Content deleted Content added
Updating published sources: General: * Have the bot script watch for changes and automatically re-exec itself. SourceUploader: * Change things around so task metadata is stored with the task. |
Updating published sources: General: * Function to transparently handle continuations in an API query. * Fix bot.pl to correctly check the ChangeLog mtime. WatchlistUpdater: * Change around to handle arbitrary API queries, with filtering. SourceUploa |
||
Line 29:
use Data::Dumper;
my
{
page
beginmarker => "\n<!-- SNIP
endmarker
frequency => 6*60*60,
maxrows => 10000,
query => {
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
},
result
match
summary
},
{
page => 'User:AnomieBOT/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => {
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},
gcontinue => 'apcontinue',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1
}
);
sub new {
my $class=shift;
my $self=$class->SUPER::new;
$self->{'pages'}=[@cfg_pages];
bless $self, $class;
return $self;
}
=pod
Line 61 ⟶ 92:
sub run {
my ($self, $api)=@_;
$api->task('WatchlistUpdater');
Line 67 ⟶ 97:
$api->edit_throttle(10);
my $endtime=time()+600;
foreach my $data
# We've run too long, wait on the rest until next time
return 0 if time()>=$endtime;
# Check last run time if we haven't already recorded it
if(!exists($data->{'lastrun'})){
my $res=$api->query(
titles => $
prop => 'revisions',
rvuser => $api->user,
Line 79 ⟶ 115:
);
if($res->{'code'} ne 'success'){
warn "Failed to retrieve last edit date for $
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$
} else {
$
}
}
# Time to check again?
next unless time()>=$
# Get edit token
my $tok=$api->edittoken($
if($tok->{'code'} eq 'shutoff'){
$self->warn("Task disabled: ".$tok->{'content'}."\n");
Line 102 ⟶ 136:
}
if($tok->{'code'} ne 'success'){
$self->warn("Failed to retrieve edit token for $
next;
}
if(exists($tok->{'missing'})){
$self->warn("Page $
next;
}
my $intxt=$tok->{'revisions'}[0]{'*'};
my %
my
my
do {
my $res=$self->fullquery($api, $data->{'gcontinue'}, %{$data->{'query
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve data for $
next;
}
%cont=();
if(exists($res->{'query-continue'})){
foreach my $
%cont=(%cont, %$n);
}
}
my @r;
if(ref($res) eq 'ARRAY'){
@r=@$res;
} elsif(ref($res) eq 'HASH'){
@r=values %$res;
} else {
$self->warn("Invalid data for $page: Not an array or hash ref");
return 60;
}
foreach (@r){
next if ($_->{'ns'}&1)==1;
next unless _match($data->{'match'}, $_);
my ($main, $talk);
if($_->{'ns'}==14 || $_->{'ns'}==6){
$main=':'.$_->{'title'};
} else {
$main=$_->{'title'};
}
if($_->{'ns'}==0){
$
} else {
$
substr($
}
$
last if
}
} while
my $table="{| class=\"wikitable\"\n";
map { $table.="|-\n|[[".$out{$_}[0]."]]||[[".$out{$_}[1]."]]\n" } sort keys %out;
$table.="|-\n|colspan=\"2\"|<strong class=\"error\">List truncated at $rows rows</strong>\n" if $rows>$data->{'maxrows'};
$table.="|}";
# Perform edit, if needed
my $outtxt=$intxt;
if($data->{'beginmarker'} eq ''){
$begin=0;
} else {
$
$begin+=length($data->{'beginmarker'}) if $begin>=0;
}
if($data->{'endmarker'} eq ''){
$end=length($outtxt);
} else {
$end=index($outtxt, $data->{'endmarker'}, $begin);
}
if($begin<0 || $end<0){
warn $outtxt;
$self->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$self->warn("No update needed for $page\n");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$self->warn("Write for $page failed: ".$res->{'error'});
next;
}
}
}
# Record last update time
$data->{'lastrun'}=time();
}
# We processed all pages, calculate the number of seconds until the next
# time we're needed.
my $t=864000; # arbitrary initial/max value
foreach (@{$self->{'pages'}}){
my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
$t=$tt if $tt<$t;
}
return $t;
}
sub _match {
my $match = shift;
my $value = shift;
if(ref($match) eq 'ARRAY'){
my $ok=0;
foreach (@$match){ $ok=($ok || _match($_,$value)); }
return $ok;
}
if(ref($value) eq 'ARRAY'){
my $ok=0;
foreach (@$value){ $ok=($ok || _match($match,$_)); }
return $ok;
}
return !defined($value) if !defined($match);
return 0 if !defined($value);
return ($match eq $value) if !ref($match);
if(ref($match) eq 'HASH'){
return 0 if ref($value) ne 'HASH';
my $ok=1;
my ($k,$v);
while($ok && (($k,$v)=each(%$match))){
my $v2=exists($value->{$k})?$value->{$k}:undef;
$ok=($ok && _match($v,$v2));
}
return $ok;
}
return 0;
}
|