User:AnomieBOT/source/tasks/WatchlistUpdater.pm: Difference between revisions

Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: General: * Have the bot script watch for changes and automatically re-exec itself. SourceUploader: * Change things around so task metadata is stored with the task.
AnomieBOT (talk | contribs)
Updating published sources: General: * Function to transparently handle continuations in an API query. * Fix bot.pl to correctly check the ChangeLog mtime. WatchlistUpdater: * Change around to handle arbitrary API queries, with filtering. SourceUploa
Line 29:
use Data::Dumper;
 
my %pages@cfg_pages=(
{
'User:Anomie/uw-templates' => {
page frequency => 6*60*60'User:Anomie/uw-templates',
beginmarker => "\n<!-- SNIP prefixHERE => 'Uw-'->\n",
endmarker namespace => '10',
frequency => 6*60*60,
maxrows => 10000,
query => {
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
},
'User:AnomieBOT/index'gcontinue => {'apcontinue',
result frequency => 6*60*60'allpages',
match prefix => 'AnomieBOT/'{},
summary namespace => '2Automatically updating list of uw-* templates',
}botflag => 1
},
{
page => 'User:AnomieBOT/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => {
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},
gcontinue => 'apcontinue',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1
}
);
 
sub new {
my $maxlen=100*1024*1024;
my $class=shift;
 
my $self=$class->SUPER::new;
$self->{'pages'}=[@cfg_pages];
bless $self, $class;
return $self;
}
 
=pod
Line 61 ⟶ 92:
sub run {
my ($self, $api)=@_;
my ($k,$v,$k2,$v2,$k3,$v3,@x);
 
$api->task('WatchlistUpdater');
Line 67 ⟶ 97:
$api->edit_throttle(10);
 
my $endtime=time()+600;
if(!exists($self->{'lastrun'})){
 
my %lastrun=();
foreach my $data values(%@{$self->{'pages'}});{
while((my $k,$v)page=each(%pages))$data->{'page'};
 
# We've run too long, wait on the rest until next time
return 0 if time()>=$endtime;
 
# Check last run time if we haven't already recorded it
if(!exists($data->{'lastrun'})){
my $res=$api->query(
titles => $kpage,
prop => 'revisions',
rvuser => $api->user,
Line 79 ⟶ 115:
);
if($res->{'code'} ne 'success'){
warn "Failed to retrieve last edit date for $k, cannot run taskpage";
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$lastrundata->{$k'lastrun'}=$self->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
} else {
$lastrundata->{$k'lastrun'}=0;
}
}
$self->{'lastrun'}=\%lastrun;
}
 
# Time to check again?
while(($k,$v)=each(%pages)){
next unless time()>=$selfdata->{'lastrun'}{$k}+$vdata->{'frequency'};
 
# Get edit token
my $starttime=strftime("%d %b %Y %T", gmtime());
my $tok=$api->edittoken($kpage);
if($tok->{'code'} eq 'shutoff'){
$self->warn("Task disabled: ".$tok->{'content'}."\n");
Line 102 ⟶ 136:
}
if($tok->{'code'} ne 'success'){
$self->warn("Failed to retrieve edit token for $kpage: ".$tok->{'error'});
next;
}
if(exists($tok->{'missing'})){
$self->warn("Page $kpage does not exist");
next;
}
my $intxt=$tok->{'revisions'}[0]{'*'};
 
my# $table="{|Generate class=\"wikitable\"\n";new table
my %qout=();
my list $rows=> 'allpages',0;
my apprefix %cont=> $v->{'prefix'},();
apnamespace => $v->{'namespace'},
aplimit => 'max'
);
my $res;
do {
my $res=$self->fullquery($api, $data->{'gcontinue'}, %{$data->{'query('}}, %qcont);
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve data for $kpage: ".$res->{'error'});
next;
}
%cont=();
if(exists($res->{'query-continue'})){
foreach my $qn (values %{'apfrom'}=$res->{'query-continue'}{'allpages'}){'apfrom'};
%cont=(%cont, %$n);
}
}
foreach (@{$res=$res->{'query'}{$data->{'allpagesresult'}}){;
my @r;
if(ref($res) eq 'ARRAY'){
@r=@$res;
} elsif(ref($res) eq 'HASH'){
@r=values %$res;
} else {
$self->warn("Invalid data for $page: Not an array or hash ref");
return 60;
}
foreach (@r){
next if ($_->{'ns'}&1)==1;
next unless _match($data->{'match'}, $_);
my ($main, $talk);
if($_->{'ns'}==14 || $_->{'ns'}==6){
$main=':'.$_->{'title'};
} else {
$main=$_->{'title'};
}
if($_->{'ns'}==0){
$v2talk="\x5b\x5bTalk:".$_->{'title'}."\x5d\x5d";
$v3="\x5b\x5bTalk:".$_->{'title'}."\x5d\x5d";
} elsif($_->{'ns'}==1){
$v3="\x5b\x5b".$_->{'title'}."\x5d\x5d";
($v2=$v3)=~s/Talk://i;
} elsif(($_->{'ns'}&1)==0){
$v2="\x5b\x5b".$_->{'title'}."\x5d\x5d";
($v3=$v2)=~s/^([^:]+):/$1 talk:/;
} else {
$v3talk="\x5b\x5b".$_->{'title'}."\x5d\x5d";
substr($v2=talk, index($v3talk, ':'), 0)=~s/' talk:/:/i';
}
$table.="|out{$_-\n|>{'title'}}=[$v2 || main,$v3\n"talk];
last if length(++$table)rows>$maxlendata->{'maxrows'};
}
} while(length($table)rows<=$maxlen && exists($resdata->{'query-continuemaxrows'} && values(%cont));
my $table="{| class=\"wikitable\"\n";
map { $table.="|-\n|[[".$out{$_}[0]."]]||[[".$out{$_}[1]."]]\n" } sort keys %out;
$table.="|-\n|colspan=\"2\"|<strong class=\"error\">List truncated at $rows rows</strong>\n" if $rows>$data->{'maxrows'};
$table.="|}";
$table='<strong class="error">List of pages is too long</strong>' if length($table)>$maxlen;
$table=~s/\x5b\x5b(Category|Image):/\x5b\x5b:$1:/ig;
 
my $endtime=strftime("%d %b %Y %T", gmtime());
my ($i,$j);
$i=index($intxt,'<!-- SNIP HERE -->');
$j=index($intxt,"{| class=\"wikitable\"\n",($i<0)?0:$i);
my $t=($j<0)?'':substr($intxt,$j);
$t=~s/\s+$//;
if($t eq $table){
$self->warn("No update needed for $k\n");
$self->{'lastrun'}{$k}=time();
next;
}
 
# Perform edit, if needed
my $outtxt=$intxt;
$outtxt=substrmy ($outtxt,0begin,$iend) if $i>=0;
if($data->{'beginmarker'} eq ''){
$outtxt.="<!-- SNIP HERE -->\n<small>Last update happened between $starttime GMT and $endtime GMT</small>\n$table";
$begin=0;
 
$res=$api->edit($tok, $outtxt, 'Updating list of watched pages', 0, 1);
if($res->{'code'} eq 'success'){
$self->warn("Updated $k\n");
$self->{'lastrun'}{$k}=time();
} else {
$self->warnbegin=index("Write for $k failed:outtxt, ".$resdata->{'errorbeginmarker'});
$begin+=length($data->{'beginmarker'}) if $begin>=0;
}
if($data->{'endmarker'} eq ''){
$end=length($outtxt);
} else {
$end=index($outtxt, $data->{'endmarker'}, $begin);
}
if($begin<0 || $end<0){
warn $outtxt;
$self->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$self->warn("No update needed for $page\n");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$self->warn("Write for $page failed: ".$res->{'error'});
next;
}
}
}
 
# Record last update time
$data->{'lastrun'}=time();
}
 
# We processed all pages, calculate the number of seconds until the next
my $t=864000;
# time we're needed.
while(($k,$v)=each(%pages)){
my $t=864000; # arbitrary initial/max value
my $tt=$self->{'lastrun'}{$k}+$v->{'frequency'}-time();
foreach (@{$self->{'pages'}}){
my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
$t=$tt if $tt<$t;
}
$t=300 if $t<300;
return $t;
}
 
sub _match {
my $match = shift;
my $value = shift;
 
if(ref($match) eq 'ARRAY'){
my $ok=0;
foreach (@$match){ $ok=($ok || _match($_,$value)); }
return $ok;
}
if(ref($value) eq 'ARRAY'){
my $ok=0;
foreach (@$value){ $ok=($ok || _match($match,$_)); }
return $ok;
}
 
return !defined($value) if !defined($match);
return 0 if !defined($value);
return ($match eq $value) if !ref($match);
 
if(ref($match) eq 'HASH'){
return 0 if ref($value) ne 'HASH';
my $ok=1;
my ($k,$v);
while($ok && (($k,$v)=each(%$match))){
my $v2=exists($value->{$k})?$value->{$k}:undef;
$ok=($ok && _match($v,$v2));
}
return $ok;
}
 
return 0;
}