User:AnomieBOT/source/tasks/WatchlistUpdater.pm: Difference between revisions

Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: General: * Function to transparently handle continuations in an API query. * Fix bot.pl to correctly check the ChangeLog mtime. WatchlistUpdater: * Change around to handle arbitrary API queries, with filtering. SourceUploa
AnomieBOT (talk | contribs)
Updating published sources: General: * Update for the addition of 'rvslots'. DatedCategoryDeleterTest: * Disable. It's clear that task won't be needed. BrokenRedirectDeleter: * Handle pages with newlines before the <code>#REDIRECT</code>.
 
(28 intermediate revisions by 3 users not shown)
Line 1:
{{ombox|type=notice|text= Per [[WP:BOT#Approval]], any bot or automated editing process that only affects only the operators' user and talk pages (or subpages thereof), and which are not otherwise disruptive, may be run without prior approval.}}
<sourcesyntaxhighlight lang="perl">
package tasks::WatchlistUpdater;
 
Line 7:
=begin metadata
 
Bot: AnomieBOT
Task: WatchlistUpdater
Task: WatchlistUpdater
BRFA: N/A
BRFA: N/A
Status: Begun 2008-08-15
Created: 2008-08-16
Rate: As needed, at most every 6 hours
 
Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
Line 20 ⟶ 21:
=cut
 
use utf8;
use strict;
 
Line 26 ⟶ 28:
@ISA=qw/AnomieBOT::Task/;
 
use POSIX qw/strftime/;
use Data::Dumper;
 
Line 36 ⟶ 37:
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
}],
gcontinue => 'apcontinueallpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of uw-* templates',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
Line 54 ⟶ 73:
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},{
gcontinue list => 'apcontinueallpages',
apprefix => 'AnomieBOT II/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'AnomieBOT III/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MediationBot/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MedcabBot/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:Anomie/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Anomie/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:AnomieBOT/nobots tests',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/nobots test ',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of bot exclusion tests',
botflag => 1,
outprefix => sub { "{{div col}}\n" },
keyforpage => sub { my $t=$_[0]{'title'}; return $t unless $t=~/ (\d+)$/; return sprintf("%08d", $1)."|$t"; },
outformat => sub {
my $t=$_[1]{'title'};
$t=~s/^[^|]*\|//;
return "* [[$t]]\n";
},
outerror => sub { "* <strong class=\"error\">".$_[1]."</strong>\n" },
outsuffix => sub { "\n{{div col end}}" }
}
);
Line 87 ⟶ 207:
 
sub approved {
return 1999;
}
 
Line 93 ⟶ 213:
my ($self, $api)=@_;
 
$api->task('WatchlistUpdater', 0, 10, qw(d::Timestamp));
$api->read_throttle(6);
$api->edit_throttle(10);
 
my $endtime=time()+600300;
 
foreach my $data (@{$self->{'pages'}}){
Line 112 ⟶ 230:
rvuser => $api->user,
rvprop => 'timestamp',
rvlimit => 1 # Only need the last rev
);
if($res->{'code'} ne 'success'){
$api->warn ("Failed to retrieve last edit date for $page: ".$res->{'error'}."\n");
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$data->{'lastrun'}=$selfapi->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
} else {
$data->{'lastrun'}=0;
Line 132 ⟶ 250:
my $tok=$api->edittoken($page);
if($tok->{'code'} eq 'shutoff'){
$selfapi->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$selfapi->warn("Failed to retrieve edit token for $page: ".$tok->{'error'});
nextreturn 60;
}
if(exists($tok->{'missing'})){
$selfapi->warn("Page $page does not exist");
$data->{'lastrun'}=time();
next;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
 
# Generate new table
Line 149 ⟶ 268:
my $rows=0;
my %cont=();
my @queries=@{$data->{'query'}};
my $query=shift @queries;
do {
my $res=$selfapi->fullqueryquery($api, [$data->{'gcontinue'}], %{$data->{'query'}}, %cont);
if($res->{'code'} ne 'success'){
$selfapi->warn("Failed to retrieve data for $page: ".$res->{'error'});
nextreturn 60;
}
%cont=();
Line 161 ⟶ 282:
}
}
$query=shift @queries unless(%cont);
$res=$res->{'query'}{$data->{'result'}};
my @r;
Line 168 ⟶ 290:
@r=values %$res;
} else {
$selfapi->warn("Invalid data for $page: Not an array or hash ref");
return 60;
}
Line 174 ⟶ 296:
next if ($_->{'ns'}&1)==1;
next unless _match($data->{'match'}, $_);
my $k=exists($main, data->{'keyforpage'})?$talkdata->{'keyforpage'}($_):$_->{'title'};
if($_->out{'ns'$k}==14 || $_->{'ns'}==6){;
$main=':'.$_->{'title'};
} else {
$main=$_->{'title'};
}
if($_->{'ns'}==0){
$talk="Talk:".$_->{'title'};
} else {
$talk=$_->{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
$out{$_->{'title'}}=[$main,$talk];
last if ++$rows>$data->{'maxrows'};
}
} while($rows<=$data->{'maxrows'} && values(%cont)$query);
my $tablex="{| class=\"wikitable\"\n"};
map {my $table.="|$data-\n|[[".$out>{$_'outprefix'}[0]."]]||[[".($out{$_}[1]."]]\n" } sort keys %outx);
map { $table.=$data->{'outformat'}($x,$out{$_}); } sort keys %out;
$table.="|-\n|colspan=\"2\"|<strong class=\"error\">List truncated at $rows rows</strong>\n" if $rows>$data->{'maxrows'};
$table.=$data->{'outerror'}($x,"<strong class=\"error\">List truncated at $rows rows</strong>") if $rows>$data->{'maxrows'};
$table.="|}";
$table.=$data->{'outsuffix'}($x);
 
# Perform edit, if needed
Line 210 ⟶ 322:
}
if($begin<0 || $end<0){
$api->warn("Begin/end markers not found, refusing to edit $outtxtpage\n");
$self->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$selfapi->warnlog("No update needed for $page\n");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$selfapi->warn("Write for $page failed: ".$res->{'error'});
next;
}
$api->log("Updated $page");
}
}
Line 233 ⟶ 345:
my $t=864000; # arbitrary initial/max value
foreach (@{$self->{'pages'}}){
next if $_->{'lastrun'}==0;
my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
$t=$tt if $tt<$t;
Line 242 ⟶ 355:
my $match = shift;
my $value = shift;
 
return $match->($value) if(ref($match) eq 'CODE');
 
if(ref($match) eq 'ARRAY'){
Line 257 ⟶ 372:
return 0 if !defined($value);
return ($match eq $value) if !ref($match);
return $value=~/$match/ if(ref($match) eq 'Regexp');
 
if(ref($match) eq 'HASH'){
return 0 if ref($value) ne 'HASH';
my $ok=1;
while(my ($k,$v);=each(%$match)){
while($ok && (( my $k,$v)v2=each(%$match)))value->{$k} // undef;
my $v2=exists($value->{$k})?$value->{$k}:undef;
$ok=($ok && _match($v,$v2));
}
Line 274 ⟶ 389:
1;
 
</syntaxhighlight>
</source>