User:AnomieBOT/source/tasks/WatchlistUpdater.pm: Difference between revisions

Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: General: * fullquery: If multiple continues are returned, process them in parallel. WatchlistUpdater: * More changing around, to allow output other than a table. * Improve error handling.
AnomieBOT (talk | contribs)
Updating published sources: General: * Update for the addition of 'rvslots'. DatedCategoryDeleterTest: * Disable. It's clear that task won't be needed. BrokenRedirectDeleter: * Handle pages with newlines before the <code>#REDIRECT</code>.
 
(27 intermediate revisions by 3 users not shown)
Line 1:
{{ombox|type=notice|text= Per [[WP:BOT#Approval]], any bot or automated editing process that only affects only the operators' user and talk pages (or subpages thereof), and which are not otherwise disruptive, may be run without prior approval.}}
<sourcesyntaxhighlight lang="perl">
package tasks::WatchlistUpdater;
 
Line 7:
=begin metadata
 
Bot: AnomieBOT
Task: WatchlistUpdater
BRFA: N/A
BRFA: N/A
Status: Begun 2008-08-15
Created: 2008-08-16
Rate: As needed, at most every 6 hours
 
Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
Line 20 ⟶ 21:
=cut
 
use utf8;
use strict;
 
Line 26 ⟶ 28:
@ISA=qw/AnomieBOT::Task/;
 
use POSIX qw/strftime/;
use Data::Dumper;
 
Line 36 ⟶ 37:
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
}],
gcontinue => 'apcontinueallpages',
result => 'allpages',
match => {},
Line 72 ⟶ 73:
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},{
gcontinue list => 'apcontinueallpages',
apprefix => 'AnomieBOT II/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'AnomieBOT III/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MediationBot/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MedcabBot/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
Line 101 ⟶ 122:
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:Anomie/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Anomie/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:AnomieBOT/nobots tests',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/nobots test ',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of bot exclusion tests',
botflag => 1,
outprefix => sub { "{{div col}}\n" },
keyforpage => sub { my $t=$_[0]{'title'}; return $t unless $t=~/ (\d+)$/; return sprintf("%08d", $1)."|$t"; },
outformat => sub {
my $t=$_[1]{'title'};
$t=~s/^[^|]*\|//;
return "* [[$t]]\n";
},
outerror => sub { "* <strong class=\"error\">".$_[1]."</strong>\n" },
outsuffix => sub { "\n{{div col end}}" }
}
);
Line 123 ⟶ 207:
 
sub approved {
return 1999;
}
 
Line 129 ⟶ 213:
my ($self, $api)=@_;
 
$api->task('WatchlistUpdater', 0, 10, qw(d::Timestamp));
$api->read_throttle(6);
$api->edit_throttle(10);
 
my $endtime=time()+600300;
 
foreach my $data (@{$self->{'pages'}}){
Line 148 ⟶ 230:
rvuser => $api->user,
rvprop => 'timestamp',
rvlimit => 1 # Only need the last rev
);
if($res->{'code'} ne 'success'){
$api->warn ("Failed to retrieve last edit date for $page: ".$res->{'error'}."\n");
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$data->{'lastrun'}=$selfapi->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
} else {
$data->{'lastrun'}=0;
Line 168 ⟶ 250:
my $tok=$api->edittoken($page);
if($tok->{'code'} eq 'shutoff'){
$selfapi->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$selfapi->warn("Failed to retrieve edit token for $page: ".$tok->{'error'});
return 60;
}
if(exists($tok->{'missing'})){
$selfapi->warn("Page $page does not exist");
$data->{'lastrun'}=time();
next;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
 
# Generate new table
Line 186 ⟶ 268:
my $rows=0;
my %cont=();
my @queries=@{$data->{'query'}};
my $query=shift @queries;
do {
my $res=$selfapi->fullqueryquery($api, [$data->{'gcontinue'}], %{$data->{'query'}}, %cont);
if($res->{'code'} ne 'success'){
$selfapi->warn("Failed to retrieve data for $page: ".$res->{'error'});
return 60;
}
Line 198 ⟶ 282:
}
}
$query=shift @queries unless(%cont);
$res=$res->{'query'}{$data->{'result'}};
my @r;
Line 205 ⟶ 290:
@r=values %$res;
} else {
$selfapi->warn("Invalid data for $page: Not an array or hash ref");
return 60;
}
Line 211 ⟶ 296:
next if ($_->{'ns'}&1)==1;
next unless _match($data->{'match'}, $_);
my $outk=exists($data->{'keyforpage'})?$_data->{'titlekeyforpage'}}=($_):$_->{'title'};
$out{$k}=$_;
last if ++$rows>$data->{'maxrows'};
}
} while($rows<=$data->{'maxrows'} && values(%cont)$query);
my $x={};
my $table=$data->{'outprefix'}($x);
Line 236 ⟶ 322:
}
if($begin<0 || $end<0){
$api->warn("Begin/end markers not found, refusing to edit $outtxtpage\n");
$self->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$selfapi->warnlog("No update needed for $page\n");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$selfapi->warn("Write for $page failed: ".$res->{'error'});
next;
}
$selfapi->warnlog("Updated $page\n");
}
}
Line 293 ⟶ 378:
my $ok=1;
while(my ($k,$v)=each(%$match)){
my $v2=exists($value->{$k})?$value->{$k}: // undef;
$ok=($ok && _match($v,$v2));
}
Line 304 ⟶ 389:
1;
 
</syntaxhighlight>
</source>