User:AnomieBOT/source/tasks/WatchlistUpdater.pm: Difference between revisions

Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: SourceUploader: * BUGFIX: Just don't allow lines to wrap in the embedded notices, for some reason it breaks when rendered.
AnomieBOT (talk | contribs)
Updating published sources: General: * Update for the addition of 'rvslots'. DatedCategoryDeleterTest: * Disable. It's clear that task won't be needed. BrokenRedirectDeleter: * Handle pages with newlines before the <code>#REDIRECT</code>.
 
(32 intermediate revisions by 3 users not shown)
Line 1:
{{ombox|type=notice|text= Per [[WP:BOT#Approval]], any bot or automated editing process that only affects only the operators' user and talk pages (or subpages thereof), and which are not otherwise disruptive, may be run without prior approval.}}
<sourcesyntaxhighlight lang="perl">
package tasks::WatchlistUpdater;
 
=pod
 
=begin metadata
 
Bot: AnomieBOT
Task: WatchlistUpdater
BRFA: N/A
Status: Begun 2008-08-15
Created: 2008-08-16
 
Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
when pages are created or deleted. The bot only edits when something actually
changes.
 
=end metadata
 
=cut
 
use utf8;
use strict;
 
Line 9 ⟶ 28:
@ISA=qw/AnomieBOT::Task/;
 
use POSIX qw/strftime/;
use Data::Dumper;
 
my %pages@cfg_pages=(
{
'User:Anomie/uw-templates' => {
page frequency => 12*60*60'User:Anomie/uw-templates',
beginmarker => "\n<!-- SNIP prefixHERE => 'Uw-'->\n",
endmarker namespace => '10',
}frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of uw-* templates',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:AnomieBOT/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'AnomieBOT II/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'AnomieBOT III/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MediationBot/',
apnamespace => '2',
aplimit => 'max'
},{
list => 'allpages',
apprefix => 'MedcabBot/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:Anomie/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'Anomie/',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1,
outprefix => sub { "{| class=\"wikitable\"\n" },
outformat => sub {
my ($main, $talk);
if($_[1]{'ns'}==14 || $_[1]{'ns'}==6){
$main=':'.$_[1]{'title'};
} else {
$main=$_[1]{'title'};
}
if($_[1]{'ns'}==0){
$talk="Talk:".$_[1]{'title'};
} else {
$talk=$_[1]{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
return "|-\n|[[$main]]||[[$talk]]\n",
},
outerror => sub { "|-\n|colspan=\"2\"|".$_[1]."\n" },
outsuffix => sub { "|}" }
},
{
page => 'User:AnomieBOT/nobots tests',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => [{
list => 'allpages',
apprefix => 'AnomieBOT/nobots test ',
apnamespace => '2',
aplimit => 'max'
}],
gcontinue => 'allpages',
result => 'allpages',
match => {},
summary => 'Automatically updating list of bot exclusion tests',
botflag => 1,
outprefix => sub { "{{div col}}\n" },
keyforpage => sub { my $t=$_[0]{'title'}; return $t unless $t=~/ (\d+)$/; return sprintf("%08d", $1)."|$t"; },
outformat => sub {
my $t=$_[1]{'title'};
$t=~s/^[^|]*\|//;
return "* [[$t]]\n";
},
outerror => sub { "* <strong class=\"error\">".$_[1]."</strong>\n" },
outsuffix => sub { "\n{{div col end}}" }
}
);
 
sub new {
my $maxlen=100*1024*1024;
my $class=shift;
my $self=$class->SUPER::new;
$self->{'pages'}=[@cfg_pages];
bless $self, $class;
return $self;
}
 
=pod
 
=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.
 
=cut
 
sub approved {
return 999;
### notice type=notice
# Per [[WP:BOT#Approval]], any bot or automated editing process that only
# affects only the operators' user and talk pages (or subpages thereof),
# and which are not otherwise disruptive, may be run without prior
# approval.
return 1;
}
 
sub run {
my ($self, $api)=@_;
my ($k,$v,$k2,$v2,$k3,$v3,@x);
 
$api->task('WatchlistUpdater', 0, 10, qw(d::Timestamp));
$api->read_throttle(6);
$api->edit_throttle(10);
return 300 if $self->check_shutoff($api, 'WatchlistUpdater');
 
my $endtime=time()+300;
if(!exists($self->{'lastrun'})){
 
my %lastrun=();
foreach my $data values(%@{$self->{'pages'}});{
while((my $k,$v)page=each(%pages))$data->{'page'};
 
# We've run too long, wait on the rest until next time
return 0 if time()>=$endtime;
 
# Check last run time if we haven't already recorded it
if(!exists($data->{'lastrun'})){
my $res=$api->query(
titles => $kpage,
prop => 'revisions',
rvuser => $api->user,
rvprop => 'timestamp',
rvlimit => 1 # Only need the last rev
);
if($res->{'code'} ne 'success'){
$api->warn ("Failed to retrieve last edit date for $k,page: cannot run task".$res->{'error'}."\n");
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$data->{'lastrun{$k'}=$selfapi->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
} else {
$lastrundata->{$k'lastrun'}=0;
}
}
$self->{'lastrun'}=\%lastrun;
}
 
# Time to check again?
while(($k,$v)=each(%pages)){
next unless time()>=$selfdata->{'lastrun'}{$k}+$vdata->{'frequency'};
 
# Get edit token
my $starttime=strftime("%d %b %Y %T", gmtime());
my $tok=$api->edittoken($kpage);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$selfapi->warn("Failed to retrieve edit token for $kpage: ".$tok->{'error'});
nextreturn 60;
}
if(exists($tok->{'missing'})){
$selfapi->warn("Page $kpage does not exist");
$data->{'lastrun'}=time();
next;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
 
my# $table="{|Generate class=\"wikitable\"\n";new table
my %qout=();
my list $rows=> 'allpages',0;
my apprefix %cont=> $v->{'prefix'},();
my apnamespace @queries=> @{$vdata->{'namespacequery'},};
my aplimit $query=>shift 'max'@queries;
);
my $res;
do {
my $res=$api->query([$data->{'gcontinue'}], %$query, %qcont);
if($res->{'code'} ne 'success'){
$selfapi->warn("Failed to retrieve data for $kpage: ".$res->{'error'});
nextreturn 60;
}
%cont=();
if(exists($res->{'query-continue'})){
foreach my $qn (values %{'apfrom'}=$res->{'query-continue'}{'allpages'}){'apfrom'};
} %cont=(%cont, %$n);
foreach (@{$res->{'query'}{'allpages'}}){
if($_->{'ns'}==0){
$v2="\x5b\x5b".$_->{'title'}."\x5d\x5d";
$v3="\x5b\x5bTalk:".$_->{'title'}."\x5d\x5d";
} elsif($_->{'ns'}==1){
$v3="\x5b\x5b".$_->{'title'}."\x5d\x5d";
($v2=$v3)=~s/Talk://i;
} elsif(($_->{'ns'}&1)==0){
$v2="\x5b\x5b".$_->{'title'}."\x5d\x5d";
($v3=$v2)=~s/^([^:]+):/$1 talk:/;
} else {
$v3="\x5b\x5b".$_->{'title'}."\x5d\x5d";
($v2=$v3)=~s/ talk:/:/i;
}
$table.="|-\n|$v2 || $v3\n";
last if length($table)>$maxlen;
}
$query=shift @queries unless(%cont);
} while(length($table)<=$maxlen && exists($res->{'query-continue'}));
$table.res="|$res->{'query'}{$data->{'result'}}";
my @r;
$table='<strong class="error">List of pages is too long</strong>' if length($table)>$maxlen;
if(ref($res) eq 'ARRAY'){
$table=~s/\x5b\x5b(Category|Image):/\x5b\x5b:$1:/ig;
@r=@$res;
 
my $endtime=strftime("%d %b %Y %T",} gmtimeelsif(ref($res) eq 'HASH');{
my ($i, @r=values %$j)res;
$i=index($intxt,'<!-- SNIP HERE -->'); } else {
$api->warn("Invalid data for $page: Not an array or hash ref");
$j=index($intxt,"{| class=\"wikitable\"\n",($i<0)?0:$i);
my $t=($j<0)?'':substr($intxt,$j) return 60;
$t=~s/\s+$//; }
if($t eq $table foreach (@r){
$self->warn("No update needed for next if ($k\n"_->{'ns'}&1)==1;
next unless _match($selfdata->{'lastrunmatch'}{, $k}=time(_);
my $k=exists($data->{'keyforpage'})?$data->{'keyforpage'}($_):$_->{'title'};
next;
$out{$k}=$_;
last if ++$rows>$data->{'maxrows'};
}
} while($rows<=$data->{'maxrows'} && $query);
my $x={};
my $table=$data->{'outprefix'}($x);
map { $table.=$data->{'outformat'}($x,$out{$_}); } sort keys %out;
$table.=$data->{'outerror'}($x,"<strong class=\"error\">List truncated at $rows rows</strong>") if $rows>$data->{'maxrows'};
$table.=$data->{'outsuffix'}($x);
 
# Perform edit, if needed
my $outtxt=$intxt;
$outtxt=substrmy ($outtxt,0begin,$iend) if $i>=0;
if($data->{'beginmarker'} eq ''){
$outtxt.="<!-- SNIP HERE -->\n<small>Last update happened between $starttime GMT and $endtime GMT</small>\n$table";
$begin=0;
 
$res=$api->edit($tok, $outtxt, 'Updating list of watched pages', 0, 1);
if($res->{'code'} eq 'success'){
$self->warn("Updated $k\n");
$self->{'lastrun'}{$k}=time();
} else {
$self->warnbegin=index("Write for $k failed:outtxt, ".$resdata->{'errorbeginmarker'});
$begin+=length($data->{'beginmarker'}) if $begin>=0;
}
if($data->{'endmarker'} eq ''){
$end=length($outtxt);
} else {
$end=index($outtxt, $data->{'endmarker'}, $begin);
}
if($begin<0 || $end<0){
$api->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$api->log("No update needed for $page");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$api->warn("Write for $page failed: ".$res->{'error'});
next;
}
$api->log("Updated $page");
}
}
 
# Record last update time
$data->{'lastrun'}=time();
}
 
# We processed all pages, calculate the number of seconds until the next
my $t=864000;
# time we're needed.
while(($k,$v)=each(%pages)){
my $t=864000; # arbitrary initial/max value
my $tt=$self->{'lastrun'}{$k}+$v->{'frequency'}-time();
foreach (@{$self->{'pages'}}){
next if $_->{'lastrun'}==0;
my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
$t=$tt if $tt<$t;
}
$t=300 if $t<300;
return $t;
}
 
sub _match {
my $match = shift;
my $value = shift;
 
return $match->($value) if(ref($match) eq 'CODE');
 
if(ref($match) eq 'ARRAY'){
my $ok=0;
foreach (@$match){ $ok=($ok || _match($_,$value)); }
return $ok;
}
if(ref($value) eq 'ARRAY'){
my $ok=0;
foreach (@$value){ $ok=($ok || _match($match,$_)); }
return $ok;
}
 
return !defined($value) if !defined($match);
return 0 if !defined($value);
return ($match eq $value) if !ref($match);
return $value=~/$match/ if(ref($match) eq 'Regexp');
 
if(ref($match) eq 'HASH'){
return 0 if ref($value) ne 'HASH';
my $ok=1;
while(my ($k,$v)=each(%$match)){
my $v2=$value->{$k} // undef;
$ok=($ok && _match($v,$v2));
}
return $ok;
}
 
return 0;
}
 
1;
 
</syntaxhighlight>
</source>