package tasks::WatchlistUpdater;
=pod
=begin metadata
Task: WatchlistUpdater
BRFA: N/A
Status: Begun 2008-08-15
Rate: As needed, at most every 6 hours
Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
when pages are created or deleted. The bot only edits when something actually
changes.
=end metadata
=cut
use strict;
use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
use POSIX qw/strftime/;
use Data::Dumper;
my @cfg_pages=(
{
page => 'User:Anomie/uw-templates',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => {
list => 'allpages',
apprefix => 'Uw-',
apnamespace => '10',
aplimit => 'max'
},
gcontinue => 'apcontinue',
result => 'allpages',
match => {},
summary => 'Automatically updating list of uw-* templates',
botflag => 1
},
{
page => 'User:AnomieBOT/index',
beginmarker => "\n<!-- SNIP HERE -->\n",
endmarker => '',
frequency => 6*60*60,
maxrows => 10000,
query => {
list => 'allpages',
apprefix => 'AnomieBOT/',
apnamespace => '2',
aplimit => 'max'
},
gcontinue => 'apcontinue',
result => 'allpages',
match => {},
summary => 'Automatically updating userspace index',
botflag => 1
}
);
sub new {
my $class=shift;
my $self=$class->SUPER::new;
$self->{'pages'}=[@cfg_pages];
bless $self, $class;
return $self;
}
=pod
=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.
=cut
sub approved {
return 1;
}
sub run {
my ($self, $api)=@_;
$api->task('WatchlistUpdater');
$api->read_throttle(6);
$api->edit_throttle(10);
my $endtime=time()+600;
foreach my $data (@{$self->{'pages'}}){
my $page=$data->{'page'};
# We've run too long, wait on the rest until next time
return 0 if time()>=$endtime;
# Check last run time if we haven't already recorded it
if(!exists($data->{'lastrun'})){
my $res=$api->query(
titles => $page,
prop => 'revisions',
rvuser => $api->user,
rvprop => 'timestamp',
rvlimit => 1
);
if($res->{'code'} ne 'success'){
warn "Failed to retrieve last edit date for $page";
return 60;
}
$res=[values(%{$res->{'query'}{'pages'}})];
if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
$data->{'lastrun'}=$self->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
} else {
$data->{'lastrun'}=0;
}
}
# Time to check again?
next unless time()>=$data->{'lastrun'}+$data->{'frequency'};
# Get edit token
my $tok=$api->edittoken($page);
if($tok->{'code'} eq 'shutoff'){
$self->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$self->warn("Failed to retrieve edit token for $page: ".$tok->{'error'});
next;
}
if(exists($tok->{'missing'})){
$self->warn("Page $page does not exist");
next;
}
my $intxt=$tok->{'revisions'}[0]{'*'};
# Generate new table
my %out=();
my $rows=0;
my %cont=();
do {
my $res=$self->fullquery($api, $data->{'gcontinue'}, %{$data->{'query'}}, %cont);
if($res->{'code'} ne 'success'){
$self->warn("Failed to retrieve data for $page: ".$res->{'error'});
next;
}
%cont=();
if(exists($res->{'query-continue'})){
foreach my $n (values %{$res->{'query-continue'}}){
%cont=(%cont, %$n);
}
}
$res=$res->{'query'}{$data->{'result'}};
my @r;
if(ref($res) eq 'ARRAY'){
@r=@$res;
} elsif(ref($res) eq 'HASH'){
@r=values %$res;
} else {
$self->warn("Invalid data for $page: Not an array or hash ref");
return 60;
}
foreach (@r){
next if ($_->{'ns'}&1)==1;
next unless _match($data->{'match'}, $_);
my ($main, $talk);
if($_->{'ns'}==14 || $_->{'ns'}==6){
$main=':'.$_->{'title'};
} else {
$main=$_->{'title'};
}
if($_->{'ns'}==0){
$talk="Talk:".$_->{'title'};
} else {
$talk=$_->{'title'};
substr($talk, index($talk, ':'), 0)=' talk';
}
$out{$_->{'title'}}=[$main,$talk];
last if ++$rows>$data->{'maxrows'};
}
} while($rows<=$data->{'maxrows'} && values(%cont));
my $table="{| class=\"wikitable\"\n";
map { $table.="|-\n|[[".$out{$_}[0]."]]||[[".$out{$_}[1]."]]\n" } sort keys %out;
$table.="|-\n|colspan=\"2\"|<strong class=\"error\">List truncated at $rows rows</strong>\n" if $rows>$data->{'maxrows'};
$table.="|}";
# Perform edit, if needed
my $outtxt=$intxt;
my ($begin,$end);
if($data->{'beginmarker'} eq ''){
$begin=0;
} else {
$begin=index($outtxt, $data->{'beginmarker'});
$begin+=length($data->{'beginmarker'}) if $begin>=0;
}
if($data->{'endmarker'} eq ''){
$end=length($outtxt);
} else {
$end=index($outtxt, $data->{'endmarker'}, $begin);
}
if($begin<0 || $end<0){
warn $outtxt;
$self->warn("Begin/end markers not found, refusing to edit $page\n");
} else {
substr($outtxt,$begin,$end-$begin)=$table;
if($intxt eq $outtxt){
$self->warn("No update needed for $page\n");
} else {
my $res=$api->edit($tok, $outtxt, $data->{'summary'}, 0, $data->{'botflag'});
if($res->{'code'} ne 'success'){
$self->warn("Write for $page failed: ".$res->{'error'});
next;
}
}
}
# Record last update time
$data->{'lastrun'}=time();
}
# We processed all pages, calculate the number of seconds until the next
# time we're needed.
my $t=864000; # arbitrary initial/max value
foreach (@{$self->{'pages'}}){
my $tt=$_->{'lastrun'}+$_->{'frequency'}-time();
$t=$tt if $tt<$t;
}
return $t;
}
sub _match {
my $match = shift;
my $value = shift;
if(ref($match) eq 'ARRAY'){
my $ok=0;
foreach (@$match){ $ok=($ok || _match($_,$value)); }
return $ok;
}
if(ref($value) eq 'ARRAY'){
my $ok=0;
foreach (@$value){ $ok=($ok || _match($match,$_)); }
return $ok;
}
return !defined($value) if !defined($match);
return 0 if !defined($value);
return ($match eq $value) if !ref($match);
if(ref($match) eq 'HASH'){
return 0 if ref($value) ne 'HASH';
my $ok=1;
my ($k,$v);
while($ok && (($k,$v)=each(%$match))){
my $v2=exists($value->{$k})?$value->{$k}:undef;
$ok=($ok && _match($v,$v2));
}
return $ok;
}
return 0;
}
1;