User:AnomieBOT/source/tasks/WatchlistUpdater.pm

This is an old revision of this page, as edited by AnomieBOT (talk | contribs) at 21:11, 21 August 2008 (Updating published sources: General: * Have the bot script watch for changes and automatically re-exec itself. SourceUploader: * Change things around so task metadata is stored with the task.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
package tasks::WatchlistUpdater;

=pod

=begin metadata

Task:   WatchlistUpdater
BRFA:   N/A
Status: Begun 2008-08-15
Rate:   As needed, at most every 6 hours

Updates algorithmically-defined "watchlists" (like [[User:Anomie/uw-templates]])
when pages are created or deleted. The bot only edits when something actually
changes.

=end metadata

=cut

use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

use POSIX qw/strftime/;
use Data::Dumper;

my %pages=(
        'User:Anomie/uw-templates' => {
            frequency => 6*60*60,
            prefix    => 'Uw-',
            namespace => '10'
        },
        'User:AnomieBOT/index' => {
            frequency => 6*60*60,
            prefix    => 'AnomieBOT/',
            namespace => '2'
        }
);

my $maxlen=100*1024*1024;


=pod

=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.

=cut

sub approved {
    return 1;
}

sub run {
    my ($self, $api)=@_;
    my ($k,$v,$k2,$v2,$k3,$v3,@x);

    $api->task('WatchlistUpdater');
    $api->read_throttle(6);
    $api->edit_throttle(10);

    if(!exists($self->{'lastrun'})){
        my %lastrun=();
        values(%pages);
        while(($k,$v)=each(%pages)){
            my $res=$api->query(
                titles  => $k,
                prop    => 'revisions',
                rvuser  => $api->user,
                rvprop  => 'timestamp',
                rvlimit => 1
            );
            if($res->{'code'} ne 'success'){
                warn "Failed to retrieve last edit date for $k, cannot run task";
                return 60;
            }
            $res=[values(%{$res->{'query'}{'pages'}})];
            if(exists($res->[0]{'revisions'}[0]{'timestamp'})){
                $lastrun{$k}=$self->ISO2timestamp($res->[0]{'revisions'}[0]{'timestamp'});
            } else {
                $lastrun{$k}=0;
            }
        }
        $self->{'lastrun'}=\%lastrun;
    }

    while(($k,$v)=each(%pages)){
        next unless time()>=$self->{'lastrun'}{$k}+$v->{'frequency'};

        my $starttime=strftime("%d %b %Y %T", gmtime());
        my $tok=$api->edittoken($k);
        if($tok->{'code'} eq 'shutoff'){
            $self->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $self->warn("Failed to retrieve edit token for $k: ".$tok->{'error'});
            next;
        }
        if(exists($tok->{'missing'})){
            $self->warn("Page $k does not exist");
            next;
        }
        my $intxt=$tok->{'revisions'}[0]{'*'};

        my $table="{| class=\"wikitable\"\n";
        my %q=(
            list        => 'allpages',
            apprefix    => $v->{'prefix'},
            apnamespace => $v->{'namespace'},
            aplimit     => 'max'
        );
        my $res;
        do {
            $res=$api->query(%q);
            if($res->{'code'} ne 'success'){
                $self->warn("Failed to retrieve data for $k: ".$res->{'error'});
                next;
            }
            if(exists($res->{'query-continue'})){
                $q{'apfrom'}=$res->{'query-continue'}{'allpages'}{'apfrom'};
            }
            foreach (@{$res->{'query'}{'allpages'}}){
                if($_->{'ns'}==0){
                    $v2="\x5b\x5b".$_->{'title'}."\x5d\x5d";
                    $v3="\x5b\x5bTalk:".$_->{'title'}."\x5d\x5d";
                } elsif($_->{'ns'}==1){
                    $v3="\x5b\x5b".$_->{'title'}."\x5d\x5d";
                    ($v2=$v3)=~s/Talk://i;
                } elsif(($_->{'ns'}&1)==0){
                    $v2="\x5b\x5b".$_->{'title'}."\x5d\x5d";
                    ($v3=$v2)=~s/^([^:]+):/$1 talk:/;
                } else {
                    $v3="\x5b\x5b".$_->{'title'}."\x5d\x5d";
                    ($v2=$v3)=~s/ talk:/:/i;
                }
                $table.="|-\n|$v2 || $v3\n";
                last if length($table)>$maxlen;
            }
        } while(length($table)<=$maxlen && exists($res->{'query-continue'}));
        $table.="|}";
        $table='<strong class="error">List of pages is too long</strong>' if length($table)>$maxlen;
        $table=~s/\x5b\x5b(Category|Image):/\x5b\x5b:$1:/ig;

        my $endtime=strftime("%d %b %Y %T", gmtime());
        my ($i,$j);
        $i=index($intxt,'<!-- SNIP HERE -->');
        $j=index($intxt,"{| class=\"wikitable\"\n",($i<0)?0:$i);
        my $t=($j<0)?'':substr($intxt,$j);
        $t=~s/\s+$//;
        if($t eq $table){
            $self->warn("No update needed for $k\n");
            $self->{'lastrun'}{$k}=time();
            next;
        }

        my $outtxt=$intxt;
        $outtxt=substr($outtxt,0,$i) if $i>=0;
        $outtxt.="<!-- SNIP HERE -->\n<small>Last update happened between $starttime GMT and $endtime GMT</small>\n$table";

        $res=$api->edit($tok, $outtxt, 'Updating list of watched pages', 0, 1);
        if($res->{'code'} eq 'success'){
            $self->warn("Updated $k\n");
            $self->{'lastrun'}{$k}=time();
        } else {
            $self->warn("Write for $k failed: ".$res->{'error'});
        }
    }

    my $t=864000;
    while(($k,$v)=each(%pages)){
        my $tt=$self->{'lastrun'}{$k}+$v->{'frequency'}-time();
        $t=$tt if $tt<$t;
    }
    $t=300 if $t<300;
    return $t;
}

1;