User:AnomieBOT/source/tasks/AFDMergeFromCleaner.pm

This is an old revision of this page, as edited by AnomieBOT (talk | contribs) at 17:08, 3 January 2009 (Updating published sources: AFDMergeFromCleaner: * Approved! * Use a link template for the report table, so the various pages have useful links associated. * Bugfix in parsing the report page.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
package tasks::AFDMergeFromCleaner;

=pod

=begin metadata

Task:   AFDMergeFromCleaner
BRFA:   Wikipedia:Bots/Requests for approval/AnomieBOT 20
Status: Approved 2009-01-03
Rate:   Max 6 edits/minute

Remove instances of {{tl|afd-mergefrom}} where the merge has been completed.
Report instances of {{tl|afd-mergefrom}} where the AFDed page is now a redirect
to a different target.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'nextrun'}=0;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2009-01-03<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 20]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    if($self->{'nextrun'}==0){
        my $t=$api->fetch('nextrun');
        $self->{'nextrun'}=$$t if(defined($t) && $$t=~/^\d+$/ && $$t<=time());
    }
    my $starttime=time();
    my $t=$self->{'nextrun'}-$starttime;
    return $t if $t>0;

    $api->task('AFDMergeFromCleaner');
    $api->read_throttle(0);
    $api->edit_throttle(10);

    my @templates=('Afd-mergefrom');
    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/AFDMergeFromCleaner]]';
    my $report='User:'.$api->user.'/Afd-mergefrom report';

    # Get a list of templates redirecting to our targets
    my %templates=();
    foreach my $template (@templates){
        $templates{"Template:$template"}=1;
        $res=$self->fullquery($api, undef,
            list          => 'backlinks',
            bltitle       => "Template:$template",
            blfilterredir => 'redirects',
            bllimit       => 'max',
        );
        $templates{$_->{'title'}}=1 foreach (@{$res->{'query'}{'backlinks'}});
    }

    my %redirected=();
    my %deleted=();
    my $ret=21600;

    my $linktmpl='User:'.$api->user.'/la';

    MAINLOOP:
    foreach my $template (@templates){
        # Get the list of pages to check
        my %q=(
            list        => 'embeddedin',
            eititle     => "Template:$template",
            einamespace => 1,
            eilimit     => 'max',
        );
        do {
            $res=$api->query(%q);
            if($res->{'code'} ne 'success'){
                $self->warn("Failed to retrieve transclusion list for $template: ".$res->{'error'}."\n");
                $ret=60;
                last MAINLOOP;
            }
            if(exists($res->{'query-continue'})){
                $q{'eicontinue'}=$res->{'query-continue'}{'embeddedin'}{'eicontinue'};
            } else {
                delete $q{'eicontinue'};
            }

            # Process found pages
            foreach (@{$res->{'query'}{'embeddedin'}}){
                my $title=$_->{'title'};
                my $subject=$title;
                if($_->{'ns'}==1){
                    $subject=~s/^Talk://;
                } else {
                    $subject=~s/^([^:]*) talk:/$1:/;
                }

                $self->warn("Checking for $template in $title\n");

                # WTF?
                if(exists($_->{'missing'})){
                    $self->warn("$title is missing? WTF?\n");
                    next;
                }

                # Ok, check the page
                my $tok=$api->edittoken($title);
                if($tok->{'code'} eq 'shutoff'){
                    $self->warn("Task disabled: ".$tok->{'content'}."\n");
                    return 300;
                }
                if($tok->{'code'} ne 'success'){
                    $self->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                    next;
                }
                next if exists($tok->{'missing'});

                # Get page text
                my $intxt=$tok->{'revisions'}[0]{'*'};

                # First, find the template and pull out the relevant parameter
                my @from=();
                $self->process_templates($intxt, sub {
                    return undef unless exists($templates{'Template:'.$_[0]});
                    my $f=undef;
                    foreach ($self->process_paramlist(@{$_[1]})){
                        ($f=$_->{'value'})=~s/^\s+|\s+$//g if $_->{'name'} eq 1;
                    }
                    push @from, $f if defined($f);
                    return undef;
                });

                # Now, query the found pages
                my %remove=();
                while(@from){
                    my @f=splice(@from, 0, 500);
                    $res=$api->query(titles => join('|',@f), redirects=>1);
                    if($res->{'code'} ne 'success'){
                        $self->warn("Failed to retrieve mergefrom page list for $title: ".$res->{'error'}."\n");
                        $ret=60;
                        last MAINLOOP;
                    }
                    my %norm=map { $_->{'to'}, $_->{'from'} } @{$res->{'query'}{'normalized'}} if exists($res->{'query'}{'normalized'});
                    foreach (@{$res->{'query'}{'redirects'}}){
                        my ($f,$t)=($_->{'from'}, $_->{'to'});
                        if($t eq $subject){
                            $f=$norm{$f} if exists($norm{$f});
                            $remove{$f}=1;
                        } else {
                            $redirected{"$f>$subject>$t"}="| {{$linktmpl|$f}} || {{$linktmpl|$subject}} || {{$linktmpl|$t}} || \n";
                        }
                    }
                    foreach (values %{$res->{'query'}{'pages'}}){
                        next unless exists($_->{'missing'});
                        # Missing pages should have the template removed, and
                        # be logged.
                        my $f=$_->{'title'};
                        $f=$norm{$f} if exists($norm{$f});
                        $remove{exists($norm{$f})?$norm{$f}:$f}=1;
                        $deleted{"$f>$subject"}="| {{$linktmpl|$f}} || {{$linktmpl|$subject}} || \n";
                    }
                }

                # Remove the removable templates
                my $outtxt=$self->process_templates($intxt, sub {
                    return undef unless exists($templates{'Template:'.$_[0]});
                    my $f=undef;
                    foreach ($self->process_paramlist(@{$_[1]})){
                        ($f=$_->{'value'})=~s/^\s+|\s+$//g if $_->{'name'} eq 1;
                    }
                    return exists($remove{$f})?'':undef;
                });

                # Need to edit?
                if($outtxt ne $intxt){
                    my $summary="Removing obsolete {{$template}}";
                    $self->warn("$summary in $title\n");
                    my $r=$api->edit($tok, $outtxt, $summary.$screwup, 1, 1);
                    if($r->{'code'} ne 'success'){
                        $self->warn("Write failed on $title: ".$r->{'error'}."\n");
                        next;
                    }
                }
            }
        } while(exists($q{'eicontinue'}));
    }

    my $tok=$api->edittoken($report);
    if($tok->{'code'} eq 'shutoff'){
        $self->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    if($tok->{'code'} ne 'success'){
        $self->warn("Failed to get edit token for $report: ".$tok->{'error'}."\n");
        $ret=60;
        last;
    }
    my $intxt=exists($tok->{'revisions'}[0]{'*'})?$tok->{'revisions'}[0]{'*'}:'';
    foreach my $s (split(/(?=(?:^|\n)==)/, $intxt)){
        if($s=~/^==\s*Redirected\s*==/){
            foreach (split /\n/, $s){
                next unless /^\Q| {{$linktmpl|\E([^]]+)\Q}} || {{$linktmpl|\E([^]]+)\Q}} || {{$linktmpl|\E([^]]+)\Q}} ||/;
                next unless exists($redirected{"$1>$2>$3"});
                $redirected{"$1>$2>$3"}=$_;
            }
        } elsif($s=~/^==\s*Deleted\s*==/){
            foreach (split /\n/, $s){
                next unless /^\Q| {{$linktmpl|\E([^]]+)\Q}} || {{$linktmpl|\E([^]]+)\Q}} ||/;
                $deleted{"$1>$2"}=$_;
            }
        }
    }
    my $outtxt="== Redirected ==\n";
    $outtxt.="The following table lists pages referred to by {{tl|afd-mergefrom}} are redirects to some page other than that with the {{tl|afd-mergefrom}}. Please correct the {{tl|afd-mergefrom}}, either by removing it (if the page was correctly merged elsewhere), undoing the incorrect redirection, or pointing it to the correct page. This table will be updated automatically.\n\n";
    $outtxt.="{| class=\"wikitable sortable\"\n";
    $outtxt.="! Page !! AFD merge to !! Redirect to !! Note\n";
    $outtxt.="|-\n";
    $outtxt.=join("|-\n", sort values %redirected);
    $outtxt.="|}\n\n";
    $outtxt.="== Deleted ==\n";
    $outtxt.="The following table lists deleted pages that were referred to by {{tl|afd-mergefrom}}; the offending {{tl|afd-mergefrom}} has already been removed. Please double-check whether the deletion was correct. If so, just remove the row from the table; if not, undelete the page and restore the {{tl|afd-mergefrom}}. This table will '''not''' be updated automatically.\n\n";
    $outtxt.="{| class=\"wikitable sortable\"\n";
    $outtxt.="! Page !! AFD merge to !! Note\n";
    $outtxt.="|-\n";
    $outtxt.=join("|-\n", sort values %deleted);
    $outtxt.="|}\n\n";

    if($outtxt ne $intxt){
        my $summary="Updating list of non-matching merges";
        $self->warn("$summary in $report\n");
        my $r=$api->edit($tok, $outtxt, $summary.$screwup, 1, 1);
        if($r->{'code'} ne 'success'){
            $self->warn("Write failed on $report: ".$r->{'error'}."\n");
            $ret=60;
        }
    }

    $starttime+=$ret;
    $self->{'nextrun'}=$starttime;
    $api->store('nextrun', \$starttime);

    return $ret;
}

1;