User:AnomieBOT/source/tasks/WikiProjectWorker.pm

This is an old revision of this page, as edited by AnomieBOT (talk | contribs) at 17:28, 8 October 2009 (Updating published sources: WikiProjectWorker: * Don't reset the iterator every 5 minutes.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
package tasks::WikiProjectWorker;

=pod

=begin metadata

Task:     WikiProjectWorker
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 28
Status:   Approved 2009-04-08
Rate:     Max 6 edits/minute
Created:  2009-03-27
OnDemand: true

Perform various tasks at the request of the affected WikiProjects:
* Add or remove banners on a specific set of pages (e.g. pages in a category, pages transcluding a template).
* Adjust banner parameters, particularly assessments and task forces.
* Fix banner shells on pages edited for the above reasons.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use Digest::SHA qw/sha256_base64/;
use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

### Request link, for edit summary.
my $req="[[User:AnomieBOT/req/WikiProject California 2|request]]";

### Increment this number every time a new run is started, so we don't have to
### mess around with deleting previous runs' database entries.
my $seq=8;

### How to find the pages?
my @cats=(
    'Category:Unassessed California articles',
    'Category:Unknown-importance California articles',
    'Category:California ranchos',
);
my @iterators=(
    {
        generator      => 'embeddedin',
        geititle       => ['Template:SFBAProject','Template:WikiProject Los Angeles'],
        geinamespace   => 1,
        geifilterredir => 'nonredirects',
        geilimit       => '100',
        prop           => 'categories|templates',
        clcategories   => 'Category:Unincorporated communities in California|Category:Ghost towns in California|Category:California ranchos',
        cllimit        => 'max',
        tllimit        => 'max',
    },
    {
        generator    => 'categorymembers',
        gcmtitle     => \@cats,
        gcmnamespace => '0|1',
        gcmlimit     => '100',
        prop         => 'categories|templates',
        clcategories => 'Category:Unincorporated communities in California|Category:Ghost towns in California|Category:California ranchos',
        cllimit      => 'max',
        tllimit      => 'max',
    },
);

### Filter function: manipulate the found data as necessary, returning the talk
### page to tag (or undef to skip).
sub filter {
    return undef if $_[0]->{'redirect'};
    $_[0]->{'title'}='Talk:'.$_[0]->{'title'} if $_[0]->{'ns'}==0;
    return $_[0]->{'title'};
}

### How to copy other projects' assessments
sub copy_class {
    return ($_[0]->WPBmax($_[2]))[0];
}

sub copy_importance {
    return '';
}

# Banner configurations.
my $main_banner='WikiProject California';
my @preprocess_templates=('SFBAProject','WikiProject Los Angeles');
my %banner_cfgs=(
    'WikiProject California' => {
        meta => 0,
        stubauto => 'auto',
    },
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'config loaded'}=0;
    $self->{'iter'}=undef;
    $self->{'iterators'}=[@iterators];
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2009-04-08<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 28]]

=cut

sub approved {
    return 1;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('WikiProjectWorker', 0, 10, qw/d::WikiProjectTagging/);
    my $errto = 'Errors? [[User:'.$api->user.'/shutoff/WikiProjectWorker]]';

    # Load configs, if necessary
    if(!$self->{'config loaded'}){
        my %cfg=();
        while(my ($banner,$cfg)=each %banner_cfgs){
            $cfg=$api->WPBMetaConfig($cfg->{'meta'}, %$cfg) if exists($cfg->{'meta'});
            $cfg{$banner}=$cfg;
        }
        $api->WPBconfig(%cfg);

        my %r=$api->redirects_to('Template:Infobox settlement');
        if(exists($r{''})){
            $api->warn("Failed to get Infobox Settlement redirects: ".$r{''}{'error'}."\n");
            return 60;
        }
        $self->{'settle'}=\%r;

        $self->{'config loaded'}=1;
    }
    if(($api->store->{'configured'} // 0) < $seq){
        ### Initialize configuration here
    }

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    while(1){
        my $iter=$self->{'iter'};
        if(!defined($iter)){
            my $i=shift @{$self->{'iterators'}};
            last unless $i;
            $iter=$api->iterator(%$i);
            $self->{'iter'}=$iter;
        }
        while(my $page=$iter->next()){
            if(!$page->{'_ok_'}){
                $api->warn("Could not retrieve page from iterator: ".$page->{'error'}."\n");
                return 60;
            }

            my $pageid=$page->{'pageid'};
            next if ($api->store->{$pageid} // 0) >= $seq;

            my $title=filter($page);
            if(!defined($title)){
                $api->log("Skipping ".$page->{'title'}.", filter returned undef");
                $api->store->{$pageid}=$seq;
                next;
            }

            my $tok=$api->edittoken($title, EditRedir => 1);
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                next;
            }
            if(($tok->{'ns'}&1)==0){
                $api->log("Cannot edit $title: namespace ".$tok->{'ns'}." is non-talk");
                $api->store->{$pageid}=$seq;
                next;
            }
            if(exists($tok->{'redirect'})){
                $api->log("$title is a redirect, skipping.");
                $api->store->{$pageid}=$seq;
                next;
            }

            $api->log("Checking $title...");

            my $intxt=exists($tok->{'revisions'}[0]{'*'})?$tok->{'revisions'}[0]{'*'}:'';
            my ($outtxt,$nowiki)=$api->strip_nowiki($intxt);

            my @params=();

            my $assess = undef;
            my $class = copy_class($api, $title, $intxt);
            if(ref($class) eq 'HASH'){
                $api->warn("Processing $title failed: ".$class->{'error'}."\n");
                next;
            }
            if($class eq ''){
                $assess=$api->WPBassess($title);
                if(ref($assess) eq 'HASH'){
                    if($assess->{'code'} eq 'pagemissing'){
                        # No subject page, doesn't matter
                        $assess=undef;
                    } else {
                        $api->warn("Processing $title failed: ".$assess->{'error'}."\n");
                        next;
                    }
                }
                push @params, 'auto=yes' if ($assess // '') eq 'stub';
            } else {
                push @params, "class=$class";
                my $imp = copy_importance($api, $title, $intxt);
                if(ref($imp) eq 'HASH'){
                    $api->warn("Processing $title failed: ".$imp->{'error'}."\n");
                    next;
                }
                #push @params, "importance=$imp" if $imp eq '';
            }

            my $need_main_banner = ($api->WPBcheck($outtxt, $main_banner) == 0);

            ### PROCESSING ###

            my @merge=();
            my @summary=();

            if(@preprocess_templates){
                (undef,$outtxt)=$api->WPBcheck($outtxt, sub {
                    my $banner=shift;
                    my $name=shift;
                    my $oname=shift;
                    my $params=shift;
                    my $wikitext=shift;

                    my $imp=undef;
                    foreach (@$params){
                        $imp=$1 if /^\s*importance\s*=\s*(\S.*?)\s*$/;
                    }
                    push @merge, $banner;
                    push @params, 'sfba=yes', defined($imp)?"sfba-importance=$imp":() if $banner eq 'SFBAProject';
                    push @params, 'la=yes', defined($imp)?"la-importance=$imp":() if $banner eq 'WikiProject Los Angeles';

                    return '' unless $need_main_banner;
                    $need_main_banner=0;
                    return "{{$main_banner}}";
                }, @preprocess_templates);
                if(ref($outtxt) eq 'HASH'){
                    $api->warn("Preprocessing $title failed: ".$outtxt->{'error'}."\n");
                    next;
                }
                if(@merge){
                    $merge[-1]='and '.$merge[-1] if @merge>1;
                    push @summary, "merging ".join((@merge>2)?', ':' ', @merge)." into $main_banner" if @merge;
                #} else {
                #    $api->warn("$title contains ".$iter->iterval.", but not in section 0\n");
                #    next;
                }
            }

            my $pg=$title;
            $pg=~s/^Talk://;
            if($page->{'title'} ne $pg){
                $page=$api->query(
                    titles       => $pg,
                    prop         => 'categories|templates',
                    clcategories => 'Category:Unincorporated communities in California|Category:Ghost towns in California|Category:California ranchos',
                    cllimit      => 'max',
                    tllimit      => 'max',
                );
                if($page->{'code'} eq 'shutoff'){
                    $api->warn("Task disabled: ".$page->{'content'}."\n");
                    return 300;
                }
                if($page->{'code'} ne 'success'){
                    $api->warn("Could not load $pg info: ".$page->{'error'}."\n");
                    return 60;
                }
                $page=(values %{$page->{'query'}{'pages'}})[0];
            }

            my $imp='';
            if(grep($_->{'title'} eq 'Category:Unincorporated communities in California', @{$page->{'categories'}})){
                $imp='low';
            } elsif(grep($_->{'title'} eq 'Category:Ghost towns in California', @{$page->{'categories'}})){
                $imp='low';
            } elsif(grep($_->{'title'} eq 'Category:California ranchos', @{$page->{'categories'}})){
                $imp='mid';
            } elsif(grep($_->{'title'} eq 'Template:Infobox settlement', @{$page->{'templates'}})){
                $pg=$api->rawpage($page->{'title'});
                if($pg->{'code'} eq 'shutoff'){
                    $api->warn("Task disabled: ".$pg->{'content'}."\n");
                    return 300;
                }
                if($pg->{'code'} ne 'success'){
                    $api->warn("Could not load $page->{title} content: ".$pg->{'error'}."\n");
                    return 60;
                }
                $pg=$pg->{'content'};
                my $pop=undef;
                $api->process_templates($pg, sub {
                    my $name=shift;
                    my $params=shift;
                    my $wikitext=shift;
                    shift; # $data
                    my $oname=shift;
                    return undef unless(exists($self->{'settle'}{"Template:$name"}));
                    my %pop=();
                    foreach (@$params){
                        $pop{$1}=$2 if /^\s*(population(?:|_total|_metro|_urban|_est))\s*=\s*([0-9,]+)\s*$/;
                    }
                    $pop=($pop{'population'} // $pop{'population_total'} // $pop{'population_est'} // $pop{'population_metro'} // $pop{'population_urban'} // undef);
                    $pop=~s/,//g if defined($pop);
                }, undef);
                if(defined($pop)){
                    $imp='low';
                    $imp='mid' if $pop>=25000;
                    $imp='high' if $pop>=100000;
                }
            }
            push @params, "importance=$imp";

            $outtxt=$api->WPBadd($outtxt, $assess, sub {
                shift; # banner
                shift; # name
                my $oname=shift;
                my $params=shift;
                my $wikitext=shift;
                my $new=shift;
                return undef if $new;

                my $any=0;
                foreach my $p (@params){
                    next unless $p=~/^(.+?)=(.*)$/;
                    my ($k,$v)=($1,$2);
                    my $re=qr/\S/;
                    unless(grep(/^\s*\Q$k\E\s*=\s*$re/, @$params)){
                        next if(grep(/^\s*\Q$k\E\s*=\s*\Q$v\E\s*$/s, @$params));
                        push @$params, "$k=$v" unless(grep(s/^(\s*\Q$k\E\s*=\s*?)(?:\S.*?)?(\s*)$/$1$v$2/s, @$params));
                        $any=1 if($v ne '' || $k ne 'importance');
                    }
                }
                return $wikitext unless $any;

                my $out="{{$oname";
                $out.="|".join("|", @$params) if @$params;
                $out.="}}";
                return $wikitext if $out eq $wikitext;
                push @summary, "updating {{$main_banner}}" unless @summary;
                return $out;
            }, $main_banner, @params);
            if(ref($outtxt) eq 'HASH'){
                $api->warn("Processing $title failed: ".$outtxt->{'error'}."\n");
                next;
            }
            push @summary, "Tagging with {{$main_banner}}" unless @summary;

            $outtxt=$api->replace_nowiki($outtxt, $nowiki);

            # Need to edit?
            if($outtxt ne $intxt){
                if(!@summary){
                    $api->warn("$title changed, but nothing in \@summary\n");
                    next;
                }
                my $cat=$iter->iterval;
                $summary[-1]='and '.$summary[-1] if @summary>1;
                my $summary = ucfirst(join((@summary>2)?', ':' ', @summary))." per $req $errto";

                my @cleanup=();
                $outtxt=$api->WPBfixshell($outtxt, \@cleanup);
                if(ref($outtxt) eq 'HASH'){
                    $api->warn("Processing $title failed: ".$outtxt->{'error'}."\n");
                    next;
                }
                $summary.="; general banner cleanup (".join(', ', @cleanup).")" if @cleanup;

                $api->log("$summary in $title");
                my $r=$api->edit($tok, $outtxt, $summary, 1, 1);
                if($r->{'code'} ne 'success'){
                    $api->warn("Write failed on $title: ".$r->{'error'}."\n");
                    next;
                }
            } else {
                $api->log("Nothing to do in $title");
            }

            # Remember that we processed this page already
            $api->store->{$pageid}=$seq;

            # If we've been at it long enough, let another task have a go.
            return 0 if time()>=$endtime;
        }
        $self->{'iter'}=undef;
    }

    # No more pages to check, try again in 10 minutes or so in case of errors.
    $self->{'iter'}=undef;
    $self->{'iterators'}=[@iterators];
    $api->log("WikiProjectWorker may be DONE!");
    return 600;
}