User:AnomieBOT/source/tasks/PERTableUpdater.pm

This is an old revision of this page, as edited by AnomieBOT (talk | contribs) at 01:24, 30 September 2013 (Updating published sources: PERTableUpdater: * Add a comment to document the structure of the @data array.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
package tasks::PERTableUpdater;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    PERTableUpdater
BRFA:    N/A
Status:  Begun 2011-12-04
Created: 2011-12-01

Update [[User:AnomieBOT/PERTable]], [[User:AnomieBOT/SPERTable]], and [[User:AnomieBOT/EDITREQTable]].

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/:time bunchlist/;
use URI::Escape;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my %colors=(
    'red'    => ' style="background-color:#ffbfdc"',
    'yellow' => ' style="background-color:#fff9bf"',
    'green'  => ' style="background-color:#e4ffcc"',
    'clear'  => '',
);
my %protact=(
    'modify'    => 'Modified',
    'protect'   => 'Protected',
    'unprotect' => 'Unprotected',
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.

=cut

sub approved {
    return 999;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('PERTableUpdater', 0, 10, qw/d::Sections d::Timestamp d::Talk d::IWNS/);

    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/PERTableUpdater]]';

    # Upgrade
    if(exists($api->store->{"pages0"})){
        $api->store->{"PER pages"}=$api->store->{"pages0"};
        delete $api->store->{"pages0"};
    }
    if(exists($api->store->{"pages1"})){
        $api->store->{"SPER pages"}=$api->store->{"pages1"};
        delete $api->store->{"pages1"};
    }

    # First, load MediaWiki:Titleblacklist to catch pages protected by
    # that mechanism
    my %tb=();
    my %sources=(
        'Title blacklist' => 'MediaWiki:Titleblacklist',
        'Global title blacklist' => 'meta:Title blacklist',
    );
    while(my ($k,$page)=each %sources){
        my $tb=$api->rawpage($page);
        if($tb->{'code'} ne 'success'){
            $api->warn("Failed to load $page: ".$tb->{'error'}."\n");
            return 60;
        }
        for my $line (split /\r?\n/, $tb->{'content'}){
            my $re=$line;
            my %opts=();
            $re=~s/^\s*([^#]*?)\s*(?:#.*)?$/$1/;
            if($re=~s/\s*<([^<>]*)>$//){
                my $opts=$1;
                $opts=~s/^\s+|\s+$//g;
                for my $opt (split /\s*\|\s*/, $opts){
                    if($opt=~/^([^=]*?)\s*=\s*(.+)$/){
                        $opts{lc($1)}=$2;
                    } else {
                        $opts{lc($opt)}=1;
                    }
                }
            }
            $re=~s/_/ /g;
            $re="(?-i:$re)" if($opts{'casesensitive'}//0);

            $re=~s!({{\s*ns\s*:\s*(.+?)\s*}})! replace_ns($api,$2) // $1 !ge;

            # Let's just hope no one ever uses {{int:}} here...

            $tb{$re}={ source=>"[[$page|$k]]", line=>$line, opts=>\%opts } unless(($opts{'moveonly'}//0) || ($opts{'newaccountonly'}//0));
        }
    }

    # Fields are:
    # 0: Namespaces to color green instead of clear
    # 1: "Tag", also used in the name of the subpage the table is put on
    # 2: Category name, no prefix
    # 3: Type of request, i.e. "$type edit requests"
    # 4: URL fragment for request links
    # 5: NID component of the urn links
    # 6: List of colors to apply based on page protection level:
    #    0: unprotected
    #    1: semi-protected
    #    2: semi-protected via title blacklist or cascading
    #    3: User CSS/JS page
    #    4: fully protected
    #    5: fully protected via title blacklist or cascading
    #    6: MediaWiki-namespace page
    my @data=(
        [[10,828],'PER','Wikipedia protected edit requests','protected','editprotected','x-wp-editprotected',[qw/red red red yellow clear yellow yellow/]],
        [[10,828],'SPER','Wikipedia semi-protected edit requests','semi-protected','editsemiprotected','x-wp-editsemiprotected',[qw/red clear yellow red red red red/]],
        [[0],'EDITREQ','Requested edits','COI','requestedit','x-wp-requestedit',[qw/clear yellow yellow red red red red/]],
    );
    my $starttime=time;
    for my $data (@data){
        my ($greenns,$tag,$cat,$type,$tgt,$urn,$colors)=@$data;
        my $iter=$api->iterator(
            generator      => 'categorymembers',
            gcmtitle       => "Category:$cat",
            gcmlimit       => 'max',
            prop           => 'info|extlinks',
            elprotocol     => 'urn',
            ellimit        => 'max',
        );
        my %oldpages=%{$api->store->{"$tag pages"}//{}};
        my %pages=();
        while(my $p=$iter->next){
            if(!$p->{'_ok_'}){
                $api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
                return 60;
            }
            next unless $p->{'ns'}&1;
            my @pages = map {
                return () unless $_->{'*'}=~/^urn:$urn:(.+)$/i;
                my $url = $1;
                $url=~s/\+/ /g;
                $url = uri_unescape( $url );
                utf8::decode( $url );
                $url;
            } @{$p->{'extlinks'}//[]};
            unless(@pages){
                my $t=$p->{'title'};
                if($p->{'ns'}==1){
                    $t=~s/^Talk://;
                } else {
                    $t=~s/^([^:]+) talk:/$1:/;
                }
                push @pages, $t;
            }
            for my $t (@pages) {
                $pages{$t}=($oldpages{$t} // {
                        ns      => $p->{'ns'}&~1,
                        title   => $t,
                        talk    => $p->{'title'},
                        touched => ISO2timestamp($p->{'touched'}),
                    });
                delete $pages{$t}{'color'};
                delete $pages{$t}{'prottype'};
                delete $pages{$t}{'reason'};
                delete $pages{$t}{'logtitle'};
            }
        }
        $api->store->{"$tag pages"}=\%pages;

        if(%pages){
            $iter=$api->iterator(
                titles => bunchlist(500, keys %pages),
                prop   => 'info',
                inprop => 'protection',
            );
            while(my $p=$iter->next){
                if(!$p->{'_ok_'}){
                    $api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
                    return 60;
                }
                my $t=$p->{'title'};
                my $pd=$pages{$t};

                # Protection scoring "bitmap":
                # 0x80 = MediaWiki-namespace auto-protection
                # 0x40 = Full protection
                # 0x20 = User script auto-protection
                # 0x10 = Semi-protection
                # 0x08 = Directly-applied protection
                # 0x04 = Cascading protection
                # 0x02 = Title blacklist protection
                # Highest score by int value "wins".
                my $protscore=0;
                $pd->{'prottype'}='Not protected';
                $pd->{'reason'}='';
                if($p->{'ns'}==8){
                    $pd->{'prottype'}='MediaWiki page';
                    $protscore=0x80;
                } elsif($p->{'ns'}==2 && $t=~m!/.*\.js$!){
                    $pd->{'prottype'}='User JS page';
                    $protscore=0x20;
                } elsif($p->{'ns'}==2 && $t=~m!/.*\.css$!){
                    $pd->{'prottype'}='User CSS page';
                    $protscore=0x20;
                }
                while(my ($re,$data)=each %tb){
                    next unless $t=~/^(?:$re)$/si;
                    next unless(exists($p->{'missing'}) || ($data->{'opts'}{'noedit'}//0));
                    my $sc=exists($data->{'opts'}{'autoconfirmed'})?0x12:0x42;
                    next if $sc<$protscore;
                    $pd->{'prottype'}=$data->{'source'};
                    my $line=$data->{'line'};
                    $pd->{'reason'}=qq(Matching line: <syntaxhighlight lang="text" enclose="none">$line</syntax).qq(highlight>);
                    $protscore=$sc;
                }

                my $expiry=undef;
                my $pg=$t;
                for my $pp (@{$p->{'protection'}//[]}){
                    next unless $pp->{'type'} eq 'edit';
                    my $sc=0;
                    $sc|=($pp->{'level'} eq 'sysop')?0x40:0x10;
                    $sc|=exists($pp->{'source'})?0x04:0x08;
                    next if $sc<$protscore;
                    if(exists($pp->{'source'})){
                        $pg=$pp->{'source'};
                        $pd->{'prottype'}="Cascade-protected from [[:$pg]]";
                    } else {
                        $pg=$t;
                        $pd->{'prottype'}='Fully protected' if $pp->{'level'} eq 'sysop';
                        $pd->{'prottype'}='Semiprotected' if $pp->{'level'} eq 'autoconfirmed';
                    }
                    $pd->{'prottype'}.=strftime(', expires %F at %T UTC', gmtime ISO2timestamp($pp->{'expiry'})) if $pp->{'expiry'} ne 'infinity';
                    $protscore=$sc;
                    $pd->{'reason'}='';
                }

                $pd->{'color'}=$colors->[0];
                if($protscore & 0x10){
                    $pd->{'color'}=$colors->[1];
                    $pd->{'color'}=$colors->[2] if($protscore & 0x06);
                }
                $pd->{'color'}=$colors->[3] if($protscore & 0x20);
                if($protscore & 0x40){
                    $pd->{'color'}=$colors->[4];
                    $pd->{'color'}=$colors->[5] if($protscore & 0x06);
                }
                $pd->{'color'}=$colors->[6] if($protscore & 0x80);
                $pd->{'color'}='green' if($pd->{'color'} eq 'clear' && grep($p->{'ns'}==$_, @$greenns));

                if($pd->{'reason'} eq ''){
                    my $iter=$api->iterator(
                        list    => 'logevents',
                        letype  => 'protect',
                        letitle => $pg,
                    );
                    my $from='';
                    while(my $le=$iter->next){
                        if(!$le->{'_ok_'}){
                            $api->warn("Failed to retrieve protection log for $pg: ".$le->{'error'}."\n");
                            return 60;
                        }
                        if($le->{'action'} eq 'move_prot'){
                            $from="From [[:".$le->{'0'}."]]: ";
                            $iter=$api->iterator(
                                list    => 'logevents',
                                letype  => 'protect',
                                letitle => $le->{'0'},
                                lestart => $le->{'timestamp'},
                            );
                            next;
                        }
                        next unless exists($protact{$le->{'action'}});
                        $le->{'timestamp'}=~s/T.*//;
                        my $comment=$le->{'comment'};
                        $comment=~s/\s*\[[^]]*\](?: \(expires [^)]*\))?$//;
                        $pd->{'reason'}=$from.$protact{$le->{'action'}}.' by [[User:'.$le->{'user'}.'|'.$le->{'user'}.']] on '.$le->{'timestamp'}.': "'.esccomment($comment).'"';
                        last;
                    }
                }
                $pd->{'logtitle'}=$pg;
            }
        }

        # The formatting here is a little strange, for backwards compat
        my $txt=qq(<div class="veblenbot-pertable">\n);
        $txt.=qq({| class="wikitable" style="padding:0em"\n);
        $txt.=qq(|-\n);
        my $ct=scalar keys %pages;
        my $s=($ct==1?'':'s');
        my $pg='User:AnomieBOT/'.$tag.'Table';
        $txt.=qq(! $ct [[:Category:$cat|$type edit request$s]] <div style="float:right;white-space:nowrap">[[$pg|v]]&middot;<span class="plainlinks">[//en.wikipedia.org/w/index.php?title=$pg&action=history h]</span></div>\n);
        $txt.=qq(|-\n);
        $txt.=qq(|\n);
        $txt.=qq({| class="wikitable sortable" width=100% style="margin:0em"\n);
        $txt.=qq(! Page\n);
        $txt.=qq(! Tagged since\n);
        $txt.=qq(! Protection level\n);
        $txt.=qq(! class = "unsortable" | Last protection log entry\n);
        for my $p (sort { $a->{'touched'} <=> $b->{'touched'} } values %pages){
            my $c=$colors{$p->{'color'}};
            my $t=$p->{'title'};
            my $et=encodetitle($p->{'logtitle'});
            $t=":$t" if($p->{'ns'}==6 || $p->{'ns'}==14);
            my $tt=$p->{'talk'};
            my $pt=$p->{'prottype'};
            my $r=$p->{'reason'};
            $txt.=qq(|-$c\n);
            $txt.=qq(| [[$t]] ([[$tt#$tgt|request]])\n);
            $txt.=strftime("| %F %H:%M\n", gmtime $p->{'touched'});
            $txt.=qq(| $pt <span class="plainlinks">([//en.wikipedia.org/w/index.php?title=Special:Log&type=protect&page=$et log])</span>\n);
            $txt.=qq(| $r\n);
        }
        $txt.=qq(|}\n);
        $txt.=qq(|-\n);
        $txt.=qq(|style="text-align:right;font-size:smaller"| Updated as needed. Last updated: <!--TS-->~~~~~<!--/TS-->\n);
        $txt.=qq(|}</div>);

        my $tok=$api->edittoken($pg, EditRedir=>1);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $pg: ".$tok->{'error'}."\n");
            next;
        }
        my $intxt=$tok->{'revisions'}[0]{'*'}//'';
        $intxt=~s/^\s+|\s+$//;
        $intxt=~s#<!--TS-->.*<!--/TS-->#<!--TS-->~~~~~<!--/TS-->#g;
        if($intxt ne $txt){
            $api->log("Updating $pg ($ct request$s)");
            my $r=$api->edit($tok, $txt, "Update table ($ct request$s)", 0, 0);
            if($r->{'code'} ne 'success'){
                $api->warn("Write failed on $pg: ".$r->{'error'}."\n");
            }
        }
    }

    my $t=$starttime-time+300;
    $t=0 if $t<0;
    return $t;
}

sub esccomment {
    my $c=shift;
    $c=~s/{/&#x7b;/g;
    $c=~s/</&lt;/g;
    $c=~s/>/&gt;/g;
    $c=~s/~/&#x7e;/g;
    $c=~s/\|\]\]/]]/g; # Pipe trick
    $c=~s/\[\[\|/[[/g; # Reverse pipe trick
    return $c;
}

sub encodetitle {
    my $t=shift;
    $t=~s/ /_/g;
    $t=uri_escape_utf8($t, '^A-Za-z0-9_\-.:/~');
    return $t;
}

sub replace_ns {
    my ($api,$ns)=@_;

    if($ns=~/^([+-]?[0-9]+)/){
        $ns=int($1);
    } else {
        $ns=~s/_/ /g;
        my %x=$api->namespace_map();
        $ns=$x{$ns} // undef;
        return undef unless defined($ns);
    }
    my %x=$api->namespace_reverse_map();
    return $x{$ns} // undef;
}

1;