User:AnomieBOT/source/tasks/SourceUploader.pm

This is an old revision of this page, as edited by AnomieBOT (talk | contribs) at 22:28, 26 August 2010 (Updating published sources: AnomieBOT::API: * Add configuration variable (and accessor function) for the bot operator's username * Better logging when testing uploads All tasks: * Remove useless metadata field). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
package tasks::SourceUploader;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    SourceUploader
BRFA:    N/A
Status:  Begun 2008-08-15
Created: 2008-08-16

Updates the pages under [[User:AnomieBOT/source]] to reflect the current source
of the bot.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

use POSIX qw/strftime/;
use Pod::Simple::Wiki;
use tasks::SourceUploader::Pod;
use Data::Dumper;
use Fcntl ':mode';

my %extensions=(
    'pl' => 'perl',
    'pm' => 'perl',
    'ini' => 'ini',
    'sh' => 'bash',
);

my @sizes=('', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB');


=pod

=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.

=cut

sub approved {
    return 999;
}

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'pages'}={};
    $self->{'loadexisting'}=1;
    $self->{'order'}=-1000;

    my $basedir=$AnomieBOT::API::basedir;
    $basedir=~s{/$}{};
    if(!-d $basedir){
        AnomieBOT::API->warn("Cannot find source directory\n");
        return $self;
    }

    $self->{'summary'}='Updating published sources: ';
    if(!open(X, "<", 'ChangeLog')){
        AnomieBOT::API->warn("Cannot load changelog\n");
        return $self;
    }
    local $_;
    my $intro=1;
    while(<X>){
        if(/^==.*==$/){
            last if !$intro;
            $intro=0;
            next;
        }
        $self->{'summary'}.=$_ if !$intro;
    }
    close(X);
    $self->{'summary'}=~s/\s+/ /g;
    $self->{'summary'}=~s/\s+$//;
    $self->{'summary'}=$self->{'summary'};

    my %pages=();
    my %tasks=(
        '01 Current'           => [],
        '05 Awaiting approval' => [],
        '06 Past'              => [],
    );
    my @dirs=($basedir);
    $self->{'shutoff_pages'}=[];
    while(my $dir=shift @dirs){
        if(!opendir(D, $dir)){
            AnomieBOT::API->warn("Cannot open directory $dir: $!\n");
            return $self;
        }
        my $dirpage=substr($dir,length($basedir));
        $dirpage='/'.$dirpage if(substr($dirpage,0,1) ne '/');

        $dirpage ="==Index of $dirpage==\n";
        $dirpage.="{| class=\"wikitable sortable\" style=\"width:100%\"\n";
        $dirpage.="! Filename !! Size !! Modified\n";
        while(my $page=readdir(D)){
            next if substr($page,0,1) eq '.';

            my $p="$dir/$page";
            my $pp=substr($p,length($basedir));
            my @stat=stat($p);
            my $img='Gnome-fs-executable.svg';
            if(-d $p){
                next if ($stat[2]&(S_IROTH|S_IXOTH))!=(S_IROTH|S_IXOTH);
                push @dirs, $p;
                $img='Gnome-fs-directory-visiting.svg';
            } elsif(-f $p){
                next if ($stat[2]&(S_IROTH))!=(S_IROTH);
                if(!open(X, '<:utf8', $p)){
                    AnomieBOT::API->warn("Cannot open file $p: $!\n");
                    return $self;
                }
                do {
                    local $/=undef;
                    $pages{$pp}=<X>;
                };
                close(X);
                my $top='';
                if($page eq 'ChangeLog'){
                    # Pull out most recent 64K of changelog entries
                    my @x=split(/\n/, $pages{$pp});
                    $pages{$pp}='';
                    while(defined(my $x=shift @x)){
                        last if(length($pages{$pp})>65536 && $x=~/^==.*==$/);
                        $pages{$pp}.=$x."\n";
                    }
                    $pages{$pp}.="\x7b\x7bombox|type=notice|text=See the \x7b\x7bsubst:history|\x7b\x7bsubst:FULLPAGENAME\x7d\x7d|page history|subst=subst:\x7d\x7d for further ChangeLog entries\x7d\x7d\n" if @x;
                } elsif($page=~/\.([^.]+)$/ && exists($extensions{$1})){
                    if($extensions{$1} eq 'perl'){
                        # Try to construct POD documentation
                        my $parser=Pod::Simple::Wiki->new('mediawiki');
                        my $x='';
                        $parser->output_string(\$x);
                        $parser->parse_string_document($pages{$pp});
                        $pages{"$pp/doc"}=$x if($parser->content_seen);

                        # Handle embedded notices and metadata
                        $x='';
                        $parser=tasks::SourceUploader::Pod->new;
                        $parser->output_string(\$x);
                        $parser->parse_string_document($pages{$pp});
                        $top.=$x if($parser->content_seen);
                        my %metadata=$parser->metadata;
                        if(%metadata){
                            $x ="<noinclude>\n";
                            $x.="{| class=\"wikitable\"\n";
                            $x.="! Account !! Task !! Disable !! {{tlx|bots}} !! Status !! Description\n";
                            $x.="</noinclude>\n";
                            $x.="|- valign=\"top\"\n";
                            my $task=$metadata{'task'};
                            my $mainbot="\x02BOT\x03";
                            my $bot=$metadata{'bot'} // $mainbot;
                            $x.="|align=\"center\"| [[User:$bot|$bot]]\n";
                            $x.="|align=\"center\"| [[User:$mainbot/source$pp|$task]]\n";
                            if(exists($metadata{'shutoff'}) && $metadata{'shutoff'} eq 'false'){
                                $x.="|align=\"center\"| No\n";
                            } else {
                                $x.="|align=\"center\"| <span class=\"plainlinks\">[{{fullurl:User:$bot/shutoff/$task|action=edit}} Here]</span>\n";
                                push @{$self->{'shutoff_pages'}}, "User:$bot/shutoff/$task";
                            }
                            if(exists($metadata{'exclusion'}) && $metadata{'exclusion'} eq 'false'){
                                $x.="|align=\"center\"| {{N}}\n";
                            } else {
                                $x.="|align=\"center\"| {{Y}}\n";
                            }
                            my $brfa=$metadata{'brfa'};
                            my $status=$metadata{'status'};
                            if($brfa eq 'N/A'){
                                $x.="| \x7b\x7bsort|$status|\x7d\x7d\x5b\x5bWikipedia:Bot policy#Approval|N/A\x5d\x5d, only edits bot's/owner's userspace. $status\n";
                            } elsif($brfa eq 'None'){
                                $x.="| $status\n";
                            } else {
                                $x.="| \x5b\x5b$brfa|$status\x5d\x5d\n";
                            }
                            if(exists($metadata{'+brfa'})){
                                $x.="<p style=\"margin:0;padding:0;font-size:smaller\">Supplemental:<br />\n";
                                for(my $i=0; $i<@{$metadata{'+brfa'}}; $i++){
                                    $x.="+ \x5b\x5b".$metadata{'+brfa'}[$i].'|'.$metadata{'+status'}[$i]."\x5d\x5d<br />\n";
                                }
                                $x.="</p>\n";
                            }
                            $x.="|\n".$metadata{'*'}."\n";
                            $x.="<noinclude>\n";
                            $x.="|}\n";
                            $x.="</noinclude>";
                            $pages{"$pp/metadata"}=$x;
                            my $section=determine_task_section(".$pp", %metadata);
                            $tasks{$section}=[] unless exists($tasks{$section});
                            push @{$tasks{$section}}, $metadata{'created'}." {{User:$mainbot/source$pp/metadata}}";
                        }
                    }
                    $pages{$pp}="<source lang=\"".$extensions{$1}."\">\n".$pages{$pp}."\n</so"."urce>";
                } else {
                    $pages{$pp}="<pre>\n".$pages{$pp}."\n</pre>";
                }
                $top.="\x7b\x7bombox|text=See \x5b\x5b/doc\x5d\x5d for formatted documentation\x7d\x7d\n" if(exists($pages{"$pp/doc"}));
                $pages{$pp}=$top.$pages{$pp};
                $img='Gnome-fs-regular.svg';
            } else {
                AnomieBOT::API->warn("Unusual filetype on $p\n");
            }
            my $sz=$stat[7];
            my $i=0;
            while($sz>=1024 && $i<@sizes){ $sz/=1024; $i++; }
            ($sz="$sz")=~s/(\.\d\d)\d+/$1/;
            $sz.='&nbsp;'.$sizes[$i] if $i>0;
            $dirpage.="|-\n";
            $dirpage.="| \x5b\x5bImage:$img|32x32px\x5d\x5d";
            $dirpage.=" \x5b\x5b/$page|$page\x5d\x5d\n";
            $dirpage.="|align=\"right\"| \x7b\x7bsort|".sprintf("%020d",$stat[7])."|$sz\x7d\x7d\n";
            $dirpage.="|align=\"center\"| \x7b\x7bsort|".sprintf("%020d",$stat[9]).strftime("|%F %T (UTC)", gmtime($stat[9]))."\x7d\x7d\n";
        }
        closedir(D);
        $dirpage.="|}";
        $pages{substr($dir,length($basedir))}=$dirpage;
    }

    my $tasklist="<noinclude>\nThis page lists basic information about every task coded for this bot, including links to the bot approval and individual shutoff pages.\n\n== Tasks ==\n</noinclude>";
    foreach my $section (sort keys %tasks){
        my @links=sort @{$tasks{$section}};
        $section=~s/^(\d+) //;
        my $n=$1;
        $tasklist.="\n=== $section ===\n";
        if(@links){
            $tasklist.="{| class=\"wikitable sortable\"\n";
            $tasklist.="! Account !! Task !! Disable !! {{tlx|bots}} !! ".(($n==1 || $n==2)?'Approval':'Status')." !! Description\n";
            $tasklist.=join("\n", map { substr($_,11) } @links)."\n";
            $tasklist.="|}\n";
        } else {
            $tasklist.="None at this time.\n";
        }
    }

    $self->{'pages'}={%pages};
    $self->{'tasklist'}=$tasklist;
    return $self;
}

sub run {
    my ($self, $api)=@_;

    my @keys=keys(%{$self->{'pages'}});
    my @shutoff=@{$self->{'shutoff_pages'}};
    if(!@keys && !@shutoff && !exists($self->{'tasklist'})){
        $api->debug(2, "Source uploaded, terminating");
        return undef;
    }

    $api->task('SourceUploader', 0, 10, qw/d::Redirects/);

    my $src='User:'.$api->user.'/source';

    if($self->{'loadexisting'}){
        my %q=(
            list        => 'allpages',
            apprefix    => $api->user.'/source',
            apnamespace => '2',
            aplimit     => 'max'
        );

        my ($k,$v,$k2,$v2,@x);
        my $res;
        do {
            $res=$api->query(%q);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve source tree: ".$res->{'error'}."\n");
                return 300;
            }
            if(exists($res->{'query-continue'})){
                $q{'apfrom'}=$res->{'query-continue'}{'allpages'}{'apfrom'};
            }
            foreach (@{$res->{'query'}{'allpages'}}){
                $v2=substr($_->{'title'}, length($src));
                if(!exists($self->{'pages'}{$v2})){
                    $self->{'pages'}{$v2}='';
                    push @keys, $v2;
                }
            }
        } while(exists($res->{'query-continue'}));

        $self->{'loadexisting'}=0;
    }

    while(@keys){
        my $page=shift @keys;
        my $text=$self->{'pages'}{$page};
        my $ret=$self->upload_page($api, $src.$page, $text);
        return $ret if $ret>60;
        next if $ret>0;
        delete($self->{'pages'}{$page});
    }

    if(exists($self->{'tasklist'})){
        my $ret=$self->upload_page($api, 'User:'.$api->user.'/TaskList', $self->{'tasklist'});
        return $ret if $ret>60;
        delete($self->{'tasklist'});
    }

    while(@shutoff){
        my $page=shift @shutoff;
        my $tok=$api->edittoken($page);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        return 60 if($tok->{'code'} ne 'success');
        if(exists($tok->{'missing'})){
            my $r=$api->edit($tok, '{{subst:void}}', 'Creating empty shutoff page to avoid redlinks in summaries and to avoid confusing people', 1, 1);
            if($r->{'code'} ne 'success'){
                $api->warn("Write error for $page: ".$r->{'error'}."\n");
                return 60;
            } else {
                $api->log("Created $page");
            }
        }
        my $tpage=$page;
        $tpage=~s/^User:/User talk:/;
        my $txt=$tpage;
        $txt=~s!^(User talk:[^/]+)/.*!$1!;
        my $r={$api->resolve_redirects($txt)};
        if(exists($r->{''})){
            $api->warn("Error fetching redirects for $page: ".$r->{''}{'error'}."\n");
            return 60;
        }
        $txt=$r->{$txt};
        $txt="#REDIRECT [[$txt]]\n\nThis page is unwatched. Please direct comments to [[$txt]].";
        $tok=$api->edittoken($tpage, EditRedir=>1, NoExclusion=>1);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        return 60 if($tok->{'code'} ne 'success');
        if(($tok->{'revisions'}[0]{'*'} // '') ne $txt){
            my $r=$api->edit($tok, $txt, 'Redirect useless unwatched talk page to someplace useful.', 1, 1);
            if($r->{'code'} ne 'success'){
                $api->warn("Write error for $tpage: ".$r->{'error'}."\n");
                return 60;
            } else {
                $api->log("Created $tpage");
            }
        }
        $self->{'shutoff_pages'}=[@shutoff];
    }

    return 0;
}

sub upload_page {
    my $self=shift;
    my $api=shift;
    my $page=shift;
    my $text=shift;

    my $tok=$api->edittoken($page);
    if($tok->{'code'} eq 'shutoff'){
        $api->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    return 60 if($tok->{'code'} ne 'success');
    my $bot=$api->user;
    $text=~s/\x02BOT\x03/$bot/go;
    $text=~s/\s+$//o;
    $tok->{'revisions'}[0]{'*'}=~s/\s+$//o if !exists($tok->{'missing'});
    if(exists($tok->{'missing'}) ||
       $tok->{'revisions'}[0]{'*'} ne $text){
        my $r=$api->edit($tok, $text, $self->{'summary'}, 0, 1);
        if($r->{'code'} ne 'success'){
            $api->warn("Write error for $page: ".$r->{'error'}."\n");
            return 60;
        } else {
            $api->log("Updated $page");
        }
    } else {
        $api->debug(2, "No update needed for $page\n");
    }
    return 0;
}

sub determine_task_section {
    my $file = shift;
    my %metadata = @_;

    my $t=$file; $t=~s{^.*/}{}; $t=~s/\.pm$//;
    return '99 Invalid metadata' unless exists($metadata{'bot'});
    return '99 Invalid metadata' unless(exists($metadata{'task'}) && $metadata{'task'} eq $t);
    return '99 Invalid metadata' unless exists($metadata{'brfa'});
    return '99 Invalid metadata' unless exists($metadata{'status'});
    return '99 Invalid metadata' unless(exists($metadata{'created'}) && $metadata{'created'}=~/^\d{4}-\d\d-\d\d$/);
    if(exists($metadata{'+brfa'})){
        return '99 Invalid metadata' unless exists($metadata{'+status'});
        return '99 Invalid metadata' if scalar(@{$metadata{'+status'}}) != scalar(@{$metadata{'+brfa'}});
    }
    if(exists($metadata{'ondemand'})){
        return '99 Invalid metadata' unless($metadata{'ondemand'} eq 'true' || $metadata{'ondemand'} eq 'false');
    }
    if(exists($metadata{'shutoff'})){
        return '99 Invalid metadata' unless($metadata{'shutoff'} eq 'true' || $metadata{'shutoff'} eq 'false');
    }
    if(exists($metadata{'exclusion'})){
        return '99 Invalid metadata' unless($metadata{'exclusion'} eq 'true' || $metadata{'exclusion'} eq 'false');
    }
    my $status=$metadata{'status'};

    AnomieBOT::API::load($file);
    my $task='tasks::'.$metadata{'task'};
    return '99 Invalid metadata' unless $task->can('approved');

    my $botnum=$task->approved;

    if($metadata{'brfa'} eq 'N/A'){
        return "01 Current" if($botnum>0 && $status=~/^Begun \d{4}-\d{2}-\d{2}$/);
        return "02 On demand" if(exists($metadata{'ondemand'}) && $metadata{'ondemand'} eq 'true');
    }
    if($status=~/^Approved \d{4}-\d{2}-\d{2}$/){
        return "01 Current" if $botnum>0;
        return "02 On demand" if(exists($metadata{'ondemand'}) && $metadata{'ondemand'} eq 'true');
        return '99 Invalid metadata';
    }
    return "03 In trial" if($status=~/^In trial/);
    return '99 Invalid metadata' if $botnum>0;
    return "04 In development" if($status eq 'Coding' || $status eq 'On hold');
    return "06 Past" if $status=~/^Completed \d{4}-\d{2}-\d{2}$/;
    return '99 Invalid metadata' if($metadata{'brfa'} eq 'N/A');
    return "05 Awaiting approval" if($status eq 'BRFA');
    return "07 Withdrawn" if $status eq 'Withdrawn';
    return "08 Rejected" if $status=~/^Rejected \d{4}-\d{2}-\d{2}$/;
    return '99 Invalid metadata';
}

1;