User:AnomieBOT/source/tasks/EnDashRedirectCreator.pm

This is an old revision of this page, as edited by AnomieBOT (talk | contribs) at 14:39, 21 December 2016 (Updating published sources: EnDashRedirectCreator: * Apply {{tlu|User:AnomieBOT/Auto-G8}} to created redirects.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
package tasks::EnDashRedirectCreator;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     EnDashRedirectCreator
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 74
Status:   Approved 2016-03-08
Created:  2016-03-03

Create redirects for articles with titles containing en-dashes from the
corresponding title with ASCII hyphens.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my @skipNs = (
    2, 3, # User, probably not useful in most cases
    14, 15, # Category, doesn't use normal redirects
    118, 119, # Draft, probably not useful
    446, 447, # Education Program, probably not useful
    828, 829, # Module, doesn't use normal redirects
    2300, 2301, # Gadget, probably doesn't use normal redirects
    2302, 2303, # Gadget definition, probably doesn't use normal redirects
    2600, 2601, # Topic, probably doesn't use normal redirects
);

my %crossNsOk = (
    0 => 1, # Not actually cross
    4 => 1, # Wikipedia, not eligible for CSD:R2
    10 => 1, # Template, not eligible for CSD:R2
    12 => 1, # Help, not eligible for CSD:R2
    14 => 1, # Category, not eligible for CSD:R2
    100 => 1, # Portal, not eligible for CSD:R2
);

sub new {
    my $class = shift;
    my $self = $class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2016-03-08<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 74]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api) = @_;

    $api->task('EnDashRedirectCreator', 0, 10, qw/d::Redirects d::IWNS/);
    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/EnDashRedirectCreator]]';

    my %ns = $api->namespace_map();
    my %rns = $api->namespace_reverse_map();

    my ($dbh, $schema);
    eval {
        ($dbh, $schema) = $api->connectToReplica( 'enwiki' );
    };
    if ( $@ ) {
        $api->warn( "Error connecting to replica: $@\n" );
        return 300;
    }

    my $cont = $self->{'dbcontinue'} // '';
    my $skipNs = join( ',', @skipNs );

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    while ( 1 ) {
        return 0 if $api->halting;

        # Load the list of redirects needing creation
        my @rows;
        eval {
            @rows = @{ $dbh->selectall_arrayref( qq{
                SELECT p1.page_namespace AS ns, p1.page_title AS title
                FROM page as p1
                    LEFT JOIN page AS p2 ON ( p1.page_namespace = p2.page_namespace AND REPLACE( p1.page_title, '–', '-' ) = p2.page_title )
                WHERE p1.page_title LIKE '%–%' AND p2.page_id IS NULL AND p1.page_namespace NOT IN ($skipNs) $cont
                ORDER BY p1.page_namespace, p1.page_title
                LIMIT 50
            }, { Slice => {} } ) };
        };
        if ( $@ ) {
            $api->warn( "Error fetching page list from replica: $@\n" );
            return 300;
        }
        last unless @rows;

        my %redirects = ();
        for my $row (@rows) {
            utf8::decode( $row->{'title'} ); # Data from database is binary
            my $to = ( $row->{'ns'} ? $rns{$row->{'ns'}} . ':' : '' ) . $row->{'title'};
            $to =~ s/_/ /g;
            my $from = $to;
            $from =~ s/–/-/g;
            $redirects{$from} = [ $to, $to ];
        }

        if ( %redirects ) {
            # Bypass double redirects and remove missing target pages
            my $res = $api->query(
                titles => join('|', map( $_->[0], values %redirects)),
                redirects => 1
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve redirect list: ".$res->{'error'}."\n");
                return 60;
            }
            my %map = ();
            if ( exists($res->{'query'}{'normalized'} ) ) {
                $map{$_->{'from'}} = $_->{'to'} foreach @{$res->{'query'}{'normalized'}};
            }
            if ( exists($res->{'query'}{'redirects'} ) ) {
                $map{$_->{'from'}} = $_->{'to'} foreach @{$res->{'query'}{'redirects'}};
            }
            my %exists = ();
            if ( exists($res->{'query'}{'pages'} ) ) {
                for my $p (values %{$res->{'query'}{'pages'}}) {
                    $exists{$p->{'title'}} = 1 if $p->{'pageid'}//0;
                }
            }
            while( my ($redir, $targets) = each( %redirects ) ) {
                my ($origtarget, $target) = @$targets;
                my %seen=( $target => 1 );
                while ( exists( $map{$target} ) ) {
                    $target = $map{$target};
                    $redirects{$redir} = [ $origtarget, $target ];
                    if ( exists( $seen{$target} ) ) {
                        $api->warn("Redirect loop involving [[$target]]");
                        delete $redirects{$redir};
                        last;
                    }
                    $seen{$target}=1;
                }
                delete $redirects{$redir} unless exists( $exists{$target} );
            }

            # Now, create the redirects
            while( my ($redir, $targets) = each( %redirects ) ) {
                return 0 if $api->halting;

                my ($origtarget, $target) = @$targets;

                my $tok=$api->edittoken($redir, EditRedir => 1, imageinfo => { prop => '', limit => 1 });
                if($tok->{'code'} eq 'shutoff'){
                    $api->warn("Task disabled: ".$tok->{'content'}."\n");
                    return 300;
                }
                if($tok->{'code'} ne 'success'){
                    $api->warn("Failed to get edit token for $redir: ".$tok->{'error'}."\n");
                    next;
                }
                if ( !exists( $tok->{'missing'} ) ) {
                    $api->log("$redir already exists, skipping");
                    next;
                }
                if ( exists( $tok->{'imagerepository'} ) && $tok->{'imagerepository'} ne '' ) {
                    $api->log("$redir is an existing image (repo=$tok->{imagerepository}), skipping");
                    next;
                }

                my $redirNs = ( $redir =~ /^(.+):/ && exists( $ns{$1} ) ? $ns{$1} : 0 );
                my $targetNs = ( $target =~ /^(.+):/ && exists( $ns{$1} ) ? $ns{$1} : 0 );

                if ( $redirNs == 0 && !( $crossNsOk{$targetNs} // 0 ) ) {
                    $api->log("$redir to $target would be a cross-namespace redirect, skipping");
                    next;
                }

                if ( $redirNs == 1 && !( $crossNsOk{$targetNs & ~1} // 0 ) ) {
                    $api->log("$redir to $target is the talk page of what would be a cross-namespace redirect, skipping");
                    next;
                }

                if ( $targetNs == 7 ) {
                    # Special rule for File talk: If the corresponding file doesn't exist, forget it.
                    my $n = $target;
                    $n =~ s/^[^:]*/File/;
                    my $res = $api->query( titles => $n );
                    if($res->{'code'} eq 'shutoff'){
                        $api->warn("Task disabled: ".$res->{'content'}."\n");
                        return 300;
                    }
                    if($res->{'code'} ne 'success'){
                        $api->warn("Failed to get status for $n: ".$res->{'error'}."\n");
                        next;
                    }
                    if ( exists( (values %{$res->{'query'}{'pages'}} )[0]{'missing'} ) ) {
                        $api->log("File page redirect [[$redir]] -> [[$target]] has no corresponding target file page, skipping");
                        next;
                    }
                }

                if ( $redir =~ m!^Template:Editnotices/! ) {
                    #$api->log("Redirect [[$redir]] -> [[$target]] is blacklisted by the title blacklist, skipping");
                    next;
                }

                my $txt = "#REDIRECT [[:$target]]\n{{Redirect shell|{{R from modification|p1={{-r|$origtarget}}}}}}\n{{User:AnomieBOT/Auto-G8|target=$origtarget}}";
                my $summary="Redirecting to [[:$origtarget]] because titles with en-dashes are hard to type";
                $summary.=" (and resolving the double redirect to [[:$target]])" if $origtarget ne $target;

                # Create page
                $api->log("$summary in $redir");
                my $r = $api->edit($tok, $txt, "$summary. $screwup", 0, 1);
                if($r->{'code'} ne 'success'){
                    $api->warn("Write failed on $redir: ".$r->{'error'}."\n");
                    next;
                }

                # If we've been at it long enough, let another task have a go.
                return 0 if time()>=$endtime;
            }
        }

        # On the next time around, skip any we've already processed this run
        my ($ns, $title) = @{$rows[$#rows]}{'ns','title'};
        $title = $dbh->quote( $title );
        $cont = " AND (p1.page_namespace > $ns OR p1.page_namespace = $ns AND p1.page_title > $title)";
        $self->{'dbcontinue'} = $cont;

        # If we've been at it long enough, let another task have a go.
        return 0 if time()>=$endtime;
    }

    $self->{'dbcontinue'} = '';

    return 21600;
}

1;