User:AnomieBOT/source/tasks/EnDashRedirectCreator.pm: Difference between revisions

Content deleted Content added
AnomieBOT (talk | contribs)
Updating published sources: EnDashRedirectCreator: * Supplemental BRFA to update old redirects as needed.
AnomieBOT (talk | contribs)
Updating published sources: EnDashRedirectCreator: * Add disabled code for finding redirects missing {{tl|R avoided double redirect}}. * Reorganize queries. * Supplemental BRFA for all this disabled code.
 
(12 intermediate revisions by the same user not shown)
Line 1:
{{ombox|type=notice|text= Approved 2016-03-08<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 74]]}}
{{ombox|type=notice|text= Supplemental BFRA approval requestedapproved 2020-06-1119<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 80]]}}
{{ombox|type=notice|text= Supplemental BFRA approval requested 2025-08-23<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 86]]}}
<syntaxhighlight lang="perl">
package tasks::EnDashRedirectCreator;
Line 13 ⟶ 14:
Status: Approved 2016-03-08
+BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 80
+Status: Approved 2020-06-19
+BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 86
+Status: BRFA
Created: 2016-03-03
 
Create redirects for articles with titles containing en-dashes from the
corresponding title with ASCII hyphens. Update these redirects later as
targets change.
 
=end metadata
Line 31 ⟶ 35:
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
 
my $brfa80 = 0;
 
my @skipNs = (
Line 47 ⟶ 49:
# Titles that the bot can't and shouldn't create redirects for, to avoid logspam.
my %skipTitles = (
0 => {
"List_of_''Late_Night_with_Jimmy_Fallon''_episodes_(2013–14)" => 1,
},
6 => {
"Velázquez_–_Bufón_don_Sebastián_de_Morra_(Museo_del_Prado,_c._1645).jpg" => 1,
},
10 => {
"Location_map_Minneapolis–Saint_Paul" => 1,
},
);
 
Line 84 ⟶ 77:
 
=for info
Supplemental BFRA approval requestedapproved 2020-06-1119<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 80]]
 
=for info
Supplemental BFRA approval requested 2025-08-23<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 86]]
 
=cut
Line 95 ⟶ 91:
my ($self, $api) = @_;
 
$api->task('EnDashRedirectCreator', 0, 10, qw/d::Redirects d::IWNS d::Talk d::Timestamp d::Templates/);
my $screwup=' Errors? [[User:'.$api->user.'/shutoff/EnDashRedirectCreator]]';
 
my $BRFA86 = 0;
 
my %ns = $api->namespace_map();
my %rns = $api->namespace_reverse_map();
my $nsre = $api->namespace_re(qw/! 0/);
 
my ($dbh);
Line 109 ⟶ 108:
return 300;
}
 
# Templates to preserve.
my %preserve = $api->redirects_to_resolved(
'Template:NASTRO comment',
);
if(exists($preserve{''})){
if($preserve{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: " . $preserve{''}{'content'} . "\n");
return 300;
}
$api->warn("Failed to get preserve template redirects: " . $preserve{''}{'error'} . "\n");
return 60;
}
 
# Templates to preserve.
my %preserveBefore = $api->redirects_to_resolved(
'Template:Old CfD',
'Template:Old MfD',
'Template:Old prod',
'Template:Old RfD',
'Template:Old TfD',
'Template:Old XfD multi',
'Template:WikiProject banner shell',
);
if(exists($preserveBefore{''})){
if($preserveBefore{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: " . $preserveBefore{''}{'content'} . "\n");
return 300;
}
$api->warn("Failed to get preserve template redirects: " . $preserveBefore{''}{'error'} . "\n");
return 60;
}
 
# Rcat templates to preserve. This should probably be limited to particularly unprintworthy things, e.g. misspelling but not alternative spelling.
my %rpreserve = $api->redirects_to_resolved(
'Template:R from incorrect disambiguation',
'Template:R from incorrect hyphenation',
'Template:R from incorrect name',
'Template:R from miscapitalization',
'Template:R from misquotation',
'Template:R from misspelling',
'Template:R from remote talk page',
'Template:R unprintworthy',
);
if(exists($rpreserve{''})){
if($rpreserve{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: " . $rpreserve{''}{'content'} . "\n");
return 300;
}
$api->warn("Failed to get r-preserve template redirects: " . $rpreserve{''}{'error'} . "\n");
return 60;
}
 
my $dofixupuntil = $self->{'dofixupuntil'} // 0;
$dofixupuntil = 0 if $dofixupuntil < time();
 
my $cont = $self->{'dbcontinue'} // '';
Line 127 ⟶ 181:
$api->warn( "Error fetching actor ID from replica: $@\n" );
return 300;
}
 
my $targetid;
eval {
( $targetid ) = $dbh->selectrow_array( "SELECT lt_id FROM linktarget WHERE lt_namespace=2 AND lt_title = 'AnomieBOT/Auto-G8'" );
};
if ( $@ ) {
$api->warn( "Error fetching linktarget ID from replica: $@\n" );
return 300;
}
 
my $autoG8Mismatch = '0=1';
my $needAvoidedRedir = '0=1';
my $needAutoG8 = '0=1';
if ( $BRFA86 && $dofixupuntil ) {
$autoG8Mismatch = "EXISTS( SELECT 1 FROM externallinks WHERE el_from = p2.page_id AND el_to_domain_index = 'urn:.' and el_to_path = 'x-anomiebot-auto-g8-mismatch:endash' )";
$needAutoG8 = "templatelinks.tl_from IS NULL";
$needAvoidedRedir = "r1.rd_from IS NOT NULL AND NOT EXISTS( SELECT 1 FROM categorylinks WHERE cl_from=p2.page_id AND cl_to='Avoided_double_redirects' )";
}
 
my $botOwned = "revision.rev_page IS NOT NULL";
if ( $BRFA86 ) {
$botOwned = "( $botOwned OR templatelinks.tl_from IS NOT NULL )";
}
 
Line 143 ⟶ 220:
LEFT JOIN redirect AS r1 ON(r1.rd_from=p1.page_id)
LEFT JOIN redirect AS r2 ON(r2.rd_from=p2.page_id)
LEFT JOIN revision ON (rev_page = p2.page_id AND rev_actor IN ($actorIds) AND rev_parent_id = 0)
LEFT JOIN templatelinks ON (tl_from=p2.page_id AND tl_target_id=$targetid)
WHERE
($dashcond) AND p1.page_namespace NOT IN ($skipNs)
AND (
p2.page_id IS NULL
} . ( OR $brfa80botOwned ?AND qq{(
OR (
r2.rd_namespace != COALESCE( r1.rd_namespace, p1.page_namespace )
OR r2.rd_title != COALESCE( r1.rd_title, p1.page_title )
OR r2.rd_fragment != r1.rd_fragment
) AND EXISTS( SELECT 1OR FROM revision WHERE rev_page = p2.page_id AND rev_actor IN ($actorIds) AND rev_parent_id = 0 )autoG8Mismatch
} : "" ) . qq{ OR $needAutoG8
OR $needAvoidedRedir
)
)
$cont
Line 228 ⟶ 308:
 
my ($redir, $origtarget, $target, $fragment) = @$targets;
 
my $redirNs = ( $redir =~ /^([^:]+):/ && exists( $ns{$1} ) ? $ns{$1} : 0 );
my $targetNs = ( $target =~ /^([^:]+):/ && exists( $ns{$1} ) ? $ns{$1} : 0 );
my $extrabefore = '';
my $rextra = '';
my $extra = '';
 
my $tok=$api->edittoken($redir, EditRedir => 1, imageinfo => { prop => '', limit => 1 });
Line 239 ⟶ 325:
}
if ( !exists( $tok->{'missing'} ) ) {
if ( !$brfa80 ) {
$api->log("$redir already exists, skipping");
next;
}
my $res = $api->query(
titles => $redir,
Line 257 ⟶ 339:
}
 
myif $txt( =!exists( $tok->{'revisionsredirect'}[0] ) ) {'slots'}{'main'}{'*'};
if ( $txt =~ m!\{\{User:AnomieBOT/Auto$api-G8\|target=>log(.*?)\}\}!"$redir &&was $1created neby $origtargetthe )bot, {but it is not a redirect anymore, skipping");
$api->warn("[[$redir]] apparently exists for both [[$1]] and [[$origtarget]], not updating");
next;
}
 
my $txt = $tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
if ( $txt =~ m!\{\{User:AnomieBOT/Auto-G8\|(?:[^{}|]*\|)?target=(.*?)\}\}! && $1 ne $origtarget ) {
my $oldtarget = $1;
my $tmp = $oldtarget;
$tmp =~ s/[$dashstr]/-/gu;
if ( $tmp eq $redir ) {
if ( $redirNs & 1 ) {
my $subjNsPrefix = $redirNs > 1 ? "$rns{$redirNs & ~1}:" : '';
my ( $oldtargetSubj, $origtargetSubj );
( $origtargetSubj = $origtarget ) =~ s/^$nsre:/$subjNsPrefix/;
( $oldtargetSubj = $oldtarget ) =~ s/^$nsre:/$subjNsPrefix/;
 
my %tgts = $api->resolve_redirects( $origtargetSubj, $oldtargetSubj, $origtarget, $oldtarget );
 
my ($oldsubjtgt, $oldtalktgt, $origsubjtgt, $origtalktgt);
$oldsubjtgt = $tgts{$oldtargetSubj};
( $oldtalktgt = $tgts{$oldtarget} ) =~ s/^($nsre):/ $ns{$1} > 1 ? "$rns{$ns{$1} & ~1}:" : '' /e;
$origsubjtgt = $tgts{$origtargetSubj};
( $origtalktgt = $tgts{$origtarget} ) =~ s/^($nsre):/ $ns{$1} > 1 ? "$rns{$ns{$1} & ~1}:" : '' /e;
 
if ( $oldsubjtgt eq $origsubjtgt && $oldsubjtgt eq $oldtalktgt && $origsubjtgt ne $origtalktgt ) {
$api->log("Skipping [[$origtarget]], [[$redir]] already exists for [[$oldtarget]] and [[$oldtargetSubj]] matches that while [[$origtarget]] does not match [[$origtargetSubj]]");
next;
} elsif ( $oldsubjtgt eq $origsubjtgt && $oldsubjtgt ne $oldtalktgt && $origsubjtgt eq $origtalktgt ) {
$api->warn("Updating [[$redir]] to [[$target]]: [[$redir]] already exists for [[$oldtarget]], but that does not match [[$oldtargetSubj]] while [[$origtarget]] does match [[$origtargetSubj]]");
} else {
$api->warn("[[$redir]] apparently exists for both [[$oldtarget]] and [[$origtarget]], not updating");
next;
}
} else {
$api->warn("[[$redir]] apparently exists for both [[$oldtarget]] and [[$origtarget]], not updating");
next;
}
} else {
$api->warn("[[$redir]] claims to exist for [[$oldtarget]], but that's not valid so overwriting");
}
}
 
# Check for certain extra templates that we should probably preserve when updating.
$api->process_templates( $txt, sub {
my $name = shift;
my $params = shift;
my $wikitext = shift;
 
if ( exists( $preserveBefore{"Template:$name"} ) || exists( $preserveBefore{$name} ) ) {
$extrabefore .= "$wikitext\n";
} elsif ( exists( $rpreserve{"Template:$name"} ) || exists( $rpreserve{$name} ) ) {
$rextra .= "\n$wikitext";
} elsif ( exists( $preserve{"Template:$name"} ) || exists( $preserve{$name} ) ) {
$extra .= "\n$wikitext";
}
 
return undef;
} );
 
# Also keep any categories from the existing redirect page.
if ( $txt =~ /\[\[\s*(?i:Category)\s*:/ ) {
my $txt2 = $txt;
$txt2 =~ s/^#REDIRECT\s*\[\[.*?\]\]//i;
my $nowiki = {};
$txt2 = $api->strip_nowiki( $txt2, $nowiki );
my @cats = $txt2 =~ /\[\[\s*(?i:Category)\s*:.*?\]\]/g;
$extra .= "\n\n" . $api->replace_nowiki( join( "\n", @cats ), $nowiki ) if @cats;
}
}
Line 267 ⟶ 413:
next;
}
 
my $redirNs = ( $redir =~ /^([^:]+):/ && exists( $ns{$1} ) ? $ns{$1} : 0 );
my $targetNs = ( $target =~ /^([^:]+):/ && exists( $ns{$1} ) ? $ns{$1} : 0 );
 
if ( $redirNs == 0 && !( $crossNsOk{$targetNs} // 0 ) ) {
Line 301 ⟶ 444:
 
$target .= '#' . $fragment if defined( $fragment );
my $tosectionrextra = defined( $fragment ) ? '"\n{{R to section}}'$rextra" :if ''defined( $fragment );
my $doublerextra = $origtarget"\n{{R eqfrom ASCII-only}}$targetrextra" ?if ''$redir :=~ "{{R/^[ avoided double redirect|1=-~]+$origtarget}}"/;
my $txtrextra = "#REDIRECT [[:$target]]\n{{Redirect shell|{{R fromavoided modificationdouble redirect|1={{-r|$origtarget}}}}$doublerextra" if $origtarget ne $tosection}}\n{{User:AnomieBOT/Auto-G8|target=$origtarget}}";
my $txt = "#REDIRECT [[:$target]]\n\n${extrabefore}{{Redirect category shell|\n{{R from alternative hyphenation|1={{-r|1=$origtarget}}}}$rextra\n}}\n{{User:AnomieBOT/Auto-G8|endash|target=$origtarget}}$extra";
my $summary;
if ( exists( $tok->{'missing'} ) ) {
Line 324 ⟶ 468:
$api->warn("Write failed on $redir: ".$r->{'error'}."\n");
next;
}
 
# Check for edit warring.
if ( ! exists( $tok->{'missing'} ) ) {
my $res = $api->query(
titles => $redir,
prop => 'revisions',
rvprop => 'user|sha1',
rvlimit => 'max',
rvend => $api->timestamp2ISO( time() - 30 * 86400 ),
formatversion => 2,
);
my %shas = ();
my $bot1058 = 0;
for my $rev (@{$res->{'query'}{'pages'}[0]{'revisions'}}) {
$shas{$rev->{'sha1'}} = ( $shas{$rev->{'sha1'}} // -1 ) + 1;
$bot1058++ if $rev->{'user'} eq 'Bot1058';
}
my $ct = 0;
for my $sha (keys %shas) {
$ct += $shas{$sha};
}
if ( $ct > 2 ) {
my $extranote = $bot1058 > 2 ? "If it's AnomieBOT and Bot1058 fighting, that probably means {{-r|1=$origtarget}} needs to be updated to match {{-r|1={{subst:SUBJECTPAGENAME:$origtarget}}}} or vice versa." : "";
$api->whine( "Possible edit warring on [[:$redir]]", "In the past 30 days, there appear to have been $ct reverts on {{-r|1=$redir}}. This suggests that vandalism or edit warring of some sort may be occurring (there or on {{-r|1=$origtarget}}). A human should look into it. $extranote" );
}
}
 
Line 343 ⟶ 513:
$self->{'dbcontinue'} = '';
 
# The intention is that first we run through all new creations needed, and only after those are done do we run through things needing the Auto-G8 template re-added.
return 21600;
if ( ! $dofixupuntil ) {
$self->{'dofixupuntil'} = time() + 21600;
return 0;
}
 
$self->{'dofixupuntil'} = 0;
return $dofixupuntil - time();
}