User:AnomieBOT/source/tasks/CommonsFileTemplateFixer.pm
![]() | Approval requested 2025-08-26, in trial. Wikipedia:Bots/Requests for approval/AnomieBOT 87 |
package tasks::CommonsFileTemplateFixer;
=pod
=begin metadata
Bot: AnomieBOT
Task: CommonsFileTemplateFixer
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 87
Status: In trial
Created: 2025-08-25
Manipulate various templates related to copying files to Commons.
* {{tl|Now Commons}}:
** Add missing {{para|date}}.
** Replace with {{tl|Nominated for deletion on Commons}} if the Commons file is nominated for deletion.
** Remove from files tagged with {{tl|Keep local}}.
* {{tl|Nominated for deletion on Commons}}:
** Replace with {{tl|Deleted on Commons}} if the Commons file is deleted.
** Replace with {{tl|Now Commons}} if the Commons file is no longer nominated for deletion (or remove if tagged with {{tl|Keep local}}).
* {{tl|Copy to Wikimedia Commons}}: (checked weekly)
** Remove from [[User:FastilyBot/Task/2/Blacklist|ineligible]] files.
** Replace with {{tl|Now Commons}} for files which are already on Commons (or remove if tagged with {{tl|Keep local}}).
In all cases, it'll try to avoid adding duplicate templates.
=end metadata
=cut
use utf8;
use strict;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my $screwup;
sub new {
my $class = shift;
my $self = $class->SUPER::new();
$self->{'iter'} = undef;
bless $self, $class;
return $self;
}
=pod
=for info
Approval requested 2025-08-26, in trial.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 87]]
=cut
sub approved {
return 6;
}
sub is_trial_done {
my ($self, $api) = @_;
return ! $self->check_for_trial( $api, 1, 2, 7, 8, 9, 11, 15);
}
sub update_trial_info {
my ($self, $api) = @_;
my $trialinfo = $api->store->{'trialinfo'} // {};
my $txt = "Trial status for [[Wikipedia:Bots/Requests for approval/AnomieBOT 87]].\n\n";
$txt .= "<onlyinclude>\n";
$txt .= qq({| class="wikitable"\n);
$txt .= "|-\n";
$txt .= "! Task !! Count !! Edits\n";
foreach my $t (1, 2, 7, 8, 9, 11, 15) {
my @revids = @{ $trialinfo->{$t} // [] };
$txt .= "|-\n",
$txt .= "| $t || " . scalar( @revids ) . " || " . join( " ", map { "[[Special:Diff/$_|$_]]" } @revids ) . "\n";
}
$txt .= qq(|}\n);
$txt .= "</onlyinclude>";
my $title = 'User:AnomieBOT/CommonsFileTemplateFixer trial info';
my $tok=$api->edittoken( $title, EditRedir => 1, NoShutoff => 1, NoExclusion => 1 );
if ( $tok->{'code'} ne 'success' ) {
$api->warn( "Failed to get edit token for $title: " . $tok->{'error'} . "\n" );
return 0;
}
my $intxt = $tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
$intxt =~ s/\s*$//;
if ( $intxt ne $txt ) {
$api->log( "Updating trial info in $title" );
my $res = $api->edit( $tok, $txt, "Updating bot trial info", 0, 1 );
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Save failed for $title: " . $res->{'error'} . "\n" );
return 0;
}
}
return 1;
}
sub check_for_trial {
my ($self, $api, @tasks) = @_;
my $trialinfo = $api->store->{'trialinfo'} // {};
foreach my $t (@tasks) {
return 1 if @{ $trialinfo->{$t} // [] } < 10;
}
return 0;
}
sub save_trial_edit {
my ($self, $api, $revid, @tasks) = @_;
my $trialinfo = $api->store->{'trialinfo'} // {};
foreach my $t (@tasks) {
$trialinfo->{$t} //= [];
push @{ $trialinfo->{$t} }, $revid;
}
$api->store->{'trialinfo'} = $trialinfo;
}
sub run {
my ($self, $api) = @_;
my $capi = $api->copy( wikibase => 'https://commons.wikimedia.org/w/', assert => 'user' );
$api->task('CommonsFileTemplateFixer', 0, 0, qw/d::Talk d::Redirects d::Templates d::Trial/);
$screwup='Report errors at [[User:' . $api->user . '/shutoff/CommonsFileTemplateFixer]]';
return 60 unless $self->update_trial_info( $api );
return $api->trial_complete( 'AnomieBOT 87' ) if $self->is_trial_done( $api );
my $docopybotuntil = $self->{'docopybotuntil'} // 0;
if ( $docopybotuntil && $docopybotuntil < time() ) {
$docopybotuntil = 0;
$self->{'docopybotuntil'} = 0;
$self->{'iter'} = undef;
}
my $copyCheckTs = strftime( '%G-%V', gmtime );
# Templates we care about.
my %r = $api->redirects_to_resolved(
'Template:Now Commons',
'Template:Copy to Wikimedia Commons',
'Template:Nominated for deletion on Commons',
'Template:Deleted on Commons',
);
if ( exists( $r{''} ) ) {
$api->warn( "Failed to get redirects to our templates: " . $r{''}{'error'} . "\n" );
return 60;
}
# Categories that make a file ineligible for copying to Commons, unless reviewed by a human.
my %badcopycats = ();
my $res = $api->query(
titles => 'User:FastilyBot/Task/2/Blacklist',
prop => 'links',
plnamespace => '14',
pllimit => 'max',
formatversion => 2,
);
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Failed to fetch links from User:FastilyBot/Task/2/Blacklist: " . $res->{'error'} . "\n" );
return 60;
}
foreach my $cat (@{ $res->{'query'}{'pages'}[0]{'links'} }) {
$badcopycats{$cat->{'title'}} = 1;
}
# Categories that indicate a copy-to-Commons was reviewed by a human.
my %goodcopycats = (
'Category:Copy to Wikimedia Commons reviewed by a human',
'Category:Copy to Wikimedia Commons (inline-identified)',
);
$res = $api->query(
list => 'categorymembers',
cmtitle => 'Category:Copy to Wikimedia Commons reviewed by a human',
cmtype => 'subcat',
cmlimit => 'max',
formatversion => 2,
);
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Failed to fetch subcats of Category:Copy to Wikimedia Commons reviewed by a human: " . $res->{'error'} . "\n" );
return 60;
}
foreach my $cat (@{ $res->{'query'}{'categorymembers'} }) {
$goodcopycats{$cat->{'title'}} = 1;
}
# Categories to query along with the edit token.
my @qcats = (
'Category:Wikipedia files on Wikimedia Commons for which a local copy has been requested to be kept',
keys %badcopycats,
keys %goodcopycats,
);
my $qcatlimit = $api->paramLimit( 'query+categories', 'categories' );
if ( ! $self->{'iter'} ) {
$api->log( "Beginning " . ( $docopybotuntil ? 'bot-copy' : 'main' ) . " run" );
$self->{'broken'} = 0;
$self->{'iter'} = $api->iterator(
generator => 'embeddedin',
geititle => $docopybotuntil ? [ $r{'Template:Copy to Wikimedia Commons'} ] : [ $r{'Template:Now Commons'}, $r{'Template:Nominated for deletion on Commons'}, $r{'Template:Copy to Wikimedia Commons'} ],
geinamespace => '6',
geilimit => 'max',
prop => 'categories',
clcategories => 'Category:Copy to Wikimedia Commons (bot-assessed)',
cllimit => 'max',
);
} else {
$api->log( "Continuing " . ( $docopybotuntil ? 'bot-copy' : 'main' ) . " run" );
}
my %cache = ();
# Spend a max of 5 minutes on this task before restarting
my $endtime = time()+300;
while ( my $file = $self->{'iter'}->next ) {
return 0 if $api->halting;
if ( ! $file->{'_ok_'} ) {
$api->warn("Failed to retrieve transclusions of " . $self->{'iter'}->iterval . ": " . $file->{'error'} . "\n");
return 60;
}
my $title = $file->{'title'};
my $isBotAssessed = grep { $_->{'title'} eq 'Category:Copy to Wikimedia Commons (bot-assessed)' } @{ $file->{'categories'} // [] };
# Skip what we're not supposed to be checking right now based on $docopybotuntil.
next if ( ! $docopybotuntil && $self->{'iter'}->iterval eq $r{'Template:Copy to Wikimedia Commons'} && $isBotAssessed );
next if ( $docopybotuntil && ! ( $self->{'iter'}->iterval eq $r{'Template:Copy to Wikimedia Commons'} && $isBotAssessed ) );
# Skip checking {{Copy to Wikimedia Commons}} if we already checked the file this week.
next if ( $self->{'iter'}->iterval eq $r{'Template:Copy to Wikimedia Commons'} && ( $api->store->{"copyCheckTs:$title"} // 0 ) ge $copyCheckTs );
my $tok=$api->edittoken( $title, EditRedir => 1, duplicatefiles => 1, categories => ( @qcats > $qcatlimit ? 1 : { categories => join( '|', @qcats ) } ) );
if ( $tok->{'code'} eq 'shutoff' ) {
$api->warn( "Task disabled: " . $tok->{'content'} . "\n" );
return 300;
}
if ( $tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded' ) {
# Skip protected and excluded pages
$api->log( "Skipping $title, $tok->{code}" );
next;
}
if ( $tok->{'code'} ne 'success' ) {
$api->warn( "Failed to get edit token for $title: " . $tok->{'error'} . "\n" );
$self->{'broken'} = 1;
next;
}
if ( exists( $tok->{'missing'} ) ) {
$api->log( "Skipping $title, page does not exist" );
next;
}
my $isKeepLocal = grep { $_->{'title'} eq 'Category:Wikipedia files on Wikimedia Commons for which a local copy has been requested to be kept' } @{ $tok->{'categories'} // [] };
my $isCopyToCommonsHuman = grep { $goodcopycats{$_->{'title'}} // 0 } @{ $tok->{'categories'} // [] };
my %hasNowCommons = ();
my $hasCopyToCommons = 0;
my %hasDelNomOnCommons = ();
my %hasDeletedOnCommons = ();
my $intxt = $tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
# Find existing templates on the page, so we can avoid duplicates.
$api->process_templates( $intxt, sub {
my $name = shift;
my $params = shift;
my %p = ();
foreach ($api->process_paramlist( @$params )) {
$p{$_->{'name'}} = $_->{'value'} =~ /^\s*$/ ? undef : $_->{'value'};
}
my $tpl = $r{"Template:$name"} // $r{$name} // "";
if ( $tpl eq $r{'Template:Now Commons'} ) {
my $ctitle = normalize_file_title( $p{'filename'} // $p{'1'} // $title );
$hasNowCommons{$ctitle} = 1;
} elsif ( $tpl eq $r{'Template:Copy to Wikimedia Commons'} ) {
$hasCopyToCommons = 1;
} elsif ( $tpl eq $r{'Template:Nominated for deletion on Commons'} ) {
my $ctitle = normalize_file_title( $p{'1'} // $title );
$hasDelNomOnCommons{$ctitle} = 1;
} elsif ( $tpl eq $r{'Template:Deleted on Commons'} ) {
my $ctitle = normalize_file_title( $p{'1'} // $title );
$hasDeletedOnCommons{$ctitle} = 1;
}
return undef;
} );
my @summary = ();
my %trialtasks = ();
my $fail = 0;
my $outtxt = $api->process_templates( $intxt, sub {
return undef if $fail;
my $name = shift;
my $params = shift;
shift; # $wikitext
shift; # $data
my $oname = shift;
my $tpl = $r{"Template:$name"} // $r{$name} // "";
if ( $tpl eq $r{'Template:Now Commons'} ) {
# Is keep-localed? Remove it.
if ( $isKeepLocal ) {
$trialtasks{15} = 1;
push @summary, 'remove {{Now Commons}} as files tagged {{Keep local}} are not eligible for [[WP:CSD#G8]]';
return '';
}
my %p = ();
foreach ($api->process_paramlist( @$params )) {
$p{$_->{'name'}} = $_->{'value'} =~ /^\s*$/ ? undef : $_->{'value'};
}
# If the corresponding file is up for deletion, replace with {{Nominated for deletion on Commons}}.
my $ctitle = normalize_file_title( $p{'filename'} // $p{'1'} // $title );
my $cfile;
if ( defined( $cache{'info'}{$ctitle} ) ) {
$cfile = $cache{'info'}{$ctitle};
} else {
$cfile = $self->get_commons_file_info( $capi, $ctitle );
if ( ! $cfile ) {
$fail = 1;
return undef;
}
$cache{'info'}{$ctitle} = $cfile;
}
if ( @{ $cfile->{'templates'} // [] } ) {
my $t = $cfile->{'title'};
$trialtasks{7} = 1;
push @summary, "replace {{Now Commons}} with {{Nominated for deletion on Commons}} as [[c:$t]] is nominated for deletion";
$summary[$#summary] .= ' (and dedupe)' if $hasDelNomOnCommons{$t} // 0;
return '' if $hasDelNomOnCommons{$t} // 0; # Dedupe
$hasDelNomOnCommons{$t} = 1;
$t =~ s/^File://;
$t = "1=$t" if $t =~ /=/;
return "{{Nominated for deletion on Commons|$t}}";
}
# If it needs a date, add one.
if ( ! defined( $p{'date'} ) || ! is_valid_date( $p{'date'} ) ) {
$trialtasks{11} = 1;
push @summary, "date {{Now Commons}}";
my $dt = "|date=" . strftime( '%-d %B %Y', gmtime );
my $ret = "{{$oname";
foreach (@$params) {
if ( /^\s*date\s*=/ ) {
$ret .= $dt;
$dt = "";
} else {
$ret .= "|$_";
}
}
$ret .= "$dt}}";
return $ret;
}
# It's fine.
return undef;
} elsif ( $tpl eq $r{'Template:Copy to Wikimedia Commons'} ) {
# Already on Commons?
my @dups = map { my $x = 'File:' . $_->{'name'}; $x =~ s/_/ /g; $x; } grep { exists( $_->{'shared'} ) } @{ $tok->{'duplicatefiles'} // [] };
if ( @dups ) {
@dups = ( $title ) if ( grep { $_ eq $title } @dups ); # Prefer matching title.
$trialtasks{1} = 1;
if ( $isKeepLocal ) {
my $d = $dups[0];
$trialtasks{15} = 1;
push @summary, "remove {{Copy to Wikimedia Commons}}, on Commons as [[c:$d]] but tagged {{Keep local}} here";
return '';
}
push @summary, "replace {{Copy to Wikimedia Commons}} with {{Now Commons}}, on Commons as " . join( " ", map { "[[c:$_]]" } @dups );
my $sidx = $#summary;
my @ret = ();
foreach my $d (@dups) {
$summary[$sidx] .= ' (and dedupe)' if $sidx >= 0 && $hasNowCommons{$d} // 0;
$sidx = -1 if $hasNowCommons{$d} // 0;
next if $hasNowCommons{$d} // 0; # Dedupe
$hasNowCommons{$d} = 1;
# Check if it's already up for deletion on Commons.
my $cfile;
if ( defined( $cache{'info'}{$d} ) ) {
$cfile = $cache{'info'}{$d};
} else {
$cfile = $self->get_commons_file_info( $capi, $d );
if ( ! $cfile ) {
$fail = 1;
return undef;
}
$cache{'info'}{$d} = $cfile;
}
if ( @{ $cfile->{'templates'} // [] } ) {
my $t = $cfile->{'title'};
$trialtasks{7} = 1;
push @summary, "replace {{Now Commons}} with {{Nominated for deletion on Commons}} as [[c:$t]] is nominated for deletion";
$summary[$#summary] .= ' (and dedupe)' if $hasDelNomOnCommons{$t} // 0;
next if $hasDelNomOnCommons{$t} // 0; # Dedupe
$hasDelNomOnCommons{$t} = 1;
$t =~ s/^File://;
$t = "1=$t" if $t =~ /=/;
push @ret, "{{Nominated for deletion on Commons|$t}}";
} else {
$d = "1=$d" if $d =~ /=/;
push @ret, "{{Now Commons|$d|date=" . strftime( '%-d %B %Y', gmtime ) . "|bot=" . $api->user . "}}";
}
}
return join( "\n", @ret );
}
# Ineligible?
if ( ! $isCopyToCommonsHuman ) {
foreach my $cat (@{ $tok->{'categories'} // [] }) {
if ( defined( $badcopycats{$cat->{'title'}} ) ) {
$trialtasks{2} = 1;
push @summary, 'remove {{Copy to Wikimedia Commons}}, ineligible due to [[' . $cat->{'title'} . ']] (use `human` parameter to override)';
return '';
}
}
}
return undef;
} elsif ( $tpl eq $r{'Template:Nominated for deletion on Commons'} ) {
my %p = ();
foreach ($api->process_paramlist( @$params )) {
$p{$_->{'name'}} = $_->{'value'} =~ /^\s*$/ ? undef : $_->{'value'};
}
my $ctitle = normalize_file_title( $p{'1'} // $title );
my $cfile;
if ( defined( $cache{'info'}{$ctitle} ) ) {
$cfile = $cache{'info'}{$ctitle};
} else {
$cfile = $self->get_commons_file_info( $capi, $ctitle );
if ( ! $cfile ) {
$fail = 1;
return undef;
}
$cache{'info'}{$ctitle} = $cfile;
}
# Commons file deleted?
if ( $cfile->{'missing'} // 0 && @{$cfile->{'logs'}} ) {
my $t = $cfile->{'title'};
$trialtasks{8} = 1;
push @summary, "replace {{Nominated for deletion on Commons}} with {{Deleted on Commons}} as [[c:$t]] was deleted";
$summary[$#summary] .= ' (and dedupe)' if $hasDeletedOnCommons{$t} // 0;
return '' if $hasDeletedOnCommons{$t} // 0; # Dedupe
$hasDeletedOnCommons{$t} = 1;
$t = "1=$t" if $t =~ /=/;
return "{{Deleted on Commons|$t}}";
}
# Commons file no longer tagged?
if ( ! ( $cfile->{'missing'} // 0 ) && ! @{ $cfile->{'templates'} // [] } ) {
my $t = $cfile->{'title'};
$trialtasks{9} = 1;
if ( $isKeepLocal ) {
$trialtasks{15} = 1;
push @summary, "remove {{Nominated for deletion on Commons}}, as [[c:$t]] is no longer tagged for deletion and it's tagged {{Keep local}} here";
return '';
}
push @summary, "replace {{Nominated for deletion on Commons}} with {{Now Commons}} as [[c:$t]] is no longer tagged for deletion";
$summary[$#summary] .= ' (and dedupe)' if $hasNowCommons{$t} // 0;
return '' if $hasNowCommons{$t} // 0; # Dedupe
$hasNowCommons{$t} = 1;
$t = "1=$t" if $t =~ /=/;
return "{{Now Commons|$t|date=" . strftime( '%-d %B %Y', gmtime ) . "|bot=" . $api->user . "}}";
}
return undef;
}
return undef;
} );
if ( $fail ) {
$self->{'broken'} = 1;
next;
}
if ( $outtxt ne $intxt ) {
if ( ! $self->check_for_trial( $api, keys %trialtasks ) ) {
return 0 if time() > $endtime;
next;
}
my $summary = ucfirst( join( '; ', @summary ) );
$api->log( "$summary in $title" );
my $res = $api->edit( $tok, $outtxt, "$summary. $screwup", 0, 1 );
if($res->{'code'} ne 'success'){
$api->warn( "Save failed for $title: " . $res->{'error'} . "\n" );
$self->{'broken'} = 1;
} elsif ( exists( $res->{'edit'}{'newrevid'} ) ) {
$self->save_trial_edit( $api, $res->{'edit'}{'newrevid'}, keys %trialtasks );
return 0 if $self->is_trial_done( $api );
}
} else {
$api->store->{"copyCheckTs:$title"} = $copyCheckTs;
}
return 0 if time() > $endtime;
}
$api->log( "Finished " . ( $docopybotuntil ? 'bot-copy' : 'main' ) . " run" );
$self->{'iter'} = undef;
# The intention is that first we run through all new stuff needed, and only after those are done do we run through bot-flagged {{Copy to Wikimedia Commons}}.
if ( ! $docopybotuntil ) {
return 60 if $self->{'broken'};
$self->{'docopybotuntil'} = time() + 28800;
return 0;
}
$self->{'docopybotuntil'} = 0;
return $docopybotuntil - time();
}
# Get Commons file info
sub get_commons_file_info {
my ($self, $api, $title, $fromredir) = @_;
my $res = $api->query(
titles => $title,
prop => 'imageinfo|templates',
tltemplates => 'Template:Deletion template tag',
tllimit => 'max',
list => 'logevents',
letitle => $title,
leaction => 'delete/delete',
redirects => 1,
formatversion => 2,
);
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Failed to fetch file info for c:$title: " . $res->{'error'} . "\n" );
return undef;
}
if ( defined( $res->{'query'}{'redirects'} ) ) {
if ( defined( $fromredir ) ) {
$api->warn( "File c:$fromredir is a double redirect" );
return undef;
}
my $t = $res->{'query'}{'redirects'}[0]{'to'};
return $self->get_commons_file_info( $api, $t, $fromredir // $title );
}
my $cfile = $res->{'query'}{'pages'}[0];
$cfile->{'logs'} = $res->{'logevents'};
return $cfile;
}
# Simple title normalization.
sub normalize_file_title {
my $t = shift;
$t =~ s/<!--.*?-->//g;
$t =~ s/[\x{200e}\x{200f}\x{202a}-\x{202e}]//g; # MediaWiki strips these from titles
$t =~ s/[\s_\xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{2028}\x{2029}\x{202f}\x{205f}\x{3000}]+/ /g; # Mediawiki considers all these as whitespace
$t =~ s/�?39;/'/g; # Some bots do this.
$t =~ s/^\s+|\s+$//g;
$t =~ s/^(?:File|Image)\s*:\s*//ig;
$t = ucfirst( $t );
return "File:$t";
}
# Check if a date seems valid.
my @months = qw/January February March April May June July August September October November December/;
my $monthsre = '(?:' . join( '|', @months, ( map { substr( $_, 0, 3 ) } @months ), ( map { substr( $_, 0, 4 ) } @months ) ) . ')';
my $dayre = '(?:0?[1-9]|[12][0-9]|3[01])';
my $yearre = '20[0-9][0-9]';
sub is_valid_date {
my $dt = shift;
$dt =~ s/^\s+|\s+$//g;
$dt =~ s/\s+/ /g;
return 1 if $dt =~ /^$dayre $monthsre,? $yearre$/;
return 1 if $dt =~ /^$monthsre $dayre,? $yearre$/;
return 1 if $dt =~ /^\d{4}-\d{2}-\d{2}$/ and $dt =~ /^$yearre-(?:0[1-9]|1[0-2])-$dayre$/;
return 1 if $dt =~ /^\d{14}$/ and $dt =~ /^$yearre(?:0[1-9]|1[0-2])$dayre(?:[01][0-9]|2[0-3])[0-5][0-9][0-6][0-9]$/;
return 0;
}
1;