Utente:Wisbot/coordbot.py
<source lang=phyton>
- -*- coding: utf-8 -*-
""" This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page.
You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file.
Will read any wiki link and use these articles. Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can give this parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
Argument can also be given as "-links:linkingpagetitle".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning. NOTE: You are advised to use -xml instead of this option; this is meant for cases where there is no recent XML dump.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
predefined message texts with original and replacements inserted.
-template:XYZ- -namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other parameters, except for the -start parameter. If you e.g. want to iterate over all user pages starting at User:M, use -start:User:M.
-always - Don't prompt you for each replacement other: -
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
"""
- Utente:Wiso 2007
- Distributed under the terms of the GPL licence
from __future__ import generators import sys, re import wikipedia, pagegenerators,catlib, config
__version__='$Id: coordbot.py,v 0.1 $'
- Summary messages in different languages
- NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
- below.`v
msg = u'robot Aggiungo Template:Coord dalla pagina %s'
templates = {
'safe': [ (r'\{\{ ?[Cc]oord(.*?)\}\}', r"Template:Coord\1\n"),
(r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}', r"
- d format: latd non è un numero
- d format: latc diverso da N e da S
- d format: longd non è un numero
- d format: longc diverso da E e da W
\n"), (r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}', r"
- dm format: latd non è un numero
- dm format: latm non è un numero
- dm format: latc diverso da N e da S
- dm format: longd non è un numero
- dm format: longm non è un numero
- dm format: longc diverso da E e da W
\n"), (r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}', r"
- dms format: latd non è un numero
- dms format: latm non è un numero
- dms format: lats non è un numero
- dms format: latc diverso da N e da S
- dms format: longd non è un numero
- dms format: longm non è un numero
- dms format: longs non è un numero
- dms format: longc diverso da E e da W
\n"), (r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}', r"
- dec format: latd non è un numero
- dec format: longd non è un numero
\n"),
], 'notsafe': [
(r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}', r"
- dm format: latd non è un numero
- dm format: latm non è un numero
- dm format: latc diverso da N e da S
- dm format: longd non è un numero
- dm format: longm non è un numero
- dm format: longc diverso da E e da W
\n"), (r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}', r"
- dms format: latd non è un numero
- dms format: latm non è un numero
- dms format: lats non è un numero
- dms format: latc diverso da N e da S
- dms format: longd non è un numero
- dms format: longm non è un numero
- dms format: longs non è un numero
- dms format: longc diverso da E e da W
\n"), (r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)', r"
- dec format: latd non è un numero
- dec format: longd non è un numero
\n")
] }
exceptions = [ r'\{\{ *?Geobox',
r'\{\{ ?[Cc]oord', r'\{\{ ?Template:[Cc]oord', r'\{\{ ?[mM]ontagna', r'\{\{ ?(Template:)?[cC]omune', r'\{\{ ?[cC]ittà', r'\{\{ ?[mM]unicipalità', r'\{\{ ?[aA]eroporto\|', r'\{\{ ?[Mm]unicipi', r'\{\{ ?[iI]nfobox[ _]Azienda\|', r'\{\{ ?[Ss]\|aziende', r'\{\{ ?[Dd]isambigua\|', r'\{\{ ?[Ff]razione', r'\{\{ ?[Ss]quadra', r'\{\{ ?[Pp]asso ?(\||\n)', r'\{\{ ?[Bb]undesland[ _]tedesco' ]
class CoordRobot:
""" A bot that import coordinates from other wikipedia. """ def __init__(self, generator, autoTitle = False, autoText = False): self.generator = generator self.compileregex()
def compileregex(self): for key in templates.keys(): for i in range(len(templates[key])): old, new = templates[key][i] oldR = re.compile(old, re.UNICODE) templates[key][i] = oldR, new for i in range(len(exceptions)): exceptions[i] = re.compile(exceptions[i])
def checkExceptions(self, text): for exception in exceptions: hit = exception.search(text) if hit: return hit.group(0) return None
def change(self,page,new_text): try: page.put(new_text) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title())) except wikipedia.SpamfilterError, url: wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(),url))
def run(self): trovato_en = False sen = wikipedia.Site('en') interwiki_list = [] for page in self.generator: try: if not page.canBeEdited(): wikipedia.output(u'Skipping locked page %s' % page.title()) continue interwiki_list = page.interwiki() except wikipedia.NoPage: wikipedia.output(u'Page %s not found' % page.title()) continue except wikipedia.IsRedirectPage: wikipedia.output(u'Page %s is a redirect, skip' % page.title()) continue trovato_en = False for page_en in interwiki_list: if page_en.site() == sen: trovato_en = True break if not trovato_en: continue wikipedia.output(page.title()) wikipedia.output(u'en: %s' %page_en.title()) text_it = page.get() match = self.checkExceptions(text_it) # skip all pages that contain certain texts if match: colors = [None] * 9 + [None] * len(page.title()) + [None] * 21 + [10] * len(match) wikipedia.output(u'Skipping %s because it contains %s' % (page.title(), match), colors = colors) continue try: text_en = page_en.get() except wikipedia.NoPage: wikipedia.output(u'Page %s not found' %page_en.title()) continue except wikipedia.IsRedirectPage: wikipedia.output(u'Page %s is a redirect, follow redirect' %page_en.title()) text_en = page_en.get(get_redirect=True) for old, new in templates['safe']: match = old.search(text_en) if not match: continue colors = [None] * 5 + [13] * len(page.title()) + [None] * 4 wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors) wikipedia.output(u'Trovato %s: ' %text_en[match.start():match.end()]) template_new = old.sub(new, text_en[match.start():match.end()]) wikipedia.output(template_new) new_text_it = template_new + text_it choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N') if choice in ['y', 'Y']: wikipedia.setAction(msg % page_en.aslink()) self.change(page,new_text_it)
def main():
gen = None # summary message summary_commandline = None # Don't edit pages which contain certain texts. exceptions = [] # commandline paramater. # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] template = None PageTitles = [] autoText = False autoTitle = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. wikipedia.setAction(msg)
# Read commandline parameters. for arg in wikipedia.handleArgs(): if arg == '-autotitle': autoTitle = True elif arg =='-autotext': autoText = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input(u'Which page do you want to chage?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-except:'): exceptions.append(arg[8:]) elif arg.startswith('-template:'): template = arg[10:] elif arg.startswith('-namespace:'): namespaces.append(int(arg[11:])) elif arg.startswith('-summary:'): wikipedia.setAction(arg[9:]) summary_commandline = True else: generator = genFactory.handleArg(arg) if generator: gen = generator print namespaces if PageTitles: pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) if not gen: # syntax error, show help text from the top of this file wikipedia.showHelp('coordbot') wikipedia.stopme() sys.exit() if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
- gen = pagegenerators.RedirectFilterPageGenerator(gen)
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20) bot = CoordRobot(preloadingGen, autoTitle, autoText) bot.run()
if __name__ == "__main__":
try: main() finally: wikipedia.stopme()