Modulo:Webarchive

Questo è un modulo scritto in Lua. Le istruzioni che seguono sono contenute nella sottopagina Modulo:Webarchive/man (modifica · cronologia)
Sandbox: Modulo:Webarchive/sandbox (modifica · cronologia) · Sottopagine: lista · Test: Modulo:Webarchive/test (modifica · cronologia · Esegui)
Questo modulo implementa le funzionalità del template {{Webarchive}} e permette ad altri moduli Lua di decifrare la data di alcuni archivi dall'URL.
Ha una sottopagina di configurazione: Modulo:Webarchive/Configurazione.
--[[ ----------------------------------
Lua module implementing the {{webarchive}} template.
A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
]]
require('Module:No globals')
local getArgs = require('Module:Arguments').getArgs
local p = {}
local track = {} -- Associative array to hold tracking categories
local maxurls = 10 -- Max number of URLs allowed.
local servizi = {
{ signature = "archive.org", service = "wayback", tailbracket = " in %sInternet Archive%s", tracking = "Categoria:Template Webarchive - collegamenti all'Internet Archive" },
{ signature = "webcitation.org", service = "webcite", tailbracket = " in %sWebCite%s", tracking = "Categoria:Template Webarchive - collegamenti a WebCite" },
{ signature = "archive.is", service = "archiveis", tailbracket = " in %sArchive.is%s", tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
{ signature = "archive.fo", service = "archiveis", tailbracket = " in %sArchive.is%s", tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
{ signature = "archive.today", service = "archiveis", tailbracket = " in %sArchive.is%s", tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
{ signature = "archive.il", service = "archiveis", tailbracket = " in %sArchive.is%s", tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
{ signature = "archive.ec", service = "archiveis", tailbracket = " in %sArchive.is%s", tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
{ signature = "archive[-]it.org", service = "archiveit", tailbracket = " in %sArchive-It%s" },
{ signature = "arquivo.pt", tail = " nel Portuguese Web Archive" },
{ signature = "loc.gov", tailbracket = " nella %sLibrary of Congress%s" },
{ signature = "webharvest.gov", tailbracket = " nel %sNational Archives and Records Administration%s" },
{ signature = "bibalex.org", tail = " nella [[Bibliotheca Alexandrina#Struttura e collezioni|Bibliotheca Alexandrina]]" },
{ signature = "collectionscanada", tail = " nel Canadian Government Web Archive" },
{ signature = "haw.nsk", tail = " nel Croatian Web Archive (HAW)" },
{ signature = "veebiarhiiv.digar.ee", tail = " nell'Estonian Web Archive" },
{ signature = "vefsafn.is", tailbracket = " nella %sNational and University Library of Iceland%s]]" },
{ signature = "proni.gov", tailbracket = " nel %sPublic Record Office of Northern Ireland%s" },
{ signature = "uni[-]lj.si", tail = " nello Slovenian Web Archive" },
{ signature = "stanford.edu", tail = " nello [[Stanford University Libraries|Stanford Web Archive]]" },
{ signature = "nationalarchives.gov.uk", tailbracket = " in %sUK Government Web Archive%s" },
{ signature = "parliament.uk", tailbracket = " in %sUK Parliament's Web Archive%s" },
{ signature = "webarchive.org.uk", tailbracket = " in %sUK Web Archive%s" },
{ signature = "nlb.gov.sg", tail = " in Web Archive Singapore" },
{ signature = "pandora.nla.gov.au", tailbracket = " in %sPandora Archive%s" },
{ signature = "perma.cc", tailbracket = " in %sPerma.cc%s" },
{ signature = "perma-archives.cc", tailbracket = " in %sPerma.cc%s" },
{ signature = "screenshots.com", tail = " in Screenshots" },
{ signature = "wikiwix.com", tail = " in Wikiwix" },
{ signature = "freezepage.com", tail = " in Freezepage" },
{ signature = "webcache.googleusercontent.com", tail = " in Google Cache" }
}
local month_localized = { 'gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre'}
--[[--------------------------< inlineError >-----------------------
Critical error. Render output completely in red. Add to tracking category.
]]
local function inlineError(arg, msg)
track["Categoria:Errori di compilazione del template Webarchive"] = 1
return '<span style="font-size:100%" class="error citation-comment">Errore nella compilazione del template webarchive: controllare <code style="color:inherit; border:inherit; padding:inherit;">|' .. arg .. '=</code> value. ' .. msg .. '</span>'
end
--[[--------------------------< inlineRed >-----------------------
Render a text fragment in red, such as a warning as part of the final output.
Add tracking category.
]]
local function inlineRed(msg, trackmsg)
if trackmsg == "warning" then
track["Categoria:Errori di compilazione del template Webarchive - Avvisi"] = 1
elseif trackmsg == "error" then
track["Categoria:Errori di compilazione del template Webarchive"] = 1
end
return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
end
--[[--------------------------< base62 >-----------------------
Convert base-62 to base-10
Credit: https://de.wikipedia.org/wiki/Modul:Expr
]]
local function base62( value )
local r = 1
if value:match( "^%w+$" ) then
local n = #value
local k = 1
local c
r = 0
for i = n, 1, -1 do
c = value:byte( i, i )
if c >= 48 and c <= 57 then
c = c - 48
elseif c >= 65 and c <= 90 then
c = c - 55
elseif c >= 97 and c <= 122 then
c = c - 61
else -- How comes?
r = 1
break -- for i
end
r = r + c * k
k = k * 62
end -- for i
end
return r
end
--[[--------------------------< tableLength >-----------------------
Given a 1-D table, return number of elements
]]
local function tableLength(t)
local count = 0
for _ in pairs(t) do count = count + 1 end
return count
end
--[[--------------------------< dateFormat >-----------------------
Given a date string, return its format: dmy, mdy, iso, ymd
If unable to determine return nil
]]
local function dateFormat(date)
local dt = {}
dt.split = {}
dt.split = mw.text.split(date, "-")
if tableLength(dt.split) == 3 then
if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 and tonumber(dt.split[2]) and tonumber(dt.split[3]) then
return "iso"
else
return nil
end
end
dt.split = mw.text.split(date, " ")
if tableLength(dt.split) == 3 then
if tonumber(dt.split[3]) then
if tonumber(dt.split[3]) > 1900 and tonumber(dt.split[3]) < 2200 then
if tonumber(dt.split[1]) then
return "dmy"
else
return "mdy"
end
else
if tonumber(dt.split[1]) then
if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 then
return "ymd"
end
end
end
end
end
return nil
end
--[[--------------------------< makeDate >-----------------------
Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format
df = mdy, dmy, iso, ymd
]]
local function makeDate(year, month, day, df)
if not year or year == "" or not month or month == "" or not day or day == "" then
return nil
end
local zmonth = month -- month with leading 0
month = month:match("0*(%d+)") -- month without leading 0
if tonumber(month) < 1 or tonumber(month) > 12 then return year end
local nmonth = month_localized[tonumber(month)]
if not nmonth then return year end
local zday = day
day = zday:match("0*(%d+)")
if tonumber(day) < 1 or tonumber(day) > 31 then
return mw.ustring.format("%s %s", nmonth, year)
end
if day == "1" then day = "1º" end
return mw.ustring.format("%s %s %s", day, nmonth, year)
end
--[[--------------------------< decodeWebciteDate >-----------------------
Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format
]]
local function decodeWebciteDate(path, df)
local dt = {}
dt.split = {}
dt.split = mw.text.split(path, "/")
-- valid URL formats that are not base62
-- http://www.webcitation.org/query?id=1138911916587475
-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
-- http://www.webcitation.org/1138911916587475
-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e
if mw.ustring.find( dt.split[2], "query") or
mw.ustring.find( dt.split[2], "cache") or
mw.ustring.find( dt.split[2], "getfile") or
tonumber(dt.split[2]) then
return "query"
end
dt.full = os.date("%Y %m %d", string.sub(string.format("%d", base62(dt.split[2])),1,10) )
dt.split = mw.text.split(dt.full, " ")
dt.year = dt.split[1]
dt.month = dt.split[2]
dt.day = dt.split[3]
if not tonumber(dt.year) or not tonumber(dt.month) or not tonumber(dt.day) then
return inlineRed("[Date error] (1)", "error")
end
if tonumber(dt.month) > 12 or tonumber(dt.day) > 31 or tonumber(dt.month) < 1 then
return inlineRed("[Date error] (2)", "error")
end
if tonumber(dt.year) > tonumber(os.date("%Y")) or tonumber(dt.year) < 1900 then
return inlineRed("[Date error] (3)", "error")
end
local fulldate = makeDate(dt.year, dt.month, dt.day, df)
if not fulldate then
return inlineRed("[Date error] (4)", "error")
else
return fulldate
end
end
--[[--------------------------< snapDateToString >-----------------------
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
return the formatted date eg. "September 1, 2016" in df format
Handle non-digits in snapshot ID such as "re_" and "-" and "*"
]]
local function decodeWaybackDate(path, df)
local snapdate, snapdatelong, currdate, fulldate
local snapdate = string.gsub(path, "^/w?e?b?/?", "") -- Remove leading "/web/" or "/"
local safe = snapdate
local path_elements = mw.text.split(safe, "/")
snapdate = path_elements[1]
if snapdate == "*" then -- eg. /web/*/http..
return "index"
end
safe = snapdate
snapdate = string.gsub(safe, "[a-z][a-z]_[0-9]?$", "") -- Remove any trailing "re_" from date
safe = snapdate
snapdate = string.gsub(safe, "[-]", "") -- Remove dashes from date eg. 2015-01-01
safe = snapdate
snapdate = string.gsub(safe, "[*]$", "") -- Remove trailing "*"
if not tonumber(snapdate) then
return inlineRed("[Date error] (2)", "error")
end
local dlen = string.len(snapdate)
if dlen < 4 then
return inlineRed("[Date error] (3)", "error")
end
if dlen < 14 then
snapdatelong = snapdate .. string.rep("0", 14 - dlen)
else
snapdatelong = snapdate
end
local year = string.sub(snapdatelong, 1, 4)
local month = string.sub(snapdatelong, 5, 6)
local day = string.sub(snapdatelong, 7, 8)
if not tonumber(year) or not tonumber(month) or not tonumber(day) then
return inlineRed("[Date error] (4)", "error")
end
if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then
return inlineRed("[Date error] (5)", "error")
end
currdate = os.date("%Y")
if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then
return inlineRed("[Date error] (6)", "error")
end
local fulldate = makeDate(year, month, day, df)
if not fulldate then
return inlineRed("[Date error] (7)", "error")
else
return fulldate
end
end
--[[--------------------------< serviceName >-----------------------
Given a ___domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID
]]
local function serviceName(url_main, nolink)
local tracking = "Categoria:Template Webarchive - altri archivi"
local bracketopen = "[["
local bracketclose = "]]"
if nolink then
bracketopen = ""
bracketclose = ""
end
url_main.service = "altri"
for _,servizio in ipairs(servizi) do
if mw.ustring.find(url_main.host, servizio.signature) then
url_main.service = servizio['service'] or url_main.service
if servizio['tailbracket'] then
url_main.tail = mw.ustring.format(servizio.tailbracket, bracketopen, bracketclose)
else
url_main.tail = servizio['tail']
end
tracking = servizio['tracking'] or tracking
break
end
end
if url_main.tail == nil then
url_main.tail = " a " .. url_main.host .. " " .. inlineRed("Errore: URL di servizio di archiviazione sconosciuto")
end
track[tracking] = 1
end
--[[--------------------------< parseExtraArgs >-----------------------
Parse numbered arguments from 2 to maxurls, such as url2..url10, date2..date10, title2..title10
e memorized them in a table
For example: {{webarchive |url=.. |url4=.. |url7=..}}
Three url arguments not in numeric sequence (1..4..7).
Function only processes arguments numbered 2 or greater (in this case 4 and 7)
It creates numeric sequenced table entries like:
urlx[1].url = <argument value for url4>
urlx[2].url = <argument value for url7>
Returns the parsed table
]]
local function parseExtraArgs(args, maxurls)
local i, indx, argurl, argurl2, argdate, argtitle
local ulx = {}
indx = 1
for i = 2, maxurls do
argurl = "url" .. i
if args[argurl] then
ulx[indx] = {}
ulx[indx]["url"] = args[argurl]
ulx[indx]["date"] = args["date" .. i] or args["data" .. i]
if not ulx[indx]["date"] then
ulx[indx]["date"] = inlineRed("[Data mancante]", "warning")
end
ulx[indx]["title"] = args["title" .. i] or args["titolo" .. i]
indx = indx + 1
end
end
return ulx
end
--[[--------------------------< comma >-----------------------
Given a date string, return "," if it's MDY
]]
local function comma(date)
local n = mw.text.split(date, " ")
local o = mw.text.split(n[1], "-") -- for ISO
if o[1] == "index" then return "" end
if not tonumber(o[1]) then
return ","
else
return ""
end
end
--[[--------------------------< createTracking >-----------------------
Return data in track[] ie. tracking categories
]]
local function createTracking()
-- Return tracking category only in namespace 0
local current_namespace = mw.title.getCurrentTitle().namespace
if current_namespace ~= 0 then return '' end
local sand = ""
if tableLength(track) > 0 then
for key,_ in pairs(track) do
sand = sand .. "[[" .. key .. "]]"
end
end
return sand
end
--[[--------------------------< createRendering >-----------------------
Return a rendering of the data in ulx[][]
]]
local function createRendering(url_main, ulx)
local sand, displayheader, displayfield
local period1 = "" -- For backwards compat with {{wayback}}
local period2 = "."
--local url_main = ulx[1]
local indexstr = "archiviato"
if url_main.date == "index" then
indexstr = "archivio"
end
-- For {{wayback}}, {{webcite}}
if url_main.format == "none" then
if not url_main.title and not url_main.date then -- No title. No date
sand = mw.ustring.format("[%s Archiviato]%s", url_main.url, url_main.tail)
elseif not url_main.title and url_main.date then -- No title. Date.
if url_main.service == "wayback" then
period1 = "."
period2 = ""
end
sand = mw.ustring.format("[%s Archiviato] il %s%s%s%s", url_main.url, url_main.date, comma(url_main.date), url_main.tail, period1)
elseif url_main.title and not url_main.date then -- Title. No date.
sand = mw.ustring.format("[%s %s]%s", url_main.url, url_main.title, url_main.tail)
elseif url_main.title and url_main.date then -- Title. Date.
sand = mw.ustring.format("[%s %s]%s (%s %s)", url_main.url, url_main.title, url_main.tail, indexstr, url_main.date)
else
return nil
end
if #ulx > 0 then -- For multiple archive URLs
sand = sand .. period2 .. " Archivi aggiuntivi: "
local archives_output = {}
for indx,urlx in ipairs(ulx) do
archives_output[#archives_output+1] = mw.ustring.format("[%s %s]", urlx["url"], urlx["title"] or urlx["date"] )
end
sand = sand .. table.concat(archives_output, ", ") .. "."
end
return sand
-- For {{cite archives}}
else
if url_main.format == "addlarchives" then -- Multiple archive services
displayheader = "Archivi aggiuntivi: "
else -- Multiple pages from the same archive
displayheader = mw.ustring.format("Pagine di archivio aggiuntive su %s: ", url_main.date)
end
local sand = displayheader
local archives_output = {}
for indx, urlx in ipairs(ulx) do
displayfield = urlx["title"]
if url_main.format == "addlarchives" then
if not displayfield then displayfield = urlx["date"] end
else
if not displayfield then displayfield = "Pagina " .. indx+1 end
end
archive_ouptut[#archives_output+1] = mw.ustring.format("[%s %s]", urlx["url"], displayfield )
end
sand = mw.ustring.format("%s%s.", sand, table.concat(archive_output, ", "))
return sand
end
end
--[[--------------------------< createRendering >-----------------------
Funzione di interfaccia principale per implementazione di
template:Webarchive
]]
function p.webarchive(frame)
-- carico in args i parametri, se sono nulli sono ignorati, eccetto che per il prametro nolink
local args = getArgs(frame, {
valueFunc = function (key, value)
if key == 'nolink' then
return true
elseif value then
value = mw.text.trim(value)
if value ~= '' then return value end
end
return nil
end
})
local tname = "Webarchive" -- name of calling template. Change if template rename.
local verifydates = "yes" -- See documentation. Set "no" to disable.
-- URL argument (first)
local url1 = args.url or args.url1
if not url1 then
return inlineError("url", "vuoto.") .. createTracking()
end
if mw.ustring.find( url1, "https://web.http") then -- track bug
track["Categoria:Errori di compilazione del template Webarchive"] = 1
return inlineError("url", "https://web.http") .. createTracking()
end
if url1 == "https://web.archive.org/http:/" then -- track bug
track["Categoria:Errori di compilazione del template Webarchive"] = 1
return inlineError("url", "Invalid URL") .. createTracking()
end
local url_main = {}
url_main.url = url1
local uri1 = mw.uri.new(url1)
url_main.host = uri1.host
serviceName(url_main, args.nolink)
-- Date argument
local date = args.date or args.date1 or args.data or args.data1
if date == "*" and url_main.service == "wayback" then
date = "index"
elseif date and url_main.service == "wayback" and verifydates == "yes" then
local ldf = dateFormat(date)
if ldf then
local udate = decodeWaybackDate( uri1.path, ldf )
if udate ~= date then
date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")
end
end
elseif date and url_main.service == "webcite" and verifydates == "yes" then
local ldf = dateFormat(date)
if ldf then
local udate = decodeWebciteDate( uri1.path, ldf )
if udate == "query" then -- skip
elseif udate ~= date then
date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")
end
end
elseif not date and url_main.service == "wayback" then
date = decodeWaybackDate( uri1.path, "iso" )
if not date then
date = inlineRed("[Date error] (1)", "error")
end
elseif not date and url_main.service == "webcite" then
date = decodeWebciteDate( uri1.path, "iso" )
if date == "query" then
date = inlineRed("[Data mancante]", "warning")
elseif not date then
date = inlineRed("[Date error] (1)]", "error")
end
elseif not date then
date = inlineRed("[Data mancante]", "warning")
end
url_main.date = date
-- Format argument
local format = args.format or args.formato or "none"
if not format ~= "none" then
if format == "addlpages" then
if not url_main.date then
format = "none"
end
elseif format ~= "addlarchives" then
format = "none"
end
end
url_main.format = format
-- Title argument
url_main.title = args.title or args.title1 or args.titolo or args.titolo1
local ulx = parseExtraArgs(args, maxurls)
--if true then return mw.text.jsonEncode(ulx, mw.text.JSON_PRETTY) end
local rend = createRendering(url_main, ulx)
if not rend then
rend = mw.ustring.format('<span style="font-size:100%" class="error citation-comment">Errori in [[:Template:%s]]: Problema sconosciuto. Si prega di segnalarlo nella [[Discussioni_template:%s|pagina di discussione]] del template.</span>', tname, tname)
track["Category:Webarchive template errors"] = 1
end
return rend .. createTracking()
end
return p