Questo modulo implementa le funzionalità del template {{Webarchive}} e permette ad altri moduli Lua di decifrare la data di alcuni archivi dall'URL.

Ha una sottopagina di configurazione: Modulo:Webarchive/Configurazione.


--[[ ----------------------------------
  Lua module implementing the {{webarchive}} template.

  A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}

	]]

local p = {}

local servizi = {
	{ signature = "archive.org", service = "wayback",  tailbracket = " all'%sInternet Archive%s",  tracking = "Categoria:Template Webarchive - collegamenti all'Internet Archive" },
	{ signature = "webcitation.org", service = "webcite",  tailbracket = " a %sWebCite%s",  tracking = "Categoria:Template Webarchive - collegamenti a WebCite" },
	{ signature = "archive.is", service = "archiveis",  tailbracket = " a %sArchive.is%s",  tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
	{ signature = "archive.fo", service = "archiveis",  tailbracket = " a %sArchive.is%s",  tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
	{ signature = "archive.today", service = "archiveis",  tailbracket = " a %sArchive.is%s",  tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
	{ signature = "archive.il", service = "archiveis",  tailbracket = " a %sArchive.is%s",  tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
	{ signature = "archive.ec", service = "archiveis",  tailbracket = " a %sArchive.is%s",  tracking = "Categoria:Template Webarchive - collegamenti a archive.is" },
	{ signature = "archive[-]it.org", service = "archiveit",  tailbracket = " a %sArchive-It%s" },
	{ signature = "arquivo.pt", tail = " al Portuguese Web Archive" },
	{ signature = "loc.gov", tailbracket = " al %sLibrary of Congress%s" },
	{ signature = "webharvest.gov", tailbracket = " al %sNational Archives and Records Administration%s" },
	{ signature = "bibalex.org", tail = " alla [[Bibliotheca Alexandrina#Struttura e collezioni|Bibliotheca Alexandrina]]" },
	{ signature = "collectionscanada", tail = " al Canadian Government Web Archive" },
	{ signature = "haw.nsk", tail = " al Croatian Web Archive (HAW)" },
	{ signature = "veebiarhiiv.digar.ee", tail = " all'Estonian Web Archive" },
	{ signature = "vefsafn.is", tailbracket = " al %sNational and University Library of Iceland%s]]" },
	{ signature = "proni.gov", tailbracket = " al %sPublic Record Office of Northern Ireland%s" },
	{ signature = "uni[-]lj.si", tail = " al Slovenian Web Archive" },
	{ signature = "stanford.edu", tail = " al [[Stanford University Libraries|Stanford Web Archive]]" },
	{ signature = "nationalarchives.gov.uk", tailbracket = " all'%sUK Government Web Archive%s" },
	{ signature = "parliament.uk", tailbracket = " all'%sUK Parliament's Web Archive%s" },
	{ signature = "webarchive.org.uk", tailbracket = " all'%sUK Web Archive%s" },
	{ signature = "nlb.gov.sg", tail = " al Web Archive Singapore" },
	{ signature = "pandora.nla.gov.au", tailbracket = " al %sPandora Archive%s" },
	{ signature = "perma.cc", tailbracket = " a %sPerma.cc%s" },
	{ signature = "perma-archives.cc", tailbracket = " a %sPerma.cc%s" },
	{ signature = "screenshots.com", tail = " a Screenshots" },
	{ signature = "wikiwix.com", tail = " a Wikiwix" },
	{ signature = "freezepage.com", tail = " a Freezepage" },
	{ signature = "webcache.googleusercontent.com", tail = " a Google Cache" }
}

local month_localized = { 'gennaio', 'febbraio', 'marzo', 'aprile', 'maggio', 'giugno',
						   'luglio', 'agosto', 'settembre', 'ottobre', 'novembre', 'dicembre'}

--[[--------------------------< inlineError >-----------------------

	Critical error. Render output completely in red. Add to tracking category.

 ]]

local function inlineError(arg, msg)

	track["Categoria:Errori di compilazione del template Webarchive"] = 1
	return '<span style="font-size:100%" class="error citation-comment">Errore nella compilazione del template webarchive: controllare <code style="color:inherit; border:inherit; padding:inherit;">&#124;' .. arg .. '=</code> value. ' .. msg .. '</span>'

end

--[[--------------------------< inlineRed >-----------------------

	Render a text fragment in red, such as a warning as part of the final output.
	Add tracking category.

 ]]

local function inlineRed(msg, trackmsg)

	if trackmsg == "warning" then
		track["Categoria:Errori di compilazione del template Webarchive - Avvisi"] = 1
	elseif trackmsg == "error" then
		track["Categoria:Errori di compilazione del template Webarchive"] = 1
	end

	return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'

end

--[[--------------------------< trimArg >-----------------------

	trimArg returns nil if arg is "" while trimArg2 returns 'true' if arg is ""
	trimArg2 is for args that might accept an empty value, as an on/off switch like nolink=

 ]]

local function trimArg(arg)
	if arg == "" or arg == nil then
		return nil
	else
		return mw.text.trim(arg)
	end
end
local function trimArg2(arg)
	if arg == nil then
		return nil
	else
		return mw.text.trim(arg)
	end
end

--[[--------------------------< base62 >-----------------------

	Convert base-62 to base-10
	Credit: https://de.wikipedia.org/wiki/Modul:Expr

	]]

local function base62( value )

	local r = 1

	if value:match( "^%w+$" ) then
		local n = #value
		local k = 1
		local c
		r = 0
		for i = n, 1, -1 do
			c = value:byte( i, i )
			if c >= 48  and  c <= 57 then
				c = c - 48
			elseif c >= 65  and  c <= 90 then
				c = c - 55
			elseif c >= 97  and  c <= 122 then
				c = c - 61
			else    -- How comes?
				r = 1
				break    -- for i
			end
			r = r + c * k
			k = k * 62
		end -- for i
	end
	return r
end

--[[--------------------------< tableLength >-----------------------

	Given a 1-D table, return number of elements

	]]

local function tableLength(T)
	local count = 0
	for _ in pairs(T) do count = count + 1 end
	return count
end


--[[--------------------------< dateFormat >-----------------------

	Given a date string, return its format: dmy, mdy, iso, ymd
	If unable to determine return nil

	]]

local function dateFormat(date)

	local dt = {}
	dt.split = {}

	dt.split = mw.text.split(date, "-")
	if tableLength(dt.split) == 3 then
		if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 and tonumber(dt.split[2]) and tonumber(dt.split[3]) then
			return "iso"
		else
			return nil
		end
	end

	dt.split = mw.text.split(date, " ")
	if tableLength(dt.split) == 3 then
		if tonumber(dt.split[3]) then
			if tonumber(dt.split[3]) > 1900 and tonumber(dt.split[3]) < 2200 then
				if tonumber(dt.split[1]) then
					return "dmy"
				else
					return "mdy"
				end
			else
				if tonumber(dt.split[1]) then
					if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 then
						return "ymd"
					end
				end
			end
		end
	end
	return nil

end

--[[--------------------------< makeDate >-----------------------

	Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format
	df = mdy, dmy, iso, ymd

 ]]

local function makeDate(year, month, day, df)

	if not year or year == "" or not month or month == "" or not day or day == "" then
		return nil
	end

	local zmonth = month -- month with leading 0
	month = month:match("0*(%d+)") -- month without leading 0
	if tonumber(month) < 1 or tonumber(month) > 12 then
		return year
	end
	--local nmonth = os.date("%B", os.time{year=2000, month=month, day=1} )     -- month in name form
	local nmonth = month_localized[tonumber(month)]
	if not nmonth then
		return year
	end

	local zday = day
	day = zday:match("0*(%d+)")
	if tonumber(day) < 1 or tonumber(day) > 31 then
		return mw.ustring.format("%s %s", nmonth, year)
	end
	if day == "1" then day = "1º" end
	return mw.ustring.format("%s %s %s", day, nmonth, year)
end


--[[--------------------------< decodeWebciteDate >-----------------------

	Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format

	]]
local function decodeWebciteDate(path, df)

		local dt = {}
		dt.split = {}

		dt.split = mw.text.split(path, "/")

		-- valid URL formats that are not base62

		-- http://www.webcitation.org/query?id=1138911916587475
		-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
		-- http://www.webcitation.org/1138911916587475
		-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
		-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e

		if mw.ustring.find( dt.split[2], "query", 1, plain) or
			 mw.ustring.find( dt.split[2], "cache", 1, plain) or
			 mw.ustring.find( dt.split[2], "getfile", 1, plain) or
			 tonumber(dt.split[2]) then
			return "query"
		end

		dt.full = os.date("%Y %m %d", string.sub(string.format("%d", base62(dt.split[2])),1,10) )
		dt.split = mw.text.split(dt.full, " ")
		dt.year = dt.split[1]
		dt.month = dt.split[2]
		dt.day = dt.split[3]

		if not tonumber(dt.year) or not tonumber(dt.month) or not tonumber(dt.day) then
			return inlineRed("[Date error] (1)", "error")
		end

		if tonumber(dt.month) > 12 or tonumber(dt.day) > 31 or tonumber(dt.month) < 1 then
			return inlineRed("[Date error] (2)", "error")
		end
		if tonumber(dt.year) > tonumber(os.date("%Y")) or tonumber(dt.year) < 1900 then
			return inlineRed("[Date error] (3)", "error")
		end

		fulldate = makeDate(dt.year, dt.month, dt.day, df)
		if not fulldate then
			return inlineRed("[Date error] (4)", "error")
		else
			return fulldate
		end

end

--[[--------------------------< snapDateToString >-----------------------

Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
	return the formatted date eg. "September 1, 2016" in df format
	Handle non-digits in snapshot ID such as "re_" and "-" and "*"

 ]]

local function decodeWaybackDate(path, df)

		local snapdate, snapdatelong, currdate, fulldate

		local safe = path
		snapdate = string.gsub(safe, "^/w?e?b?/?", "")                      -- Remove leading "/web/" or "/"
		safe = snapdate
		local N = mw.text.split(safe, "/")
		snapdate = N[1]
		if snapdate == "*" then -- eg. /web/*/http..
			return "index"
		end
		safe = snapdate
		snapdate = string.gsub(safe, "[a-z][a-z]_[0-9]?$", "")              -- Remove any trailing "re_" from date
		safe = snapdate
		snapdate = string.gsub(safe, "[-]", "")                             -- Remove dashes from date eg. 2015-01-01
		safe = snapdate
		snapdate = string.gsub(safe, "[*]$", "")                            -- Remove trailing "*"

		if not tonumber(snapdate) then
			return inlineRed("[Date error] (2)", "error")
		end
		local dlen = string.len(snapdate)
		if dlen < 4 then
			return inlineRed("[Date error] (3)", "error")
		end
		if dlen < 14 then
			snapdatelong = snapdate .. string.rep("0", 14 - dlen)
		else
			snapdatelong = snapdate
		end
		local year = string.sub(snapdatelong, 1, 4)
		local month = string.sub(snapdatelong, 5, 6)
		local day = string.sub(snapdatelong, 7, 8)
		if not tonumber(year) or not tonumber(month) or not tonumber(day) then
			return inlineRed("[Date error] (4)", "error")
		end
		if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then
			return inlineRed("[Date error] (5)", "error")
		end
		currdate = os.date("%Y")
		if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then
			return inlineRed("[Date error] (6)", "error")
		end

		fulldate = makeDate(year, month, day, df)
		if not fulldate then
			return inlineRed("[Date error] (7)", "error")
		else
			return fulldate
		end

end

--[[--------------------------< serviceName >-----------------------

	Given a ___domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID

	]]

local function serviceName(host, nolink)

	local tracking = "Categoria:Template Webarchive - altri archivi"
	local bracketopen = "[["
	local bracketclose = "]]"
	if nolink then
		bracketopen = ""
		bracketclose = ""
	end

	ulx.url1.service = "altri"
	ulx.url1.tail = " a " .. ulx.url1.host .. " " .. inlineRed("Errore: URL di servizio di archiviazione sconosciuto")

	for _,servizio in ipairs(servizi) do
		if mw.ustring.find(host, servizio.signature, 1, plain) then
			ulx.url1.service = servizio['service'] or ulx.url1.service
			if servizio['tailbracket'] then
				ulx.url1.tail = mw.ustring.format(servizio.tailbracket, bracketopen, bracketclose)
			else
				ulx.url1.tail = servizio['tail'] or ulx.url1.tail
			end
			tracking = servizio['tracking'] or tracking
			break
		end
	end
	track[tracking] = 1
end

--[[--------------------------< parseExtraArgs >-----------------------

	Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
	For example: {{webarchive |url=.. |url4=.. |url7=..}}
		Three url arguments not in numeric sequence (1..4..7).
		Function only processes arguments numbered 2 or greater (in this case 4 and 7)
		It creates numeric sequenced table entries like:
	   		urlx.url2.url = <argument value for url4>
	 		urlx.url3.url = <argument value for url7>
	Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")

 ]]

local function parseExtraArgs()

	local i, j, argurl, argurl2, argdate, argtitle

	j = 2
	for i = 2, maxurls do
		argurl = "url" .. i
		if trimArg(args[argurl]) then
			argurl2 = "url" .. j
			ulx[argurl2] = {}
			ulx[argurl2]["url"] = args[argurl]
			argdate = "date" .. j or "data" .. j
			if trimArg(args[argdate]) then
				ulx[argurl2]["date"] = args[argdate]
			else
				ulx[argurl2]["date"] = inlineRed("[Data mancante]", "warning")
			end
			argtitle = "title" .. j
			if trimArg(args[argtitle]) then
				ulx[argurl2]["title"] = args[argtitle]
			else
				ulx[argurl2]["title"] = nil
			end
			j = j + 1
		end
	end

	if j == 2 then
		return 0
	else
		return j - 2
	end

end

--[[--------------------------< comma >-----------------------

	Given a date string, return "," if it's MDY

	]]

local function comma(date)
	local N = mw.text.split(date, " ")
	local O = mw.text.split(N[1], "-") -- for ISO
	if O[1] == "index" then return "" end
	if not tonumber(O[1]) then
		return ","
	else
		return ""
	end
end

--[[--------------------------< createTracking >-----------------------

	Return data in track[] ie. tracking categories

	]]

local function createTracking()

	-- Return tracking category only in namespace 0
	local current_namespace = mw.title.getCurrentTitle().namespace
	if current_namespace ~= 0 then return '' end

	local sand = ""
	if tableLength(track) > 0 then
		for key,_ in pairs(track) do
			sand = sand .. "[[" .. key .. "]]"
		end
	end
	return sand

end

--[[--------------------------< createRendering >-----------------------

		 Return a rendering of the data in ulx[][]

	]]

local function createRendering()

	local sand, displayheader, displayfield

	local period1 = ""   -- For backwards compat with {{wayback}}
	local period2 = "."

	local indexstr = "archiviato"
	if ulx.url1.date == "index" then
		indexstr = "archivio"
	end
	-- For {{wayback}}, {{webcite}}

	if ulx.url1.format == "none" then
		if not ulx.url1.title and not ulx.url1.date then -- No title. No date
			sand = "[" .. ulx.url1.url .. " archiviato]" .. ulx.url1.tail
		elseif not ulx.url1.title and ulx.url1.date then -- No title. Date.
			if ulx.url1.service == "wayback" then
				period1 = "."
				period2 = ""
			end
			sand = "[" .. ulx.url1.url .. " archiviato] il " .. ulx.url1.date .. comma(ulx.url1.date) .. ulx.url1.tail .. period1
		elseif ulx.url1.title and not ulx.url1.date then -- Title. No date.
			sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail
		elseif ulx.url1.title and ulx.url1.date then -- Title. Date.
			sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail .. "&#32;(" .. indexstr .. " " .. ulx.url1.date .. ")"
		else
			return nil
		end
		if ulx.url1.extraurls > 0 then -- For multiple archive URLs
			local tot = ulx.url1.extraurls + 1
			sand = sand .. period2 .. " Archivi aggiuntivi: "
			for i=2,tot do
				local indx = "url" .. i
				if ulx[indx]["title"] then
					displayfield = "title"
				else
					displayfield = "date"
				end
				sand = sand .. "[" .. ulx[indx]["url"] .. " " .. ulx[indx][displayfield] .. "]"
				if i == tot then
					sand = sand .. "."
				else
					sand = sand .. ", "
				end
			end
		else
			return sand
		end
		return sand

	 -- For {{cite archives}}
	else
		if ulx.url1.format == "addlarchives" then -- Multiple archive services
			displayheader = "Archivi aggiuntivi: "
		else  -- Multiple pages from the same archive
			displayheader = "Pagine di archivio aggiuntivie&nbsp;su " .. ulx.url1.date .. ": "
		end
		local tot = 1 + ulx.url1.extraurls
		local sand = displayheader
		for i=1,tot do
			local indx = "url" .. i
			displayfield = ulx[indx]["title"]
			if ulx.url1.format == "addlarchives" then
				if not displayfield then
					displayfield = ulx[indx]["date"]
				end
			else
				if not displayfield then
					displayfield = "Pagina " .. i
				end
			end
			sand = sand .. "[" .. ulx[indx]["url"] .. " " .. displayfield .. "]"
			if i == tot then
				sand = sand .. "."
			else
				sand = sand .. ", "
			end
		end
		return sand
	end
end

function p.webarchive(frame)
	args = frame.args
	if (args[1]==nil) and (args["url"]==nil) then -- if no argument provided than check parent template/module args
		args = frame:getParent().args
	end

	local tname = "Webarchive"                              -- name of calling template. Change if template rename.
	ulx = {}                                                -- Associative array to hold template data
	track = {}                                              -- Associative array to hold tracking categories
	maxurls = 10                                            -- Max number of URLs allowed.
	local verifydates = "yes"                               -- See documentation. Set "no" to disable.

	-- URL argument (first)
	local url1 = trimArg(args.url) or trimArg(args.url1)
	if not url1 then
		return inlineError("url", "Empty.") .. createTracking()
	end
	if mw.ustring.find( url1, "https://web.http", 1, plain ) then -- track bug
		track["Categoria:Errori di compilazione del template Webarchive"] = 1
		return inlineError("url", "https://web.http") .. createTracking()
	end
	if url1 == "https://web.archive.org/http:/" then -- track bug
		track["Categoria:Errori di compilazione del template Webarchive"] = 1
		return inlineError("url", "Invalid URL") .. createTracking()
	end

	ulx.url1 = {}
	ulx.url1.url = url1
	local uri1 = mw.uri.new(ulx.url1.url)
	ulx.url1.host = uri1.host
	ulx.url1.extraurls = parseExtraArgs()

	-- Nolink argument
	local nolink = trimArg2(args.nolink)

	serviceName(uri1.host, nolink)

	-- Date argument
	local date = trimArg(args.date) or trimArg(args.date1)
	if date == "*" and ulx.url1.service == "wayback" then
		date = "index"
	elseif date and ulx.url1.service == "wayback" and verifydates == "yes" then
		local ldf = dateFormat(date)
		if ldf then
			local udate = decodeWaybackDate( uri1.path, ldf )
			if udate ~= date then
				date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")
			end
		end
	elseif date and ulx.url1.service == "webcite" and verifydates == "yes" then
		local ldf = dateFormat(date)
		if ldf then
			local udate = decodeWebciteDate( uri1.path, ldf )
			if udate == "query" then -- skip
			elseif udate ~= date then
				date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")
			end
		end
	elseif not date and ulx.url1.service == "wayback" then
		date = decodeWaybackDate( uri1.path, "iso" )
		if not date then
			date = inlineRed("[Date error] (1)", "error")
		end
	elseif not date and ulx.url1.service == "webcite" then
		date = decodeWebciteDate( uri1.path, "iso" )
		if date == "query" then
			date = inlineRed("[Data mancante]", "warning")
		elseif not date then
			date = inlineRed("[Date error] (1)", "error")
		end
	elseif not date then
		date = inlineRed("[Data mancante]", "warning")
	end
	ulx.url1.date = date

	-- Format argument
	local format = trimArg(args.format) or trimArg(args.formato)
	if not format then
		format = "none"
	else
		if format == "addlpages" then
			if not ulx.url1.date then
				format = "none"
			end
		elseif format == "addlarchives" then
			format = "addlarchives"
		else
			format = "none"
		end
	end
	ulx.url1.format = format

	-- Title argument
	local title = trimArg(args.title) or trimArg(args.title1) or trimArg(args.titolo) or trimArg(args.titolo1)
	ulx.url1.title = title

	local rend = createRendering()
	if not rend then
		rend = '<span style="font-size:100%" class="error citation-comment">Errori in [[:Template:' .. tname .. ']]: Problema sconosciuto. Si prega di segnalarlo nella [[Discussioni_template:' .. tname .. '|pagina di discussione]] del template.</span>'
		track["Category:Webarchive template errors"] = 1
	end

	return rend .. createTracking()

end

return p