Module:ISO 3166/sandbox: Difference between revisions

Content deleted Content added
Test using baked data to make fewer calls to p.strip()
Don't display script warnings if nocat is set to true
 
(34 intermediate revisions by 4 users not shown)
Line 8:
local getArgs = require("Module:Arguments").getArgs
local data = mw.loadData("Module:ISO 3166/data/National/sandbox")
 
local outdata = mw.loadData("Module:ISO 3166/data/National")
--[[----------E R R O R S----------]]-- -- Assign error categories and display warnings
local function errors(errType, args, code, alpha2)
if not (args.nocat and args.nocat == 'true') then
if code then
local warningText = code .. ' ("%s") is not a recognized ' .. errType .. ' in [[ISO 3166-%s]] ([[Module:ISO 3166]]).'
if errType == 'country' then
mw.addWarning(string.format(warningText, args[1], '1'))
elseif errType == 'subdivision' and alpha2 then
mw.addWarning(string.format(warningText, args[2], '2:' .. alpha2))
end
end
return mw.title.getCurrentTitle().namespace == 0 and ('[[Category:Wikipedia articles with obscure ' .. errType .. 'subdivision]]') or ''
end
return ''
end
 
--[[----------F I N D N A M E----------]]-- -- Finds the name in the database
Line 14 ⟶ 29:
local function findname(code,cdata,qry)
local sqry = p.strip(qry)
if cdata["name"] and sqry==p.strip(cdata["name"])
or cdata["isoname"] and sqry==p.strip(cdata["isoname"])
or not cdata["nocode"] and sqry==code
or sqry==cdata["alpha3"] or sqry==cdata["numeric"]
Line 22 ⟶ 37:
end
for _,tname in pairs(cdata["isonames"] or {}) do
if sqry==p.strip(tname) then
return true
end
end
for _,tname in pairs(cdata["altnames"] or {}) do
if sqry==p.strip(tname) then
return true
end
Line 55 ⟶ 70:
 
function p.strip(text)
if not text then return nil end
local accents = {["À"]="A",["Á"]="A",["Â"]="A",["Ã"]="A", -- accent list
["Ä"]="A",["Å"]="A",["Ç"]="C",["È"]="E",["É"]="E",
["Ê"] local accents ="E", {["ËÀ"]="EA",["ÌÁ"]="IA",["ÍÂ"]="IA",["ÎÃ"]="IA",
["ÏÄ"]="IA",["ÑÅ"]="NA",["ÒÆ"]="OAE",["ÓÇ"]="OC",["ÔÈ"]="OE",
["ÕÉ"]="OE",["ÖÊ"]="OE",["ØË"]="OE",["ÙÌ"]="UI",["ÚÍ"]="UI",
["ÛÎ"]="UI",["ÜÏ"]="UI",["ÝÐ"]="YD",["Ñ"]="N",["Ò"]="O",
["Ó"]="O",["Ô"]="O",["Õ"]="O",["Ö"]="O",["Ø"]="O",
["Ù"]="U",["Ú"]="U",["Û"]="U",["Ü"]="U",["Ý"]="Y",
["Þ"]="TH",
["Ā"]="A",["Ă"]="A",["Ą"]="A",["Ć"]="C",["Č"]="C",
["Ĉ"]="C",["Ċ"]="C",["Ď"]="D",["Đ"]="D",["Ĕ"]="E",
["Ě"]="E",["Ė"]="E",["Ē"]="E",["Ę"]="E",["Ŋ"]="N",
["Ğ"]="G",["Ģ"]="G",["Ĝ"]="G",["Ġ"]="G",["Ĥ"]="H",
["Ħ"]="H",["Ĭ"]="I",["İ"]="I",["Ī"]="I",["Į"]="I",
["Ĩ"]="I",["IJ"]="IJ",["Ĵ"]="J",["Ķ"]="K",["Ĺ"]="L",
["Ľ"]="L",["Ļ"]="L",["Ŀ"]="L",["Ł"]="L",["Ń"]="N",
["Ň"]="N",["Ņ"]="N",["Ŏ"]="O",["Ő"]="O",["Ō"]="O",
["Œ"]="OE",["Ŕ"]="R",["Ř"]="R",["Ŗ"]="R",["Ś"]="S",
["Š"]="S",["Ş"]="S",["Ŝ"]="S",["Ť"]="T",["Ţ"]="T",
["Ŧ"]="T",["Ŭ"]="U",["Ű"]="U",["Ū"]="U",["Ų"]="U",
["Ů"]="U",["Ũ"]="U",["Ŵ"]="W",["Ŷ"]="Y",["Ÿ"]="Y",
["Ź"]="Z",["Ž"]="Z",["Ż"]="Z",
}
local remove = {"NATION OF","COUNTRY OF","TERRITORY OF", -- text to be removed list
"FLAG OF","FLAG","KINGDOM OF","STATE OF"," STATE ",
"PROVINCE OF","PROVINCE","TERRITORY","COUNTY","GOVERNORATE"
}
local patterns = {"(%a)%.(%a)%.",".+:","|.+","%(.-%)", -- patterns to follow (order may matter)
"%.%S.*","^THE ","%_","%-","%d%d?%d?PX"
}
local patternsreplacements = { ["(%a)%.(%a)%."]="%1%2", -- Replacements for patterns to followabove (order maydoes not matter)
[".+:"]="",["|.+"]="",["%(.-%)"]="",
["|%.+%S.*"]="",["^THE "]="",["%_"]=" ",["%-"]=" ",
["%(.-%)"]="",
["%..*"]="",
["^THE "]="",
["%_"]=" ",
["%d%d?%d?PX"]="",
}
text = mw.text.decode(text) -- Decode HTML entities (passed from wikidata?)
text = mw.text.unstripNoWiki(text) -- Remove <nowiki> tags
text = mw.ustring.upper(text) -- Case insensitivity
text = mw.ustring.gsub(text,"[À-Ż]",accents) -- Deaccent
 
local capture = string.match(text, '[ -~]*'); -- Keep only ASCII printable characters
if capture ~= text then -- If same there are no Unicode characters
text = mw.ustring.gsub(text,"[À-Ý]",accents) -- Deaccent
end
for pattern_,valuepattern in pairsipairs(patterns) do -- Follow patterns
textlocal value = string.gsub(text,replacements[pattern,value) ]
text = mw.ustring.gsub(text,pattern,value)
end
for _,words in pairs(remove) do -- Remove unneeded words
text = mw.ustring.gsub(text,words,"")
end
text = mw.ustring.gsub(text,"%W","") -- Remove non alpha-numeric
-- Hyphen-sensitive
text = string.gsub(text,"[^%w%-]","") -- Remove non alpha-numeric -- Remove non alpha-numeric
return text
end
 
Line 110 ⟶ 139:
 
function p.luacode(args)
if (args[1], args[2] ~= nil) then args[1] =or '', p.strip(args[12]) endor ''
iflocal (args[2] ~= nil) then args[2]code1 = p.strip(args[21]) end
local code2 = p.strip(args[2])
local a1, a2 = args[1], args[2]
local hyphenPos = string.find(a1 or '', '-', 1, true)
if not hyphenPos then --No hyphen
return p.luacodeimpl(args)
end
args[1] = string.sub(a1, 1, hyphenPos-1) --Try splitting on hyphen
args[2] = string.sub(a1, hyphenPos+1)
local hyphensplitresult = p.luacodeimpl(args)
if hyphensplitresult ~= nil and hyphensplitresult ~= '' then
return hyphensplitresult
end
args[1], args[2] = a1, a2 --Try again without splitting
return p.luacodeimpl(args) or ''
end
 
if args["codetype"]=="3" then args["codetype"]="alpha3" end
function p.luacodeimpl(args)
local country, subdivision = args[1], args[2]
 
if args["codetype"]=="3" then
args["codetype"]="alpha3"
end
local catnocountryeot = (args.nocaterror and args.nocat == 'true') and ''or ""
or '[[Category:Wikipedia page with obscure country]]'
local catnosubdivision = (args.nocat and args.nocat == 'true') and ''
or '[[Category:Wikipedia page with obscure subdivision]]'
if notcode1 country== "" then
if not (args.nocat and args.nocat == 'true') then
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry = '' end
mw.addWarning((args[1] ~= "" and ('Invalid parameter "' .. args[1] .. '"') or 'No parameter') ..
return catnocountry, '<span style="font-size:100%" class="error">"No parameter for the country given"</span>'
' for the country given ([[Module:ISO 3166]]).')
end
return errors('country', args), '<span style="font-size:100%" class="error">"No parameter for the country given"</span>'
end
if notcode2 subdivision== "" then --3166-1 code
for alpha2,cdata in pairs(data) do
if findname(alpha2,cdata,countrycode1) then
if args["codetype"]=="numeric" or args["codetype"]=="alpha3" then
return cdata[args["codetype"]]
Line 160 ⟶ 165:
end
end
if string.find(args[1],"%-") then
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry = '' end
args[1], args[2] = string.match(args[1],"^([^%-]*)%-(.*)$")
return catnocountry
if args[2] then return p.luacode(args) end
end
return errors('country', args, code1)
else --3166-2 code
for alpha2,cdata in pairs(data) do
if findname(alpha2,cdata,countrycode1) then
if mw.ustring.match(alpha2,"GB") then -- For England, Wales etc.
alpha2 = "GB"
end
local sdata = mw.loadData("Module:ISO 3166/data/"..alpha2.."/sandbox")
local empty = true
for scode,scdata in pairs(sdata) do
if type(scdata)=="table" then
empty = false
if findname(scode,scdata,subdivisioncode2) then
return alpha2.."-"..scode
end
end
end
if string.find(args[2] or '',"%-") then
if mw.title.getCurrentTitle().namespace ~= 0 then catnosubdivision = '' end
code1, args[2] = string.match(args[2] or '',"^([^%-]*)%-(.*)$")
return catnosubdivision
args[1] = args[1].."-"..code1
return p.luacode(args)
end
return errors('subdivision', args, code2, alpha2)
end
end
if string.find(args[1],"%-") then
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry = '' end
args[1] = string.match(args[1],"^([^%-]*)%-(.*)$")
return catnocountry
return p.luacode(args)
end
return errors('country', args, code1)
end
return errors('country', args, code1)
end
 
--[[----------P . C O D E---------]]-- -- Calls P.luacodeLuacode but using Module:Arguments
 
function p.code(frame)
 
return p.luacode(getArgs(frame))
return p.luacode(getArgs(frame)) or ""
 
end
 
--[[----------P . N U M E R I C---------]]-- -- Calls P.luacodeLuacode but using Module:Arguments and setting it to output a numeric value
 
function p.numeric(frame)
Line 212 ⟶ 229:
local code1 = p.luacode(args)
local code2 = ''
hyphenPos =if string.find(code1, "%-",) 1, true)then
code1, code2 = string.match(code1,"^([^%-]*)%-(.*)$")
if hyphenPos ~= nil then
code2 = string.sub(code1, hyphenPos+1)
code1 = string.sub(code1, 1, hyphenPos-1)
end
if string.lenfind(code1,"^%u%u$") == 2 then
if code2=="" then --3166-1 alpha-2 code
if data[code1] then
return (args.isoname or args.lang) and isoname(outdatadata,code1,args.lang)
or (outdatadata[code1]["displayname"] or outdatadata[code1]["name"])
else
return errors('[[Category:Wikipedia page with obscure country]]', args)
end
else --3166-2 code
local sdata
local outsdata
if data[code1] then
sdata = mw.loadData("Module:ISO 3166/data/"..code1.."/sandbox")
outsdata = mw.loadData("Module:ISO 3166/data/"..code1)
else
return errors('[[Category:Wikipedia page with obscure country]]', args)
end
if sdata[code2] then
return (args.isoname or args.lang) and isoname(outsdatasdata,code2,args.lang)
or (outsdatasdata[code2]["displayname"] or outsdatasdata[code2]["name"])
else
return errors('[[Category:Wikipedia page with obscure country]]', args)
end
end
Line 263 ⟶ 276:
local args = frame.args
local subdivisionqueried = false
local catnocountry = (args.nocat and args.nocat == 'true') and ''
or '[[Category:Wikipedia page with obscure country]]'
local catnosubdivision = (args.nocat and args.nocat == 'true') and ''
or '[[Category:Wikipedia page with obscure subdivision]]' or ''
local tracking = ''
local targs = {}
targs[1] = args[1] or ''
for i, v in pairs(args) do
if i == 'country' and not stringmw.ustring.find(targs[1], 'region:', 1, true) then
local country = v
local k, region = 1, ''
Line 286 ⟶ 295:
if region == '' then
region = p.luacode({country, nocat = 'true'})
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry, catnosubdivision = '', '' end
if region == '' then
tracking = tracking .. catnocountryerrors('country', args)
elseif subdivisionqueried == true then
tracking = tracking .. catnosubdivisionerrors('subdivision', args)
end
end