Module:ISO 3166/sandbox: Difference between revisions

Content deleted Content added
Try to handle strings more efficiently
Don't display script warnings if nocat is set to true
 
(35 intermediate revisions by 4 users not shown)
Line 7:
local p = {}
local getArgs = require("Module:Arguments").getArgs
local data = mw.loadData("Module:ISO 3166/data/National/sandbox")
 
--[[----------E R R O R S----------]]-- -- Assign error categories and display warnings
local function errors(errType, args, code, alpha2)
if not (args.nocat and args.nocat == 'true') then
if code then
local warningText = code .. ' ("%s") is not a recognized ' .. errType .. ' in [[ISO 3166-%s]] ([[Module:ISO 3166]]).'
if errType == 'country' then
mw.addWarning(string.format(warningText, args[1], '1'))
elseif errType == 'subdivision' and alpha2 then
mw.addWarning(string.format(warningText, args[2], '2:' .. alpha2))
end
end
return mw.title.getCurrentTitle().namespace == 0 and ('[[Category:Wikipedia articles with obscure ' .. errType .. 'subdivision]]') or ''
end
return ''
end
 
--[[----------F I N D N A M E----------]]-- -- Finds the name in the database
Line 54 ⟶ 70:
 
function p.strip(text)
if not text then return nil end
local accents = {["À"]="A",["Á"]="A",["Â"]="A",["Ã"]="A", -- accent list
["Ä"]="A",["Å"]="A",["Ç"]="C",["È"]="E",["É"]="E",
["Ê"] local accents ="E", {["ËÀ"]="EA",["ÌÁ"]="IA",["ÍÂ"]="IA",["ÎÃ"]="IA",
["ÏÄ"]="IA",["ÑÅ"]="NA",["ÒÆ"]="OAE",["ÓÇ"]="OC",["ÔÈ"]="OE",
["ÕÉ"]="OE",["ÖÊ"]="OE",["ØË"]="OE",["ÙÌ"]="UI",["ÚÍ"]="UI",
["ÛÎ"]="UI",["ÜÏ"]="UI",["ÝÐ"]="YD",["Ñ"]="N",["Ò"]="O",
["Ó"]="O",["Ô"]="O",["Õ"]="O",["Ö"]="O",["Ø"]="O",
["Ù"]="U",["Ú"]="U",["Û"]="U",["Ü"]="U",["Ý"]="Y",
["Þ"]="TH",
["Ā"]="A",["Ă"]="A",["Ą"]="A",["Ć"]="C",["Č"]="C",
["Ĉ"]="C",["Ċ"]="C",["Ď"]="D",["Đ"]="D",["Ĕ"]="E",
["Ě"]="E",["Ė"]="E",["Ē"]="E",["Ę"]="E",["Ŋ"]="N",
["Ğ"]="G",["Ģ"]="G",["Ĝ"]="G",["Ġ"]="G",["Ĥ"]="H",
["Ħ"]="H",["Ĭ"]="I",["İ"]="I",["Ī"]="I",["Į"]="I",
["Ĩ"]="I",["IJ"]="IJ",["Ĵ"]="J",["Ķ"]="K",["Ĺ"]="L",
["Ľ"]="L",["Ļ"]="L",["Ŀ"]="L",["Ł"]="L",["Ń"]="N",
["Ň"]="N",["Ņ"]="N",["Ŏ"]="O",["Ő"]="O",["Ō"]="O",
["Œ"]="OE",["Ŕ"]="R",["Ř"]="R",["Ŗ"]="R",["Ś"]="S",
["Š"]="S",["Ş"]="S",["Ŝ"]="S",["Ť"]="T",["Ţ"]="T",
["Ŧ"]="T",["Ŭ"]="U",["Ű"]="U",["Ū"]="U",["Ų"]="U",
["Ů"]="U",["Ũ"]="U",["Ŵ"]="W",["Ŷ"]="Y",["Ÿ"]="Y",
["Ź"]="Z",["Ž"]="Z",["Ż"]="Z",
}
local remove = {"NATION OF","COUNTRY OF","TERRITORY OF", -- text to be removed list
"FLAG OF","FLAG","KINGDOM OF","STATE OF"," STATE ",
"PROVINCE OF","PROVINCE","TERRITORY","COUNTY","GOVERNORATE"
}
local patterns = { "(%a)%.(%a)%.",".+:","|.+","%(.-%)", -- patterns to follow (order may matter)
"%.%S.*","^THE ","%_","%-","%d%d?%d?PX"
[".+:"]="",
}
["|.+"]="",
local replacements = {["(%a)%.(%a)%."]="%1%2", -- Replacements for patterns above (order does not matter)
["%(.-%)"]="",
["%.+:"]="",["|.*+"]="",["%(.-%)"]="",
["%.%S.*"]="",["^THE "]="",["%_"]=" ",["%-"]=" ",
["%_"]=" ",
["%d%d?%d?PX"]="",
}
text = mw.text.decode(text) -- Decode HTML entities (passed from wikidata?)
text = mw.text.unstripNoWiki(text) -- Remove <nowiki> tags
text = mw.ustring.upper(text) -- Case insensitivity
text = mw.ustring.gsub(text,"[À-Ż]",accents) -- Deaccent
 
local capture = string.match(text, '[ -~]*'); -- Keep only ASCII printable characters
if capture ~= text then -- If same there are no Unicode characters
text = mw.ustring.gsub(text,"[À-Ý]",accents) -- Deaccent
end
for pattern_,valuepattern in pairsipairs(patterns) do -- Follow patterns
textlocal value = string.gsub(text,replacements[pattern,value) ]
text = mw.ustring.gsub(text,pattern,value)
end
for _,words in pairs(remove) do -- Remove unneeded words
text = mw.ustring.gsub(text,words,"")
end
text = mw.ustring.gsub(text,"%W","") -- Remove non alpha-numeric
-- Hyphen-sensitive
text = string.gsub(text,"[^%w%-]","") -- Remove non alpha-numeric
return text
end
 
Line 109 ⟶ 139:
 
function p.luacode(args)
if (args[1], args[2] ~= nil) then args[1] =or '', p.strip(args[12]) endor ''
iflocal (args[2] ~= nil) then args[2]code1 = p.strip(args[21]) end
local code2 = p.strip(args[2])
local a1, a2 = args[1], args[2]
local hyphenPos = string.find(a1 or '', '-', 1, true)
if not hyphenPos then --No hyphen
return p.luacodeimpl(args)
end
args[1] = string.sub(a1, 1, hyphenPos-1) --Try splitting on hyphen
args[2] = string.sub(a1, hyphenPos+1)
local hyphensplitresult = p.luacodeimpl(args)
if hyphensplitresult ~= nil and hyphensplitresult ~= '' then
return hyphensplitresult
end
args[1], args[2] = a1, a2 --Try again without splitting
return p.luacodeimpl(args) or ''
end
 
if args["codetype"]=="3" then args["codetype"]="alpha3" end
function p.luacodeimpl(args)
local country, subdivision = args[1], args[2]
 
if args["codetype"]=="3" then
args["codetype"]="alpha3"
end
local catnocountryeot = (args.nocaterror and args.nocat == 'true') and ''or ""
or '[[Category:Wikipedia page with obscure country]]'
local catnosubdivision = (args.nocat and args.nocat == 'true') and ''
or '[[Category:Wikipedia page with obscure subdivision]]'
if notcode1 country== "" then
if not (args.nocat and args.nocat == 'true') then
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry = '' end
mw.addWarning((args[1] ~= "" and ('Invalid parameter "' .. args[1] .. '"') or 'No parameter') ..
return catnocountry, '<span style="font-size:100%" class="error">"No parameter for the country given"</span>'
' for the country given ([[Module:ISO 3166]]).')
end
return errors('country', args), '<span style="font-size:100%" class="error">"No parameter for the country given"</span>'
end
if notcode2 subdivision== "" then --3166-1 code
for alpha2,cdata in pairs(data) do
if findname(alpha2,cdata,countrycode1) then
if args["codetype"]=="numeric" or args["codetype"]=="alpha3" then
return cdata[args["codetype"]]
Line 158 ⟶ 165:
end
end
if string.find(args[1],"%-") then
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry = '' end
args[1], args[2] = string.match(args[1],"^([^%-]*)%-(.*)$")
return catnocountry
if args[2] then return p.luacode(args) end
end
return errors('country', args, code1)
else --3166-2 code
for alpha2,cdata in pairs(data) do
if findname(alpha2,cdata,countrycode1) then
if mw.ustring.match(alpha2,"GB") then -- For England, Wales etc.
alpha2 = "GB"
Line 171 ⟶ 181:
if type(scdata)=="table" then
empty = false
if findname(scode,scdata,subdivisioncode2) then
return alpha2.."-"..scode
end
end
end
if string.find(args[2] or '',"%-") then
if mw.title.getCurrentTitle().namespace ~= 0 then catnosubdivision = '' end
code1, args[2] = string.match(args[2] or '',"^([^%-]*)%-(.*)$")
return catnosubdivision
args[1] = args[1].."-"..code1
return p.luacode(args)
end
return errors('subdivision', args, code2, alpha2)
end
end
if string.find(args[1],"%-") then
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry = '' end
args[1] = string.match(args[1],"^([^%-]*)%-(.*)$")
return catnocountry
return p.luacode(args)
end
return errors('country', args, code1)
end
return errors('country', args, code1)
end
 
--[[----------P . C O D E---------]]-- -- Calls P.luacodeLuacode but using Module:Arguments
 
function p.code(frame)
 
return p.luacode(getArgs(frame))
return p.luacode(getArgs(frame)) or ""
 
end
 
--[[----------P . N U M E R I C---------]]-- -- Calls P.luacodeLuacode but using Module:Arguments and setting it to output a numeric value
 
function p.numeric(frame)
Line 210 ⟶ 229:
local code1 = p.luacode(args)
local code2 = ''
hyphenPos =if string.find(code1, "%-",) 1, true)then
code1, code2 = string.match(code1,"^([^%-]*)%-(.*)$")
if hyphenPos ~= nil then
code2 = string.sub(code1, hyphenPos+1)
code1 = string.sub(code1, 1, hyphenPos-1)
end
if string.lenfind(code1,"^%u%u$") == 2 then
if code2=="" then --3166-1 alpha-2 code
if data[code1] then
Line 223 ⟶ 240:
or (data[code1]["displayname"] or data[code1]["name"])
else
return errors('[[Category:Wikipedia page with obscure country]]', args)
end
else --3166-2 code
Line 230 ⟶ 247:
sdata = mw.loadData("Module:ISO 3166/data/"..code1)
else
return errors('[[Category:Wikipedia page with obscure country]]', args)
end
if sdata[code2] then
Line 236 ⟶ 253:
or (sdata[code2]["displayname"] or sdata[code2]["name"])
else
return errors('[[Category:Wikipedia page with obscure country]]', args)
end
end
Line 259 ⟶ 276:
local args = frame.args
local subdivisionqueried = false
local catnocountry = (args.nocat and args.nocat == 'true') and ''
or '[[Category:Wikipedia page with obscure country]]'
local catnosubdivision = (args.nocat and args.nocat == 'true') and ''
or '[[Category:Wikipedia page with obscure subdivision]]' or ''
local tracking = ''
local targs = {}
targs[1] = args[1] or ''
for i, v in pairs(args) do
if i == 'country' and not stringmw.ustring.find(targs[1], 'region:', 1, true) then
local country = v
local k, region = 1, ''
Line 282 ⟶ 295:
if region == '' then
region = p.luacode({country, nocat = 'true'})
if mw.title.getCurrentTitle().namespace ~= 0 then catnocountry, catnosubdivision = '', '' end
if region == '' then
tracking = tracking .. catnocountryerrors('country', args)
elseif subdivisionqueried == true then
tracking = tracking .. catnosubdivisionerrors('subdivision', args)
end
end