Module:Lang/data/iana languages/make: Difference between revisions

Content deleted Content added
basic gmatch is safe here
m fix module names;
 
(12 intermediate revisions by 4 users not shown)
Line 1:
require('Module:No globalsstrict');
local p = {};
 
 
Line 31 ⟶ 30:
before the Comments line.
 
Records with Deprecatedprivate datesuse or Preferred-Value codessubtags are ignored as are private use codes.
 
]=]
Line 41 ⟶ 40:
local in_comments = false;
 
if mw.ustringstring.find (record, 'Deprecated', 1, true) or mw.ustringstring.find (record, 'Preferred%-Value'), 1, true)
or mw.ustringstring.find (record, 'Private use', 1, true) then
return 'skip';
end
 
for line in string.gmatch (record, '([^\n]+)\n') do -- get a \n terminated line of text (without the \n)
local label = string.match(line, "(.-):")
if mw.ustring.find (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line
code = mw.ustring.match (line, 'Subtag: ([%a%d]+)'); -- extract and save to subtag's code
elseifif mw.ustringnot label and string.find (line, 'Description:^ .+') and not in_comments then -- if thisa continuation line isbut not a descriptioncomments linecontinuation
local descdescriptions[#descriptions] = mwstring.ustring.matchgsub (linedescriptions[#descriptions], 'Description:\"$', (.+)''); -- extractremove trailing quote mark from theprevious description
descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. string.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark
desc = mw.ustring.gsub (desc, '"', '\\"'); -- in case description contains quote marks (see 1959acad)
elseif label == 'Subtag' then -- if this line is the subtag line
code = string.match (line, 'Subtag: (%w+)'); -- extract and save to subtag's code
elseif label == 'Description' then -- if this line is a description line
local desc = string.match (line, 'Description: (.+)'); -- extract the description
desc = string.gsub (desc, '"', '\\"'); -- in case description contains quote marks (see 1959acad)
table.insert (descriptions, '\"' .. desc .. '\"'); -- save the description wrapped in quote marks
elseif mw.ustring.findlabel (line,== 'Prefix: .+') then -- if this line is a prefix line
table.insert (prefixes, '\"' .. mw.ustringstring.match (line, 'Prefix: (.+)'):lower() .. '\"'); -- extract and save the prefix wrapped in quote marks
elseif mw.ustring.findlabel (line,== 'Comments: .+') then -- if this line is a comments line
in_comments = true;
elseif mw.ustring.find (line, '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation
descriptions[#descriptions] = mw.ustring.gsub (descriptions[#descriptions], '\"$', ''); -- remove trailing quote mark from previous description
descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. mw.ustring.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark
end
end
Line 91 ⟶ 93:
before the Comments line.
 
Records with Deprecatedprivate datesuse or Preferred-Value codessubtags are ignored as are private use codes.
 
]=]
Line 97 ⟶ 99:
local function get_lang_script_region_parts (record)
local code;
local suppress; -- Suppress script for this code if specified
local deprecated; -- boolean; true when subtag is deprecated
local descriptions = {};
local in_comments = false;
 
if mw.ustring.find (record, 'Deprecated') or mw.ustring.:find (record, 'Preferred%-Value') or mw.ustring.find (record, 'Private use') then
return 'skip';
end
 
for line in string.record:gmatch (record, '([^\n]+)\n') do -- get a \n terminated line of text (without the \n)
local label = line:match ('(.-):');
if mw.ustring.find (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line
code = mw.ustring.match (line,if 'Subtag: ([%a%d]+)'); == label then -- extractif andthis saveline tois the subtag's codeline
elseif code mw.ustring.find= (line,:match ('DescriptionSubtag: .(%w+)') then; -- ifextract thisand linesave isto asubtag's description linecode
elseif 'Description' == label then -- if this line is a description line
table.insert (descriptions, '\"' .. mw.ustring.match (line, 'Description: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks
table.insert (descriptions, '\"' .. line:match ('Description: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks
elseif mw.ustring.find (line, 'Comments: .+') then -- if this line is a comments line
elseif 'Deprecated' == label then
deprecated = true; -- subtag is deprecated; set our flag
elseif 'Suppress-Script' == label then
suppress = line:match ('Suppress%-Script: (%S+)');
elseif 'Comments' == label then -- if this line is a comments line
in_comments = true;
elseif mw.ustring.line:find (line, '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation
descriptions[#descriptions] = mw.ustring.gsub (descriptions[#descriptions],:gsub ('\"$', ''); -- remove trailing quote mark from previous description
descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. mw.ustring.line:match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark
end
end
return code, table.concat (descriptions, ', '), suppress, deprecated;
end
 
Line 124 ⟶ 133:
 
read a local copy of the IANA language-subtag-registry file and from it build tables to replace the tables in:
[[Module:LanguageLang/data/iana languages]]
[[Module:LanguageLang/data/iana scriptsregions]]
[[Module:LanguageLang/data/iana regionsscripts]]
[[Module:Lang/data/iana supressed cripts]]
[[Module:Lang/data/iana variants]]
 
current language-subtag-registry file can be found at: http://www.iana.org/assignments/language-subtag-registry
Line 133 ⟶ 144:
]=]
 
local function p.iana_extract (frame)
local page = mw.title.getCurrentTitle(); -- get a page object for this page
local content = page:getContent(); -- get unparsed content
local lang_table = {}; -- languages go here
local lang_dep_table = {}; -- deprecated languages go here
local script_table = {}; -- scripts go here
local region_table = {}; -- regions go here
local variant_table = {}; -- variants go here
local suppress_table = {}; -- here we collect suppressed scripts and associated language codes
local iso_639_1_table = {}; -- ISO 639-1 languages; not used by Module:Lang but included here to ensure Module:Lang/data/ISO_639-1 gets updated
local file_date; -- first line
 
Line 145 ⟶ 159:
local descriptions;
local prefixes; -- used for language variants only
local suppress; -- a code's suppress script
local deprecated; -- boolean: true when subtag is deprecated
 
file_date = content:match ('(File%-Date: %d%d%d%d%-%d%d%-%d%d)'); -- get the file date line from this version of the source file
 
for record in string.gmatch (content, '%%%%([^%%]+)') do -- get a %% delimited 'record' from the file; leave off the delimiters
iflocal mwrecord_type = string.ustring.find match(record, 'Type: language(%w+)') then -- if a language record
if record_type == 'language' then -- if a language record
code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s)
code, descriptions, suppress, deprecated = get_lang_script_region_parts (record); -- get the code, description(s), suppress script, and deprecated flag
if code and ('skip' ~= code) then
if deprecated then
table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries
table.insert (lang_dep_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries
else
table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries
if 2 == code:len() then
table.insert (iso_639_1_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries
end
end
elseif not code then
table.insert (lang_table, "[\"error\"] = {" .. record .. "}"); -- code should never be nil, but inserting an error entry in the final output can be helpful
end
-- here we collect suppress stript tags and their associated language codes;
-- prettigying the data in this table must wait until all language codes have been read
if suppress then -- if this code has a suppressed script
local suppressed_code = table.concat ({'\"', code, '\"'}); -- wrap the code in quotes
if suppress_table[suppress] then -- if there is an entry for this script
table.insert (suppress_table[suppress], suppressed_code); -- insert the new code
else
suppress_table[suppress] = {}; -- add new script and empty table
table.insert (suppress_table[suppress], suppressed_code); -- insert the new code
end
end
 
elseif mw.ustring.findrecord_type (record,== 'Type: script') then -- if a script record
code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s)
Line 167 ⟶ 203:
end
 
elseif mw.ustring.findrecord_type (record,== 'Type: region') then -- if a region record
code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s)
Line 176 ⟶ 212:
end
 
elseif mw.ustring.findrecord_type (record,== 'Type: variant') then -- if a regionvariant record
code, prefixes, descriptions = get_variant_parts (record); -- get the code, prefix(es), and description(s)
 
Line 196 ⟶ 232:
end
end
-- makenow prettyprettify outputthe supressed script table
local pretty_suppressed = {};
return "<br /><pre>-- " .. file_date .. "<br />return {<br />&#9;" .. table.concat (lang_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " ..
file_date .. "<br />return {<br />&#9;" .. table.concat (script_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " ..
for script, code_tbl in pairs (suppress_table) do
file_date .. "<br />return {<br />&#9;" .. table.concat (region_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " ..
local LIMIT = 11; -- max number of subtags on a line before a line break
file_date .. "<br />return {<br />&#9;" .. table.concat (variant_table, ',<br />&#9;') .. "<br />&#9;}<br />" .. "</pre>";
local fragment_tbl = {}; -- groups of LIMIT number of subtags collected here
for i=1, #code_tbl, LIMIT do
local stop = ((i+LIMIT-1) > #code_tbl) and #code_tbl or i+LIMIT-1; -- calculate a table.concat stop position
table.insert (fragment_tbl, table.concat (code_tbl, ', ', i, stop)); -- get the fragment and save it
end
table.insert (pretty_suppressed, -- and make all pretty
table.concat ({'[\"', script, '\"] = {', table.concat (fragment_tbl, ',\n\t\t\t\t'), '}'})
);
end
table.sort (pretty_suppressed);
 
-- make final output pretty
return '<br /><pre>------------------------------< I A N A L A N G U A G E S >--------------------------------------------------<br />--' ..
file_date .. "<br />local active = {<br />&#9;" .. table.concat (lang_table, ',<br />&#9;') .. "<br />&#9;}<br /><br />" ..
"local deprecated = {<br />&#9;" .. table.concat (lang_dep_table, ',<br />&#9;') .. "<br />&#9;}<br /><br />" ..
"return {<br />&#9;active = active,<br />&#9;deprecated = deprecated,<br />&#9;}<br /><br />" ..
'------------------------------< I A N A S C R I P T S >------------------------------------------------------<br />--' ..
file_date .. "<br />return {<br />&#9;" .. table.concat (script_table, ',<br />&#9;') .. "<br />&#9;}<br /><br />" ..
'------------------------------< I A N A R E G I O N S >------------------------------------------------------<br />--' ..
file_date .. "<br />return {<br />&#9;" .. table.concat (region_table, ',<br />&#9;') .. "<br />&#9;}<br /><br />" ..
'------------------------------< I A N A V A R I A N T S >----------------------------------------------------<br />--' ..
file_date .. "<br />return {<br />&#9;" .. table.concat (variant_table, ',<br />&#9;') .. "<br />&#9;}<br /><br />" ..
'------------------------------< I A N A S U P P R E S S E D S C R I P T S >--------------------------------<br />--' ..
file_date .. "<br />return {<br />&#9;" .. table.concat (pretty_suppressed, ',<br />&#9;') .. "<br />&#9;}<br /><br />" ..
'------------------------------< I S O 6 3 9 - 1 >------------------------------------------------------------<br />--' ..
file_date .. "<br />return {<br />&#9;" .. table.concat (iso_639_1_table, ',<br />&#9;') .. "<br />&#9;}<br /><br />" .. "</pre>";
end
 
 
return p;
--[[--------------------------< E X P O R T E D F U N C T I O N >--------------------------------------------
]]
 
return {
iana_extract = iana_extract,
}