Module:Lang/data/iana languages/make: Difference between revisions

Browse history interactively

← Previous edit

Content deleted Content added

Revision as of 21:28, 5 November 2017 edit Erutuon (talk \| contribs) Autopatrolled, Extended confirmed users 32,238 edits basic gmatch is safe here ← Previous edit		Latest revision as of 14:55, 10 July 2024 edit undo Trappist the monk (talk \| contribs) Administrators 494,447 edits m fix module names;
(12 intermediate revisions by 4 users not shown)
Line 1: require('~~Module:No globals~~strict'); ~~local p = {};~~ Line 31 ⟶ 30: before the Comments line. Records with ~~Deprecated~~private ~~dates~~use ~~or Preferred-Value codes~~subtags are ignored ~~as are private use codes~~. ]=] Line 41 ⟶ 40: local in_comments = false; if ~~mw.ustring~~string.find (record, 'Deprecated', 1, true) or ~~mw.ustring~~string.find (record, 'Preferred%-Value'), 1, true) or ~~mw.ustring~~string.find (record, 'Private use', 1, true) then return 'skip'; end for line in string.gmatch (record, '([^\n]+)\n') do -- get a \n terminated line of text (without the \n) local label = string.match(line, "(.-):") ~~if mw.ustring.find (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line~~ ~~code = mw.ustring.match (line, 'Subtag: ([%a%d]+)'); -- extract and save to subtag's code~~ ~~elseif~~if ~~mw.ustring~~not label and string.find (line, '~~Description:~~^ .+') and not in_comments then -- if ~~this~~a continuation line isbut not a ~~description~~comments ~~line~~continuation ~~local desc~~descriptions[#descriptions] = mwstring.~~ustring.match~~gsub (~~line~~descriptions[#descriptions], '~~Description:~~\"$', ~~(.+)~~''); -- ~~extract~~remove trailing quote mark from ~~the~~previous description descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. string.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark ~~desc = mw.ustring.gsub (desc, '"', '\\"'); -- in case description contains quote marks (see 1959acad)~~ elseif label == 'Subtag' then -- if this line is the subtag line code = string.match (line, 'Subtag: (%w+)'); -- extract and save to subtag's code elseif label == 'Description' then -- if this line is a description line local desc = string.match (line, 'Description: (.+)'); -- extract the description desc = string.gsub (desc, '"', '\\"'); -- in case description contains quote marks (see 1959acad) table.insert (descriptions, '\"' .. desc .. '\"'); -- save the description wrapped in quote marks elseif ~~mw.ustring.find~~label ~~(line,~~== 'Prefix~~: .+~~') then -- if this line is a prefix line table.insert (prefixes, '\"' .. ~~mw.ustring~~string.match (line, 'Prefix: (.+)'):lower() .. '\"'); -- extract and save the prefix wrapped in quote marks elseif ~~mw.ustring.find~~label ~~(line,~~== 'Comments~~: .+~~') then -- if this line is a comments line in_comments = true; ~~elseif mw.ustring.find (line, '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation~~ ~~descriptions[#descriptions] = mw.ustring.gsub (descriptions[#descriptions], '\"$', ''); -- remove trailing quote mark from previous description~~ ~~descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. mw.ustring.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark~~ end end Line 91 ⟶ 93: before the Comments line. Records with ~~Deprecated~~private ~~dates~~use ~~or Preferred-Value codes~~subtags are ignored ~~as are private use codes~~. ]=] Line 97 ⟶ 99: local function get_lang_script_region_parts (record) local code; local suppress; -- Suppress script for this code if specified local deprecated; -- boolean; true when subtag is deprecated local descriptions = {}; local in_comments = false; if ~~mw.ustring.find (~~record~~, 'Deprecated') or mw.ustring.~~:find (~~record, 'Preferred%-Value') or mw.ustring.find (record,~~ 'Private use') then return 'skip'; end for line in ~~string.~~record:gmatch (~~record,~~ '([^\n]+)\n') do -- get a \n terminated line of text (without the \n) local label = line:match ('(.-):'); ~~if mw.ustring.find (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line~~ ~~code = mw.ustring.match (line,~~if 'Subtag~~: ([%a%d]+)~~'); == label then -- ~~extract~~if ~~and~~this ~~save~~line tois the subtag's ~~code~~line ~~elseif~~ code ~~mw.ustring.find~~= (line,:match ('~~Description~~Subtag: .(%w+)') ~~then~~; -- ifextract ~~this~~and ~~line~~save isto asubtag's ~~description line~~code elseif 'Description' == label then -- if this line is a description line ~~table.insert (descriptions, '\"' .. mw.ustring.match (line, 'Description: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks~~ table.insert (descriptions, '\"' .. line:match ('Description: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks ~~elseif mw.ustring.find (line, 'Comments: .+') then -- if this line is a comments line~~ elseif 'Deprecated' == label then deprecated = true; -- subtag is deprecated; set our flag elseif 'Suppress-Script' == label then suppress = line:match ('Suppress%-Script: (%S+)'); elseif 'Comments' == label then -- if this line is a comments line in_comments = true; elseif ~~mw.ustring.~~line:find (~~line,~~ '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation descriptions[#descriptions] = ~~mw.ustring.gsub (~~descriptions[#descriptions],:gsub ('\"$', ''); -- remove trailing quote mark from previous description descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. ~~mw.ustring.~~line:match (~~line,~~ '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark end end return code, table.concat (descriptions, ', '), suppress, deprecated; end Line 124 ⟶ 133: read a local copy of the IANA language-subtag-registry file and from it build tables to replace the tables in: [[Module:~~Language~~Lang/data/iana languages]] [[Module:~~Language~~Lang/data/iana ~~scripts~~regions]] [[Module:~~Language~~Lang/data/iana ~~regions~~scripts]] [[Module:Lang/data/iana supressed cripts]] [[Module:Lang/data/iana variants]] current language-subtag-registry file can be found at: http://www.iana.org/assignments/language-subtag-registry Line 133 ⟶ 144: ]=] local function p.iana_extract (frame) local page = mw.title.getCurrentTitle(); -- get a page object for this page local content = page:getContent(); -- get unparsed content local lang_table = {}; -- languages go here local lang_dep_table = {}; -- deprecated languages go here local script_table = {}; -- scripts go here local region_table = {}; -- regions go here local variant_table = {}; -- variants go here local suppress_table = {}; -- here we collect suppressed scripts and associated language codes local iso_639_1_table = {}; -- ISO 639-1 languages; not used by Module:Lang but included here to ensure Module:Lang/data/ISO_639-1 gets updated local file_date; -- first line Line 145 ⟶ 159: local descriptions; local prefixes; -- used for language variants only local suppress; -- a code's suppress script local deprecated; -- boolean: true when subtag is deprecated file_date = content:match ('(File%-Date: %d%d%d%d%-%d%d%-%d%d)'); -- get the file date line from this version of the source file for record in string.gmatch (content, '%%%%([^%%]+)') do -- get a %% delimited 'record' from the file; leave off the delimiters iflocal mwrecord_type = string.~~ustring.find~~ match(record, 'Type: ~~language~~(%w+)') ~~then -- if a language record~~ if record_type == 'language' then -- if a language record ~~code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s)~~ code, descriptions, suppress, deprecated = get_lang_script_region_parts (record); -- get the code, description(s), suppress script, and deprecated flag if code and ('skip' ~= code) then if deprecated then ~~table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries~~ table.insert (lang_dep_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries else table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries if 2 == code:len() then table.insert (iso_639_1_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries end end elseif not code then table.insert (lang_table, "[\"error\"] = {" .. record .. "}"); -- code should never be nil, but inserting an error entry in the final output can be helpful end -- here we collect suppress stript tags and their associated language codes; -- prettigying the data in this table must wait until all language codes have been read if suppress then -- if this code has a suppressed script local suppressed_code = table.concat ({'\"', code, '\"'}); -- wrap the code in quotes if suppress_table[suppress] then -- if there is an entry for this script table.insert (suppress_table[suppress], suppressed_code); -- insert the new code else suppress_table[suppress] = {}; -- add new script and empty table table.insert (suppress_table[suppress], suppressed_code); -- insert the new code end end elseif ~~mw.ustring.find~~record_type ~~(record,~~== '~~Type:~~ script') then -- if a script record code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s) Line 167 ⟶ 203: end elseif ~~mw.ustring.find~~record_type ~~(record,~~== '~~Type:~~ region') then -- if a region record code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s) Line 176 ⟶ 212: end elseif ~~mw.ustring.find~~record_type ~~(record,~~== '~~Type:~~ variant') then -- if a ~~region~~variant record code, prefixes, descriptions = get_variant_parts (record); -- get the code, prefix(es), and description(s) Line 196 ⟶ 232: end end -- ~~make~~now ~~pretty~~prettify ~~output~~the supressed script table local pretty_suppressed = {}; ~~return "<br /><pre>-- " .. file_date .. "<br />return {<br /> " .. table.concat (lang_table, ',<br /> ') .. "<br /> }<br />-- " ..~~ ~~file_date .. "<br />return {<br /> " .. table.concat (script_table, ',<br /> ') .. "<br /> }<br />-- " ..~~ for script, code_tbl in pairs (suppress_table) do ~~file_date .. "<br />return {<br /> " .. table.concat (region_table, ',<br /> ') .. "<br /> }<br />-- " ..~~ local LIMIT = 11; -- max number of subtags on a line before a line break ~~file_date .. "<br />return {<br /> " .. table.concat (variant_table, ',<br /> ') .. "<br /> }<br />" .. "</pre>";~~ local fragment_tbl = {}; -- groups of LIMIT number of subtags collected here for i=1, #code_tbl, LIMIT do local stop = ((i+LIMIT-1) > #code_tbl) and #code_tbl or i+LIMIT-1; -- calculate a table.concat stop position table.insert (fragment_tbl, table.concat (code_tbl, ', ', i, stop)); -- get the fragment and save it end table.insert (pretty_suppressed, -- and make all pretty table.concat ({'[\"', script, '\"] = {', table.concat (fragment_tbl, ',\n\t\t\t\t'), '}'}) ); end table.sort (pretty_suppressed); -- make final output pretty return '<br /><pre>------------------------------< I A N A L A N G U A G E S >--------------------------------------------------<br />--' .. file_date .. "<br />local active = {<br /> " .. table.concat (lang_table, ',<br /> ') .. "<br /> }<br /><br />" .. "local deprecated = {<br /> " .. table.concat (lang_dep_table, ',<br /> ') .. "<br /> }<br /><br />" .. "return {<br /> active = active,<br /> deprecated = deprecated,<br /> }<br /><br />" .. '------------------------------< I A N A S C R I P T S >------------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (script_table, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I A N A R E G I O N S >------------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (region_table, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I A N A V A R I A N T S >----------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (variant_table, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I A N A S U P P R E S S E D S C R I P T S >--------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (pretty_suppressed, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I S O 6 3 9 - 1 >------------------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (iso_639_1_table, ',<br /> ') .. "<br /> }<br /><br />" .. "</pre>"; end ~~return p;~~ --[[--------------------------< E X P O R T E D F U N C T I O N >-------------------------------------------- ]] return { iana_extract = iana_extract, }