Module:Lang/data/iana languages/make: Difference between revisions

Browse history interactively

← Previous edit

Content deleted Content added

Revision as of 14:19, 21 December 2017 edit Trappist the monk (talk \| contribs) Administrators 494,463 edits +suppressed script extraction; ← Previous edit		Latest revision as of 14:55, 10 July 2024 edit undo Trappist the monk (talk \| contribs) Administrators 494,463 edits m fix module names;
(9 intermediate revisions by 4 users not shown)
Line 1: require('~~Module:No globals~~strict'); ~~local p = {};~~ Line 31 ⟶ 30: before the Comments line. Records with ~~Deprecated~~private ~~dates~~use ~~or Preferred-Value codes~~subtags are ignored ~~as are private use codes~~. ]=] Line 41 ⟶ 40: local in_comments = false; if string.find (record, 'Deprecated', 1, true) or string.find (record, 'Preferred%-Value'), 1, true) or string.find (record, 'Private use', 1, true) then return 'skip'; end for line in string.gmatch (record, '([^\n]+)\n') do -- get a \n terminated line of text (without the \n) ~~suppress~~local label = string.match (line, ~~'Suppress%-Script:~~ "(~~%S+~~.-)':");▼ if string.find (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line▼ code = string.match (line, 'Subtag: ([%a%d]+)'); -- extract and save to subtag's code▼ ~~elseif~~if not label and string.find (line, '~~Description:~~^ .+') and not in_comments then -- if ~~this~~a continuation line isbut not a ~~description~~comments ~~line~~continuation descriptions[#descriptions] = string.gsub (descriptions[#descriptions], '\"$', ''); -- remove trailing quote mark from previous description▼ descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. string.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark▼ ▲ ifelseif ~~string.find~~label ~~(line,~~== 'Subtag~~: [%a%d]+~~') then -- if this line is the subtag line ▲ code = string.match (line, 'Subtag: ([%~~a%d]~~w+)'); -- extract and save to subtag's code elseif ~~string.find~~label ~~(line,~~== '~~Comments: .+~~Description') then -- if this line is a ~~comments~~description line▼ local desc = string.match (line, 'Description: (.+)'); -- extract the description desc = string.gsub (desc, '"', '\\"'); -- in case description contains quote marks (see 1959acad) table.insert (descriptions, '\"' .. desc .. '\"'); -- save the description wrapped in quote marks elseif ~~string.find~~label ~~(line,~~== 'Prefix~~: .+~~') then -- if this line is a prefix line table.insert (prefixes, '\"' .. string.match (line, 'Prefix: (.+)'):lower() .. '\"'); -- extract and save the prefix wrapped in quote marks elseif ~~string.find~~label ~~(line,~~== 'Comments~~: .+~~') then -- if this line is a comments line in_comments = true; ~~elseif string.find (line, '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation~~ ▲ descriptions[#descriptions] = string.gsub (descriptions[#descriptions], '\"$', ''); -- remove trailing quote mark from previous description ▲ descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. string.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark end end Line 91 ⟶ 93: before the Comments line. Records with ~~Deprecated~~private ~~dates~~use ~~or Preferred-Value codes~~subtags are ignored ~~as are private use codes~~. ]=] Line 98 ⟶ 100: local code; local suppress; -- Suppress script for this code if specified local deprecated; -- boolean; true when subtag is deprecated local descriptions = {}; local in_comments = false; if ~~string.find (~~record~~, 'Deprecated') or string.~~:find (~~record, 'Preferred%-Value') or string.find (record,~~ 'Private use') then return 'skip'; end for line in ~~string.~~record:gmatch (~~record,~~ '([^\n]+)\n') do -- get a \n terminated line of text (without the \n) local label = line:match ('(.-):'); ~~if string.find (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line~~ ~~code = string.match (line,~~if 'Subtag~~: ([%a%d]+)~~'); == label then -- ~~extract~~if ~~and~~this ~~save~~line tois the subtag's ~~code~~line ~~elseif~~ code ~~string.find~~= (line,:match ('~~Description~~Subtag: .(%w+)') ~~then~~; -- ifextract ~~this~~and ~~line~~save isto asubtag's ~~description line~~code ▲ ~~if string.find (line,~~elseif '~~Subtag: [%a%d]+~~Description') == label then -- if this line is ~~the~~a ~~subtag~~description line table.insert (descriptions, '\"' .. ~~string.~~line:match (~~line,~~ 'Description: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks elseif string.find (line, 'Suppress%-Script: %S+') then▼ elseif 'Deprecated' == label then ▲ suppress = string.match (line, 'Suppress%-Script: (%S+)'); deprecated = true; -- subtag is deprecated; set our flag ▲ elseif string.find (line, 'Comments: .+') then -- if this line is a comments line ▲ elseif ~~string.find (line,~~ 'Suppress%-Script:' ~~%S+')~~== label then suppress = line:match ('Suppress%-Script: (%S+)'); elseif 'Comments' == label then -- if this line is a comments line in_comments = true; elseif ~~string.~~line:find (~~line,~~ '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation descriptions[#descriptions] = ~~string.gsub (~~descriptions[#descriptions],:gsub ('\"$', ''); -- remove trailing quote mark from previous description descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. ~~string.~~line:match (~~line,~~ '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark end end return code, table.concat (descriptions, ', '), suppress, deprecated; end Line 127 ⟶ 133: read a local copy of the IANA language-subtag-registry file and from it build tables to replace the tables in: [[Module:~~Language~~Lang/data/iana languages]] [[Module:~~Language~~Lang/data/iana ~~scripts~~regions]] [[Module:~~Language~~Lang/data/iana ~~regions~~scripts]] [[Module:Lang/data/iana supressed cripts]] [[Module:Lang/data/iana variants]] current language-subtag-registry file can be found at: http://www.iana.org/assignments/language-subtag-registry Line 136 ⟶ 144: ]=] local function p.iana_extract (frame) local page = mw.title.getCurrentTitle(); -- get a page object for this page local content = page:getContent(); -- get unparsed content local lang_table = {}; -- languages go here local lang_dep_table = {}; -- deprecated languages go here local script_table = {}; -- scripts go here local region_table = {}; -- regions go here local variant_table = {}; -- variants go here local suppress_table = {}; -- here we collect suppressed scripts and associated language codes local iso_639_1_table = {}; -- ISO 639-1 languages; not used by Module:Lang but included here to ensure Module:Lang/data/ISO_639-1 gets updated local file_date; -- first line Line 150 ⟶ 160: local prefixes; -- used for language variants only local suppress; -- a code's suppress script local deprecated; -- boolean: true when subtag is deprecated file_date = content:match ('(File%-Date: %d%d%d%d%-%d%d%-%d%d)'); -- get the file date line from this version of the source file for record in string.gmatch (content, '%%%%([^%%]+)') do -- get a %% delimited 'record' from the file; leave off the delimiters iflocal record_type = string.~~find~~ match(record, 'Type: ~~language~~(%w+)') ~~then -- if a language record~~ if record_type == 'language' then -- if a language record code, descriptions, suppress, deprecated = get_lang_script_region_parts (record); -- get the code, description(s), ~~and~~ suppress script, and deprecated flag if code and ('skip' ~= code) then if deprecated then table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries▼ table.insert (lang_dep_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries else ▲ table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries if 2 == code:len() then table.insert (iso_639_1_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries end end elseif not code then table.insert (lang_table, "[\"error\"] = {" .. record .. "}"); -- code should never be nil, but inserting an error entry in the final output can be helpful Line 175 ⟶ 194: end elseif ~~string.find~~record_type ~~(record,~~== '~~Type:~~ script') then -- if a script record code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s) Line 184 ⟶ 203: end elseif ~~string.find~~record_type ~~(record,~~== '~~Type:~~ region') then -- if a region record code, descriptions = get_lang_script_region_parts (record); -- get the code and description(s) Line 193 ⟶ 212: end elseif ~~string.find~~record_type ~~(record,~~== '~~Type:~~ variant') then -- if a ~~region~~variant record code, prefixes, descriptions = get_variant_parts (record); -- get the code, prefix(es), and description(s) Line 217 ⟶ 236: for script, code_tbl in pairs (suppress_table) do local LIMIT = 11; -- max number of subtags on a line before a line break table.insert (pretty_suppressed,▼ local fragment_tbl = {}; -- groups of LIMIT number of subtags collected here table.concat ({'[\"', script, '\"] = {', table.concat (code_tbl, ', '), '}'})▼ for i=1, #code_tbl, LIMIT do local stop = ((i+LIMIT-1) > #code_tbl) and #code_tbl or i+LIMIT-1; -- calculate a table.concat stop position table.insert (fragment_tbl, table.concat (code_tbl, ', ', i, stop)); -- get the fragment and save it end ▲ table.insert (pretty_suppressed, -- and make all pretty ▲ table.concat ({'[\"', script, '\"] = {', table.concat (~~code_tbl~~fragment_tbl, ', \n\t\t\t\t'), '}'}) ); end table.sort (pretty_suppressed); -- make ~~pretty~~final output pretty return "'<br /><pre>------------------------------< "I ..A ~~file_date~~N ..A ~~"<br~~ ~~/>return~~ ~~{<br~~L ~~/> "~~A ..N ~~table.concat~~G ~~(lang_table,~~U ~~',<br~~A ~~/> ')~~G ..E ~~"<br~~S />~~ }~~--------------------------------------------------<br />-- "' .. file_date .. "<br />~~return~~local active = {<br /> " .. table.concat (~~script_table~~lang_table, ',<br /> ') .. "<br /> }<br />--<br />" .. ~~file_date~~"local ..deprecated ~~"<br />return~~= {<br /> " .. table.concat (~~region_table~~lang_dep_table, ',<br /> ') .. "<br /> }<br />--<br />" .. ~~file_date ..~~ "~~<br />~~return {<br /> "active ~~.. table.concat (variant_table,~~= 'active,<br /> ')deprecated ..= "deprecated,<br /> }<br />--<br />" .. '------------------------------< I A N A S C R I P T S >------------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (pretty_suppressed, ',<br /> ') .. "<br /> }<br />" .. "</pre>";▼ file_date .. "<br />return {<br /> " .. table.concat (script_table, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I A N A R E G I O N S >------------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (region_table, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I A N A V A R I A N T S >----------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (variant_table, ',<br /> ') .. "<br /> }<br /><br />" .. '------------------------------< I A N A S U P P R E S S E D S C R I P T S >--------------------------------<br />--' .. ▲ file_date .. "<br />return {<br /> " .. table.concat (pretty_suppressed, ',<br /> ') .. "<br /> }<br /><br />" .. ~~"</pre>";~~ '------------------------------< I S O 6 3 9 - 1 >------------------------------------------------------------<br />--' .. file_date .. "<br />return {<br /> " .. table.concat (iso_639_1_table, ',<br /> ') .. "<br /> }<br /><br />" .. "</pre>"; end return p;▼ --[[--------------------------< E X P O R T E D F U N C T I O N >-------------------------------------------- ]] ▲return p;{ iana_extract = iana_extract, }