Module:ISO 639 name/ISO 639 name to code/make: Difference between revisions

Content deleted Content added
create;
 
repoint links;
 
(22 intermediate revisions by 3 users not shown)
Line 1:
require('Module:No globalsstrict');
local temp = {};
 
Line 5:
--[[--------------------------< A D D _ L A N G >--------------------------------------------------------------
 
addstemp languagetable andis itsa codetable toof thetables appropriatewhere placethe inkey is the templanguage table.name and tempthe tablevalue is a labletable oflisting tablesthe whereISO the639
key is the language name and the value is a 3-element table listing the ISO 639 codes associated with that language name.
 
This function adds language name (as index) and its code (as a table element) using an appropriate index number.
lang is the language name from the source data
code is the associate ISO 639 code from the source data
part is 1 for ISO 639-1 language names and codes, 2 ..., 3 ...
 
<lang> is the language name from the source data
language names with parenthetical disambiguation are listed twice; with and without the disambiguators
<code> is the associateassociated ISO 639 code from the source data
 
<part> is 1 for ISO 639-1 language names and codes, 2, ...2B, 3, 5... <part> for the override data is prefixed with 'O'
TODO: convert characters with diacritics to characters without?
 
]]
 
local function add_langlang_add (lang, code, part)
ifpart lang:match= (' *%b()') then{ -- ifconvert languagestring name<part> doesto nota have anumeric disambiguatorindex
if['1']=1, not['2']=2, temp[lang'2B']=3, then ['3']=4, ['5']=5, -- whenfor nothe entrybase forISO this639 languageparts
temp[lang'O1']=6, ['O2']=7, {['""O2B']=8, ['""O3']=9, ['""O5'}; ]=10, -- make blankfor table entry forthe thisoverride languagetables
['D1']=11, ['D2']=12, ['D2B']=13, ['D3']=14, ['D5']=15, -- for the deprecated tables
end
})[part];
 
lang = mw.ustring.lower (lang); -- convert to lowercase for use as table index
temp[lang][part] = table.concat ({'"', code, '"'}); -- add the code
end
 
lang = lang:gsub (' *%b()', ''); -- remove disambiguation if there is one
if not temp[lang] then -- when no entry for this language
temp[lang] = {'""', '""', '""'}; -- make onea blank entry
end
 
table.insert(temp[lang][part], = tablestring.concat format({'[%s]=\"%s\"', codepart, '"'}code)); -- add the code; codes are added in the order that this function is called in iso_639_name_to_code()
 
end
 
 
-- TODO: better name
local function iterate_table(part_data, part_number)
for code, v in pairs (part2_datapart_data) do -- now part 2
for _, lang in ipairs (v) do -- code can have multiple names so for each one
add_langlang_add (lang, code, 3part_number); -- create and / or add this name / code pair to the output
end
end
end
 
--[[--------------------------< I S O 6 3 9 _ N A M E _ T O _ C O D E >----------------------------------------
Line 41 ⟶ 48:
 
]]
 
local function iso_639_name_2_codeiso_639_name_to_code ()
local out = {};
 
local part1_datapart_data = mw.loadData ('Module:Language/dataISO 639 name/ianaISO languages639-3'); -- used only for ISO 639-13 language codes / names
for codeiterate_table(part_data, v in pairs (part3_data'3') do -- start with part 3 because it has the most codes
local part2_data = mw.loadData ('Module:Sandbox/trappist the monk/ISO 639 name/ISO 639-2'); -- ISO 639-2 language codes / names; to be moved to Module:Language/data/ISO 639-2
local part3_data = mw.loadData ('Module:Language/data/ISO 639-3'); -- existing data module
 
part_data = mw.loadData ('Module:ISO 639 name/ISO 639-5'); -- ISO 639-5 language codes / names
for code, v in pairs (part3_data) do -- start with part 3 because it has the most codes
iterate_table(part_data, '5')
for _, lang in ipairs (v) do -- code can have multiple names so for each one
 
add_lang (lang, code, 3); -- create and / or add this name / code pair to the output
local part2_datapart_data = mw.loadData ('Module:Sandbox/trappist the monk/ISO 639 name/ISO 639-2'); -- ISO 639-2 language codes / names; to be moved to Module:Language/data/ISO 639-2
end
iterate_table(part_data, '2')
 
part_data = mw.loadData ('Module:ISO 639 name/ISO 639-2B'); -- ISO 639-2B language codes / names
iterate_table(part_data, '2B')
 
part_data = mw.loadData ('Module:ISO 639 name/ISO 639-1'); -- ISO 639-1 language codes / names
iterate_table(part_data, '1')
 
local part3_datapart_data = mw.loadData ('Module:Language/dataISO 639 name/ISO 639-3 override'); -- existinghas override data modulefor all parts
for _, o_part in ipairs ({'1', '2', '2B', '3', '5'}) do -- for each of the override tables
local o_part_data = part_data['override_' .. o_part]; -- point to override data
o_part = 'O' .. o_part; -- prefix o_part
iterate_table(o_part_data, o_part) -- for each code in the data table and for each language name associated with that code
end
 
part_data = mw.loadData ('Module:ISO 639 name/ISO 639 deprecated'); -- has deprecated data for all parts
for code, v in pairs (part2_data) do -- now part 2
for _, langd_part in ipairs (v{'1', '2', '2B', '3', '5'}) do -- for each of the deprecated tables
local d_part_data = part_data['deprecated_' .. d_part]; -- point to deprecated data
add_lang (lang, code, 2);
d_part = 'D' .. d_part; -- prefix d_part
end
iterate_table(d_part_data, d_part) -- for each code in the data table and for each language name associated with that code
end
 
local function comp (a, b)
for code, v in pairs (part1_data) do -- now part 1
return tonumber (a:match ('(%d+)')) < tonumber (b:match ('(%d+)'));
if 2 == #code then -- IANA source data includes a mix of 2- and 3-character codes; ISO 639-1 is the 2-character variety
for _, lang in ipairs (v) do
add_lang (lang, code, 1);
end
end
end
 
for lang, codes in pairs (temp) do
table.sort (codes, comp); -- codes are added in the order that lang_add() is called above; sort to make pretty
table.insert (out, table.concat ({'["', lang, '"] = {', table.concat (codes, ', '), '}'})); -- reformat
end
 
table.sort (out); -- sort in language name order
 
return table.concat ({"<pre>return {<br />&#9;", table.concat (out, ',<br />&#9;'), "<br />&#9;}<br /></pre>"}); -- render
local key_str = table.concat ({
'--[[--------------------------< I S O _ 6 3 9 _ N A M E _ T O _ C O D E >--------------------------------------<br /><br />',
'Key:<br />&#9;',
'[1]=ISO 639-1&#9;&#9;[6]=ISO 639-1 override&#9;&#9;[11]=ISO 639-1 deprecated<br />&#9;',
'[2]=ISO 639-2&#9;&#9;[7]=ISO 639-2 override&#9;&#9;[12]=ISO 639-2 deprecated<br />&#9;',
'[3]=ISO 639-2B&#9;&#9;[8]=ISO 639-2B override&#9;&#9;[13]=ISO 639-2B deprecated<br />&#9;',
'[4]=ISO 639-3&#9;&#9;[9]=ISO 639-3 override&#9;&#9;[14]=ISO 639-3 deprecated<br />&#9;',
'[5]=ISO 639-5&#9;&#9;[10]=ISO 639-5 override&#9;&#9;[15]=ISO 639-5 deprecated',
'<br />]]<br /><br />'
})
return table.concat ({"'<pre>', key_str, 'return {<br />&#9;"', table.concat (out, ',<br />&#9;'), "'<br />&#9;}<br /></pre>"'}); -- render
end
 
--[[--------------------------< E X P O R T E D _ F U N C T I O N S >------------------------------------------
return {iso_639_name_2_code = iso_639_name_2_code}
]]
 
return {iso_639_name_to_code = iso_639_name_to_code}