Module:ISO 639 name/ISO 639 name to code/make: Difference between revisions

Content deleted Content added
No edit summary
repoint links;
 
(10 intermediate revisions by 3 users not shown)
Line 1:
require('Module:No globalsstrict');
local temp = {};
 
Line 5:
--[[--------------------------< A D D _ L A N G >--------------------------------------------------------------
 
temp table is a table of tables where the key is the language name and the value is a 3-element table listing the ISO 639
the ISO 639 codes associated with that language name.
 
This function adds language name (as index) and its code (as a table element) tousing thean appropriate placeindex in the temp tablenumber.
 
<lang> is the language name from the source data
<code> is the associated ISO 639 code from the source data
<part> is 1 for ISO 639-1 language names and codes, 2, ...2B, 3 ..., 5 ... <part> Notefor thatthe partoverride 5data codes gois inprefixed indexwith [4]'O'
 
This function does not create alias entries in temp table for those language names that use characters with diacritics.
To do so risks conflict between names that do not use diacritics (Bari, code bfa) and names that do (Barí, code mot).
 
]]
 
local function lang_add (lang, code, part)
part, = ({ -- addconvert string <part> to a thenumeric index
part = ({ ['1']=1, ['2']=2, ['2B']=3, ['3']=4, ['5']=5, -- })[part];for the base ISO 639 parts
['O1']=6, ['O2']=7, ['O2B']=8, ['O3']=9, ['O5']=10, -- for the override tables
['D1']=11, ['D2']=12, ['D2B']=13, ['D3']=14, ['D5']=15, -- for the deprecated tables
})[part];
 
lang = mw.ustring.lower (lang); -- convert to lowercase for use as table index
 
if not temp[lang] then -- when no entry for this language
temp[lang] = {}; -- make a blank entry: 1, 2, 2B, 3, 5
end
 
table.insert (temp[lang], tablestring.concat format({ '[%s]=\"%s\"', part, code)) -- add the code; codes are added in the order that this function is called in iso_639_name_to_code()
 
'[', -- open the key
part, -- add the index
']=\"', -- close key, add assignment operator and open quote beause these are strings
code, -- add the code
'\"' -- close the string quote
}));
end
 
 
-- TODO: better name
local function iterate_table(part_data, part_number)
for code, v in pairs (part_data) do -- now part 2
for _, lang in ipairs (v) do -- code can have multiple names so for each one
lang_add (lang, code, '3'part_number); -- create and / or add this name / code pair to the output
end
end
end
 
--[[--------------------------< I S O 6 3 9 _ N A M E _ T O _ C O D E >----------------------------------------
Line 47 ⟶ 52:
local out = {};
 
local part_data = mw.loadData ('Module:Language/dataISO 639 name/ISO 639-3'); -- ISO 639-3 language codes / names
for codeiterate_table(part_data, v in pairs (part_data'3') do -- start with part 3 because it has the most codes
 
for _, lang in ipairs (v) do -- code can have multiple names so for each one
part_data = mw.loadData ('Module:Language/dataISO 639 name/ISO 639-25'); -- ISO 639-25 language codes / names
lang_add (lang, code, '3'); -- create and / or add this name / code pair to the output
iterate_table(part_data, '5')
end
 
part_data = mw.loadData ('Module:Language/dataISO 639 name/ISO 639-2B2'); -- ISO 639-2B2 language codes / names
iterate_table(part_data, '2')
 
part_data = mw.loadData ('Module:LanguageISO 639 name/data/ianaISO languages639-2B'); -- used only for ISO 639-12B language codes / names');
iterate_table(part_data, '2B')
 
part_data = mw.loadData ('Module:ISO 639 name/ISO 639-1'); -- ISO 639-1 language codes / names
iterate_table(part_data, '1')
 
part_data = mw.loadData ('Module:ISO 639 name/ISO 639 override'); -- has override data for all parts
for _, o_part in ipairs ({'1', '2', '2B', '3', '5'}) do -- for each of the override tables
local o_part_data = part_data['override_' .. o_part]; -- point to override data
o_part = 'O' .. o_part; -- prefix o_part
iterate_table(o_part_data, o_part) -- for each code in the data table and for each language name associated with that code
end
 
part_data = mw.loadData ('Module:Language/dataISO 639 name/ISO 639-5 deprecated'); -- ISOhas 639-5deprecated languagedata codesfor /all namesparts
for code_, vd_part in pairsipairs (part_data{'1', '2', '2B', '3', '5'}) do -- nowfor parteach 5of the deprecated tables
local d_part_data = part_data['deprecated_' .. d_part]; -- point to deprecated data
for _, lang in ipairs (v) do
d_part = 'D' .. d_part; -- prefix d_part
lang_add (lang, code, '5');
iterate_table(d_part_data, d_part) -- for each code in the data table and for each language name associated with that code
end
end
 
local function comp (a, b)
part_data = mw.loadData ('Module:Language/data/ISO 639-2'); -- ISO 639-2 language codes / names
return tonumber (a:match ('(%d+)')) < tonumber (b:match ('(%d+)'));
for code, v in pairs (part_data) do -- now part 2
for _, lang in ipairs (v) do
lang_add (lang, code, '2');
end
end
part_data = mw.loadData ('Module:Language/data/ISO 639-2B'); -- ISO 639-2B language codes / names
for code, v in pairs (part_data) do -- now part 2B
for _, lang in ipairs (v) do
lang_add (lang, code, '2B');
end
end
part_data = mw.loadData ('Module:Language/data/iana languages'); -- used only for ISO 639-1 language codes / names');
for code, v in pairs (part_data) do -- now part 1
if 2 == #code then -- IANA source data includes a mix of 2- and 3-character codes; ISO 639-1 is the 2-character variety
for _, lang in ipairs (v) do
lang_add (lang, code, '1');
end
end
end
 
for lang, codes in pairs (temp) do
table.sort (codes, comp); -- codes are added in the order that lang_add() is called above; sort to make pretty
table.insert (out, table.concat ({'["', lang, '"] = {', table.concat (codes, ', '), '}'})); -- reformat
end
 
table.sort (out); -- sort in language name order
 
local key_str = '--[[Key:<br />&#9;[1]=ISO 639-1<br />&#9;[2]=ISO 639-2<br />&#9;[3]=ISO 639-2B<br />&#9;[4]=ISO 639-3<br />&#9;[5]=ISO 639-5<br />]]<br /><br />'
local key_str = table.concat ({
return table.concat ({'<pre>', key_str, 'return {<br />&#9;', table.concat (out, ',<br />&#9;'), '<br />&#9;}<br /></pre>'}); -- render
'--[[--------------------------< I S O _ 6 3 9 _ N A M E _ T O _ C O D E >--------------------------------------<br /><br />',
'Key:<br />&#9;',
'[1]=ISO 639-1&#9;&#9;[6]=ISO 639-1 override&#9;&#9;[11]=ISO 639-1 deprecated<br />&#9;',
'[2]=ISO 639-2&#9;&#9;[7]=ISO 639-2 override&#9;&#9;[12]=ISO 639-2 deprecated<br />&#9;',
'[3]=ISO 639-2B&#9;&#9;[8]=ISO 639-2B override&#9;&#9;[13]=ISO 639-2B deprecated<br />&#9;',
'[4]=ISO 639-3&#9;&#9;[9]=ISO 639-3 override&#9;&#9;[14]=ISO 639-3 deprecated<br />&#9;',
'[5]=ISO 639-5&#9;&#9;[10]=ISO 639-5 override&#9;&#9;[15]=ISO 639-5 deprecated',
'<br />]]<br /><br />'
})
return table.concat ({'<pre>', key_str, 'return {<br />&#9;', table.concat (out, ',<br />&#9;'), '<br />&#9;}<br /></pre>'}); -- render
end
 
--[[--------------------------< E X P O R T E D _ F U N C T I O N S >------------------------------------------
]]
 
return {iso_639_name_to_code = iso_639_name_to_code}