Module:ISO 639 name/ISO 639 name to code/make

This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 15:03, 1 September 2020. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

require('Module:No globals');
local temp = {};


--[[--------------------------< A D D _ L A N G >--------------------------------------------------------------

temp table is a table of tables where the key is the language name and the value is a 3-element table listing
the ISO 639 codes associated with that language name.

This function adds language name (as index) and its code (as a table element) to the appropriate place in the temp table.  

lang is the language name from the source data
code is the associated ISO 639 code from the source data
part is 1 for ISO 639-1 language names and codes, 2 ..., 3 ..., 5 ...  Note that part 5 codes go in index [4]

This function does not create alias entries in temp table for those language names that use characters with diacritics.
To do so risks conflict between names that do not use diacritics (Bari, code bfa) and names that do (Barí, code mot).

]]

local function lang_add (lang, code, part)
	part = ({['1']=1, ['2']=2, ['2B']=3, ['3']=4, ['5']=5, })[part];

	lang = mw.ustring.lower (lang);												-- convert to lowercase for use as table index
	
	if not code then
		error ('nil code')
	end
	if not part then
		error ('nil part')
	end
	
	if not temp[lang] then														-- when no entry for this language
		temp[lang] = {};														-- make a blank entry: 1, 2, 2B, 3, 5
	end

	table.insert (temp[lang], table.concat ({									-- add the code; codes are added in the order that this function is called in iso_639_name_to_code()
		'[',																	-- open the key
		part,																	-- add the index
		']=',																	-- close key, add assignment operator
		code																	-- add the code
	}));
end


--[[--------------------------< I S O 6 3 9 _ N A M E _ T O _ C O D E >----------------------------------------

read code-to-name source tables and convert to a name-to-code table.

]]

local function iso_639_name_to_code ()
	local out = {};

	local part_data = mw.loadData ('Module:Language/data/ISO 639-3');			-- ISO 639-3 language codes / names
	for code, v in pairs (part_data) do											-- start with part 3 because it has the most codes
		for _, lang in ipairs (v) do											-- code can have multiple names so for each one
			lang_add (lang, code, '3');											-- create and / or add this name / code pair to the output
		end
	end

	part_data = mw.loadData ('Module:Language/data/ISO 639-5');					-- ISO 639-5 language codes / names
	for code, v in pairs (part_data) do											-- now part 5
		for _, lang in ipairs (v) do
			lang_add (lang, code, '5');
		end
	end
	
	part_data = mw.loadData ('Module:Language/data/ISO 639-2');					-- ISO 639-2 language codes / names
	for code, v in pairs (part_data) do											-- now part 2
		for _, lang in ipairs (v) do
			lang_add (lang, code, '2');
		end
	end
	
	part_data = mw.loadData ('Module:Language/data/ISO 639-2B');				-- ISO 639-2B language codes / names
	for code, v in pairs (part_data) do											-- now part 2B
		for _, lang in ipairs (v) do
			lang_add (lang, code, '2B');
		end
	end
	
	part_data = mw.loadData ('Module:Language/data/iana languages');			-- used only for ISO 639-1 language codes / names');
	for code, v in pairs (part_data) do											-- now part 1
		if 2 == #code then														-- IANA source data includes a mix of 2- and 3-character codes; ISO 639-1 is the 2-character variety
			for _, lang in ipairs (v) do
				lang_add (lang, code, '1');
			end
		end
	end

	for lang, codes in pairs (temp) do
		table.sort (codes);														-- codes are added in the order that lang_add() is called above; sort to make pretty
		table.insert (out, table.concat ({'["', lang, '"] = {', table.concat (codes, ', '), '}'}));	-- reformat
	end

	table.sort (out);
	local key_str = '--[[Key:<br />&#9;[1]=ISO 639-1<br />&#9;[2]=ISO 639-2<br />&#9;[3]=ISO 639-2B<br />&#9;[4]=ISO 639-3<br />&#9;[5]=ISO 639-5<br />]]<br /><br />'
	return table.concat ({'<pre>', key_str, 'return {<br />&#9;', table.concat (out, ',<br />&#9;'), '<br />&#9;}<br /></pre>'});	-- render
	
end

return {iso_639_name_to_code = iso_639_name_to_code}