Module:ISO 639 name/ISO 639 name to code/make

This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 13:22, 25 September 2018. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

require('Module:No globals');
local temp = {};


--[[--------------------------< A D D _ L A N G >--------------------------------------------------------------

adds language and its code to the appropriate place in the temp table.  temp table is a lable of tables where the
key is the language name and the value is a 3-element table listing the ISO 639 codes associated with that name.

lang is the language name from the source data
code is the associate ISO 639 code from the source data
part is 1 for ISO 639-1 language names and codes, 2 ..., 3 ...

language names with parenthetical disambiguation are listed twice; with and without the disambiguators

TODO: convert characters with diacritics to characters without?

]]

local function add_lang (lang, code, part)
	lang = mw.ustring.lower (lang);												-- convert to lowercase for use as table index
	
	if lang:match (' *%b()') then												-- if language name does not have a disambiguator
		if not temp[lang] then													-- when no entry for this language
			temp[lang] = {'""', '""', '""'};									-- make blank table entry for this language
		end

		temp[lang][part] = table.concat ({'"', code, '"'});					-- add the code
	end

	lang = lang:gsub (' *%b()', '');											-- remove disambiguation if there is one
	if not temp[lang] then														-- when no entry for this language
		temp[lang] = {'""', '""', '""'};										-- make one
	end
	
	temp[lang][part] = table.concat ({'"', code, '"'});						-- add the code
end


--[[--------------------------< I S O 6 3 9 _ N A M E _ T O _ C O D E >----------------------------------------

read code-to-name source tables and convert to a name-to-code table.

]]
local function iso_639_name_to_code ()
	local out = {};

	local part1_data = mw.loadData ('Module:Language/data/iana languages');		-- used only for ISO 639-1 language codes / names
	local part2_data = mw.loadData ('Module:Sandbox/trappist the monk/ISO 639 name/ISO 639-2');		-- ISO 639-2 language codes / names; to be moved to Module:Language/data/ISO 639-2
	local part3_data = mw.loadData ('Module:Language/data/ISO 639-3');			-- existing data module

	for code, v in pairs (part3_data) do										-- start with part 3 because it has the most codes
		for _, lang in ipairs (v) do											-- code can have multiple names so for each one
			add_lang (lang, code, 3);											-- create and / or add this name / code pair to the output
		end
	end

	for code, v in pairs (part2_data) do										-- now part 2
		for _, lang in ipairs (v) do
			add_lang (lang, code, 2);
		end
	end
	
	for code, v in pairs (part1_data) do										-- now part 1
		if 2 == #code then														-- IANA source data includes a mix of 2- and 3-character codes; ISO 639-1 is the 2-character variety
			for _, lang in ipairs (v) do
				add_lang (lang, code, 1);
			end
		end
	end

	for lang, codes in pairs (temp) do
		table.insert (out, table.concat ({'["', lang, '"] = {', table.concat (codes, ', '), '}'}));	-- reformat
	end

	table.sort (out);
	return table.concat ({"<pre>return {<br />&#9;", table.concat (out, ',<br />&#9;'), "<br />&#9;}<br /></pre>"});	-- render
	
end

return {iso_639_name_to_code = iso_639_name_to_code}