Module:Lang/data/iana languages/make

This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 12:23, 4 November 2017. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

p = {};


--[=[------------------------< G E T _ E L E M E N T _ P A R T S >---------------------------------------------

We get an element that looks more-or-less like this:
	%%\n
	Type: language\n
	Subtag: aa\n
	Description: Afar\n
	Added: 2005-10-16\n

Each line is terminated with a \n character.

Type, for our purposes can be 'language', 'script', or 'region'

Subtag is the code of Type

Description associates Subtag with a proper name or names.  There can be more than one Description line and
Description lines can wrap to the next line.  When they do, the first two characters of the continuation line
are spaces.

]=]

local function get_lang_script_region_parts (element)
local code = code;
local descriptions = {};

	for line in mw.ustring.gmatch (element, '([^\n]+)\n') do
		if mw.ustring.match (line, 'Subtag: [%a%d]+') then
			code = mw.ustring.match (line, 'Subtag: ([%a%d]+)');
		elseif mw.ustring.match (line, 'Description: .+') then
			table.insert (descriptions, '\"' .. mw.ustring.match (line, 'Description: (.+)') .. '\"');
		elseif mw.ustring.match (line, '^  .+') then
			descriptions[#descriptions] = mw.ustring.gsub (descriptions[#descriptions], '\"$', '');		-- remove trailing quote mark
			descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. mw.ustring.match (line, '^  (.+)') .. '\"';
		end
	end
	
	return code, table.concat (descriptions, ', ');
end


--[=[------------------------< I A N A _ E X T R A C T _ L A N G >---------------------------------------------

read a local copy of the IANA language-subtag-registry file and from it build the rudiments of a table to replace
the table in [[Module:Language/data/iana languages]].

%%
Type: language
Subtag: aa
Description: Afar

]=]

function p.iana_extract_lang (frame)
	local page = mw.title.getCurrentTitle();									-- get a page object for this page
	local content = page:getContent();											-- get unparsed content
	local lang_table = {};														-- languages go here
	local script_table = {};													-- scripts go here
	local region_table = {};													-- regions go here
	local file_date;															-- first line
	local code;
	local descriptions;
																				-- search for nvr links and associated hull numbers

	_, _, file_date = content:find ('(File%-Date: %d%d%d%d%-%d%d%-%d%d)');

	for element in mw.ustring.gmatch (content, '%%%%[^%%]+') do
		if mw.ustring.find (element, 'Type: language') then
			code, descriptions = get_lang_script_region_parts (element);
			
			if code then
				table.insert (lang_table, "[\'" .. code .. "\'] = {" .. descriptions .. "}");	-- make table entries
			end
		elseif mw.ustring.find (element, 'Type: script') then
			code, descriptions = get_lang_script_region_parts (element);
			
			if code then
				table.insert (script_table, "[\'" .. code .. "\'] = {" .. descriptions .. "}");	-- make table entries
			end
		elseif mw.ustring.find (element, 'Type: region') then
			code, descriptions = get_lang_script_region_parts (element);
			
			if code then
				table.insert (region_table, "[\'" .. code .. "\'] = {" .. descriptions .. "}");	-- make table entries
			end
		end
	end
																				-- make pretty output
	return "<br /><pre>-- " .. file_date .. "<br />return {<br />&#9;" .. table.concat (lang_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " .. 
			file_date .. "<br />return {<br />&#9;" .. table.concat (script_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " ..
			file_date .. "<br />return {<br />&#9;" .. table.concat (region_table, ',<br />&#9;') .. "<br />&#9;}<br />" .. "</pre>";
end

return p;