Module:Lang/data/iana languages/make

This is an old revision of this page, as edited by Trappist the monk (talk | contribs) at 00:05, 4 November 2017. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

p = {};


--[=[------------------------< G E T _ E L E M E N T _ P A R T S >---------------------------------------------

We get an element that looks more-or-less like this:
	%%\n
	Type: language\n
	Subtag: aa\n
	Description: Afar\n
	Added: 2005-10-16\n

Each line is terminated with a \n character.

Type, for our purposes can be 'language', 'script', or 'region'

Subtag is the code of Type

Description associates Subtag with a proper name or names.  There can be more than one Description line and
Description lines can wrap to the next line.  When they do, the first two characters of the continuation line
are spaces.

]=]

local function get_element_parts (element)
local code = code;
local descriptions = {};

	for line in mw.ustring.gmatch (element, '([^\n]+)\n') do
		if line:match ('Subtag: [%a%d]+') then
			code = line:match ('Subtag: ([%a%d]+)');
		elseif line:match ('Description: .+') then
			table.insert (descriptions, '\"' .. line:match ('Description: (.+)') .. '\"');
		end
	end
	
	return code or 'nil code', table.concat (descriptions, ', ') or 'nil descriptions';
end


--[=[------------------------< I A N A _ E X T R A C T _ L A N G >---------------------------------------------

read a local copy of the IANA language-subtag-registry file and from it build the rudiments of a table to replace
the table in [[Module:Language/data/iana languages]].

%%
Type: language
Subtag: aa
Description: Afar



]=]
function p.iana_extract_lang (frame)
	local page = mw.title.getCurrentTitle();									-- get a page object for this page
	local content = page:getContent();											-- get unparsed content
	local out_table = {};														-- output goes here
	local code;
	local descriptions;
																				-- search for nvr links and associated hull numbers

--	for code, lang in mw.ustring.gmatch (content, '%%%%%s*Type:%s*language%s*Subtag:%s*(%l%l%l?)%s*Description:%s*(.-)%s*Added') do
	for element in mw.ustring.gmatch (content, '%%%%[^%%]+') do
		code, descriptions = get_element_parts (element);
		
		table.insert (out_table, "[\'" .. code .. "\'] = {" .. descriptions .. "}");	-- make table entries
	end
																				-- make pretty output
	return "<br /><pre>&#9;&#9;{<br />&#9;&#9;" .. table.concat (out_table, ',<br />&#9;&#9;') .. "<br />&#9;&#9;},<br /></pre>";
end

return p;