Module:Lang/data/iana languages/make

require('Module:No globals');
local p = {};


--[=[------------------------< G E T _ L A N G _ S C R I P T _ R E G I O N _ P A R T S >-----------------------

We get an element that looks more-or-less like this:
	%%\n
	Type: language\n
	Subtag: aa\n
	Description: Afar\n
	Added: 2005-10-16\n

	
Each line is terminated with a \n character.

Type, for our purposes can be 'language', 'script', or 'region'

Subtag is the code of Type

Description associates Subtag with a proper name or names; one name per Description line.  There can be more
than one Description line and Description lines can wrap to the next line.  When they do, the first two
characters of the continuation line are spaces.

]=]

local function get_lang_script_region_parts (element)
local code;
local descriptions = {};

	for line in mw.ustring.gmatch (element, '([^\n]+)\n') do					-- get a \n terminate line of text (without the \n)
		if mw.ustring.match (line, 'Subtag: [%a%d]+') then						-- if this line is the subtag line
			code = mw.ustring.match (line, 'Subtag: ([%a%d]+)');				-- extract and save to subtag's code
		elseif mw.ustring.match (line, 'Description: .+') then					-- if this line is a description line
			table.insert (descriptions, '\"' .. mw.ustring.match (line, 'Description: (.+)') .. '\"');	-- extract and save the name wrapped in quote marks
		elseif mw.ustring.match (line, '^  .+') then													-- if a continuation line
			descriptions[#descriptions] = mw.ustring.gsub (descriptions[#descriptions], '\"$', '');		-- remove trailing quote mark from previous description
			descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. mw.ustring.match (line, '^  (.+)') .. '\"';	-- extract and save the continuation with new quote mark
		end
	end
	
	return code, table.concat (descriptions, ', ');
end


--[=[------------------------< I A N A _ E X T R A C T >-------------------------------------------------------

read a local copy of the IANA language-subtag-registry file and from it build tables to replace the tables in:
	[[Module:Language/data/iana languages]]
	[[Module:Language/data/iana scripts]
	[[Module:Language/data/iana regions]]

current language-subtag-registry file can be found at: http://www.iana.org/assignments/language-subtag-registry
archive.org has copies of previous versions see: https://web.archive.org/web/*/http://www.iana.org/assignments/language-subtag-registry

]=]

function p.iana_extract (frame)
	local page = mw.title.getCurrentTitle();									-- get a page object for this page
	local content = page:getContent();											-- get unparsed content
	local lang_table = {};														-- languages go here
	local script_table = {};													-- scripts go here
	local region_table = {};													-- regions go here
	local file_date;															-- first line

	local code;
	local descriptions;

	file_date = content:match ('(File%-Date: %d%d%d%d%-%d%d%-%d%d)');			-- get the file date line from this version of the source file

	for element in mw.ustring.gmatch (content, '%%%%([^%%]+)') do				-- get a %% delimited 'element' from the file; leave off the delimiters
		if mw.ustring.find (element, 'Type: language') then						-- if a language element
			code, descriptions = get_lang_script_region_parts (element);		-- get the code and description(s)
			
			if code then														
				table.insert (lang_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}");	-- make table entries
			else
				table.insert (lang_table, "[\"error\"] = {}");					-- code should never be nil, but inserting an error entry in the final output can be helpful
			end
		elseif mw.ustring.find (element, 'Type: script') then					-- if a script element
			code, descriptions = get_lang_script_region_parts (element);		-- get the code and description(s)
			
			if code then
				table.insert (script_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}");	-- make table entries
			else
				table.insert (lang_table, "[\"error\"] = {}");					-- code should never be nil, but ...
			end
		elseif mw.ustring.find (element, 'Type: region') then					-- if a region element
			code, descriptions = get_lang_script_region_parts (element);		-- get the code and description(s)
			
			if code then
				table.insert (region_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}");	-- make table entries
			else
				table.insert (lang_table, "[\"error\"] = {}");					-- code should never be nil, but ...
			end
		end
	end
																				-- make pretty output
	return "<br /><pre>-- " .. file_date .. "<br />return {<br />&#9;" .. table.concat (lang_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " .. 
			file_date .. "<br />return {<br />&#9;" .. table.concat (script_table, ',<br />&#9;') .. "<br />&#9;}<br />-- " ..
			file_date .. "<br />return {<br />&#9;" .. table.concat (region_table, ',<br />&#9;') .. "<br />&#9;}<br />" .. "</pre>";
end

return p;
Module:Lang/data/iana languages/make

Usage