Content deleted Content added
m Trappist the monk moved page Module:Sandbox/trappist the monk/iana to Module:Language/name/data/iana data extraction tool without leaving a redirect: to get it out of the sandbox |
No edit summary |
||
Line 1:
require('Module:No globals');
local p = {};
--[=[------------------------< G E T _ V A R I A N T _ P A R T S >---------------------------------------------
We get a record that looks more-or-less like this:
%%\n
Type: variant\n
Subtag: bohoric\n
Description: Slovene in Bohorič alphabet\n
Added: 2012-06-27\n
Prefix: sl\n
Each line is terminated with a \n character.
Type, for this function can only be 'variant'
Subtag is the code of Type
Prefix is a language code to which this variant applies; one language code per Prefix line. There can be
more than one prefix line.
Description associates Subtag with a proper name or names; one name per Description line. There can be more
than one Description line and Description lines can wrap to the next line. When they do, the first two
characters of the continuation line are spaces.
Comments: lines can also be continued so once in a Comments line (which is otherwise ignored) all further
continuations in the record are also ignored. This is a crude mechanism to prevent comment continuations
from being concatenated onto the end of descriptions and relies on Description line occuring in the record
before the Comments line.
Records with Deprecated dates or Preferred-Value codes are ignored as are private use codes.
]=]
local function get_variant_parts (record)
local code;
local descriptions = {};
local prefixes = {};
local in_comments = false;
if mw.ustring.match (record, 'Deprecated') or mw.ustring.find (record, 'Preferred%-Value') or mw.ustring.find (record, 'Private use') then
return 'skip';
end
for line in mw.ustring.gmatch (record, '([^\n]+)\n') do -- get a \n terminated line of text (without the \n)
if mw.ustring.match (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line
code = mw.ustring.match (line, 'Subtag: ([%a%d]+)'); -- extract and save to subtag's code
elseif mw.ustring.match (line, 'Description: .+') then -- if this line is a description line
table.insert (descriptions, '\"' .. mw.ustring.match (line, 'Description: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks
elseif mw.ustring.match (line, 'Prefix: .+') then -- if this line is a description line
table.insert (prefixes, '\"' .. mw.ustring.match (line, 'Prefix: (.+)') .. '\"'); -- extract and save the name wrapped in quote marks
elseif mw.ustring.match (line, 'Comments: .+') then -- if this line is a comments line
in_comments = true;
elseif mw.ustring.match (line, '^ .+') and not in_comments then -- if a continuation line but not a commnets continuation
descriptions[#descriptions] = mw.ustring.gsub (descriptions[#descriptions], '\"$', ''); -- remove trailing quote mark from previous description
descriptions[#descriptions] = descriptions[#descriptions] .. ' ' .. mw.ustring.match (line, '^ (.+)') .. '\"'; -- extract and save the continuation with new quote mark
end
end
return code, table.concat (prefixes, ', '), table.concat (descriptions, ', ');
end
--[=[------------------------< G E T _ L A N G _ S C R I P T _ R E G I O N _ P A R T S >-----------------------
We get
%%\n
Type: language\n
Line 41 ⟶ 102:
end
for line in mw.ustring.gmatch (record, '([^\n]+)\n') do -- get a \n
if mw.ustring.match (line, 'Subtag: [%a%d]+') then -- if this line is the subtag line
code = mw.ustring.match (line, 'Subtag: ([%a%d]+)'); -- extract and save to subtag's code
Line 76 ⟶ 137:
local script_table = {}; -- scripts go here
local region_table = {}; -- regions go here
local variant_table = {}; -- variants go here
local file_date; -- first line
local code;
local descriptions;
local prefixes; -- used for language variants only
file_date = content:match ('(File%-Date: %d%d%d%d%-%d%d%-%d%d)'); -- get the file date line from this version of the source file
Line 99 ⟶ 162:
table.insert (script_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries
elseif not code then
table.insert (
end
Line 108 ⟶ 171:
table.insert (region_table, "[\"" .. code .. "\"] = {" .. descriptions .. "}"); -- make table entries
elseif not code then
table.insert (
end
elseif mw.ustring.find (record, 'Type: variant') then -- if a region record
code, prefixes, descriptions = get_variant_parts (record); -- get the code, prefix(es), and description(s)
if code and ('skip' ~= code) then
table.insert (variant_table,
table.concat ({
"[\"",
code,
"\"] = {<br />		[\"descriptions\"] = {",
descriptions,
"},<br />		[\"prefixes\"] = {",
prefixes,
"},<br />		},"
})
);
elseif not code then
table.insert (variant_table, "[\"error\"] = {" .. record .. "}"); -- code should never be nil, but ...
end
end
Line 115 ⟶ 197:
return "<br /><pre>-- " .. file_date .. "<br />return {<br />	" .. table.concat (lang_table, ',<br />	') .. "<br />	}<br />-- " ..
file_date .. "<br />return {<br />	" .. table.concat (script_table, ',<br />	') .. "<br />	}<br />-- " ..
file_date .. "<br />return {<br />	" .. table.concat (region_table, ',<br />	') .. "<br />	}<br />-- " ..
file_date .. "<br />return {<br />	" .. table.concat (variant_table, ',<br />	') .. "<br />	}<br />" .. "</pre>";
end
|