Module:Sandbox/Erutuon: Difference between revisions

Content deleted Content added
matched_count maybe not useful except internally
allow multiple private-use subtags; store them in an array; error-throwing method in metatable (remove metatable and any fields only used internally before returning)
Line 354:
return table.concat(m_table.keysToList(language_codes), ', ')
end
 
local parsed_subtags_mt = {}
parsed_subtags_mt.__index = parsed_subtags_mt
function parsed_subtags_mt:throw(error, index)
self.error = error
self.invalid = table.concat(self.input, "-", index)
-- Only useful internally.
self.input = nil
setmetatable(self, nil)
end
 
Line 360 ⟶ 371:
 
-- Based on https://www.w3.org/International/articles/language-tags/.
 
-- Parse a language tag.
-- Returns nil if tag is not a string or empty.
-- Else returns a table with a map of subtag type to subtag for all subtags that
-- were parsed. If there was an error, returns an "error" field with a
-- description of the error, and an "invalid" field with the suffix of the tag
-- starting at the index where the error occurred.
-- Does not recognize "extension" tags, such as those introduced by "u" and
-- described here, as they are not needed on Wikipedia:
-- https://www.rfc-editor.org/rfc/rfc6067.txt.
function p.parse_IETF(tag)
if not type(tag) or tag =~= "string" or type(tag) ~== "string" then
return nil
end
Line 374 ⟶ 395:
-- hyphen).
local parsed_subtags = {}
setmetatable(parsed_subtags, parsed_subtags_mt)
local matched_count = 0
-- Language tags probably only contain ASCII alphabetic and numerical
-- characters and hyphen-minus, though I am not sure if this is explicitly.
-- stated in the spec.
if not tag:find '^[A-Za-z0-9-]+$' then
parsed_subtags.error = :throw("invalid characters", 1)
parsed_subtags.invalid = tag
return parsed_subtags
end
local subtagspotential_subtags = mw.text.split(tag, "-")
parsed_subtags.errorinput = "invalid subtag"potential_subtags
-- An array of patterns for each subtag, and a "type" field for the name
Line 409 ⟶ 430:
local index = 1
local last_matched_subtag_i = 0
for subtag_i, subtag in ipairs(subtagspotential_subtags) do
local type
local matched = false
Line 439 ⟶ 460:
end
if #subtagspotential_subtags > matched_count then
-- Not all potential subtags were matched. The unmatched tail end of the tag
-- (after the subtag at the index last_matched_subtag_i) is a
-- private-use subtag if it starts with "x". Otherwise, the tag is
-- invalid.
-- Remove the "x-"?
local suffix = table.concat(subtags, "-", last_matched_subtag_i + 1)
-- https://tools.ietf.org/html/bcp47#section-2.2.7
if suffix:sub(1, 1) == "x" then
local private_use_tag_start_indices = {}
parsed_subtags.private_use = suffix
for subtag_i = last_matched_subtag_i + 1, #potential_subtags do
matched_count = matched_count + 1 -- not used after this point
if potential_subtags[subtag_i] == "x" then
else
table.insert(private_use_tag_start_indices, subtag_i)
parsed_subtags.invalid = suffix
end
parsed_subtags.error = "invalid subtag"
end
if not private_use_tag_start_indices[1] then
local suffix = table.concat parsed_subtags:throw(subtags,"invalid "-subtag", last_matched_subtag_i + 1)
return parsed_subtags
end
for i, subtag_index in pairs(private_use_tag_start_indices) do
local next_subtag_index = private_use_tag_start_indices[i + 1] or #potential_subtags
-- Private-use subtags consist of x- followed by a sequence of alphanumeric characters.
if next_subtag_index - subtag_index ~= 2 then
parsed_subtags.:throw("invalid =subtag", suffixsubtag_index)
return parsed_subtags
end
-- Generate private_use table only if needed.
parsed_subtags.private_use = parsed_subtags.private_use or {}
table.insert(parsed_subtags.private_use = suffix,
table.concat(potential_subtags, "-",
subtag_index,
private_use_tag_start_indices[i + 1]
and private_use_tag_start_indices[i + 1] - 1))
end
end
if not (parsed_subtags.language or parsed_subtags.private_use) then
parsed_subtags.error = :throw("no language subtag", 1)
end
-- Only useful internally.
parsed_subtags.invalidinput = tagnil
setmetatable(parsed_subtags, nil)
return parsed_subtags