Content deleted Content added
matched_count maybe not useful except internally |
allow multiple private-use subtags; store them in an array; error-throwing method in metatable (remove metatable and any fields only used internally before returning) |
||
Line 354:
return table.concat(m_table.keysToList(language_codes), ', ')
end
local parsed_subtags_mt = {}
parsed_subtags_mt.__index = parsed_subtags_mt
function parsed_subtags_mt:throw(error, index)
self.error = error
self.invalid = table.concat(self.input, "-", index)
-- Only useful internally.
self.input = nil
setmetatable(self, nil)
end
Line 360 ⟶ 371:
-- Based on https://www.w3.org/International/articles/language-tags/.
-- Parse a language tag.
-- Returns nil if tag is not a string or empty.
-- Else returns a table with a map of subtag type to subtag for all subtags that
-- were parsed. If there was an error, returns an "error" field with a
-- description of the error, and an "invalid" field with the suffix of the tag
-- starting at the index where the error occurred.
-- Does not recognize "extension" tags, such as those introduced by "u" and
-- described here, as they are not needed on Wikipedia:
-- https://www.rfc-editor.org/rfc/rfc6067.txt.
function p.parse_IETF(tag)
if
return nil
end
Line 374 ⟶ 395:
-- hyphen).
local parsed_subtags = {}
setmetatable(parsed_subtags, parsed_subtags_mt)
local matched_count = 0
-- Language tags probably only contain ASCII alphabetic and numerical
-- characters and hyphen-minus
if not tag:find '^[A-Za-z0-9-]+$' then
parsed_subtags
parsed_subtags.invalid = tag▼
return parsed_subtags
end
local
-- An array of patterns for each subtag, and a "type" field for the name
Line 409 ⟶ 430:
local index = 1
local last_matched_subtag_i = 0
for subtag_i, subtag in ipairs(
local type
local matched = false
Line 439 ⟶ 460:
end
if #
-- Not all potential subtags were matched. The unmatched tail end of the tag
-- (after the subtag at the index last_matched_subtag_i) is a
-- private-use subtag if it starts with "x". Otherwise, the tag is
-- invalid.
-- Remove the "x-"?
local suffix = table.concat(subtags, "-", last_matched_subtag_i + 1)▼
-- https://tools.ietf.org/html/bcp47#section-2.2.7
local private_use_tag_start_indices = {}
parsed_subtags.private_use = suffix▼
for subtag_i = last_matched_subtag_i + 1, #potential_subtags do
if potential_subtags[subtag_i] == "x" then
table.insert(private_use_tag_start_indices, subtag_i)
parsed_subtags.invalid = suffix▼
end
▲ parsed_subtags.error = "invalid subtag"
end
if not private_use_tag_start_indices[1] then
▲
return parsed_subtags
end
for i, subtag_index in pairs(private_use_tag_start_indices) do
local next_subtag_index = private_use_tag_start_indices[i + 1] or #potential_subtags
-- Private-use subtags consist of x- followed by a sequence of alphanumeric characters.
if next_subtag_index - subtag_index ~= 2 then
return parsed_subtags
end
-- Generate private_use table only if needed.
parsed_subtags.private_use = parsed_subtags.private_use or {}
table.concat(potential_subtags, "-",
subtag_index,
private_use_tag_start_indices[i + 1]
and private_use_tag_start_indices[i + 1] - 1))
end
end
if not (parsed_subtags.language or parsed_subtags.private_use) then
parsed_subtags
end
-- Only useful internally.
setmetatable(parsed_subtags, nil)
return parsed_subtags
|