Module:Sandbox/Erutuon: Difference between revisions

Content deleted Content added
in "invalid characters" error, show first subtag with invalid character
m consistency, rm logging
Line 1:
local p = {}
 
local Unicode_data = require '"Module:Unicode data/sandbox'"
 
local function errorf(level, ...)
Line 36:
 
local Latn_pattern = table.concat {
'"['",
'"\n\32-\127'",
'"\194\160-\194\172'",
'"\195\128-\195\191'",
'"\196\128-\197\191'",
'"\198\128-\201\143'",
'"\225\184\128-\225\187\191'",
'"\226\177\160-\226\177\191'",
'"\234\156\160-\234\159\191'",
'"\234\172\176-\234\173\175'",
'"\239\172\128-\239\172\134'",
'"\239\188\129-\239\188\188'",
'"'",
'"'",
'"«'", '"»'",
'"]'",
};
 
Line 134:
function p.show(frame)
local expanded_pattern = Latn_pattern
:gsub('"%[(.-)%]'", '"%1'")
:gsub( -- Find two UTF-8-encoded characters separated by hyphen-minus.
'"([%z\1-\127\194-\244][\128-\191]*)%-([%z\1-\127\194-\244][\128-\191]*)'",
function (char1, char2)
return expand_range(char1, char2)
Line 143:
return ('* <div style="overflow-wrap: break-word;">%s</div><br>%s')
:format(expanded_pattern
:gsub('"^%s*'", ''""), -- Remove initial '"\n '" to avoid creating unwanted pre element.
show_scripts(mw.ustring.gcodepoint(expanded_pattern)))
end
Line 173:
 
local function link_block_name(block_name)
if block_name:find '" '" then
return ("[[%s]]"):format(block_name)
else
Line 213:
or codepoint <= 0x1F)) then
if self[script_code].n == 0x20 then
local period = ('".'"):byte()
for _ = 1, 3 do
self[script_code].n = self[script_code].n + 1
Line 340:
local language_codes = {}
for lang_template in content:gmatch '"{{lang[^}]+'" do
local template_name = lang_template:match('"{{([^|}]+)'")
local language_code
if template_name == '"lang'" then
language_code = lang_template:match '"{{lang|([^|}]+)'"
elseif template_name:find '"^lang-'" then
language_code = lang_template:match '"{{lang-([^|}]+)'"
end
if language_code then
Line 353:
end
return table.concat(m_table.keysToList(language_codes), '", '")
end
 
Line 421:
-- Language tags probably only contain ASCII alphabetic and numerical
-- characters and hyphen-minus.
if not tag:find '"^[A-Za-z0-9-]+$'" then
mw.log(tag, tag:find '[^A-Za-z0-9-]')
return parsed_subtags:throw(
"invalid characters",
fun.indexOf(
function (tag)
return tag:find '"[^A-Za-z0-9-]'"
end,
potential_subtags))