Module:Sandbox/Erutuon: Difference between revisions

Content deleted Content added
tooltips
for fun, show characters in tooltip of character count
Line 36:
 
local Latn_pattern = table.concat {
'[',
'[', -- this is a set so include opening bracket
'\n\32-\127',
'\n\32-\127', -- C0 Controls and Basic Latin U+0020–U+007E (20 - 7E) + (U+0010 and U+007F <poem>...</poem> support)
'\194\160-\194\172', -- C1 Controls and Latin-1 Supplement U+00A0-U+00AC (C2 A0 - C2 AC)
'\195\128-\195\191', -- (skip shy) U+00C0–U+00FF (C3 80 - C3 BF)
'\196\128-\197\191', -- Latin Extended-A U+0100–U+017F (C4 80 - C5 BF)
'\198\128-\201\143', -- Latin Extended-B U+0180–U+024F (C6 80 - C9 8F)
'\225\184\128-\225\187\191', -- Latin Extended Additional U+1E00-U+1EFF (E1 B8 80 - E1 BB BF)
'\226\177\160-\226\177\191', -- Latin Extended-C U+2C60–U+2C7F (E2 B1 A0 - E2 B1 BF)
'\234\156\160-\234\159\191', -- Latin Extended-D U+A720-U+A7FF (EA 9C A0 - EA 9F BF)
'\234\172\176-\234\173\175', -- Latin Extended-E U+AB30-U+AB6F (EA AC B0 - EA AD AF)
'\239\172\128-\239\172\134', -- Alphabetic Presentaion Forms U+FB00-U+FB06 (EF AC 80 - EF AC 86)
'\239\188\129-\239\188\188', -- Halfwidth and Fullwidth Forms U+FF01-U+FF3C (EF BC 81 - EF BC BC)
'–', -- ndash
'—', -- mdash
'«', '»',
'«', '»', -- guillemets commonly used in several 'Latn' languages
']',
']', -- close the set
};
 
Line 188:
local singles = script_data.singles
local ranges = script_data.ranges
local function clear (self)
for _, key in ipairs(m_table.keysToList(self, false)) do
self[key] = nil
end
end
local counts = {}
Line 195 ⟶ 201:
self[script_code] = (self[script_code] or 0) + (amount or 1)
end,
clear = function (self)clear,
}
for _, key in ipairs(m_table.keysToList(self, false)) do
})
self[key] = nil
local codepoints_per_script = {}
setmetatable(codepoints_per_script, {
__index = {
add = function(self, script_code, codepoint)
self[script_code] = self[script_code] or { n = 0 }
if self[script_code].n <= 0x20
and not (codepoint <= 0x9F and (codepoint +>= 10x80
or codepoint <= 0x1F)) then
if self[script_code].n == 0x20 then
local period = ('.'):byte()
for _ = 1, 3 do
self[script_code].n = self[script_code].n + 1
self[script_code][self[script_code].n] = period
end
else
if script_code == "Zinh" then -- probably combining character
self[script_code].n = self[script_code].n + 1
self[script_code][self[script_code].n] = 0x25CC
end
self[script_code].n = self[script_code].n + 1
self[script_code][self[script_code].n] = codepoint
end
end
end,
clear = clear,
}
})
Line 217 ⟶ 246:
if singles[codepoint] then
counts:increment(singles[codepoint])
codepoints_per_script:add(singles[codepoint], codepoint)
codepoint = codepoint + 1
else
Line 223 ⟶ 253:
local count = 0
while codepoint <= range[2] and codepoint <= block[2] do
codepoint = codepoint + 1
count = count + 1
codepoints_per_script:add(range[3], codepoint)
codepoint = codepoint + 1
end
counts:increment(range[3], count)
Line 254 ⟶ 285:
fun.map(
function (count, script)
return ('<abbr title="%s">%s</abbr> (<span title="%s">%d</span>)"')
:format(
script_data.aliases[script], script,
('<abbr title="%s">%s</abbr>')
:format(script_data.aliasescodepoints_per_script[script], script),
and mw.text.nowiki(mw.ustring.char(
unpack(codepoints_per_script[script])))
or "",
count)
end,
Line 267 ⟶ 301:
", "))
end
-- mw.logObject(codepoints_per_script, block[3])
counts:clear()
codepoints_per_script:clear()
end
output:insert "|}"