Content deleted Content added
for fun, show characters in tooltip of character count |
census of blocks with Latin script |
||
Line 183:
local output = Output()
local start = frame.args[1] and tonumber(frame.args[1], 16) or 0
local ending = 0x20000 or frame.args[2] and tonumber(frame.args[2], 16) or 0x4000
local script_data = mw.loadData "Module:Unicode data/scripts"
Line 237:
! block !! codepoints !! scripts
]]
local blocks_containing_Latn = {}
for _, block in pairs(mw.loadData "Module:Unicode data/blocks") do
Line 244 ⟶ 246:
if codepoint >= start then
while codepoint <= block[2] do
local count
counts:increment(singles[codepoint])▼
if script then -- Codepoint is in "singles" map.
codepoints_per_script:add(singles[codepoint], codepoint)▼
codepoint = codepoint + 1
count = 1
else
local range, index = Unicode_data.binary_range_search(codepoint, ranges)
if range then -- Codepoint is in "ranges" array.
script = range[3]
while codepoint <= range[2] and codepoint <= block[2] do
count = count + 1
codepoints_per_script:add(
codepoint = codepoint + 1
end
counts:increment(
else -- Codepoint doesn't have data; it's Zzzz.
-- Get range immediately above codepoint.
Line 264 ⟶ 270:
end
script = "Zzzz"
local range = ranges[index]
while codepoint < range[1] and codepoint <= block[2]
Line 271 ⟶ 278:
codepoint = codepoint + 1
end
counts:increment(
end
end
if script == "Latn" then
blocks_containing_Latn[block] = (blocks_containing_Latn[block] or 0) + count
end
end
Line 307 ⟶ 317:
end
output:insert "|}"
mw.log(table.concat(
fun.map(
function (count, block)
return ("%s (U+%04X-U+%04X): %d"):format(block[3], block[1], block[2], count)
end,
m_table.sortedPairs(
blocks_containing_Latn,
function (block1, block2)
return block1[1] < block2[1]
end)),
"\n"))
return output:join()
|