Module:Sandbox/Erutuon/charinsert names

This is an old revision of this page, as edited by Erutuon (talk | contribs) at 03:46, 2 February 2019 (list words in names). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

function p.show()
	local content = mw.title.new("MediaWiki:Gadget-charinsert-core.js"):getContent()
	
	local charinsert = content:match("charinsert: (%b{})")
	if not charinsert then return "Could not find charinsert" end
	
	-- Make charinsert object valid JSON.
	charinsert = charinsert
		:gsub("\\", "")
		:gsub("'(.-)'", function (in_quotes)
			return '"' .. in_quotes:gsub('"', '\\"') .. '"'
		end)
	
	if false then
		return mw.getCurrentFrame():extensionTag{
			name = "syntaxhighlight",
			content = charinsert,
			args = {
				lang = "lua",
			}
		}
	end
	
	local charinsert_table = mw.text.jsonDecode(charinsert, mw.text.JSON_TRY_FIXING)
	if not charinsert_table then return "charinsert object is not valid JSON" end
	
	local codepoint_set = {}
	for menu_name, menu in pairs(charinsert_table) do
		for codepoint in mw.ustring.gcodepoint(menu) do
			if codepoint > 0x7F then
				codepoint_set[codepoint] = true
			end
		end
	end
	
	local codepoint_list = require "Module:table".keysToList(codepoint_set)
	
	local Unicode = require "Module:Unicode data"
	
	local words = {}
	
	local JSON = {}
	local len = 0
	for i, codepoint in ipairs(codepoint_list) do
		local name = Unicode.lookup_name(codepoint)
		for word in name:gmatch("[^ ]+") do
			words[word] = true
		end
		local key_and_value = '"' .. codepoint .. '":"' .. Unicode.lookup_name(codepoint) .. '"'
		if len + #key_and_value > 80 then
			key_and_value = '\n' .. key_and_value
			len = #key_and_value
		else
			len = len + #key_and_value
		end
		
		table.insert(JSON, key_and_value)
	end
	
	local word_list = require "Module:table".keysToList(words)
	table.sort(word_list)
	mw.log(table.concat(word_list, ", "), #word_list)
	
	return "{" .. table.concat(JSON, ",") .. "}"
end

return p