Module:Wikt-lang

This is an old revision of this page, as edited by Erutuon (talk | contribs) at 00:03, 7 October 2016 (changing the "script" key to "scripts" and the associated value to an array, and for now, decide italicization based on the first script in that array). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local U = mw.ustring.char

--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]

data = {
	["ar"] = {
		["name"] = "Arabic",
		["article"] = "Arabic language",
		["scripts"] = { "Arab" },
			--[[ Dagger ālif is replaced by full-size ālif;
			fatḥatan, ḍammatan, kasratan, fatḥa, ḍamma, kasra, madda, and sukūn are removed. ]]
		["replacements"] = {
			[U(0x0671)] = U(0x0627),
			[U(0x064B)] = "",
			[U(0x064C)] = "",
			[U(0x064D)] = "",
			[U(0x064E)] = "",
			[U(0x064F)] = "",
			[U(0x0650)] = "",
			[U(0x0651)] = "",
			[U(0x0652)] = "",
			[U(0x0670)] = "",
			[U(0x0640)] = "",
		},
	},
	["bn"] = {
		["name"] = "Bengali",
		["article"] = "Bengali language",
		["scripts"] = { "Beng" },
	},
	["de"] = {
		["name"] = "German",
		["article"] = "German language",
		["scripts"] = { "Latn" },
		--[[
		["replacements"] = {
			["ae"]    = "ä",
			["oe"]    = "ö",
			["ue"]    = "ü",
			["A[Ee]"] = "Ä",
			["O[Ee]"] = "Ö",
			["U[Ee]"] = "Ü",
		},
		]]
	},
	["en"] = {
		["name"] = "English",
		["article"] = "English language",
		["scripts"] = { "Latn" },
	},
	["es"] = {
		["name"] = "Spanish",
		["article"] = "Spanish language",
		["scripts"] = { "Latn" },
	},
	["fr"] = {
		["name"] = "French",
		["article"] = "French language",
		["scripts"] = { "Latn" },
	},
	["grc"] = {
		["name"] = "Ancient Greek",
		["article"] = "Ancient Greek",
		["scripts"] = { "Grek" },
		["replacements"] = {
			-- Vowels with macrons or breves are replaced with plain letters.
			["[ᾱᾰ]"] = "α",
			["[ᾹᾸ]"] = "Α",
			["[ῑῐ]"] = "Ι",
			["[ῙῘ]"] = "ι",
			["[ῡῠ]"] = "Υ",
			["[ῩῨ]"] = "υ",
			["ϑ"]    = "θ",
			["ϱ"]    = "ρ"
		},
	},
	["hi"] = {
		["name"] = "Hindi",
		["article"] = "Hindi",
		["scripts"] = { "Deva" },
	},
	["ja"] = {
		["name"] = "Japanese",
		["article"] = "Japanese language",
		["scripts"] = { "Jpan" },
	},
	["la"] = {
		["name"] = "Latin",
		["article"] = "Latin",
		["scripts"] = { "Latn" },
		["replacements"] = {
			-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
			["[ĀĂ]"]  = "A",
			["[āă]"]  = "a",
			["[ĒĔ]"]  = "E",
			["[ēĕë]"] = "e",
			["[ĪĬÏ]"] = "I",
			["[īĭï]"] = "i",
			["[ŌŎ]"]  = "O",
			["[ōŏ]"]  = "o",
			["[ŪŬÜ]"] = "U",
			["[ūŭü]"] = "u",
			["Ȳ"]     = "Y",
			["ȳ"]     = "y"
		},
	},
	["pt"] = {
		["name"] = "Portuguese",
		["article"] = "Portuguese language",
		["scripts"] = { "Latn" },
	},
	["pa"] = {
		["name"] = "Punjabi",
		["article"] = "Punjabi language",
		["scripts"] = { "Guru", "Arab", }
	},
	["ru"] = {
		["name"] = "Russian",
		["article"] = "Russian language",
		["scripts"] = { "Cyrl" },
		-- Combining acute accent is removed.
		["replacements"] = { [U(0x0301)] = "", }
	},
	["ur"] = {
		["name"] = "Urdu",
		["article"] = "Urdu",
		["scripts"] = { "Arab" },
	},
	["zh"] = {
		["name"] = "Chinese",
		["article"] = "Chinese language",
		["scripts"] = { "Hani" },
	},
}

--[[

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = "",
	},

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = "",
		["replacements"] = {
		},
	},

]]

f = {}

gsub = mw.ustring.gsub

local function checkForString(variable)
	return variable ~= "" and variable ~= nil
end

local function generatePrefix(languageCode)
	local languageData = data[languageCode]
	local languageArticle = languageData["article"]
	local languageName = languageData["name"]
	return "[[" .. languageArticle .. "|" .. languageName .. "]]: "
end

local function strip(word, languageCode)
	local languageData = data[languageCode]
	word = tostring(word)
	if word == nil then
		error("The function strip requires a string argument")
	elseif word == "" then
		return ""
	else
		-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
		word = gsub(word, "\'\'\'", "")
		word = gsub(word, "\'\'", "")
		if languageData == nil then
			return word
		else
			local replacements = languageData["replacements"]
			if replacements == nil then
				return word
			else
				for regex, replacement in pairs(replacements) do
					word = gsub(word, regex, replacement)
				end
				return word
			end
		end
	end
end

local function languageSpan(languageCode, text)
	languageData = data[languageCode]
	if languageData then
		languageScript = languageData["scripts"][1]
	end
	if languageScript == "Latn" then
		return "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</i>"
	else
		return "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</span>"
	end
end

function f.lang(frame)
	return languageSpan(frame.args[1], frame.args[2])
end

function wiktionaryLink(languageCode, entry, linkText)
	local languageData, languageName = {}, ""
	if languageCode then
		languageName = mw.language.fetchLanguageName(languageCode, 'en') -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
		if entry and linkText then
			return languageSpan(languageCode, "[[wikt:" .. entry .. "#" .. languageName .. "|" .. linkText .. "]]")
		else
			error("wiktionaryLink needs a Wiktionary entry or link text, or both")
		end
	else
		error("wiktionaryLink needs a language code")
	end
end

function f.wikt(frame)
	local languageCode = frame.args[1]
	local word1 = frame.args[2]
	local word2 = frame.args[3]
	if languageCode then
		if checkForString(word2) and checkForString(word1) then
			entry = strip(word1, languageCode)
			linkText = word2
		elseif checkForString(word1) then
			entry = strip(word1, languageCode)
			linkText = word1
		else
			error("Please provide a word in the second parameter")
		end
	else
		error("Please provide a language code in the first parameter")
	end
	return wiktionaryLink(languageCode, entry, linkText)
end

return f