Module:Wikt-lang

This is an old revision of this page, as edited by Erutuon (talk | contribs) at 02:36, 2 October 2016 (for generating language name prefix used in templates such as Template:lang-fr). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local U = mw.ustring.char

--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]

data = {
	["ar"] = {
		["name"] = "Arabic",
		["article"] = "Arabic language",
		["script"] = "Arab",
		["replacements"] = {
			--[[ Dagger ālif is replaced by full-size ālif;
			fatḥatan, ḍammatan, kasratan, fatḥa, ḍamma, kasra, madda, and sukūn are removed. ]]
			["from"] = { U(0x0671), U(0x064B), U(0x064C), U(0x064D), U(0x064E), U(0x064F), U(0x0650), U(0x0651), U(0x0652), U(0x0670), U(0x0640) },
			["to"]   = {U(0x0627), "", "", "", "", "", "", "", "", "", "", }
		},
	},
	["bn"] = {
		["name"] = "Bengali",
		["article"] = "Bengali language",
		["script"] = "Beng",
	},
	["de"] = {
		["name"] = "German",
		["article"] = "German language",
		["script"] = "Latn",
		["replacements"] = {
			["from"] = { "ae", "oe", "ue", "Ae", "Oe", "Ue", "AE", "OE", "UE", },
			["to"] = { "ä", "ö", "ü", "Ä", "Ö", "Ü", "Ä", "Ö", "Ü", },
		},
	},
	["en"] = {
		["name"] = "English",
		["article"] = "English language",
		["script"] = "Latn",
	},
	["es"] = {
		["name"] = "Spanish",
		["article"] = "Spanish language",
		["script"] = "Latn",
	},
	["fr"] = {
		["name"] = "French",
		["article"] = "French language",
		["script"] = "Latn",
	},
	["grc"] = {
		["name"] = "Ancient Greek",
		["article"] = "Ancient Greek",
		["script"] = "Grek",
		["replacements"] = {
			-- Vowels with macrons or breves are replaced with plain letters.
			["from"] = { "[ᾱᾰ]", "[ᾹᾸ]", "[ῑῐ]", "[ῙῘ]", "[ῡῠ]", "[ῩῨ]", "ϑ", "ϱ", },
			["to"] = { "α", "Α", "Ι", "ι", "Υ", "υ", "θ", "ρ", },
		},
	},
	["hi"] = {
		["name"] = "Hindi",
		["article"] = "Hindi",
		["script"] = "Deva",
	},
	["ja"] = {
		["name"] = "Japanese",
		["article"] = "Japanese language",
		["script"] = "Jpan",
	},
	["la"] = {
		["name"] = "Latin",
		["article"] = "Latin",
		["script"] = "Latn",
		["replacements"] = {
			-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
			["from"] = { "[ĀĂ]", "[āă]", "[ĒĔ]", "[ēĕë]", "[ĪĬÏ]", "[īĭï]", "[ŌŎ]", "[ōŏ]", "[ŪŬÜ]", "[ūŭü]", "Ȳ", "ȳ", },
			["to"] = { "A", "a", "E", "e", "I", "i", "O", "o", "U", "u", "Y", "y", },
		},
	},
	["pt"] = {
		["name"] = "Portuguese",
		["article"] = "Portuguese language",
		["script"] = "Latn",
	},
	["pa"] = {
		["name"] = "Punjabi",
		["article"] = "Punjabi language",
		["script"] = "Guru", -- also "Arab", but multiple scripts have to be allowed
	},
	["ru"] = {
		["name"] = "Russian",
		["article"] = "Russian language",
		["script"] = "Cyrl",
		["replacements"] = {
			-- Combining acute accent is removed.
			["from"] = { U(0x0301), },
			["to"] = { "", },
		},
	},
	["ur"] = {
		["name"] = "Urdu",
		["article"] = "Urdu",
		["script"] = "Arab",
	},
	["zh"] = {
		["name"] = "Chinese",
		["article"] = "Chinese language",
		["script"] = "Hani",
	},
}

--[[

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = "",
	},

	[""] = {
		["name"] = "",
		["article"] = "",
		["script"] = "",
		["replacements"] = {
			["from"] = { "", },
			["to"] = { "", },
		},
	},

]]

f = {}

local function generatePrefix(languageCode)
	local languageData = data[languageCode]
	local languageArticle = languageData["article"]
	local languageName = languageData["name"]
	return "[[" .. languageArticle .. "|" .. languageName .. "]]: "
end

local function strip(word, languageCode)
	local languageData = data[languageCode]
	if languageData == nil then
		return word
	else
		local replacements = languageData["replacements"]
		if replacements == nil then
			return word
		else
			local from = replacements["from"]
			local to = replacements["to"]
			for i = 1, #from do
				word = mw.ustring.gsub(word, from[i], to[i])
			end
			return word
		end
	end
end

local function languageSpan(languageCode, text)
	languageData = data[languageCode]
	if languageData then
		languageScript = languageData["script"]
	end
	if languageScript == "Latn" then
		return "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</i>"
	else
		return "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</span>"
	end
end

function f.lang(frame)
	return languageSpan(frame.args[1], frame.args[2])
end

function wiktionaryLink(languageCode, entry, linkText)
	local languageData, languageName = {}, ""
	if languageCode then
		languageName = mw.language.fetchLanguageName(languageCode, 'en') -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
		if entry and linkText then
			return languageSpan(languageCode, "[[wikt:" .. entry .. "#" .. languageName .. "|" .. linkText .. "]]")
		else
			error("wiktionaryLink needs a Wiktionary entry or link text, or both")
		end
	else
		error("wiktionaryLink needs a language code")
	end
end

function f.wikt(frame)
	local languageCode = frame.args[1]
	local word1 = frame.args[2]
	local word2 = frame.args[3]
	if languageCode then
		if word2 and word1 then
			entry = strip(word1, languageCode)
			linkText = word2
		elseif word1 then
			entry = strip(word1, languageCode)
			linkText = word1
		else
			error("Please provide a word in the second parameter")
		end
	else
		error("Please provide a language code in the first parameter")
	end
	return wiktionaryLink(languageCode, entry, linkText)
end

return f