require('Module:No globalsstrict')
local m_data = mw.loadData("Module:Wikt-lang/data")
local langData = m_data.languages or m_data
local p = {}
local subfunction = mw.ustring.subifNotEmpty(value)
if value == "" then
local gsub = mw.ustring.gsub
return nil
local find = mw.ustring.find
else
local match = mw.ustring.match
return value
local lower = mw.ustring.lower
end
local upper = mw.ustring.upper
local U = mw.ustring.char
-- diacritics
local grave = U(0x300)
local acute = U(0x301)
local double_acute = U(0x30B)
local tilde = U(0x303)
local macron = U(0x304)
local dgrave = U(0x30F)
local invbreve = U(0x311)
--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]
local languages = {
["ang"] = {
["name"] = "Old English",
["article"] = {"Old English"},
["scripts"] = {"Latn"},
-- Remove macrons, acutes, and overdots
["replacements"] = {
["[ĀÁ]"] = "A",
["[āá]"] = "a",
["[ǢǼ]"] = "Æ",
["[ǣǽ]"] = "æ",
["Ċ"] = "C",
["ċ"] = "c",
["[ĒÉ]"] = "E",
["[ēé]"] = "e",
["Ġ"] = "G",
["ġ"] = "g",
["[ĪÍ]"] = "I",
["[īí]"] = "i",
["[ŌÓ]"] = "O",
["[ōó]"] = "o",
["[ŪÚ]"] = "U",
["[ūú]"] = "u",
["[ȲÝ]"] = "Y",
["[ȳý]"] = "y",
},
},
["ar"] = {
["name"] = "Arabic",
["article"] = "Arabic language",
["scripts"] = { "Arab" },
--[[ ālif with wasla is replaced by ālif;
taṭwīl, fatḥatan, ḍammatan, kasratan,
fatḥa, ḍamma, kasra,
shadda, sukūn, and superscript (dagger) ālif are removed. ]]
["direction"] = "rtl", -- Should be in the script data module.
["replacements"] = {
[U(0x0671)] = U(0x0627),
["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
..U(0x064E)..U(0x064F)..U(0x0650)
..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
},
},
["bn"] = {
["name"] = "Bengali",
["article"] = "Bengali language",
["scripts"] = { "Beng" },
},
["de"] = {
["name"] = "German",
["article"] = "German language",
["scripts"] = { "Latn" },
--[[
["replacements"] = {
["ae"] = "ä",
["oe"] = "ö",
["ue"] = "ü",
["A[Ee]"] = "Ä",
["O[Ee]"] = "Ö",
["U[Ee]"] = "Ü",
},
]]
},
["en"] = {
["name"] = "English",
["article"] = "English language",
["scripts"] = { "Latn" },
},
["es"] = {
["name"] = "Spanish",
["article"] = "Spanish language",
["scripts"] = { "Latn" },
},
["fr"] = {
["name"] = "French",
["article"] = "French language",
["scripts"] = { "Latn" },
},
["frm"] = {
["name"] = "Middle French",
["article"] = "Middle French",
["scripts"] = { "Latn" },
},
["gem-pro"] = {
["name"] = "Proto-Germanic",
["article"] = "Proto-Germanic language",
["script"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["grc"] = {
["name"] = "Ancient Greek",
["article"] = "Ancient Greek",
["scripts"] = { "Grek" },
["replacements"] = {
-- Vowels with macrons or breves are replaced with plain letters.
["[ᾱᾰ]"] = "α",
["[ᾹᾸ]"] = "Α",
["[ῑῐ]"] = "ι",
["[ῙῘ]"] = "Ι",
["[ῡῠ]"] = "υ",
["[ῩῨ]"] = "Υ",
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϱ"] = "ρ",
["ϲ"] = "σ",
["ϕ"] = "φ",
},
},
["grk-pro"] = {
["name"] = "Proto-Hellenic",
["Wikipedia_name"] = "Proto-Greek",
["article"] = "Proto-Greek language",
["script"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["hi"] = {
["name"] = "Hindi",
["article"] = "Hindi",
["scripts"] = { "Deva" },
},
["ine-pro"] = {
["name"] = "Proto-Indo-European",
["article"] = "Proto-Indo-European language",
["script"] = { "Latn" },
["type"] = "reconstructed",
["replacements"] = {},
},
["ja"] = {
["name"] = "Japanese",
["article"] = "Japanese language",
["scripts"] = { "Jpan" },
},
["la"] = {
["name"] = "Latin",
["article"] = "Latin",
["scripts"] = { "Latn" },
["replacements"] = {
-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
["[ĀĂ]"] = "A",
["[āă]"] = "a",
["[ĒĔ]"] = "E",
["[ēĕë]"] = "e",
["[ĪĬÏ]"] = "I",
["[īĭï]"] = "i",
["[ŌŎ]"] = "O",
["[ōŏ]"] = "o",
["[ŪŬÜ]"] = "U",
["[ūŭü]"] = "u",
["Ȳ"] = "Y",
["ȳ"] = "y"
},
},
["mul"] = {
["name"] = "Translingual",
["article"] = "",
["script"] = { "" },
},
["orv"] = {
["name"] = "Old East Slavic",
["article"] = "Old East Slavic",
["script"] = { "Cyrs" },
["replacements"] = {
[U(0x484)] = "",
},
},
["pt"] = {
["name"] = "Portuguese",
["article"] = "Portuguese language",
["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Punjabi language",
["scripts"] = { "Guru", "Arab", }
},
["ru"] = {
["name"] = "Russian",
["article"] = "Russian language",
["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["sla-pro"] = {
["name"] = "Proto-Slavic", -- also Common Slavic
["type"] = "reconstructed",
["scripts"] = { "Latn" },
["replacements"] = {
["[ÀÁÃĀȀȂ]"] = "A",
["[àáãāȁȃ]"] = "a",
["[ÈÉẼĒȄȆ]"] = "E",
["[èéẽēȅȇ]"] = "e",
["[ÌÍĨĪȈȊ]"] = "I",
["[ìíĩīȉȋ]"] = "i",
["[ÒÓÕŌȌȎŐ]"] = "O",
["[òóõōȍȏő]"] = "o",
["[ÙÚŨŪȔȖŰ]"] = "U",
["[ùúũūȕȗű]"] = "u",
["[ỲÝỸȲ]"] = "Y",
["[ỳýỹȳ]"] = "y",
["Ǭ"] = "Ǫ",
["ǭ"] = "ǫ",
["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
},
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Urdu",
["scripts"] = { "Arab" },
},
["zh"] = {
["name"] = "Chinese",
["article"] = "Chinese language",
["scripts"] = { "Hani" },
},
["xcl"] = {
["name"] = "Old Armenian",
["article"] = "Classical Armenian",
["script"] = { "Armn" },
["replacements"] = {
["[՞՜՛՟]"] = "",
["և"] = "եւ",
},
},
}
--[[
[""] = {
["name"] = "",
["article"] = "",
["script"] = { "" },
},
[""] = {
["name"] = "",
["article"] = "",
["script"] = { "" },
["replacements"] = {
},
},
]]
local function checkForString(variable)
return variable ~= "" and variable ~= nil
end
local function makeLinkedName(languageCode)
local data = languages[languageCode]
local article = data["article"]
local name = data["Wikipedia_name"] or data["name"]
return "[[" .. article .. "|" .. name .. "]]: "
end
local function makeEntryName(word, languageCode)
local data = languageslangData[languageCode]
local ugsub = mw.ustring.gsub
word = tostring(word)
if word == nil then
else
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
word = word:gsub(word, "\'\'\'", "")
word = word:gsub(word, "\'\'", "")
if data == nil then
return word
return word
else
-- Decompose so that the diacritics of characters such
for regex, replacement in pairs(replacements) do
-- as á can be removed in one go.
word = gsub(word, regex, replacement)
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
word = mw.ustring.toNFD(word)
for i, from in ipairs(replacements.from) do
word = ugsub(
word,
from,
replacements.to and replacements.to[i] or "")
end
else
for regex, replacement in pairs(replacements) do
word = ugsub(word, regex, replacement)
end
end
return word
end
local function getCodesfixScriptCode(codesfirstLetter, textthreeLetters)
return string.upper(firstLetter) .. string.lower(threeLetters)
local languageCode, scriptCode
end
local function getCodes(codes)
local languageCode, scriptCode, invalidCode
local errorText
if codes == nil or codes == "" then
errorText = 'no language or script code provided'
elseif codes:find(codes, "^%s*%a%a%a?%s*$") or codes:find(codes, "^%s*%a%a%a?%-%a%a%a%a%s*$") then
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
languageCode = find(codes, "^%s*%a%a%a?") and (match(codes, "^%s*(%l%l%l?)") or gsub(match(codes, "^%s*(%a%a%a?)"), "(%a%a%a?)", function(a) return lower(a) end, 1) )
codes:find("^%a%a%a?") and (
codes:match("^(%l%l%l?)")
or codes:match("^(%a%a%a?)")
:gsub("(%a%a%a?)", string.lower, 1)
)
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
scriptCode = find(codes, "%a%a%a%a%s*$") and (match(codes, "(%u%l%l%l)%s*$") or gsub(match(codes, "(%a%a%a%a)%s*$"), "(%a)(%a%a%a)", function(a, b) return upper(a) .. lower(b) end, 1) )
elseif codes:find(codes, "^%s*%a%a%a%-%a%a%a$") thenand (
codes:match("(%u%l%l%l)$")
languageCode = match(codes, "^%s*%l%l%l%-%l%l%l$") and match (codes, "^%s*%l%l%l%-%l%l%l$") or gsub(match(codes, "^%s*%a%a%a%-%a%a%a$"), "(%a%a%a?)", function(a) return lower(a) end, 1)
or gsub(
elseif find(codes, "^%s*%a%a%a?") then
languageCode = gsub( codes:match(codes, "(%a%a%a?"), "(%a%a%a?)$", function(a) return lower(a) end, 1)
local invalidCode = gsub "(codes, "^%s*a)(%a%a%a?%-?)", "")
fixScriptCode,
1
)
)
elseif codes:find("^%a%a%a?%-%a%a%a?$")
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
languageCode = codes
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
else
scriptCode = scriptCode:gsub(
"(%a)(%a%a%a)",
fixScriptCode,
1
)
end
elseif codes:find("^%a%a%a?") then
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
languageCode = string.lower(languageCode)
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
elseif codes:find(codes, "%-?%a%a%a%a%s*$") then
invalidCode, scriptCode = gsub(codes:match(codes, "(.*)%-?(%a%a%a%a"), $"%a%a%a%a", function(a) return lower(a) end, 1)
scriptCode = gsub(
local invalidCode = gsub(codes, "%s*%a%a%a%a%-?$", "")
scriptCode,
"(%a)(%a%a%a)",
fixScriptCode
)
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
else
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
end
if not scriptCode then
scriptCode = require("Module:Language/scripts").isLatn(text) and "Latn" or "unknown"
end
if errorText then
errorText = ""
end
languageCode = m_data.redirects[languageCode] or languageCode
return languageCode, scriptCode, errorText
end
local function tag(text, languageCode, script, italicsitalicize)
local data = languageslangData[languageCode]
-- Use Wikipedia code if it has been given: for instance,
local italicize = script == "Latn" and italics
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
if script and script ~= "" then
languageCode = languageCode .. "-" .. script
end
if not text then text = "[text?]" end
local textDirectionMarkers = { "", "", "" }
if data and data["direction"] == "rtl" then
local out = { textDirectionMarkers[2] }
if italicize then
table.insert(out, "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</i>")
else
table.insert(out, "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</span>")
end
table.insert(out, textDirectionMarkers[3])
return table.concat(out)
end
function p.lang(frame)
local parent = frame:getParent()
local args = parent.args[1] and parent.args or frame.args
local codes = args[1]
local text = args[2] or error("Provide text in the second parameter")
local languageCode, scriptCode, errorText = getCodes(codes, text)
local italics = args.italics or args.i
italics = not (italics == "n" or italics == "-")
return tag(text, languageCode, scriptCode, italics) .. errorText
end
local function linkToWiktionary(entry, linkText, languageCode)
local data = languageslangData[languageCode]
local name
if languageCode then
if data and data.name then
name = ( data and data.name ) or ( mw.language.fetchLanguageName(languageCode, 'en') ~= "" and mw.language.fetchLanguageName(languageCode, 'en') ) or error("No name for the language " .. (languageCode or "nil") .. " could be found") -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
name = data.name
if sub(entry, 1, 1) == "*" then
else
-- On other languages' wikis, use mw.getContentLanguage():getCode(),
-- or replace 'en' with that wiki's language code.
name = mw.language.fetchLanguageName(languageCode, 'en')
if name == "" then
error("Name for the language code " .. ("%q"):format(languageCode or nil)
.. " could not be retrieved with mw.language.fetchLanguageName, "
.. "so it should be added to [[Module:Wikt-lang/data]]")
end
end
if entry:sub(1, 1) == "*" then
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
else
error("Language name is empty")
end
elseif data and data.type == "reconstructed" then
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
local frame = mw.getCurrentFrame()
-- Track reconstructed entries with no asterisk by transcluding
-- a nonexistent template. This technique is used in Wiktionary:
-- see [[wikt:Module:debug]].
-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
pcall(frame.expandTemplate, frame,
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry
else
error("Language name is empty")
end
elseif data and data.type == "appendix" then
if name ~= "" then
entry = "ReconstructionAppendix:" .. name .. "/" .. sub(entry, 2)
else
error("Language name is empty")
local parent = frame:getParent()
local args = parent.args[1] and parent.args or frame.args
local codes = args[1] or nil
local word1codes = args[21] orand nilmw.text.trim(args[1])
local word2word1 = ifNotEmpty(args[32] or nil)
local word2 = ifNotEmpty(args[3])
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local errorMessage = errorText
localif italics =not args.italics[2] or '' == args.i[2] then
return '<span style="color:#d33">[text?] Parameter 2 is required</span>';
italics = not (italics == "n" or italics == "-")
local entry, linkText
if checkForString(word2) and checkForString(word1) then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif checkForString(word1) then
entry = makeEntryName(word1, languageCode)
linkText = word1
end
local out = (languageCode and entry and linkText and tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics) ) or entry and linkText and linkToWiktionary(entry, linkText) or '<span style="font-size: smaller;">[text?]</span>'
return out and out .. errorMessage or errorMessage or error("The function wiktlang generated nothing")
end
function p.wikt(frame)
local parent = frame:getParent()
local args = parent.args[1] and parent.args or frame.args
local codes = args[1] or nil
local word1 = args[2] or nil
local word2 = args[3] or nil
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local errorMessage = errorText
local italics = args.italics or args.i or args.italic
italics = not (italics == "n" or italics == "-" or italics == "no")
local entry, linkText
if checkForString(word2) and checkForString(word1) then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif checkForString(word1) then
entry = makeEntryName(word1, languageCode)
linkText = word1
end
local italicize = italics and (scriptCode == "Latn" or require("Module:Unicode data").is_Latin(linkText))
local out
if languageCode and entry and linkText then
out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italicize)
elseif entry and linkText then
out = linkToWiktionary(entry, linkText)
else
out = '<span style="font-size: smaller;">[text?]</span>'
end
if out and errorText then
local out = (languageCode and entry and linkText and linkToWiktionary(entry, linkText, languageCode) ) or entry and linkText and linkToWiktionary(entry, linkText) or '<span style="font-size: smaller;">[text?]</span>'
return out .. errorText
return out and out .. errorMessage or errorMessage or error("The function wikt generated nothing")
else
return errorText or error("The function wiktlang generated nothing")
end
end
|