Module:Wikt-lang: Difference between revisions

Content deleted Content added
changing the "script" key to "scripts" and the associated value to an array, and for now, decide italicization based on the first script in that array
sync from sandbox: include explicitly given script subtag in language tagging
 
(97 intermediate revisions by 11 users not shown)
Line 1:
require('strict')
local U = mw.ustring.char
local m_data = mw.loadData("Module:Wikt-lang/data")
local langData = m_data.languages or m_data
 
local p = {}
--[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]
 
local function ifNotEmpty(value)
data = {
if value == "" then
["ar"] = {
return nil
["name"] = "Arabic",
else
["article"] = "Arabic language",
return value
["scripts"] = { "Arab" },
end
--[[ Dagger ālif is replaced by full-size ālif;
fatḥatan, ḍammatan, kasratan, fatḥa, ḍamma, kasra, madda, and sukūn are removed. ]]
["replacements"] = {
[U(0x0671)] = U(0x0627),
[U(0x064B)] = "",
[U(0x064C)] = "",
[U(0x064D)] = "",
[U(0x064E)] = "",
[U(0x064F)] = "",
[U(0x0650)] = "",
[U(0x0651)] = "",
[U(0x0652)] = "",
[U(0x0670)] = "",
[U(0x0640)] = "",
},
},
["bn"] = {
["name"] = "Bengali",
["article"] = "Bengali language",
["scripts"] = { "Beng" },
},
["de"] = {
["name"] = "German",
["article"] = "German language",
["scripts"] = { "Latn" },
--[[
["replacements"] = {
["ae"] = "ä",
["oe"] = "ö",
["ue"] = "ü",
["A[Ee]"] = "Ä",
["O[Ee]"] = "Ö",
["U[Ee]"] = "Ü",
},
]]
},
["en"] = {
["name"] = "English",
["article"] = "English language",
["scripts"] = { "Latn" },
},
["es"] = {
["name"] = "Spanish",
["article"] = "Spanish language",
["scripts"] = { "Latn" },
},
["fr"] = {
["name"] = "French",
["article"] = "French language",
["scripts"] = { "Latn" },
},
["grc"] = {
["name"] = "Ancient Greek",
["article"] = "Ancient Greek",
["scripts"] = { "Grek" },
["replacements"] = {
-- Vowels with macrons or breves are replaced with plain letters.
["[ᾱᾰ]"] = "α",
["[ᾹᾸ]"] = "Α",
["[ῑῐ]"] = "Ι",
["[ῙῘ]"] = "ι",
["[ῡῠ]"] = "Υ",
["[ῩῨ]"] = "υ",
["ϑ"] = "θ",
["ϱ"] = "ρ"
},
},
["hi"] = {
["name"] = "Hindi",
["article"] = "Hindi",
["scripts"] = { "Deva" },
},
["ja"] = {
["name"] = "Japanese",
["article"] = "Japanese language",
["scripts"] = { "Jpan" },
},
["la"] = {
["name"] = "Latin",
["article"] = "Latin",
["scripts"] = { "Latn" },
["replacements"] = {
-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
["[ĀĂ]"] = "A",
["[āă]"] = "a",
["[ĒĔ]"] = "E",
["[ēĕë]"] = "e",
["[ĪĬÏ]"] = "I",
["[īĭï]"] = "i",
["[ŌŎ]"] = "O",
["[ōŏ]"] = "o",
["[ŪŬÜ]"] = "U",
["[ūŭü]"] = "u",
["Ȳ"] = "Y",
["ȳ"] = "y"
},
},
["pt"] = {
["name"] = "Portuguese",
["article"] = "Portuguese language",
["scripts"] = { "Latn" },
},
["pa"] = {
["name"] = "Punjabi",
["article"] = "Punjabi language",
["scripts"] = { "Guru", "Arab", }
},
["ru"] = {
["name"] = "Russian",
["article"] = "Russian language",
["scripts"] = { "Cyrl" },
-- Combining acute accent is removed.
["replacements"] = { [U(0x0301)] = "", }
},
["ur"] = {
["name"] = "Urdu",
["article"] = "Urdu",
["scripts"] = { "Arab" },
},
["zh"] = {
["name"] = "Chinese",
["article"] = "Chinese language",
["scripts"] = { "Hani" },
},
}
 
--[[
 
[""] = {
["name"] = "",
["article"] = "",
["script"] = "",
},
 
[""] = {
["name"] = "",
["article"] = "",
["script"] = "",
["replacements"] = {
},
},
 
]]
 
f = {}
 
gsub = mw.ustring.gsub
 
local function checkForString(variable)
return variable ~= "" and variable ~= nil
end
 
local function generatePrefix(languageCode)
local languageData = data[languageCode]
local languageArticle = languageData["article"]
local languageName = languageData["name"]
return "[[" .. languageArticle .. "|" .. languageName .. "]]: "
end
 
local function stripmakeEntryName(word, languageCode)
local languageDatadata = datalangData[languageCode]
local ugsub = mw.ustring.gsub
word = tostring(word)
if word == nil then
error("The function stripmakeEntryName requires a string argument")
elseif word == "" then
return ""
else
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
word = word:gsub(word, "\'\'\'", "")
word = word:gsub(word, "\'\'", "")
if languageDatadata == nil then
return word
else
local replacements = languageDatadata and data["replacements"]
if replacements == nil then
return word
else
-- Decompose so that the diacritics of characters such
for regex, replacement in pairs(replacements) do
-- as á can be removed in one go.
word = gsub(word, regex, replacement)
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
word = mw.ustring.toNFD(word)
for i, from in ipairs(replacements.from) do
word = ugsub(
word,
from,
replacements.to and replacements.to[i] or "")
end
else
for regex, replacement in pairs(replacements) do
word = ugsub(word, regex, replacement)
end
end
return word
Line 194 ⟶ 55:
end
 
local function languageSpanfixScriptCode(languageCodefirstLetter, textthreeLetters)
return string.upper(firstLetter) .. string.lower(threeLetters)
languageData = data[languageCode]
end
if languageData then
 
languageScript = languageData["scripts"][1]
local function getCodes(codes)
local languageCode, scriptCode, invalidCode
local errorText
if codes == nil or codes == "" then
errorText = 'no language or script code provided'
elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
codes:find("^%a%a%a?") and (
codes:match("^(%l%l%l?)")
or codes:match("^(%a%a%a?)")
:gsub("(%a%a%a?)", string.lower, 1)
)
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
codes:find("%a%a%a%a$") and (
codes:match("(%u%l%l%l)$")
or gsub(
codes:match("(%a%a%a%a)$"),
"(%a)(%a%a%a)",
fixScriptCode,
1
)
)
elseif codes:find("^%a%a%a?%-%a%a%a?$")
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
languageCode = codes
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
else
scriptCode = scriptCode:gsub(
"(%a)(%a%a%a)",
fixScriptCode,
1
)
end
elseif codes:find("^%a%a%a?") then
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
languageCode = string.lower(languageCode)
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
elseif codes:find("%-?%a%a%a%a$") then
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
scriptCode = gsub(
scriptCode,
"(%a)(%a%a%a)",
fixScriptCode
)
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
else
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
end
if languageScript == "Latn"errorText then
errorText = ' <span style="font-size: smaller">[' .. errorText .. ']</span>'
return "<i lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</i>"
else
errorText = ""
return "<span lang=\"" .. languageCode .. "\" xml:lang=\"" .. languageCode .. "\">" .. text .. "</span>"
end
languageCode = m_data.redirects[languageCode] or languageCode
return languageCode, scriptCode, errorText
end
 
local function tag(text, languageCode, script, italicize)
function f.lang(frame)
local data = langData[languageCode]
return languageSpan(frame.args[1], frame.args[2])
-- Use Wikipedia code if it has been given: for instance,
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
if script and script ~= "" then
languageCode = languageCode .. "-" .. script
end
 
if not text then text = "[text?]" end
local textDirectionMarkers = { "", "", "" }
if data and data["direction"] == "rtl" then
textDirectionMarkers = { ' dir="rtl"', '&rlm;', '&lrm;' }
end
local out = { textDirectionMarkers[2] }
if italicize then
table.insert(out, "<i lang=\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</i>")
else
table.insert(out, "<span lang=\"" .. languageCode .. "\"" .. textDirectionMarkers[1] .. ">" .. text .. "</span>")
end
table.insert(out, textDirectionMarkers[3])
return table.concat(out)
end
 
local function wiktionaryLinklinkToWiktionary(languageCode, entry, linkText, languageCode)
local data = langData[languageCode]
local languageData, languageName = {}, ""
local name
if languageCode then
if data and data.name then
languageName = mw.language.fetchLanguageName(languageCode, 'en') -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code.
name = data.name
else
-- On other languages' wikis, use mw.getContentLanguage():getCode(),
-- or replace 'en' with that wiki's language code.
name = mw.language.fetchLanguageName(languageCode, 'en')
if name == "" then
error("Name for the language code " .. ("%q"):format(languageCode or nil)
.. " could not be retrieved with mw.language.fetchLanguageName, "
.. "so it should be added to [[Module:Wikt-lang/data]]")
end
end
if entry:sub(1, 1) == "*" then
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
else
error("Language name is empty")
end
elseif data and data.type == "reconstructed" then
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
local frame = mw.getCurrentFrame()
-- Track reconstructed entries with no asterisk by transcluding
-- a nonexistent template. This technique is used in Wiktionary:
-- see [[wikt:Module:debug]].
-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
pcall(frame.expandTemplate, frame,
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry
else
error("Language name is empty")
end
elseif data and data.type == "appendix" then
if name ~= "" then
entry = "Appendix:" .. name .. "/" .. entry
else
error("Language name is empty")
end
end
if entry and linkText then
return languageSpan(languageCode, "[[wikt:" .. entry .. "#" .. languageNamename .. "|" .. linkText .. "]]")
else
error("wiktionaryLinklinkToWiktionary needs a Wiktionary entry or link text, or both")
end
else
return "[[wikt:" .. entry .. "|" .. linkText .. "]]"
error("wiktionaryLink needs a language code")
end
end
 
function fp.wiktwiktlang(frame)
local languageCodeparent = frame.args[1]:getParent()
local word1args = frameparent.args[21] and parent.args or frame.args
local word2 = frame.args[3]
local codes = args[1] and mw.text.trim(args[1])
if languageCode then
local word1 = ifNotEmpty(args[2])
if checkForString(word2) and checkForString(word1) then
local word2 = ifNotEmpty(args[3])
entry = strip(word1, languageCode)
linkText = word2
if not args[2] or '' == args[2] then
elseif checkForString(word1) then
return '<span style="color:#d33">[text?] Parameter 2 is required</span>';
entry = strip(word1, languageCode)
end
linkText = word1
else
local languageCode, scriptCode, errorText = getCodes(codes)
error("Please provide a word in the second parameter")
end
local italics = args.italics or args.i or args.italic
italics = not (italics == "n" or italics == "-" or italics == "no")
local entry, linkText
if word2 and word1 then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif word1 then
entry = makeEntryName(word1, languageCode)
linkText = word1
end
 
local italicize = italics and (scriptCode == "Latn" or require("Module:Unicode data").is_Latin(linkText))
 
local out
if languageCode and entry and linkText then
out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italicize)
elseif entry and linkText then
out = linkToWiktionary(entry, linkText)
else
out = '<span style="font-size: smaller;">[text?]</span>'
end
if out and errorText then
return out .. errorText
else
return errorText or error("The function wiktlang generated nothing")
error("Please provide a language code in the first parameter")
end
return wiktionaryLink(languageCode, entry, linkText)
end
 
return fp