Module:Wikt-lang: Difference between revisions

Content deleted Content added
mNo edit summary
Tag: Reverted
sync from sandbox: include explicitly given script subtag in language tagging
 
(13 intermediate revisions by 7 users not shown)
Line 1:
require('Module:No globalsstrict')
local m_data = mw.loadData("Module:LanguageWikt-lang/data")
local langData = m_data.languages or m_data
 
local strings = {
["RECONSTRUCTION"] = "Reconstruction:%s/%s",
["APPENDIX"] = "Appendix:%s/%s",
["LINK"] = "[[wikt:%s|%s]]",
["PIPED_LINK"] = "[[wikt:%s#%s|%s]]",
["HTML_ITALIC_LANG"] = "<i lang=\"%s\" xml:lang=\"%s\"%s>%s</i>",
["HTML_SPAN_LANG"] = "<span lang=\"%s\" xml:lang=\"%s\"%s>%s</span>",
}
 
local error_messages = {
["NO_LANGUAGE_CODE"] = "No language code.",
["NO_WIKTIONARY_ENTRY"] = "No Wiktionary entry.",
["LANGUAGE_NAME_FOR_CODE_NOT_FOUND"] = "The language name for the language code <code>%s</code> was not found.",
}
 
local tracking_categories = {
["ERROR_CATEGORY"] = "[[Category:Language module errors]]",
["RECONSTRUCTED_WITH_NO_ASTERISK"] = "[[Category:Language module reconstructed with no asterisk]]",
["USING_REDIRECT_CODE"] = "[[Category:Language module using redirect code]]",
}
 
local activeTrackingCategories = {}
 
local p = {}
 
local function get_error_messageifNotEmpty(messagevalue)
if value == "" then
return string.format('<span style="font-size:100%%; font-style:normal;" class="error">Error: %s </span>', message) .. tracking_categories["ERROR_CATEGORY"]
return nil
else
return value
end
end
 
local function getCodesmakeEntryName(codeword, textlanguageCode)
local redirect_code = m_data.redirects[code]
if redirect_code then
code = redirect_code
table.insert(activeTrackingCategories, tracking_categories["USING_REDIRECT_CODE"])
end
 
local langModule = require('Module:Lang').get_ietf_parts
local languageCode, script, region, variant, private, errorText = langModule(code)
return languageCode, script, errorText
end
 
local function cleanWiktionaryText(wiktionaryText, languageCode)
local data = langData[languageCode]
wiktionaryText = tostring(wiktionaryText)
 
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
wiktionaryText = wiktionaryText:gsub("\'\'\'", "")
wiktionaryText = wiktionaryText:gsub("\'\'", "")
-- If the language is not found, return wiktionaryText.
if data == nil then
return wiktionaryText
end
 
-- If the language does not have diacritics, return wiktionaryText.
local replacements = data and data["replacements"]
if replacements == nil then
return wiktionaryText
end
 
-- Decompose so that the diacritics of characters such
-- as á can be removed in one go.
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
local ugsub = mw.ustring.gsub
word = tostring(word)
if replacements.decompose then
if word == nil then
wiktionaryText = mw.ustring.toNFD(wiktionaryText)
error("The function makeEntryName requires a string argument")
for i, from in ipairs(replacements.from) do
elseif word == "" then
wiktionaryText = ugsub(wiktionaryText, from, replacements.to and replacements.to[i] or "")
return ""
end
else
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
for regex, replacement in pairs(replacements) do
word = word:gsub("\'\'\'", "")
wiktionaryText = ugsub(wiktionaryText, regex, replacement)
word = word:gsub("\'\'", "")
if data == nil then
return word
else
local replacements = data and data["replacements"]
if replacements == nil then
return word
else
-- Decompose so that the diacritics of characters such
-- as á can be removed in one go.
-- No need to compose at the end, because the MediaWiki software
-- will handle that.
if replacements.decompose then
word = mw.ustring.toNFD(word)
for i, from in ipairs(replacements.from) do
word = ugsub(
word,
from,
replacements.to and replacements.to[i] or "")
end
else
for regex, replacement in pairs(replacements) do
word = ugsub(word, regex, replacement)
end
end
return word
end
end
end
end
 
local function fixScriptCode(firstLetter, threeLetters)
return wiktionaryText
return string.upper(firstLetter) .. string.lower(threeLetters)
end
 
local function createWiktionaryLinkgetCodes(wiktionaryText, linkText, languageCodecodes)
iflocal languageCode, thenscriptCode, invalidCode
local errorText
local data = langData[languageCode]
if codes == nil or codes == "" then
local name
errorText = 'no language or script code provided'
if data and data.name then
elseif codes:find("^%a%a%a?$") or codes:find("^%a%a%a?%-%a%a%a%a$") then
name = data.name
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
codes:find("^%a%a%a?") and (
codes:match("^(%l%l%l?)")
or codes:match("^(%a%a%a?)")
:gsub("(%a%a%a?)", string.lower, 1)
)
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
codes:find("%a%a%a%a$") and (
codes:match("(%u%l%l%l)$")
or gsub(
codes:match("(%a%a%a%a)$"),
"(%a)(%a%a%a)",
fixScriptCode,
1
)
)
elseif codes:find("^%a%a%a?%-%a%a%a?$")
or codes:find("^%a%a%a%-%a%a%a%-%a%a%a$") then
languageCode = codes
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
else
scriptCode = scriptCode:gsub(
-- On other languages' wikis, use mw.getContentLanguage():getCode(),
"(%a)(%a%a%a)",
-- or replace 'en' with that wiki's language code.
fixScriptCode,
-- name = mw.language.fetchLanguageName(languageCode, mw.getContentLanguage():getCode())
1
name = mw.language.fetchLanguageName(languageCode, 'en')
)
end
elseif codes:find("^%a%a%a?") then
 
languageCode, invalidCode = codes:match("^(%a%a%a?)%-?(.*)")
if name == "" then
returnlanguageCode = get_error_message(string.formatlower(error_messages["LANGUAGE_NAME_FOR_CODE_NOT_FOUND"], languageCode))
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
end
elseif codes:find("%-?%a%a%a%a$") then
 
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)$")
if wiktionaryText:sub(1, 1) == "*" then
scriptCode = gsub(
wiktionaryText = string.format(strings["RECONSTRUCTION"], name, wiktionaryText:sub(2))
scriptCode,
 
"(%a)(%a%a%a)",
elseif data and data.type == "reconstructed" then
fixScriptCode
-- Track reconstructed entries with no asterisk by transcluding
)
table.insert(activeTrackingCategories, tracking_categories["RECONSTRUCTED_WITH_NO_ASTERISK"])
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
wiktionaryText = string.format(strings["RECONSTRUCTION"], name, wiktionaryText)
 
elseif data and data.type == "appendix" then
wiktionaryText = string.format(strings["APPENDIX"], name, wiktionaryText)
end
 
return string.format(strings["PIPED_LINK"], wiktionaryText, name, linkText)
else
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
return string.format(strings["LINK"], wiktionaryText, linkText)
end
if errorText then
errorText = ' <span style="font-size: smaller">[' .. errorText .. ']</span>'
else
errorText = ""
end
languageCode = m_data.redirects[languageCode] or languageCode
return languageCode, scriptCode, errorText
end
 
local function tag(text, languageCode, script, italicize)
 
-- TODO: see if all or some of the logic can be handled by [[Module:Lang]]
local function tag(wiktionaryLink, languageCode, script, italics)
local data = langData[languageCode]
 
local textDirectionMarkers = {"", "", ""}
if data and data["direction"] == "rtl" then
textDirectionMarkers = {' dir="rtl"', '&rlm;', '&lrm;'}
end
 
local out = {textDirectionMarkers[2]}
 
-- Use Wikipedia code if it has been given: for instance,
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
-- code "ine-x-proto".
languageCode = data and data.Wikipedia_code or languageCode
if script and script ~= "" then
languageCode = languageCode .. "-" .. script
end
 
if not text then text = "[text?]" end
local textDirectionMarkers = { "", "", "" }
if data and data["direction"] == "rtl" then
textDirectionMarkers = { ' dir="rtl"', '&rlm;', '&lrm;' }
end
local italicizeout = script{ == "Latn" andtextDirectionMarkers[2] italics}
if italicize then
table.insert(out, string.format(strings["HTML_ITALIC_LANG<i lang=\""], languageCode,.. languageCode, .. "\"" .. textDirectionMarkers[1], wiktionaryLink).. ">" .. text .. "</i>")
else
table.insert(out, string.format(strings["HTML_SPAN_LANG<span lang=\""], languageCode,.. languageCode, .. "\"" .. textDirectionMarkers[1], wiktionaryLink).. ">" .. text .. "</span>")
end
 
table.insert(out, textDirectionMarkers[3])
return table.concat(out)
end
 
local function linkToWiktionary(entry, linkText, languageCode)
function p.wikt(frame)
local data = langData[languageCode]
frame['no_tag'] = true
local name
return p.wiktlang(frame)
if languageCode then
if data and data.name then
name = data.name
else
-- On other languages' wikis, use mw.getContentLanguage():getCode(),
-- or replace 'en' with that wiki's language code.
name = mw.language.fetchLanguageName(languageCode, 'en')
if name == "" then
error("Name for the language code " .. ("%q"):format(languageCode or nil)
.. " could not be retrieved with mw.language.fetchLanguageName, "
.. "so it should be added to [[Module:Wikt-lang/data]]")
end
end
if entry:sub(1, 1) == "*" then
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
else
error("Language name is empty")
end
elseif data and data.type == "reconstructed" then
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
local frame = mw.getCurrentFrame()
-- Track reconstructed entries with no asterisk by transcluding
-- a nonexistent template. This technique is used in Wiktionary:
-- see [[wikt:Module:debug]].
-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
pcall(frame.expandTemplate, frame,
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. entry
else
error("Language name is empty")
end
elseif data and data.type == "appendix" then
if name ~= "" then
entry = "Appendix:" .. name .. "/" .. entry
else
error("Language name is empty")
end
end
if entry and linkText then
return "[[wikt:" .. entry .. "#" .. name .. "|" .. linkText .. "]]"
else
error("linkToWiktionary needs a Wiktionary entry or link text, or both")
end
else
return "[[wikt:" .. entry .. "|" .. linkText .. "]]"
end
end
 
function p.wiktlang(frame)
local getArgsparent = require('Moduleframe:Arguments'getParent().getArgs
local args = getArgs(parent.args[1] and parent.args or frame).args
 
local codecodes = args[1] and mw.text.trim(args[1])
local word1 = ifNotEmpty(args[2])
if not code then
local word2 = ifNotEmpty(args[3])
return get_error_message(error_messages["NO_LANGUAGE_CODE"])
if not args[2] or '' == args[2] then
return '<span style="color:#d33">[text?] Parameter 2 is required</span>';
end
 
local languageCode, scriptCode, errorText = getCodes(codes)
local wiktionaryText = args[2]
if not wiktionaryText then
local italics = args.italics or args.i or args.italic
return get_error_message(error_messages["NO_WIKTIONARY_ENTRY"])
italics = not (italics == "n" or italics == "-" or italics == "no")
local entry, linkText
if word2 and word1 then
entry = makeEntryName(word1, languageCode)
linkText = word2
elseif word1 then
entry = makeEntryName(word1, languageCode)
linkText = word1
end
 
local italicize = italics and (scriptCode == "Latn" or require("Module:Unicode data").is_Latin(linkText))
local displayText = args[3]
local languageCode, scriptCode, errorMessage = getCodes(code, displayText or wiktionaryText)
 
local out
if errorMessage then
if languageCode and entry and linkText then
return errorMessage
out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italicize)
elseif entry and linkText then
out = linkToWiktionary(entry, linkText)
else
out = '<span style="font-size: smaller;">[text?]</span>'
end
 
if out and errorText then
local italics = args.italics or args.i
return out .. errorText
italics = not (italics == "n" or italics == "-")
 
local wiktionaryTextCleaned = cleanWiktionaryText(wiktionaryText, languageCode)
 
local linkText
if displayText then
linkText = displayText
else
return errorText or error("The function wiktlang generated nothing")
linkText = wiktionaryText
end
 
local wiktionaryLink = createWiktionaryLink(wiktionaryTextCleaned, linkText, languageCode)
if not args['no_tag'] then
wiktionaryLink = tag(wiktionaryLink, languageCode, scriptCode, italics)
end
 
return wiktionaryLink .. table.concat(activeTrackingCategories)
end