Content deleted Content added
No edit summary |
No edit summary |
||
(9 intermediate revisions by 3 users not shown) | |||
Line 24:
local cfg = mw.loadData ('Module:Lang/configuration' .. (mw.getCurrentFrame():getTitle():match ('/sandbox') or '')); -- for internationalization
local is_latn_data = mw.loadData ('Module:Lang/data/is latn data');
local sizeof_ranges_t = is_latn_data.sizeof_ranges_t;
local namespace = mw.title.getCurrentTitle().namespace; -- used for categorization
Line 80 ⟶ 83:
local category_link = ((0 == namespace or 10 == namespace) and not args_t.nocat) and substitute ('[[Category:$1]]', {category}) or '';
return substitute ('[$1] <span style="color:#d33">$2:
{
text or cfg.make_error_msg_t.undefined,
Line 143 ⟶ 146:
This function attempts to invert the italic markup a args.text by adding/removing leading/trailing italic markup
in args.text. Like |italic=unset, |italic=invert disables automatic italic markup. Individual leading/trailing
apostrophes are converted to their
bold markup inadvertently.
Line 189 ⟶ 192:
Validates |italic= or |italics= assigned values.
When |italic= is set and has an
for the special case 'default', return nil.
When |italic= is not set, or has an
When both |italic= and |italics= are set, returns nil and a 'conflicting' error message.
The return value nil causes the calling lang, lang_xx, or
defined default ('inherit' for {{lang}}, 'inherit' or 'italic' for {{lang-??}} depending on
the individual template's requirements, 'italic' for {{transliteration}}) or to the value appropriate to |script=, if set ({{lang}}
Line 231 ⟶ 234:
--[=[--------------------------< V A L I D A T E _ C A T _ A R G S >----------------------------------------------------------
Default behavior of the {{lang}} and {{lang-??}} templates is to add categorization when the templates are used in
This default functionality may be suppressed by setting |nocat=yes or |cat=no. This function selects one of these two parameters
to control categorization.
Line 282 ⟶ 285:
--[[--------------------------< F O R M A T _ I E T F _ T A G >------------------------------------------------
Prettify
code: lower case
script: sentence case
Line 349 ⟶ 352:
local function get_ietf_parts (source, args_script, args_region, args_variant)
local code, script, region, variant, private; --
if not is_set (source) then
Line 355 ⟶ 358:
end
local pattern = { -- table of tables holding
{'^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$', 's', 'r', 'v'}, -- 1 - ll-Ssss-RR-variant (where variant is 4 digits)
{'^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$', 's', 'r', 'v'}, -- 2 - ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits)
Line 385 ⟶ 388:
}
local t = {}; -- table of captures; serves as a translator between captured
for i, v in ipairs (pattern) do -- spin through the pattern table looking for a match
Line 399 ⟶ 402:
};
script = t.s or ''; -- translate table contents to named variables;
region = t.r or ''; -- absent table entries are nil so set named
variant= t.v or '';
private = t.p or '';
Line 433 ⟶ 436:
script = script:lower(); -- ensure that we use and return lower case version of this
if not script_table[script] then
return code, nil, nil, nil, nil, substitute (cfg.get_ietf_parts_t.unrecog_scr_code, {script, code}); -- language code
end
end
if suppressed_table[script] then -- ensure that code-script does not use a suppressed script
if in_array (code, suppressed_table[script]) then
return code, nil, nil, nil, nil, substitute (cfg.get_ietf_parts_t.script_code, {script, code}); -- language code
end
end
Line 534 ⟶ 537:
if text:find ('\n+') then -- look for any number of \n characters in text
text = text:gsub ('([^\n])\n([^\n])', '%1 %2'); -- replace single newline characters with a space character which mimics
if 'italic' == style then
text = text:gsub('[^\n]+', '<p><i>%1</i></p>'); -- insert p and italic markup tags at each
else
text = text:gsub ('[^\n]+', '<p>%1</p>'); -- insert p markup at each
text = text:gsub ('\n', ''); -- strip newline characters
end
Line 552 ⟶ 555:
Makes a <span title="<title text>"><content_text></span> or <div title="<title text>"><content_text></div> where
<title text> is in the tool-tip in the wiki's local language and <content_text> is non-local-language text in
<tag> holds a string 'div' or 'span' used to choose the correct wrapping tag.
Line 574 ⟶ 577:
--[[--------------------------< M A K E _ T E X T _ H T M L >--------------------------------------------------
Add the
<div> tags for block content
Line 597 ⟶ 600:
end
if 'span' == tag then -- default
if 'italic' == style then -- but if italic
tag = 'i'; -- change to <i> tags
Line 605 ⟶ 608:
end
table.insert (html_t, table.concat ({'<', tag})); -- open the <i>, <span>, or <div>
code = code:gsub ('%-x%-.*', ''); -- strip private use subtag from code tag because meaningless outside of
table.insert (html_t, table.concat ({' lang="', code, '\"'})); -- add language attribute
Line 629 ⟶ 632:
end
table.insert (html_t, table.concat ({style_added, '>'})); -- close the opening
table.insert (html_t, text); -- insert the text
table.insert (html_t, table.concat ({'</', tag, '>'})); -- close the 'text' <i>, <span>, or <div>
if is_set (language) then -- create a <title_text> string for the title= attribute in a wrapper span or div
Line 701 ⟶ 704:
to be |translit-script= (in this function, tscript).
This function is used by both lang_xx() and
lang_xx() always provides code, language_name, and translit; may provide tscript; never provides style
For {{transliteration}}, style only applies when a language code is provided.
Line 746 ⟶ 749:
if not title_t[std] then return ''; end -- invalid standard, setup for error message
if title_t[std][code] then -- if language code is in the table (
title_text = substitute ('$1$2 ($3 $4) $5', { -- add the appropriate text to the tool tip
title_text,
Line 910 ⟶ 913:
--[[--------------------------< H T M L _ T A G _ S E L E C T >------------------------------------------------
Inspects content of and selectively trims text. Returns text and the name of an appropriate
If text contains:
Line 989 ⟶ 992:
elseif override_table[code] then -- not there so try basic language tag
name = override_table[code];
elseif lang_table[code] then -- shift to
name = lang_table[code];
elseif lang_dep_table[code] then -- try the
name = lang_dep_table[code];
end
Line 1,019 ⟶ 1,022:
]]
local function text_script_match_test (script, is_latn_text, pos, char)
local scripts_t = {['latf'] = true, ['latg'] = true, ['latn'] = true}; -- unicode 'latn' scripts; 'latf' and 'latg' are font variants so there are no Fraktur or Gaelic codepoints
if is_set (script) then -- don't bother with the rest of this if <script> is nil or empty string
Line 1,029 ⟶ 1,032:
else -- when text is not wholly Latn script
if scripts_t[script] then -- but a Latn script is specified
return substitute (cfg.text_script_match_test_t.latn_scr_mismatch, {pos, char}); -- emit an error message with position of first offending character
end
end
Line 1,036 ⟶ 1,039:
--[[--------------------------< B I
conducts a binary search of <ranges_t> for a sub-range that holds <target>.
returns boolean true if a sub-range holding <target> is found; boolean false else.
]]
local function
local idx_top = sizeof_ranges_t; -- initialize to index of last key (number of keys)
if (target < ranges_t[idx_bot][1]) or (target > ranges_t[idx_top][2]) then -- invalid; target out of range
return; -- abandon
end
local idx_mid; -- calculated index of range midway between top index and bottom index
local flag = false; -- flag to tell us when we've evaluated last (highest) range in <ranges_t>
while 1 do
idx_mid = math.ceil ((idx_bot + idx_top) / 2); -- get the mid-point in the <ranges_t> sequence
if (target >= ranges_t[idx_mid][1]) and (target <= ranges_t[idx_mid][2]) then -- indexed range low value <= target <= indexed range high value
return true; -- we found the range that holds the <target> character; return true
elseif (target > ranges_t[idx_mid][2]) then -- is <target> > indexed range high value?
idx_bot = idx_mid; -- adjust <idx_bot> up
else -- here when <target> less than indexed range low value
idx_top = idx_mid - 1; -- adjust <idx_top> down
end
if flag then
break; -- here when we just evaluated the last range and <target> not found
end
if not flag and (idx_bot == idx_top) then -- set true just before we evaluate the last (highest) range in <ranges_t>
flag = true;
end
end
end
--[[--------------------------< I S _ L A T I N >--------------------------------------------------------------
compare <text> as codepoints to lists of known codepoints accepted as Latn script
returns boolean true and modified <text> when <text> is wrapped in accept-as-written markup
returns boolean true and <text> when codepoint is known
returns boolean false, <text>, non-Latn codepoint position in <text> (left to right), and the codepoint character
when codepoint is not known
TODO: when text has accept-as-written markup, return a non-boolean value to indicate that <text> is not wholly
latn script? Use that return value to create non-Latn HTML lang= attribute because <text> isn't really
latn so lang=und (undetermined)? or instead, omit the -Latn subtag? (without -Latn need to force |italic=yes)
]]
local function is_latin (text, tag)
local count;
text, count = text:gsub ('^%(%((.+)%)%)$', '%1'); -- remove accept-as-written markup if present
if 0 ~= count then
return true, text; -- markup present so assume that <text> is Latn-script
end
local pos = 0; -- position counter for error messaging
for codepoint in mw.ustring.gcodepoint (text) do -- fetch each code point
pos = pos + 1; -- bump the position counter
if not is_latn_data.singles_t[codepoint] and -- codepoint not found in the singles list?
not binary_search (codepoint, is_latn_data.ranges_t) and -- codepoint not a member of a listed range?
not (tag and is_latn_data.specials_t[codepoint] and is_latn_data.specials_t[codepoint][tag]) then -- not a language-specific codepoint?
return false, text, pos, mw.ustring.char (codepoint); -- codepoint not known; return false with codepoint position and character representation
end
end
return true, text; -- is known; return <text>
end
Line 1,124 ⟶ 1,162:
end
args.text, tag = html_tag_select (args.text); -- inspects text; returns appropriate
args.rtl = args.rtl == cfg.keywords_t.affirmative; -- convert to boolean: 'yes' -> true, other values -> false
Line 1,139 ⟶ 1,177:
end
local is_latn_text, pos
is_latn_text, args.text, pos, char= is_latin (args.text, code); -- make a boolean
msg = text_script_match_test (subtags.script, is_latn_text, pos, char)
if msg then -- if an error detected then there is an error message
return make_error_msg (msg, args, template);
Line 1,169 ⟶ 1,208:
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles
language_name = language_name_get (args.code, code, true); -- get language name; try
if cfg.keywords_t.invert == args.italic and 'span' == tag then -- invert only supported for in-line content
Line 1,234 ⟶ 1,273:
if 'none' ~= args_t.label then -- if we want a label
table.insert (translation_t, '<small>'); -- open the <small>
if cfg.keywords_t.negative == args_t.link then
table.insert (translation_t, substitute ('<abbr title="$1">$2</abbr>', {cfg.translation_make_t.lit_xlation, cfg.translation_make_t.lit_abbr})); -- unlinked form
Line 1,240 ⟶ 1,279:
table.insert (translation_t, make_wikilink (cfg.translation_make_t.lit_xlation, cfg.translation_make_t.lit_abbr)); -- linked form
end
table.insert (translation_t, " </small>"); -- close the <small>
end
table.insert (translation_t, table.concat ({''', args_t.translation, '''})); -- use
return table.concat (translation_t); -- make a big string and done
end
Line 1,291 ⟶ 1,330:
]]
local function _lang_xx (args, base_template) -- base_template will be either of '
local out = {};
local language_name; -- used to make display text, article links
Line 1,322 ⟶ 1,361:
end
args.text, tag = html_tag_select (args.text); -- inspects text; returns appropriate
if args[translit_idx] and args.translit then
Line 1,329 ⟶ 1,368:
else
args.translit = args[translit_idx] or args.translit -- prefer positional 'translit' parameter
end
Line 1,364 ⟶ 1,396:
end
if args.translit then
local latn, pos, char;
latn, args.translit, pos, char = is_latin (args.translit, (('' ~= subtags.private) and (code .. '-x-' .. subtags.private)) or code);
if not latn then
return make_error_msg (substitute (cfg.lang_xx_t.translit_nonlatn, {pos, char}), args, template);
end
end
local is_latn_text, text, pos, char = is_latin (args.text, code); -- make a boolean
args.text = text; -- may have been modified (accept-as-written markup removed)
msg = text_script_match_test (subtags.script, is_latn_text, pos, char)
if msg then -- if an error detected then there is an error message
return make_error_msg (msg, args, template);
Line 1,414 ⟶ 1,455:
args.code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles
language_name = language_name_get (args.code, code, true); -- get language name; try
category_name = language_name; -- category names retain IANA parenthetical diambiguators (if any)
Line 1,429 ⟶ 1,470:
if mw.ustring.find (language_name, 'languages', 1, true) then
table.insert (out, make_wikilink (language_name)); -- collective language name uses simple wikilink
elseif lang_data.article_name[args.code:lower()] then -- is
table.insert (out, make_wikilink (lang_data.article_name[args.code:lower()], language_name)); -- language name with wikilink from override data
elseif lang_data.article_name[code] then -- is language tag in article override
Line 1,628 ⟶ 1,669:
| 1 | 2 | 3 | 4
{{lang-xx |<text> |<
{{langx |<tag> |<text> |<
The calls to lang_xx_args_get() and _lang_xx() use '
positional parameters.
Line 1,637 ⟶ 1,678:
{{langx}} can't do that. The initial version of {{langx}} relied on a list of language tags (inherit_t in ~/langx)
scraped from those {{lang-??}} templates that call lang_xx_inherit() to render text in upright font.
uses auto-italics code adapted from {{lang}} (doesn't support poem tags).
Line 1,643 ⟶ 1,684:
local function langx (frame)
local args_t = lang_xx_args_get (frame, cfg.templates_t.langx); -- get the arguments; '
return _langx (args_t);
Line 1,674 ⟶ 1,715:
local function is_ietf_tag (frame)
return _is_ietf_tag (getArgs (frame)[1]); -- args[1] is the
end
Line 1,687 ⟶ 1,728:
local function is_ietf_tag_frame (frame)
return _is_ietf_tag (getArgs (frame, {frameOnly = true,})[1]); -- args[1] is the
end
Line 1,719 ⟶ 1,760:
raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private
language_name = language_name_get (raw_code, code); -- get language name; try
if 'yes' ~= args.raw then
Line 1,728 ⟶ 1,769:
if mw.ustring.find (language_name, 'languages', 1, true) then
language_name = make_wikilink (language_name, label); -- collective language name uses simple wikilink
elseif lang_data.article_name[raw_code:lower()] then -- is
language_name = make_wikilink (lang_data.article_name[raw_code:lower()], label or language_name); -- language name with wikilink from override data
elseif lang_data.article_name[code] then -- is language tag in article name override?
Line 1,747 ⟶ 1,788:
]]
local function name_from_tag (frame) -- entry point from an {{#invoke:
return _name_from_tag (getArgs(frame)) -- pass-on the args table, nothing else; getArgs() so we also get parent frame
end
Line 1,754 ⟶ 1,795:
--[[--------------------------< _ T A G _ F R O M _ N A M E >--------------------------------------------------
Returns the
according to the spelling in the source tables. When a standard language name has a parenthetical disambiguator,
that disambiguator must be omitted (they are not present in the data name-to-tag tables).
Line 1,794 ⟶ 1,835:
]]
local function tag_from_name (frame) -- entry point from an {{#invoke:
local result, _ = _tag_from_name (getArgs(frame)) -- pass-on the args table, nothing else; getArgs() so we also get parent frame; supress second return used by is_lang_name()
return result;
Line 1,812 ⟶ 1,853:
--[[--------------------------< _
Module entry point from another module.
Line 1,818 ⟶ 1,859:
]]
local function
local title_table = lang_data.translit_title_table; -- table of transliteration standards and the language codes and scripts that apply to those standards
local language_name; -- language name that matches language code; used for tool tip
Line 1,850 ⟶ 1,891:
end
if is_set (args[1]) then -- IANA language code used for
if args[1]:match ('^%a%a%a?%a?$') or args[1]:match ('^%a%a%a?%-x%-') then -- args[1] has correct form?
args.code = args[1]:lower(); -- use the language/script code; only (2, 3, or 4 alpha characters) or private-use; lower case because table indexes are lower case
Line 1,860 ⟶ 1,901:
end
local
is_latn_text, args.text, pos, char= is_latin (args.text, args.code); -- is latn text? strip accept-as-written markup
if not is_latn_text then -- when text is not latn
return make_error_msg (substitute (cfg.lang_xx_t.translit_nonlatn, {pos, char}), args, template); -- abandon with error message
end
Line 1,896 ⟶ 1,938:
--[[--------------------------<
Module entry point from an {{#invoke:}}.
Line 1,902 ⟶ 1,944:
]]
local function
return
end
Line 1,933 ⟶ 1,975:
raw_code = format_ietf_tag (code, subtags.script, subtags.region, subtags.variant, subtags.private); -- format to recommended subtag styles; private omitted because private
category_name = language_name_get (raw_code, code); -- get language name; try
category_name = make_category (code, category_name, nil, true):gsub ('[%[%]]', '');
Line 1,950 ⟶ 1,992:
]]
local function category_from_tag (frame) -- entry point from an {{#invoke:
return _category_from_tag (getArgs (frame)); -- pass-on the args table, nothing else; getArgs() so we also get parent frame
end
Line 1,959 ⟶ 2,001:
return {
category_from_tag = category_from_tag, -- frame entry points when this module is #invoke:ed into templates/wikitext
lang = lang, -- entry point for {{lang}}
langx = langx, -- entry point for {{langx}}
Line 1,967 ⟶ 2,009:
is_ietf_tag_frame = is_ietf_tag_frame,
is_lang_name = is_lang_name,
tag_from_name = tag_from_name, -- returns
name_from_tag = name_from_tag, -- used for template documentation; possible use in ISO 639 name from code templates
_category_from_tag = _category_from_tag, -- API entry points when this module is require()d into other modules
_lang = _lang,
_langx = _langx,
Line 1,980 ⟶ 2,022:
_tag_from_name = _tag_from_name,
_name_from_tag = _name_from_tag,
_translation_make = translation_make,
};
|