Module:Citation/CS1/Utilities/sandbox: Difference between revisions

Content deleted Content added
No edit summary
m Removed protection from "Module:Citation/CS1/Utilities/sandbox"
 
(44 intermediate revisions by 4 users not shown)
Line 1:
--[[
History of changes since last sync: 20202024-0112-1128
 
2020-07-30: unify handling of error and maint messaging; see Help_talk:Citation_Style_1#Some_maintenance_items_to_upgrade_to_errors
 
]]
Line 8 ⟶ 6:
 
local z = {
error_categorieserror_cats_t = {}; -- for categorizing citations that contain errors
error_ids_t = {}; -- list of error identifiers; used to prevent duplication of certain errors; local to this module
error_ids = {};
error_msgs_t = {}; -- sequence table of error messages
message_tail = {};
maintenance_catsmaint_cats_t = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work
properties_catsprop_cats_t = {}; -- for categorizing citations based on certain properties, language of source for instance
prop_keys_t = {}; -- for adding classes to the citation's <cite> tag
};
 
Line 28 ⟶ 27:
]]
 
local function is_set ( var )
return not (var == nil or var == '');
end
Line 39 ⟶ 38:
]]
 
local function in_array ( needle, haystack )
if needle == nil then
return false;
end
for n, v in ipairs( (haystack ) do
if v == needle then
return n;
Line 49 ⟶ 48:
end
return false;
end
 
 
--[[--------------------------< H A S _ A C C E P T _ A S _ W R I T T E N >------------------------------------
 
When <str> is wholly wrapped in accept-as-written markup, return <str> without markup and true; return <str> and false else
 
with allow_empty = false, <str> must have at least one character inside the markup
with allow_empty = true, <str> the markup frame can be empty like (()) to distinguish an empty template parameter from the specific condition "has no applicable value" in citation-context.
 
After further evaluation the two cases might be merged at a later stage, but should be kept separated for now.
 
]]
 
local function has_accept_as_written (str, allow_empty)
if not is_set (str) then
return str, false;
end
 
local count;
 
if true == allow_empty then
str, count = str:gsub ('^%(%((.*)%)%)$', '%1'); -- allows (()) to be an empty set
else
str, count = str:gsub ('^%(%((.+)%)%)$', '%1');
end
return str, 0 ~= count;
end
 
Line 54 ⟶ 80:
--[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
 
Populates numbered arguments in a message string using an argument table. <args> may be a single string or a
sequence table of multiple strings.
 
]]
 
local function substitute ( msg, args )
return args and mw.message.newRawMessage ( msg, args ):plain() or msg;
end
 
Line 65 ⟶ 92:
--[[--------------------------< E R R O R _ C O M M E N T >----------------------------------------------------
 
Wraps error messages with CSS markup according to the state of hidden. <content> may be a single string or a
sequence table of multiple strings.
 
]]
 
local function error_comment ( content, hidden )
return substitute( (hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
end
 
 
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------
 
Converts a hyphen, endash, emdash to endash under certain conditions. The hyphen/en/em must separate
like items; unlike items are returned unmodified. These forms are modified:
letter - letter (A-B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a-5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)
 
any other forms are returned unmodified.
 
str may be a comma- or semicolon-separated list of page ranges with/without single pages
 
]]
 
local function hyphen_to_dash (str)
if not is_set (str) then
return str;
end
 
str = str:gsub ("(%(%(.-%)%))", function(m) return m:gsub(",", ","):gsub(";", ";") end) -- replace commas and semicolons in accept-as-written markup with similar unicode characters so they'll be ignored during the split
str = str:gsub ('&[nm]dash;', {['&ndash;'] = '–', ['&mdash;'] = '—'}); -- replace &mdash; and &ndash; entities with their characters; semicolon mucks up the text.split
str = str:gsub ('&#45;', '-'); -- replace HTML numeric entity with hyphen character
str = str:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with generic keyboard space character
local out = {};
local list = mw.text.split (str, '%s*[,;]%s*'); -- split str at comma or semicolon separators if there are any
 
local accept; -- boolean
 
for _, item in ipairs (list) do -- for each item in the list
item, accept = has_accept_as_written (item); -- remove accept-this-as-written markup when it wraps all of item
if not accept and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[—–-]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators
if mw.ustring.match (item, '^%a+[%.%-]?%d+%s*[—–-]%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit)
mw.ustring.match (item, '^%d+[%.%-]?%a+%s*[—–-]%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter)
mw.ustring.match (item, '^%d+[%.%-]%d+%s*[—–-]%s*%d+[%.%-]%d+$') then -- digit separator digit hyphen digit separator digit
item = mw.ustring.gsub (item, '(%w*[%.%-]?%w+)%s*[—–-]%s*(%w*[%.%-]?%w+)', '<span class="nowrap">%1 –</span> <span class="nowrap">%2</span>'); -- replace hyphen/dash, with spaced endash
 
elseif mw.ustring.match (item, '^%d+%s*[—–-]%s*%d+$') or -- digit hyphen digit
mw.ustring.match (item, '^%a+%s*[—–-]%s*%a+$') then -- letter hyphen letter
item = mw.ustring.gsub (item, '(%w+)%s*[—–-]%s*(%w+)', '<span class="nowrap">%1–</span>%2'); -- replace hyphen/emdash with endash, remove extraneous space characters
 
else
-- item = mw.ustring.gsub (item, '%s*[—–-]%s*', '–'); -- disabled; here when 'unlike' items so return <item> as is
end
end
table.insert (out, item); -- add the (possibly modified) item to the output table
end
 
local temp_str = ''; -- concatenate the output table into a comma separated string
temp_str, accept = has_accept_as_written (table.concat (out, ', ')); -- remove accept-this-as-written markup when it wraps all of concatenated out
if accept then
temp_str = has_accept_as_written (str); -- when global markup removed, return original str; do it this way to suppress boolean second return value
return temp_str:gsub(",", ","):gsub(";", ";");
else
return temp_str:gsub(",", ","):gsub(";", ";"); -- else, return assembled temp_str
end
end
 
Line 77 ⟶ 168:
 
Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
link is provided (or link and display are the same), returns a wikilink in the form [[L]]; if neither are
provided or link is omitted, returns an empty string.
empty string.
 
]=]
 
local function make_wikilink (link, display)
if not is_set (link) then return '' end
 
if is_set (display) then
return if table.concatis_set ({'[[',display) and link, '|',~= display, ']]'});then
return table.concat ({'[[', link, '|', display, ']]'});
else
return table.concat ({'[[', link, ']]'});
end
else
return table.concat ({'[[', link, ']]'});
end
end
Line 97 ⟶ 186:
--[[--------------------------< S E T _ M E S S A G E >----------------------------------------------------------
 
Sets an error conditionmessage and returnsusing the appropriate~/Configuration errorerror_conditions{} message.table along Thewith actualarguments placement of the error messagesupplied in the output isfunction
call, inserts the resulting message in z.error_msgs_t{} sequence table, and returns the error message.
the responsibility of the calling function.
 
<error_id> – key value for appropriate error handler in ~/Configuration error_conditions{} table
TODO: rename this function to set_message()
<arguments> – may be a single string or a sequence table of multiple strings to be subsititued into error_conditions[error_id].message
TODO: change z.error_categories and z.maintenance_cats to have the form cat_name = true; to avoid dups without having to have an extra cat
<raw> – boolean
true – causes this function to return the error message not wrapped in visible-error, hidden-error span tag;
returns error_conditions[error_id].hidden as a second return value
does not add message to z.error_msgs_t sequence table
false, nil – adds message wrapped in visible-error, hidden-error span tag to z.error_msgs_t
returns the error message wrapped in visible-error, hidden-error span tag; there is no second return value
<prefix> – string to be prepended to <message> -- TODO: remove support for these unused(?) arguments?
<suffix> – string to be appended to <message>
 
TODO: change z.error_cats_t and z.maint_cats_t to have the form cat_name = true? this to avoid dups without having to have an extra table
 
]]
 
local added_maint_cats = {} -- list of maintenance categories that have been added to z.maintenance_cats; TODO: figure out how to delete this table
local added_maint_cats = {} -- list of maintenance categories that have been added to z.maint_cats_t; TODO: figure out how to delete this table
 
local function set_message (error_id, arguments, raw, prefix, suffix)
local error_state = cfg.error_conditions[error_id];
prefix = prefix or ""'';
suffix = suffix or ""'';
if error_state == nil then
error( (cfg.messages['undefined_error'] .. ': ' .. error_id ); -- because missing error handler in Module:Citation/CS1/Configuration
 
elseif is_set (error_state.category) then
if error_state.message then -- when error_state.message defined, this is an error message
table.insert ( z.error_categorieserror_cats_t, error_state.category );
else
if not added_maint_cats[error_id] then
added_maint_cats[error_id] = true; -- note that we've added this category
table.insert (z.maintenance_catsmaint_cats_t, substitute (error_state.category, arguments)); -- make cat name then add to table
end
return; -- because no message, nothing more to do
Line 127 ⟶ 227:
end
 
local message = substitute ( error_state.message, arguments );
 
message = table.concat (
Line 144 ⟶ 244:
});
 
z.error_idserror_ids_t[error_id] = true;
if z.error_idserror_ids_t['err_citation_missing_title'] and -- if missing-title error already noted
in_array (error_id, {'err_bare_url_missing_title', 'err_trans_missing_title'}) then -- and this error is one of these
return '', false; -- don't bother because one flavor of missing title is sufficient
end
message = table.concat ({ prefix, message, suffix });
 
if rawtrue == trueraw then
return message, error_state.hidden; -- return message not wrapped in visible-error, hidden-error span tag
end
 
returnmessage = error_comment (message, error_state.hidden); -- wrap message in visible-error, hidden-error span tag
table.insert (z.error_msgs_t, message); -- add it to the messages sequence table
return message; -- and done; return value generally not used but is used as a flag in various functions of ~/Identifiers
end
 
Line 187 ⟶ 289:
end
 
if is_set (args[alias]) then -- alias is in the template's argument list
if value ~= nil and selected ~= alias then -- if we have already selected one of the aliases
local skip;
for _, v in ipairs (error_list) do -- spin through the error list to see if we've added this alias
if v == alias then
skip = true;
Line 197 ⟶ 299:
end
if not skip then -- has not been added so
table.insert ( error_list, alias ); -- add error alias to the error list
end
else
Line 210 ⟶ 312:
--[[--------------------------< A D D _ M A I N T _ C A T >------------------------------------------------------
 
Adds a category to z.maintenance_catsmaint_cats_t using names from the configuration file with additional text if any.
To prevent duplication, the added_maint_cats table lists the categories by key that have been added to z.maintenance_catsmaint_cats_t.
 
]]
 
--local added_maint_cats = {} -- list of maintenance categories that have been added to z.maintenance_cats
local function add_maint_cat (key, arguments)
if not added_maint_cats [key] then
added_maint_cats [key] = true; -- note that we've added this category
table.insert( (z.maintenance_catsmaint_cats_t, substitute (cfg.maint_cats [key], arguments)); -- make name then add to table
end
end
 
 
--[[--------------------------< A D D _ P R O P _ C A T >--------------------------------------------------------
 
Adds a category to z.prop_cats_t using names from the configuration file with additional text if any.
 
foreign_lang_source and foreign_lang_source_2 keys have a language code appended to them so that multiple languages
may be categorized but multiples of the same language are not categorized.
 
added_prop_cats is a table declared in page scope variables above
 
]]
 
local added_prop_cats = {}; -- list of property categories that have been added to z.prop_cats_t
 
local function add_prop_cat (key, arguments, key_modifier)
local key_modified = key .. ((key_modifier and key_modifier) or ''); -- modify <key> with <key_modifier> if present and not nil
if not added_prop_cats [key_modified] then
added_prop_cats [key_modified] = true; -- note that we've added this category
table.insert (z.prop_cats_t, substitute (cfg.prop_cats [key], arguments)); -- make name then add to table
table.insert (z.prop_keys_t, 'cs1-prop-' .. key); -- convert key to class for use in the citation's <cite> tag
end
end
Line 234 ⟶ 359:
]]
 
local function safe_for_italics ( str )
if not is_set (str) then return str end
 
return str;
if str:sub (1, 1) == "'" then str = "<span></span>" .. str; end
else
if str:sub (-1, -1) == "'" then str = str .. "<span></span>" .. str; end
if str:sub(-1,-1) == "'" then str = str .. "<span></span>"; end
return str:gsub ('\n', ' '); -- Remove newlines as they break italics.
-- Remove newlines as they break italics.
return str:gsub( '\n', ' ' );
end
end
 
Line 256 ⟶ 378:
 
local function wrap_style (key, str)
if not is_set ( str ) then
return "";
elseif in_array( (key, { 'italic-title', 'trans-italic-title' } ) then
str = safe_for_italics ( str );
end
 
return substitute ( cfg.presentation[key], {str} );
end
 
 
--[[--------------------------< LM IA K E _ S TE P _ ML AI KS ET >------------------------------------------------------------
 
make a separated list of items using provided separators.
Line 272 ⟶ 394:
<sep_list_pair> - typically '<space>and<space>'
<sep_list_end> - typically '<comma><space>and<space>' or '<comma><space>&<space>'
 
defaults to cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], and cfg.presentation['sep_list_end']
if <sep_list_end> is specified, <sep_list> and <sep_list_pair> must also be supplied
 
]]
 
local function list_makemake_sep_list (count, list_seq, sep_list, sep_list_pair, sep_list_end)
local list = '';
 
if not sep_list then -- set the defaults
sep_list = cfg.presentation['sep_list'];
sep_list_pair = cfg.presentation['sep_list_pair'];
sep_list_end = cfg.presentation['sep_list_end'];
end
if 2 >= count then
list = table.concat (list_seq, sep_list_pair); -- insert separator between two items; returns list_seq[1] then only one item
elseif 2 < count then
list = table.concat (list_seq, sep_list, 1, count - 1); -- concatenate all but last item with plain list separator
list = table.concat ({list, list_seq[count]}, sep_list_end); -- concatenate last item onto end of <list> with final separator
end
Line 286 ⟶ 418:
return list;
end
 
 
--[[--------------------------< S E L E C T _ O N E >----------------------------------------------------------
Line 307 ⟶ 440:
if index ~= nil then index = tostring(index); end
 
for _, alias in ipairs ( aliases_list ) do -- for each alias in the aliases list
if alias:match ('#') then -- if this alias can be enumerated
if '1' == index then -- when index is 1 test for enumerated and non-enumerated aliases
value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); -- first test for non-enumerated alias
end
value, selected = is_alias_used (args, alias, index, true, value, selected, error_list); -- test for enumerated alias
else
value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); -- test for non-enumerated alias
end
end
 
if #error_list > 0 and 'none' ~= error_condition then -- for cases where this code is used outside of extract_names()
-- local error_str = "";
-- for _, k in ipairs( error_list ) do
-- if error_str ~= "" then error_str = error_str .. cfg.presentation['sep_list'] end
-- error_str = error_str .. wrap_style ('parameter', k);
-- end
-- if #error_list > 1 then
-- error_str = error_str .. cfg.presentation['sep_list_end'];
-- else
-- error_str = error_str .. cfg.presentation['sep_list_pair'];
-- end
 
for i, v in ipairs (error_list) do
error_list[i] = wrap_style ('parameter', v);
end
table.insert (error_list, wrap_style ('parameter', selected));
set_message (error_condition, {make_sep_list (#error_list, error_list)});
 
local error_str = list_make (#error_list, error_list, cfg.presentation['sep_list'], cfg.presentation['sep_list_pair'], cfg.presentation['sep_list_end']);
 
-- error_str = error_str .. wrap_style ('parameter', selected);
table.insert( z.message_tail, { set_message( error_condition, {error_str}, true ) } );
end
Line 358 ⟶ 476:
 
local function remove_wiki_link (str)
return (str:gsub( ("%[%[([^%[%]]*)%]%]", function(l)
return l:gsub( ("^[^|]*|(.*)$", "%1" ):gsub ("^%s*(.-)%s*$", "%1");
end));
end
Line 395 ⟶ 513:
D = mw.text.trim (D, '%s|'); -- trim white space and pipe characters
-- L = L and mw.text.trim (L, '%s|');
return wl_type, D, L or '';
end
Line 423 ⟶ 540:
local flag;
while true do
if argument:find ( "'''''", 1, true ) then -- bold italic (5)
argument, flag = argument:gsub ("%'%'%'%'%'", ""); -- remove all instances of it
elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4)
argument, flag=argument:gsub ("%'%'%'%'", "");
elseif argument:find ( "'''", 1, true ) then -- bold (3)
argument, flag=argument:gsub ("%'%'%'", "");
elseif argument:find ( "''", 1, true ) then -- italic (2)
argument, flag = argument:gsub ("%'%'", "");
else
break;
Line 457 ⟶ 574:
return {
add_maint_cat = add_maint_cat, -- exported functions
add_prop_cat = add_prop_cat,
error_comment = error_comment,
has_accept_as_written = has_accept_as_written,
hyphen_to_dash = hyphen_to_dash,
in_array = in_array,
is_set = is_set,
is_wikilink = is_wikilink,
make_sep_list = make_sep_list,
list_make = list_make,
make_wikilink = make_wikilink,
remove_wiki_link = remove_wiki_link,