Module:Citation/CS1/sandbox: Difference between revisions

Content deleted Content added
m reset changes list;
No edit summary
 
(9 intermediate revisions by 2 users not shown)
Line 1:
--[[
History of changes since last sync: 2025-04-12
 
2025-05-10: maint cat to track {{cite journal}} templates misusing |page= for |article-number=; see Help_talk:Citation_Style_1#|page=_same_value_as_last_n-digits_of_|doi=
2025-07-29: fix url access parameter application on wikipedia library url errors; see Help_talk:Citation_Style_1#Wikipedia_Library_errors_on_a_talk_page
 
]]
Line 2,726 ⟶ 2,729:
utilities.set_message ('maint_publisher_location'); -- set a maint message
return; -- and done
end
end
end
 
 
--[[--------------------------< I S _ P A G E _ A R T _ N U M >------------------------------------------------
 
compare the trailing (rightmost) characters of the |doi= value against the whole value assigned to |page(s)=.
 
return boolean true when:
|page(s)= has exactly 8 digits and a dot between the fourth and fifth digits matches the trailing 9 characters
of the |doi= value: |page=12345678 → |page=1234.5678 matches |doi=10.xxxx/yyyy1234.5678
|page(s)= is 5 or more characters and matches |doi= values's trailing characters
|page(s)= begins with a lowercase 'e' and |page(s)= without the 'e' matches |doi= values's trailing
characters: |page=e12345 → |page=12345 matches |doi=10.xxxx/yyyy12345
|page(s)= begins with a uppercase 'CD' followed by (typically) six digits matches |doi= values that ends with
'CDxxxxxx.pubx' (where 'x' is any single digit)
 
return nil when |page(s)= values:
are ranges separated by underscore, hyphen, emdash, endash, figure dash, or minus character
are comma- or semicolon-separated lists of pages
have external urls (has text 'http')
are digit-only values less than 10000
do not match |doi= values's trailing characters
 
]]
 
local function is_page_art_num (page, doi)
if not (utilities.is_set (page) and utilities.is_set (doi)) then -- both required
return; -- abandon; nothing to do
end
 
if page:match ('[,;_−–—‒%-]') then -- when |page(s)= might be a page range or a separated list of pages
return; -- abandon
end
 
page = page:lower(); -- because doi names are case insensitive
doi = doi:lower(); -- force these to lowercase for testing
if page:match ('http') then -- when |page(s)= appears to hold a url
return; -- abandon
end
 
if tonumber (page) then -- is |page(s)= digits only
if 10000 > tonumber (page) then -- when |page(s)= less than 10000
return; -- abandon
end
if doi:match (page .. '$') then -- digits only page number match the last digits in |doi=?
return true;
end
 
if 8 == page:len() then -- special case when |page(s)= is exactly 8 digits
local dot_page = page:gsub ('(%d%d%d%d)(%d%d%d%d)', '%1.%2'); -- make a |page=xxxx.yyyy version commonly used in |doi=
if doi:match (dot_page .. '$') then -- 8-digit dotted page number match the last characters in |doi=?
return true;
end
end
else -- here when |page(s)= is alpha-numeric
if 4 < page:len() then -- when |page(s)= is five or more characters
if doi:match (page .. '$') then -- alpha-numeric page match the last characters in |doi=?
return true;
end
local epage = page:match ('^e([%w]+)$'); -- if first character of |page= is 'e', remove it
if epage and doi:match (epage .. '$') then -- page number match the last characters in |doi=?
return true;
end
local cdpage = page:match ('^cd%d+$'); -- if first characters of |page= are 'CD' and last characters are digits (typically 6 digits)
if cdpage and doi:match (cdpage .. '%.pub%d$') then -- page number matches doi 'CDxxxxxx.pubx' where 'x' is a digit
return true;
end
end
end
Line 3,412 ⟶ 3,489:
}
 
ID_list, ID_list_coins = identifiers.identifier_lists_get (args, {DoiBroken = DoiBroken, ASINTLD = A['ASINTLD'], Embargo = Embargo, Class = Class, Year=anchor_year}, ID_support);
DoiBroken = DoiBroken, -- for |doi=
ASINTLD = A['ASINTLD'], -- for |asin=
Embargo = Embargo, -- for |pmc=
Class = Class, -- for |arxiv=
CitationClass = config.CitationClass, -- for |arxiv=
Year=anchor_year, -- for |isbn=
}, ID_support);
 
-- Account for the oddities that are {{cite arxiv}}, {{cite biorxiv}}, {{cite citeseerx}}, {{cite medrxiv}}, {{cite ssrn}}, before generation of COinS data.
Line 3,465 ⟶ 3,549:
Title = ''; -- set title to empty string
utilities.set_message ('maint_untitled'); -- add maint cat
end
 
if 'journal' == config.CitationClass or ('citation' == config.CitationClass and utilities.is_set (Periodical) and 'journal' == Periodical_origin) then
if is_page_art_num (Page or Pages, ID_list_coins['DOI']) then -- does |page(s)= look like it holds an article number
utilities.set_message ('maint_page_art_num'); -- add maint cat
end
end
 
Line 4,378 ⟶ 4,468:
_, value, _ = utilities.is_wikilink (value); -- extract label portion from wikilink
end
 
return value;
end
Line 4,434 ⟶ 4,525:
--[[--------------------------< H A S _ T W L _ U R L >--------------------------------------------------------
 
look for The Wikipedia Library urls in url-holding parameters. TWL urls are accessible only for readers who are not
active extended confirmed Wikipedia editors. This function emitssets an error message when such urls are discovered.
and when appropriate, sets the |<param>-url-access=subscription. returns nothing.
 
looks for: '.wikipedialibrary.idm.oclc.org'
Line 4,441 ⟶ 4,533:
]]
 
local function has_twl_url (url_params_t, cite_args_t)
local url_error_t = {}; -- sequence of url-holding parameters that have a TWL url
for param, value in pairs (url_params_t) do
if value:find ('%.wikipedialibrary%.idm%.oclc%.org') then -- has the TWL base url?
table.insert (url_error_t, utilities.wrap_style ('parameter', param)); -- add parameter name to the error list
end
end
if 0 ~= #url_error_t then -- non-zero when there are errors
table.sort (url_error_t); -- sor for error messaging
for i, param in ipairs (url_error_t) do
if cfg.url_access_map_t[param] then -- if <param> has a matching -access parameter
cite_args_t[cfg.url_access_map_t[param]] = cfg.keywords_xlate.subscription; -- set |<param>-url-access=subscription
end
url_error_t[i] = utilities.wrap_style ('parameter', param); -- make the parameter pretty for error message
end
 
utilities.set_message ('err_param_has_twl_url', {utilities.make_sep_list (#url_error_t, url_error_t)}); -- add this error message
return true;
end
end
Line 4,479 ⟶ 4,577:
 
frame – from template call (citation()); may be nil when called from another module
argsargs_t – table of all cs1|2 parameters in the template (the templateparent frame)
configconfig_t – table of template-supplied parameter (the #invoke frame)
 
]]
 
local function _citation (frame, argsargs_t, configconfig_t) -- save a copy in case we need to display an error message in preview mode
if not frame then
frame = mw.getCurrentFrame(); -- if called from another module, get a frame for frame-provided functions
end
-- i18n: set the name that your wiki uses to identify sandbox subpages from sandbox template invoke (or can be set here)
local sandbox = ((configconfig_t.SandboxPath and '' ~= configconfig_t.SandboxPath) and configconfig_t.SandboxPath) or '/sandbox'; -- sandbox path from {{#invoke:Citation/CS1/sandbox|citation|SandboxPath=/...}}
is_sandbox = nil ~= string.find (frame:getTitle(), sandbox, 1, true); -- is this invoke the sandbox module?
sandbox = is_sandbox and sandbox or ''; -- use i18n sandbox to load sandbox modules when this module is the sandox; live modules else
Line 4,506 ⟶ 4,604:
 
z = utilities.z; -- table of error and category tables in Module:Citation/CS1/Utilities
local cite_args_t = {}; -- because args_t is the parent (template) frame args (which cannot be modified); params and their values will be placed here
 
is_preview_mode = not utilities.is_set (frame:preprocess ('{{REVISIONID}}'));
Line 4,514 ⟶ 4,613:
local capture; -- the single supported capture when matching unknown parameters using patterns
local empty_unknowns = {}; -- sequence table to hold empty unknown params for error message listing
for k, v in pairs ( args args_t) do -- get parameters from the parent (template) frame
v = mw.ustring.gsub (v, '^%s*(.-)%s*$', '%1'); -- trim leading/trailing whitespace; when v is only whitespace, becomes empty string
if v ~= '' then
Line 4,520 ⟶ 4,619:
k = mw.ustring.gsub (k, '%d', cfg.date_names.local_digits); -- for enumerated parameters, translate 'local' digits to Western 0-9
end
if not validate( k, configconfig_t.CitationClass ) then
if type (k) ~= 'string' then -- exclude empty numbered parameters
if v:match("%S+") ~= nil then
error_text = utilities.set_message ('err_text_ignored', {v});
end
elseif validate (k:lower(), configconfig_t.CitationClass) then
error_text = utilities.set_message ('err_parameter_ignored_suggest', {k, k:lower()}); -- suggest the lowercase version of the parameter
else
Line 4,535 ⟶ 4,634:
if capture then -- if the pattern matches
param = utilities.substitute (param, capture); -- add the capture to the suggested parameter (typically the enumerator)
if validate (param, configconfig_t.CitationClass) then -- validate the suggestion to make sure that the suggestion is supported by this template (necessary for limited parameter lists)
error_text = utilities.set_message ('err_parameter_ignored_suggest', {k, param}); -- set the suggestion error message
else
Line 4,544 ⟶ 4,643:
end
if not utilities.is_set (error_text) then -- couldn't match with a pattern, is there an explicit suggestion?
if (suggestions.suggestions[ k:lower() ] ~= nil) and validate (suggestions.suggestions[ k:lower() ], configconfig_t.CitationClass) then
utilities.set_message ('err_parameter_ignored_suggest', {k, suggestions.suggestions[ k:lower() ]});
else
Line 4,554 ⟶ 4,653:
end
 
argscite_args_t[k] = v; -- save this parameter and its value
 
elseif not utilities.is_set (v) then -- for empty parameters
if not validate (k, configconfig_t.CitationClass, true) then -- is this empty parameter a valid parameter
k = ('' == k) and '(empty string)' or k; -- when k is empty string (or was space(s) trimmed to empty string), replace with descriptive text
table.insert (empty_unknowns, utilities.wrap_style ('parameter', k)); -- format for error message and add to the list
end
end
-- crude debug support that allows us to render a citation from module {{#invoke:}} TODO: keep?
-- elseif args[k] ~= nil or (k == 'postscript') then -- when args[k] has a value from {{#invoke}} frame (we don't normally do that)
-- args[k] = v; -- overwrite args[k] with empty string from pframe.args[k] (template frame); v is empty string here
end -- not sure about the postscript bit; that gets handled in parameter validation; historical artifact?
end
 
Line 4,577 ⟶ 4,673:
local url_param_t = {}; -- table of url-holding paramters and their values
 
for k, v in pairs ( args cite_args_t) do
 
if 'string' == type (k) then -- don't evaluate positional parameters
has_invisible_chars (k, v); -- look for invisible characters
Line 4,583 ⟶ 4,680:
has_extraneous_punc (k, v); -- look for extraneous terminal punctuation in parameter values
missing_pipe_check (k, v); -- do we think that there is a parameter that is missing a pipe?
argscite_args_t[k] = inter_wiki_check (k, v); -- when language interwiki-linked parameter missing leading colon replace with wiki-link label
 
if 'string' == type (k) then -- when parameter k is not positional
Line 4,595 ⟶ 4,692:
 
has_extraneous_url (non_url_param_t); -- look for url in parameter values where a url does not belong
if has_twl_url (url_param_t, cite_args_t) then ; -- look for url-holding parameters that hold a The Wikipedia Library url
 
args['url-access'] = 'subscription';
end
return table.concat ({
frame:extensionTag ('templatestyles', '', {src='Module:Citation/CS1' .. sandbox .. '/styles.css'}),
citation0( config(config_t, argscite_args_t)
});
end