Module:Citation/CS1/sandbox: Difference between revisions

Content deleted Content added
No edit summary
No edit summary
 
(25 intermediate revisions by 3 users not shown)
Line 1:
--[[
History of changes since last sync: 20242025-1204-2812
 
2025-01-05-10: addmaint modulecat entryto pointstrack {{cite journal}} templates misusing |page= for |article-number=; see Help_talk:Citation_Style_1/Archive_97#Module_support_edit_req|page=_same_value_as_last_n-digits_of_|doi=
2025-0107-1529: tightenfix protocol-relativeurl access parameter application on wikipedia library url-in-title testerrors; see Help_talk:Citation_Style_1#Double_slash_%28but_not_URL%29_in_title_causes_cite_web_to_think_title_contains_URLWikipedia_Library_errors_on_a_talk_page
2025-02-27: emit error message when isbn present for pre-1965 pub date; see Help_talk:Citation_Style_1#isbn_and_pre-isbn_publication_dates
 
]]
Line 2,040 ⟶ 2,039:
 
local function get_v_name_table (vparam, output_table, output_link_table)
local _, accept = utilities.has_accept_as_written (vparam);
if accept then
utilities.add_prop_cat ('vanc-accept'); -- add properties category
end
local name_table = mw.text.split(vparam, "%s*,%s*"); -- names are separated by commas
local wl_type, label, link; -- wl_type not used here; just a placeholder
Line 2,618 ⟶ 2,621:
 
fetch global mode setting from {{cs1 config}} (if present) or from |mode= (if present); global setting overrides
overrides local |mode= parameter value. When both are present, emit maintenance message
 
]]
Line 2,672 ⟶ 2,675:
local quote_prefix = '';
if utilities.is_set (quote_page) then
extra_text_in_page_check (quote_page, 'quote-page'); -- add to maint cat if |quote-page= value begins with what looks like p., pp., etc.
if not nopp then
quote_prefix = utilities.substitute (cfg.messages['p-prefix'], {sepc, quote_page}), '', '', '';
Line 2,702 ⟶ 2,705:
 
return quote, quote_page, quote_pages, postscript;
end
 
 
--[[--------------------------< C H E C K _ P U B L I S H E R _ N A M E >--------------------------------------
 
look for variations of '<text>: <text>' that might be '<___location>: <publisher>' in |publisher= parameter value.
when found, emit a maintenance message; return nil else
 
<publisher> is the value assigned to |publisher= or |institution=
 
]]
 
local function check_publisher_name (publisher)
local patterns_t = {
'^[%w%s]+%s*:%s*[%w%s]+$', -- plain text <___location>: <publisher>
'^%[+[%w%s:|]+%]+%s*:%s*[%w%s]+$', -- partially wikilinked [[<___location>]]: <publisher>
'^[%w%s]+%s*:%s*%[+[%w%s:|]+%]+$', -- partially wikilinked <___location>: [[<publisher>]]
'^%[+[%w%s:|]+%]+%s*:%s*%[+[%w%s:|]+%]+$', -- wikilinked [[<___location>]]: [[<publisher>]]
}
for _, pattern in ipairs (patterns_t) do -- spin through the patterns_t sequence
if mw.ustring.match (publisher, pattern) then -- does this pattern match?
utilities.set_message ('maint_publisher_location'); -- set a maint message
return; -- and done
end
end
end
 
 
--[[--------------------------< I S _ P A G E _ A R T _ N U M >------------------------------------------------
 
compare the trailing (rightmost) characters of the |doi= value against the whole value assigned to |page(s)=.
 
return boolean true when:
|page(s)= has exactly 8 digits and a dot between the fourth and fifth digits matches the trailing 9 characters
of the |doi= value: |page=12345678 → |page=1234.5678 matches |doi=10.xxxx/yyyy1234.5678
|page(s)= is 5 or more characters and matches |doi= values's trailing characters
|page(s)= begins with a lowercase 'e' and |page(s)= without the 'e' matches |doi= values's trailing
characters: |page=e12345 → |page=12345 matches |doi=10.xxxx/yyyy12345
|page(s)= begins with a uppercase 'CD' followed by (typically) six digits matches |doi= values that ends with
'CDxxxxxx.pubx' (where 'x' is any single digit)
 
return nil when |page(s)= values:
are ranges separated by underscore, hyphen, emdash, endash, figure dash, or minus character
are comma- or semicolon-separated lists of pages
have external urls (has text 'http')
are digit-only values less than 10000
do not match |doi= values's trailing characters
 
]]
 
local function is_page_art_num (page, doi)
if not (utilities.is_set (page) and utilities.is_set (doi)) then -- both required
return; -- abandon; nothing to do
end
 
if page:match ('[,;_−–—‒%-]') then -- when |page(s)= might be a page range or a separated list of pages
return; -- abandon
end
 
page = page:lower(); -- because doi names are case insensitive
doi = doi:lower(); -- force these to lowercase for testing
if page:match ('http') then -- when |page(s)= appears to hold a url
return; -- abandon
end
 
if tonumber (page) then -- is |page(s)= digits only
if 10000 > tonumber (page) then -- when |page(s)= less than 10000
return; -- abandon
end
if doi:match (page .. '$') then -- digits only page number match the last digits in |doi=?
return true;
end
 
if 8 == page:len() then -- special case when |page(s)= is exactly 8 digits
local dot_page = page:gsub ('(%d%d%d%d)(%d%d%d%d)', '%1.%2'); -- make a |page=xxxx.yyyy version commonly used in |doi=
if doi:match (dot_page .. '$') then -- 8-digit dotted page number match the last characters in |doi=?
return true;
end
end
else -- here when |page(s)= is alpha-numeric
if 4 < page:len() then -- when |page(s)= is five or more characters
if doi:match (page .. '$') then -- alpha-numeric page match the last characters in |doi=?
return true;
end
local epage = page:match ('^e([%w]+)$'); -- if first character of |page= is 'e', remove it
if epage and doi:match (epage .. '$') then -- page number match the last characters in |doi=?
return true;
end
local cdpage = page:match ('^cd%d+$'); -- if first characters of |page= are 'CD' and last characters are digits (typically 6 digits)
if cdpage and doi:match (cdpage .. '%.pub%d$') then -- page number matches doi 'CDxxxxxx.pubx' where 'x' is a digit
return true;
end
end
end
end
 
Line 2,914 ⟶ 3,017:
QuotePages = utilities.hyphen_to_dash (A['QuotePages']);
end
 
local NoPP = is_valid_parameter_value (A['NoPP'], A:ORIGIN('NoPP'), cfg.keywords_lists['yes_true_y'], nil);
local NoPP = is_valid_parameter_value (A['NoPP'], A:ORIGIN('NoPP'), cfg.keywords_lists['yes_true_y'], nil);
 
local Mode = mode_set (A['Mode'], A:ORIGIN('Mode'));
Line 2,920 ⟶ 3,024:
-- separator character and postscript
local sepc, PostScript = set_style (Mode:lower(), A['PostScript'], config.CitationClass);
local Quote;
Quote, QuotePage, QuotePages, PostScript = quote_make (A['Quote'], A['TransQuote'], A['ScriptQuote'], QuotePage, QuotePages, NoPP, sepc, PostScript);
 
local Edition = A['Edition'];
Line 2,950 ⟶ 3,054:
 
PublisherName = nil; -- ensure that this parameter is unset for the time being; will be used again after COinS
end
 
if 'book' == config.CitationClass or 'encyclopaedia' == config.CitationClass or ('citation' == config.CitationClass and not utilities.is_set (Periodical)) then
local accept;
PublisherName, accept = utilities.has_accept_as_written (PublisherName); -- check for and remove accept-as-written markup from |publisher= wrapped
if not accept then -- when no accept-as-written markup
check_publisher_name (PublisherName); -- emit maint message when |publisher= might be prefixed with publisher's ___location
end
end
 
Line 2,994 ⟶ 3,106:
Page, Pages, At, coins_pages = insource_loc_get (Page, A:ORIGIN('Page'), Pages, A:ORIGIN('Pages'), At);
 
-- local NoPP = is_valid_parameter_value (A['NoPP'], A:ORIGIN('NoPP'), cfg.keywords_lists['yes_true_y'], nil);
 
if utilities.is_set (PublicationPlace) and utilities.is_set (Place) then -- both |publication-place= and |place= (|___location=) allowed if different
Line 3,134 ⟶ 3,244:
end
local use_lowercase = ( sepc == ',' ); -- controls capitalization of certain static text
-- CS1/2 mode
 
-- local Mode;
-- if cfg.global_cs1_config_t['Mode'] then -- global setting in {{cs1 config}} overrides local |mode= parameter value; nil when empty or assigned value invalid
-- Mode = is_valid_parameter_value (cfg.global_cs1_config_t['Mode'], 'cs1 config: mode', cfg.keywords_lists['mode'], ''); -- error messaging 'param' here is a hoax
-- else
-- Mode = is_valid_parameter_value (A['Mode'], A:ORIGIN('Mode'), cfg.keywords_lists['mode'], '');
-- end
--
-- if cfg.global_cs1_config_t['Mode'] and utilities.is_set (A['Mode']) then -- when template has |mode=<something> which global setting has overridden
-- utilities.set_message ('maint_overridden_setting'); -- set a maint message
-- end
 
-- -- separator character and postscript
-- local sepc, PostScript = set_style (Mode:lower(), A['PostScript'], config.CitationClass);
-- controls capitalization of certain static text
local use_lowercase = ( sepc == ',' );
-- cite map oddities
Line 3,395 ⟶ 3,489:
}
 
ID_list, ID_list_coins = identifiers.identifier_lists_get (args, {DoiBroken = DoiBroken, ASINTLD = A['ASINTLD'], Embargo = Embargo, Class = Class, Year=anchor_year}, ID_support);
DoiBroken = DoiBroken, -- for |doi=
ASINTLD = A['ASINTLD'], -- for |asin=
Embargo = Embargo, -- for |pmc=
Class = Class, -- for |arxiv=
CitationClass = config.CitationClass, -- for |arxiv=
Year=anchor_year, -- for |isbn=
}, ID_support);
 
-- Account for the oddities that are {{cite arxiv}}, {{cite biorxiv}}, {{cite citeseerx}}, {{cite medrxiv}}, {{cite ssrn}}, before generation of COinS data.
Line 3,448 ⟶ 3,549:
Title = ''; -- set title to empty string
utilities.set_message ('maint_untitled'); -- add maint cat
end
 
if 'journal' == config.CitationClass or ('citation' == config.CitationClass and utilities.is_set (Periodical) and 'journal' == Periodical_origin) then
if is_page_art_num (Page or Pages, ID_list_coins['DOI']) then -- does |page(s)= look like it holds an article number
utilities.set_message ('maint_page_art_num'); -- add maint cat
end
end
 
Line 3,895 ⟶ 4,002:
URL = " " .. external_link( URL, nil, URL_origin, UrlAccess );
end
--[[
local Quote = A['Quote'];
local TransQuote = A['TransQuote'];
local ScriptQuote = A['ScriptQuote'];
if utilities.is_set (Quote) or utilities.is_set (TransQuote) or utilities.is_set (ScriptQuote) then
 
if utilities.is_set (Quote) then
if Quote:sub(1, 1) == '"' and Quote:sub(-1, -1) == '"' then -- if first and last characters of quote are quote marks
Quote = Quote:sub(2, -2); -- strip them off
end
end
Quote = kern_quotes (Quote); -- kern if needed
Quote = utilities.wrap_style ('quoted-text', Quote ); -- wrap in <q>...</q> tags
if utilities.is_set (ScriptQuote) then
Quote = script_concatenate (Quote, ScriptQuote, 'script-quote'); -- <bdi> tags, lang attribute, categorization, etc.; must be done after quote is wrapped
end
 
if utilities.is_set (TransQuote) then
if TransQuote:sub(1, 1) == '"' and TransQuote:sub(-1, -1) == '"' then -- if first and last characters of |trans-quote are quote marks
TransQuote = TransQuote:sub(2, -2); -- strip them off
end
Quote = Quote .. " " .. utilities.wrap_style ('trans-quoted-title', TransQuote );
end
 
if utilities.is_set (QuotePage) or utilities.is_set (QuotePages) then -- add page prefix
local quote_prefix = '';
if utilities.is_set (QuotePage) then
extra_text_in_page_check (QuotePage, 'quote-page'); -- add to maint cat if |quote-page= value begins with what looks like p., pp., etc.
if not NoPP then
quote_prefix = utilities.substitute (cfg.messages['p-prefix'], {sepc, QuotePage}), '', '', '';
else
quote_prefix = utilities.substitute (cfg.messages['nopp'], {sepc, QuotePage}), '', '', '';
end
elseif utilities.is_set (QuotePages) then
extra_text_in_page_check (QuotePages, 'quote-pages'); -- add to maint cat if |quote-pages= value begins with what looks like p., pp., etc.
if tonumber(QuotePages) ~= nil and not NoPP then -- if only digits, assume single page
quote_prefix = utilities.substitute (cfg.messages['p-prefix'], {sepc, QuotePages}), '', '';
elseif not NoPP then
quote_prefix = utilities.substitute (cfg.messages['pp-prefix'], {sepc, QuotePages}), '', '';
else
quote_prefix = utilities.substitute (cfg.messages['nopp'], {sepc, QuotePages}), '', '';
end
end
Quote = quote_prefix .. ": " .. Quote;
else
Quote = sepc .. " " .. Quote;
end
 
PostScript = ""; -- cs1|2 does not supply terminal punctuation when |quote= is set
end
]]
-- We check length of PostScript here because it will have been nuked by
-- the quote parameters. We'd otherwise emit a message even if there wasn't
Line 3,996 ⟶ 4,050:
utilities.set_message ('maint_bot_unknown'); -- and add a category if not already added
else
-- utilities.set_message ('maint_unfit'); -- and add a category if not already added
utilities.add_prop_cat ('unfit'); -- and add a category if not already added
end
Line 4,415 ⟶ 4,468:
_, value, _ = utilities.is_wikilink (value); -- extract label portion from wikilink
end
 
return value;
end
Line 4,471 ⟶ 4,525:
--[[--------------------------< H A S _ T W L _ U R L >--------------------------------------------------------
 
look for The Wikipedia Library urls in url-holding parameters. TWL urls are accessible only for readers who are not
active extended confirmed Wikipedia editors. This function emitssets an error message when such urls are discovered.
and when appropriate, sets the |<param>-url-access=subscription. returns nothing.
 
looks for: '.wikipedialibrary.idm.oclc.org'
Line 4,478 ⟶ 4,533:
]]
 
local function has_twl_url (url_params_t, cite_args_t)
local url_error_t = {}; -- sequence of url-holding parameters that have a TWL url
for param, value in pairs (url_params_t) do
if value:find ('%.wikipedialibrary%.idm%.oclc%.org') then -- has the TWL base url?
table.insert (url_error_t, utilities.wrap_style ('parameter', param)); -- add parameter name to the error list
end
end
if 0 ~= #url_error_t then -- non-zero when there are errors
table.sort (url_error_t); -- sor for error messaging
for i, param in ipairs (url_error_t) do
if cfg.url_access_map_t[param] then -- if <param> has a matching -access parameter
cite_args_t[cfg.url_access_map_t[param]] = cfg.keywords_xlate.subscription; -- set |<param>-url-access=subscription
end
url_error_t[i] = utilities.wrap_style ('parameter', param); -- make the parameter pretty for error message
end
 
utilities.set_message ('err_param_has_twl_url', {utilities.make_sep_list (#url_error_t, url_error_t)}); -- add this error message
end
Line 4,515 ⟶ 4,577:
 
frame – from template call (citation()); may be nil when called from another module
argsargs_t – table of all cs1|2 parameters in the template (the templateparent frame)
configconfig_t – table of template-supplied parameter (the #invoke frame)
 
]]
 
local function _citation (frame, argsargs_t, configconfig_t) -- save a copy in case we need to display an error message in preview mode
if not frame then
frame = mw.getCurrentFrame(); -- if called from another module, get a frame for frame-provided functions
end
-- i18n: set the name that your wiki uses to identify sandbox subpages from sandbox template invoke (or can be set here)
local sandbox = ((configconfig_t.SandboxPath and '' ~= configconfig_t.SandboxPath) and configconfig_t.SandboxPath) or '/sandbox'; -- sandbox path from {{#invoke:Citation/CS1/sandbox|citation|SandboxPath=/...}}
is_sandbox = nil ~= string.find (frame:getTitle(), sandbox, 1, true); -- is this invoke the sandbox module?
sandbox = is_sandbox and sandbox or ''; -- use i18n sandbox to load sandbox modules when this module is the sandox; live modules else
Line 4,542 ⟶ 4,604:
 
z = utilities.z; -- table of error and category tables in Module:Citation/CS1/Utilities
local cite_args_t = {}; -- because args_t is the parent (template) frame args (which cannot be modified); params and their values will be placed here
 
is_preview_mode = not utilities.is_set (frame:preprocess ('{{REVISIONID}}'));
 
-- table where we store all of the template's arguments
local suggestions = {}; -- table where we store suggestions if we need to loadData them
local error_text; -- used as a flag
Line 4,551 ⟶ 4,613:
local capture; -- the single supported capture when matching unknown parameters using patterns
local empty_unknowns = {}; -- sequence table to hold empty unknown params for error message listing
for k, v in pairs ( args args_t) do -- get parameters from the parent (template) frame
v = mw.ustring.gsub (v, '^%s*(.-)%s*$', '%1'); -- trim leading/trailing whitespace; when v is only whitespace, becomes empty string
if v ~= '' then
Line 4,557 ⟶ 4,619:
k = mw.ustring.gsub (k, '%d', cfg.date_names.local_digits); -- for enumerated parameters, translate 'local' digits to Western 0-9
end
if not validate( k, configconfig_t.CitationClass ) then
if type (k) ~= 'string' then -- exclude empty numbered parameters
if v:match("%S+") ~= nil then
error_text = utilities.set_message ('err_text_ignored', {v});
end
elseif validate (k:lower(), configconfig_t.CitationClass) then
error_text = utilities.set_message ('err_parameter_ignored_suggest', {k, k:lower()}); -- suggest the lowercase version of the parameter
else
Line 4,572 ⟶ 4,634:
if capture then -- if the pattern matches
param = utilities.substitute (param, capture); -- add the capture to the suggested parameter (typically the enumerator)
if validate (param, configconfig_t.CitationClass) then -- validate the suggestion to make sure that the suggestion is supported by this template (necessary for limited parameter lists)
error_text = utilities.set_message ('err_parameter_ignored_suggest', {k, param}); -- set the suggestion error message
else
Line 4,581 ⟶ 4,643:
end
if not utilities.is_set (error_text) then -- couldn't match with a pattern, is there an explicit suggestion?
if (suggestions.suggestions[ k:lower() ] ~= nil) and validate (suggestions.suggestions[ k:lower() ], configconfig_t.CitationClass) then
utilities.set_message ('err_parameter_ignored_suggest', {k, suggestions.suggestions[ k:lower() ]});
else
Line 4,591 ⟶ 4,653:
end
 
argscite_args_t[k] = v; -- save this parameter and its value
 
elseif not utilities.is_set (v) then -- for empty parameters
if not validate (k, configconfig_t.CitationClass, true) then -- is this empty parameter a valid parameter
k = ('' == k) and '(empty string)' or k; -- when k is empty string (or was space(s) trimmed to empty string), replace with descriptive text
table.insert (empty_unknowns, utilities.wrap_style ('parameter', k)); -- format for error message and add to the list
end
end
-- crude debug support that allows us to render a citation from module {{#invoke:}} TODO: keep?
-- elseif args[k] ~= nil or (k == 'postscript') then -- when args[k] has a value from {{#invoke}} frame (we don't normally do that)
-- args[k] = v; -- overwrite args[k] with empty string from pframe.args[k] (template frame); v is empty string here
end -- not sure about the postscript bit; that gets handled in parameter validation; historical artifact?
end
 
Line 4,614 ⟶ 4,673:
local url_param_t = {}; -- table of url-holding paramters and their values
 
for k, v in pairs ( args cite_args_t) do
 
if 'string' == type (k) then -- don't evaluate positional parameters
has_invisible_chars (k, v); -- look for invisible characters
Line 4,620 ⟶ 4,680:
has_extraneous_punc (k, v); -- look for extraneous terminal punctuation in parameter values
missing_pipe_check (k, v); -- do we think that there is a parameter that is missing a pipe?
argscite_args_t[k] = inter_wiki_check (k, v); -- when language interwiki-linked parameter missing leading colon replace with wiki-link label
 
if 'string' == type (k) then -- when parameter k is not positional
Line 4,632 ⟶ 4,692:
 
has_extraneous_url (non_url_param_t); -- look for url in parameter values where a url does not belong
has_twl_url (url_param_t, cite_args_t); -- look for url-holding parameters that hold a The Wikipedia Library url
 
return table.concat ({
frame:extensionTag ('templatestyles', '', {src='Module:Citation/CS1' .. sandbox .. '/styles.css'}),
citation0( config(config_t, argscite_args_t)
});
end
Line 4,663 ⟶ 4,723:
 
return {
citation = citation, -- template entry point
_citation = _citation, -- module entry point
}