Module:Footnotes/anchor id list/sandbox: Difference between revisions

Content deleted Content added
No edit summary
No edit summary
Line 1:
require('Module:No globals');
local data = mw.loadData ('Module:Footnotes/anchor id list/data');
local Lang_obj = mw.language.getContentLanguage(); -- used by template_list_add() to uppercase first letter of template name TODO: better way to do that?
 
local anchor_id_list = {};
 
--local redirect_patterns_anchor = {
-- '{{%s*[Aa]nchor',
-- '{{%s*[Aa]nchor for redirect',
-- '{{%s*[Aa]nchors',
-- '{{%s*[Aa]nchro',
-- '{{%s*[Aa]ncor',
-- }
--local redirect_patterns_harvc = {
-- '{{%s*[Hh]arvc',
-- '{{%s*[Cc]itec',
-- }
local redirect_patterns_sfn_whitelist = {
'{{%s*[Ss]fn whitelist',
'{{%s*[Hh]arv whitelist',
}
--local redirect_patterns_vcite = {
-- '{{%s*[Vv]cite',
-- '{{%s*[Vv]ancite',
---- '{{%s*[Cc]it ', -- disabled 'cit journal & cit paper' redirect to vcite journal but 'cit book', 'cit new', 'cit web' are cs1 redirects
-- }
local redirects_citation = {
['citation'] = true,
['cite'] = true,
['cite citation'] = true,
['cite study'] = true,
['cite technical standard'] = true,
}
local redirects_date = {
['date'] = true,
Line 38 ⟶ 10:
['isotomos'] = true,
}
local redirects_patent = { -- special case cs1-like templates because uses different parameters for name and date in anchor ID
--local redirects_harvc = {
-- ['harvc'] = true,
-- ['citec'] = true,
-- }
local redirects_patent = {
['cite patent'] = true,
['citeref patent'] = true,
Line 118 ⟶ 86:
'<source.->.-</source>', -- deprecated alias of syntaxhighlight tag
}
local template_skip = { -- TODO: necessary to keep this? with known template tables templates not in those tables will be skipped
local template_skip = {
['citation-attribution'] = true,
}
local Article_content;
 
local anchor_id_list = {}; -- exported tables
local template_list = {};
local article_whitelist = {};
 
 
Line 406 ⟶ 377:
 
local function template_name_get (template)
-- local template_name = template:match ('{{%s*([^/|]+)'); -- get template name; ignore subpages ~/new, ~/sandbox
local template_name = template:match ('{{%s*([^#][^#/|]+)'); -- get template name; ignore subpages ~/new, ~/sandbox; parser functions, magic words don't count as templates
 
Line 433 ⟶ 403:
 
for param, value in template:gmatch ('|%s*([^=]-)%s*=%s*([^|}]+)') do -- build a table of template parameters and their values
if value then -- there must be a value but when
if '' ~= value and not value:match ('^%s$') then -- skip when value is empty string or only whitespace
params[param] = mw.text.trim (value); -- add trimmed value else
Line 449 ⟶ 419:
 
local function anchor_id_make_harvc (template)
local date = date_get (template, alias_patterns_harvc_date); -- get date; done here because might be in {{date}}; return date if valid; empty string else
local anchor_id;
local params = {}; -- table of harvc parameters
Line 465 ⟶ 435:
end
anchor_id = names_get (params, aliases_harvc_author); -- get the harvc contributor names
 
if anchor_id then -- if names were gotten
return 'CITEREF' .. anchor_id .. date;
end
Line 474 ⟶ 444:
 
 
--[[--------------------------< CA IN TC EH O R E_ FI D _ M A K E _ C S 1 2 >------------------------------------------------------
 
for cs1|2 template and cs1-like templates
 
inspect |ref= to decide what to do:
|ref= - empty or missing: get names and date from template parameters because; all cs1|2 will soon create CITEREF anchor IDs
|ref=harv - get names and date from template parameters
|ref={{SfnRef|name|name|name|name|year}} - assemble an anchor id from {{sfnref}} positional parameters
Line 483 ⟶ 455:
|ref=none - skip; do nothing because an anchor id intentionally suppressed; TODO: keep with a type code of '0'?
|ref=<text> - save param value because may match an anchor id override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter
 
this no longer applies; all cs1|2 will soon create CITEREF anchor IDs
|ref= - empty or missing
for cs1: skip
if |mode=cs2: spoof |ref=harv
for cs2: get names and date from template parameters
if |mode=cs1: skip
 
]]
 
local function anchor_id_makeanchor_id_make_cs12 (template)
local ref; -- content of |ref=
local template_name; -- name of the template for cs2 detection
local anchor_id; -- the assembled anchor id from this template
local date;
local params = {}; -- table of cs1|2template parameters
template_name = template_name_get (template); -- get lowercase trimmed template name; ignore subpages ~/new, ~/sandbox
Line 506 ⟶ 471:
 
if redirects_patent[template_name] then
date = date_get (template, alias_patterns_patent_date); -- get date; done here because might be in {{date}}
else
date = date_get (template, alias_patterns_date);
Line 522 ⟶ 487:
end
end
 
-- this disabled because all cs1|2 templates will create CITEREF anchor IDs after next cs1|2 module-suite update
-- if not ref then -- here when |ref= missing or empty
-- if redirects_citation[template_name] then -- could be cs2
-- if template:match ('|%s*mode%s*=%s*cs1') then
-- return nil; -- |ref= missing or empty; citation template but |mode=cs1
-- else
-- ref = 'harv'; -- spoof to handle cs2 as if it were cs1 with |ref=harv
-- end
-- else -- |ref= missing or empty; not a cs2 template
-- if template:match ('|%s*mode%s*=%s*cs2') then
-- ref = 'harv'; -- |ref= missing or empty; not a cs2 template; |mode=cs2; spoof as if it were cs1 with |ref=harv
-- end
-- end
-- end
end
 
Line 545 ⟶ 495:
return 'CITEREF' .. (params.last or '') .. (params.year or ''); -- cite LSA always creates an anchor id using only |last= and |year= (no aliases)
end
-- all cs1|2 templates will create CITEREF anchor IDs after next cs1|2 module-suite update so keep going
-- return nil; -- not cite LSA so done
end
 
if 'harv' == ref or not ref then -- |ref=harv specified or |ref= missing or empty (new cs1|2 default is not default for other templates handled here)
if redirects_patent[template_name] then -- if this is a cite patent template
anchor_id = names_get (params, aliases_inventor); -- inventor names only
Line 580 ⟶ 528:
--[[--------------------------< L I S T _ A D D >--------------------------------------------------------------
 
adds an <item> to the <list> table; for anchor IDs, the boolean <encode> argument must be set true; no return value
 
]]
 
local function list_add (item, list, encode)
if item then -- if there was an anchor id extracteditem
if encode then -- for anchor IDs ...
item = mw.uri.anchorEncode (item); -- encode to remove wikimarkup, convert spaces to underscores etc
end
if not list[item] then -- if not already saved
list[item] = 1; -- save it
else -- here when this anchor iditem already saved
list[item] = list[item] + 1; -- to indicate that there are multiple same name/date citationsitems
end
end
Line 607 ⟶ 555:
 
local function anchor_id_make_anchor (template, anchor_id_list)
template = template:gsub ('^{{[^|]+|', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} (anchorand template delimiters)name
 
template = template:gsub ('^[^|]+|', ''); -- remove template name and first pipe
template = wikilink_strip (template); -- strip any wikilink markup (there shouldn't be any but just in case)
Line 614 ⟶ 562:
local anchor_id;
for param in template:gmatch ('%b{}') do -- loop through the template; remove and save templates (presumed to be sfnref or harvid)
table.insert (params, param); -- save it
template = template:gsub ('%b{}', '', 1); -- remove it from source template
Line 621 ⟶ 569:
for _, t in ipairs (params) do -- spin through the templates in params
anchor_id = sfnref_get (t); -- attempt to decode {{sfnref}} and {{harvid}}
if anchor_id then -- nil when not {{sfnref}} andor {{harvid}}
list_add (anchor_id, anchor_id_list, true); -- add anchor ID to the list
end
Line 645 ⟶ 593:
 
]]
 
local Lang_obj = mw.language.getContentLanguage();
 
local function template_list_add (template)
Line 652 ⟶ 598:
if template and not template:match ('^#') then -- found a template or magic word; ignore magic words
template=mw.text.trim (template); -- trim whitespace
template = Lang_obj:ucfirst (template); -- first character in template name must be uppercase (same as canonical template name) TODO: better way to do this?
list_add (template, template_list); -- add to list with (unused) tally
end
 
--mw.logObject (template_list, 'template_list')
-- return template_list;
end
 
 
--[[--------------------------< CA IN TC EH O R E_ I FD _ L I S T _ M A K E >--------------------------------------------
 
makes a list of anchor ids from cs1|2, cs1|2-like, vcite xxx, and harvc, anchor, wikicite templates
 
Because cs1|2 wrapper templates can, and often do, hide |ref=, the author and date parameters inside the wrapper,
these parameters are not available in the article's wikisource so {{harv}}, {{sfn}}, and {{harvc}} templates that
link correctly to those wrapper templates will incorrectly show error messages. Use |ignore-err=yes in the {{harv}},
{{sfn}}, and {{harvc}} templates to supress the error message.
 
creates a list of templates used in the article for use with the whitelist
 
creates a list of article-local whitelisted anchor IDs from {{sfn whitelist}}
 
]]
Line 689 ⟶ 636:
 
if data.known_templates_cs12 [template_name] then
anchor_id = anchor_id_makeanchor_id_make_cs12 (template); -- extract an anchor id from this template
list_add (anchor_id, anchor_id_list, true)
 
Line 739 ⟶ 686:
elseif data.known_templates_anchor [template_name] then
anchor_id_make_anchor (template, anchor_id_list); -- extract anchor ids from this template if any
end
elseif data.known_templates_sfn_whitelist [template_name] then
end
template = template:gsub ('^{{[^|]+|', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name
template = mw.text.trim (template, '%s|'); -- trim leading trailing white space and pipes
template = mw.text.split (template, '%s*|%s*'); -- make a table of the template's parameters
 
for _, anchor_id in ipairs (template) do -- spin through this template's parameter
mw.logObject (anchor_id_list, 'anchor_id_list')
if '' ~= anchor_id and not article_whitelist[anchor_id] then
mw.logObject (template_list, 'template_list')
article_whitelist[anchor_id] = 1; -- add to the whitelist
 
return anchor_id_list;
end
 
 
--[[--------------------------< T E M P L A T E _ L I S T _ M A K E >------------------------------------------
 
makes a list of templates use in the article.
 
]]
--[[
local Lang_obj = mw.language.getContentLanguage();
 
local function template_list_make ()
article_content_get (); -- attempt to get this article's content
 
if '' == Article_content then -- when there is no article content
return ''; -- no point in continuing
end
 
local template_list = {};
for template in Article_content:gmatch ('{{%s*(.-)[|}]') do
if template and not template:match ('^#') then -- found a template or magic word; ignore magic words
template=mw.text.trim (template); -- trim whitespace
template = Lang_obj:ucfirst (template); -- first character in template name must be uppercase (same as canonical template name)
list_add (template, template_list); -- add to list with (unused) tally
end
end
mw.logObject (template_list, 'template_list')
return template_list;
end
]]
 
--[[--------------------------< A R T I C L E _ L O C A L _ W H I T E L I S T _ M A K E >----------------------
 
makes a list of templates use in the article.
 
]]
 
local function article_local_whitelist_make ()
article_content_get (); -- attempt to get this article's content
 
if '' == Article_content then -- when there is no article content
return ''; -- no point in continuing
end
 
local article_whitelist = {};
local tstart, tend;
local template;
for _, pattern in ipairs (redirect_patterns_sfn_whitelist) do
tstart, tend = Article_content:find (pattern); -- find the first whitelist template
while tstart do -- nil when whitelist template not found
template = Article_content:match ('%b{}', tstart); -- get the whole template
if template then -- necessary?
template = template:gsub (pattern, ''):gsub ('}}$', '', 1); -- remove outer {{ and }} and template name
template = mw.text.trim (template, '%s|'); -- trim leading trailing white space and pipes
template = mw.text.split (template, '%s*|%s*'); -- make a table of the template's parameters
 
for _, anchor_id in ipairs (template) do -- spin through this template's parameter
if '' ~= anchor_id and not article_whitelist[anchor_id] then
article_whitelist[anchor_id] = 1; -- add to the whitelist
end
end
end
tstart = tend; -- reset the search starting index
tstart, tend = Article_content:find (pattern, tstart); -- search for another whitelist template
end
end
 
mw.logObject (article_whitelistanchor_id_list, 'article_whitelistanchor_id_list');
mw.logObject (template_list, 'template_list');
return article_whitelist;
mw.logObject (article_whitelist, 'article_whitelist');
 
return anchor_id_list;
end
 
Line 828 ⟶ 713:
return {
anchor_id_list = anchor_id_list_make(), -- table of anchor ids available in this article
article_whitelist = article_local_whitelist_make()article_whitelist, -- table of anchor ids with false-positive error message to be suppressed
-- template_list = template_list_make(), -- table of templates used in this article
template_list = template_list, -- table of templates used in this article
}