Module:Citation/CS1/Configuration/sandbox: Difference between revisions

Content deleted Content added
No edit summary
+10.'9778'
 
(95 intermediate revisions by 9 users not shown)
Line 1:
--[[
History of changes since last sync: 20232025-1104-2512
 
2025-04-23: add script-lang tkr;
2025-04-27: change maint warning message in preveiw box from #3a3 to #085 to match cs1|2 maint message color;
2025-05-01: tweak tabular data fetch; see Help_talk:Citation_Style_1#Lua_error%3F
2025-05-10: maint cat to track {{cite journal}} templates misusing |page= for |article-number=; see Help_talk:Citation_Style_1#|page=_same_value_as_last_n-digits_of_|doi=
2025-05-26: add 10.1016/j.patter to free DOI prefix recognition (Patterns)
2025-06-07: maint cat for post 2007 arxiv format without |class=; see Help_talk:Citation_Style_1#Category%3ACS1_maint%3A_missing_class_%3F
2025-06-11: add 'updated' as bad author name; see Help_talk:Citation_Style_1#Author_check
2025-07-29: fix url access parameter application on wikipedia library url errors; see Help_talk:Citation_Style_1#Wikipedia_Library_errors_on_a_talk_page
2025-08-03: Add 10.9778 to free DOI recognition (CMAJ Open)
 
2023-11-27: add doi free registrant 4249 - Scholarpedia
2023-11-30: removed temporary Julian–Gregorian uncertainty categorization; see Help_talk:Citation_Style_1#Category%3ACS1%3A_Julian–Gregorian_uncertainty
2023-12-04: add doi free registrant 22323 - SISSA
2023-12-06: combine extra-text tests for |volume= and |issue=; see Help_talk:Citation_Style_1#Extra_text_in_{{pipe}}volume=_and_{{pipe}}issue=
2023-12-10: add doi free registrant 15347 - Wikijournals; 1074 and 1194 - American Society for Biochemistry and Molecular Biology
2023-12-14: add doi free registrant 5210 - University of Illinois Libaries
2023-12-30: add doi free registrant 7759 - Cureus
2021-01-01: add doi free registrant 14256 - Croatian Association of Civil Engineers
2024-01-09: add |script-encyclopedia= and |trans-encyclopedia=; see Help_talk:Citation_Style_1#support_for_%7Cscript-encyclopedia%3D_and_%7Ctrans-encyclopedia%3D
2024-01-14: add doi free registrant 1045 - D-Lib Magazine
2024-01-14: add doi free registrant 1096 - FASEB
]]
 
 
local lang_obj = mw.language.getContentLanguage(); -- make a language object for the local language; used here for languages and dates
Line 47:
]]
 
local uncategorized_namespaces_t = {[2]=true}; -- init with user namespace id
for k, _ in pairs (mw.site.talkNamespaces) do -- add all talk namespace ids
uncategorized_namespaces_t[k] = true;
Line 158:
['warning_msg_e'] = '<span style="color:#d33">One or more <code style="color: inherit; background: inherit; border: none; padding: inherit;">&#123;{$1}}</code> templates have errors</span>; messages may be hidden ([[Help:CS1_errors#Controlling_error_message_display|help]]).'; -- $1 is template link
['warning_msg_m'] = '<span style="color:#3a3085">One or more <code style="color: inherit; background: inherit; border: none; padding: inherit;">&#123;{$1}}</code> templates have maintenance messages</span>; messages may be hidden ([[Help:CS1_errors#Controlling_error_message_display|help]]).'; -- $1 is template link
}
 
Line 235:
 
['ext-link-access-signal'] = '<span class="$1" title="$2">$3</span>', -- external link with appropriate lock icon
['free'] = {class='cs1id-lock-free', title='Freely accessible'}, -- classes defined in Module:Citation/CS1/styles.css
['registration'] = {class='cs1id-lock-registration', title='Free registration required'},
['limited'] = {class='cs1id-lock-limited', title='Free access subject to limited trial, subscription normally required'},
['subscription'] = {class='cs1id-lock-subscription', title='Paid subscription required'},
 
['interwiki-icon'] = '<span class="$1" title="$2">$3</span>',
Line 479:
local punct_skip = {};
local url_skip = {};
 
 
--[[--------------------------< U R L _ A C C E S S _ M A P >--------------------------------------------------
 
this table used by the wikipedia library url test, has_twl_url(), which automatically sets a url-access parameter
to 'subscription' when it discovers a wikipedia library url in any of the url-holding paramters used as keys in
this table.
 
translators: if your wiki uses parameter names for these url-holding parameters and their matching -access parameters,
add your wiki's parameters to this list. Leave the English parameters in place.
 
TODO: is there a better way to do this?
 
]]
 
local url_access_map_t = {
['chapter-url'] = 'chapter-url-access',
['contribution-url'] = 'contribution-url-access',
['entry-url'] = 'entry-url-access',
['article-url'] = 'article-url-access',
['section-url'] = 'section-url-access',
['map-url'] = 'map-url-access',
['mapurl'] = 'map-url-access',
['url'] = 'url-access',
['URL'] = 'url-access'
}
 
 
Line 499 ⟶ 525:
 
]]
local is_Latn = 'A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143\225\184\128-\225\187\191';
local special_case_translation = {
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
Line 525 ⟶ 551:
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
{['en'] = {'are you a robot', true}, ['local'] = nil},
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'page not found', true}, ['local'] = nil},
Line 532 ⟶ 558:
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'^404', false}, ['local'] = nil},
{['en'] = {'error[ %-]404', false}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
Line 555 ⟶ 582:
{['en'] = {'allmusic', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'^[Bb]ureau$', false}, ['local'] = nil},
{['en'] = {'business', true}, ['local'] = nil},
{['en'] = {'cnn', true}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'^[Cc]ompany$', false}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'correspondent', true}, ['local'] = nil},
{['en'] = {'^[Dd]esk$', false}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
Line 567 ⟶ 598:
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a]][Ee]mail%f[%A]', false}, ['local'] = nil},
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'^[Gg]roup$', false}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'^[Ii]nc%.?$', false}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'^[Ll]imited$', false}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil},
{['en'] = {'[Nn]ews[ %-]?[Rr]oom', false}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
Line 588 ⟶ 622:
{['en'] = {'super.?user', false}, ['local'] = nil},
{['en'] = {'%f['..is_Latn..'][Uu]ser%f[^'..is_Latn..']', false}, ['local'] = nil},
{['en'] = {'updated', true}, ['local'] = nil},
{['en'] = {'verfasser', true}, ['local'] = nil},
}
Line 1,070 ⟶ 1,105:
 
-- list of emoji that use a zwj character (U+200D) to combine with another emoji
-- from: https://unicode.org/Public/emoji/1516.10/emoji-zwj-sequences.txt; version: 1516.10; 20232024-0608-0514
-- table created by: [[:en:Module:Make emoji zwj table]]
local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx
Line 1,172 ⟶ 1,207:
 
local script_lang_codes = {
'ab', 'am', 'ar', 'az', 'be', 'bg', 'bn', 'bo', 'bs', 'dv', 'dz', 'el', 'face', 'guchr', 'hecu',
'hidv', 'hydz', 'jael', 'kafa', 'kkgrc', 'kmgu', 'knhe', 'kohi', 'kuhy', 'kyja', 'loka', 'mk', 'ml', 'mnkk',
'mnikm', 'mrkn', 'myko', 'neku', 'orky', 'otalo', 'pamk', 'psml', 'rumn', 'sdmni', 'simr', 'sr', 'syc', 'tamy',
'tene', 'tgor', 'thota', 'tipa', 'ttps', 'ugru', 'uksd', 'ursi', 'uzsr', 'yisyc', 'yueta', 'zhte',
'tg', 'th', 'ti', 'tkr', 'tt', 'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh',
'zgh'
};
 
Line 1,249 ⟶ 1,286:
['foreign-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is foreign-language name, $2 is ISO639-1 code
['foreign-lang-source-2'] = 'CS1 foreign language sources (ISO 639-2)|$1', -- |language= category; a cat for ISO639-2 languages; $1 is the ISO 639-2 code used as a sort key
['interproj-linked-name'] = 'CS1 interproject-linked names|$1', -- any author, editor, etc that has an interproject link; $1 is interproject tag used as a sort key
['interwiki-linked-name'] = 'CS1 interwiki-linked names|$1', -- any author, editor, etc that has an interwiki link; $1 is interwiki tag used as a sort key; yeilds to interproject
['local-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['___location-test'] = 'CS1 ___location test',
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 characters
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1language codetag
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
['unfit'] = 'CS1: unfit URL', -- |url-status=unfit or |url-status=usurped; used to be a maint cat
['vanc-accept'] = 'CS1:Vancouver names with accept markup', -- for |vauthors=/|veditors= with accept-as-written markup
['year-range-abbreviated'] = 'CS1: abbreviated year range', -- probably temporary cat to identify scope of |date=, |year= values using YYYY–YY form
}
Line 1,292 ⟶ 1,333:
local registrants_t = {};
for _, v in ipairs ({
'1045', '1074', '1096', '1100', '1155', '1186', '1194', '1371', '1629', '1989', '1999', '2147', '2196', '3285', '3389', '3390', '3410',
'3748', '3814', '3847', '3897', '4061', '4089', '4103', '4172', '4175', '42494230', '4236', '4239', '4240', '4249', '4251',
'4252', '4253', '4254', '4291', '4292', '4329', '4330', '4331', '5194', '5210', '5306', '5312', '5313', '5314',
'5315', '5316', '5317', '5318', '5319', '5320', '5321', '5334', '5402', '5409', '5410', '5411', '5412',
'5492', '5493', '5494', '5495', '5496', '5497', '5498', '5499', '5500', '5501', '5527', '5528', '5662',
'6064', '6219', '7167', '7217', '7287', '7482', '7490', '7554', '7717', '7759', '7766', '9778', '11131', '11569', '11647',
'11648', '12688', '12703', '12715', '12942', '12998', '13105', '14256', '14293', '14303', '15215', '15347', '15412', '15560', '16995',
'17645', '18637', '19080', '19173', '20944', '21037', '21468', '21767', '22261', '22323', '22459', '24105', '24196', '24966',
'26775', '30845', '32545', '35711', '35712', '35713', '35995', '36648', '37126', '37532', '37871', '47128',
'47622', '47959', '52437', '52975', '53288', '54081', '54947', '55667', '55914', '57009', '58647', '59081',
Line 1,308 ⟶ 1,349:
return registrants_t;
end
 
local extended_registrants_t = { -- known free registrants identifiable by the doi suffix incipit
['1002'] = {'aelm', 'leap'}, -- Advanced Electronic Materials, Learned Publishing
['1016'] = {'j.heliyon', 'j.nlp', 'j.patter', 'j.proche'}, -- Heliyon, Natural Language Processing, Patterns, Procedia Chemistry
['1017'] = {'nlp'}, -- Natural Language Processing Journal
['1046'] = {'j.1365-8711', 'j.1365-246x'}, -- MNRAS, GJI
['1093'] = {'mnras', 'mnrasl', 'gji', 'rasti'}, -- MNRAS, MNRAS Letters, GJI, RASTI
['1099'] = {'acmi', 'mic', '00221287', 'mgen'}, -- Access Microbiology, Microbiology, Journal of General Microbiology, Microbial Genomics
['1111'] = {'j.1365-2966', 'j.1745-3933', 'j.1365-246X'}, -- MNRAS, MNRAS Letters, GJI
['1210'] = {'jendso','jcemcr'}, -- Journal of the Endocrine Society, JCEM Case Reports
['4171'] = {'dm','mag'}, -- Documenta Mathematica, EMS Magazine
['14231'] = {'ag'}, -- Algebraic Geometry
}
 
 
Line 1,737 ⟶ 1,791:
category = 'CS1 errors: generic title',
hidden = false,
},
err_invalid_isbn_date = {
message = 'ISBN / Date incompatibility',
anchor = 'invalid_isbn_date',
category = 'CS1 errors: ISBN date',
hidden = true
},
err_invalid_param_val = {
Line 1,796 ⟶ 1,856:
anchor = 'param_has_ext_link',
category = 'CS1 errors: external links',
hidden = false
},
err_param_has_twl_url = {
message = 'Wikipedia Library link in <code class="cs1-code">$1</code>', -- $1 is parameter name
anchor = 'param_has_twl_url',
category = 'CS1 errors: URL',
hidden = false
},
Line 1,943 ⟶ 2,009:
anchor = 'extra_punct',
category = 'CS1 maint: extra punctuation',
hidden = true,
},
maint_id_limit_load_fail = { -- applies to all cs1|2 templates on a page;
message = nil, -- maint message (category link) never emitted
anchor = 'id_limit_load_fail',
category = 'CS1 maint: ID limit load fail',
hidden = true,
},
Line 1,967 ⟶ 2,039:
anchor = '___location',
category = 'CS1 maint: ___location',
hidden = true,
},
maint_missing_class = {
message = nil,
anchor = 'missing_class',
category = 'CS1 maint: missing class',
hidden = true,
},
Line 2,003 ⟶ 2,081:
anchor = 'overridden',
category = 'CS1 maint: overridden setting',
hidden = true,
},
maint_page_art_num = {
message = nil,
anchor = 'page_art_num',
category = 'CS1 maint: article number as page number',
hidden = true,
},
Line 2,021 ⟶ 2,105:
anchor = 'postscript',
category = 'CS1 maint: postscript',
hidden = true,
},
maint_publisher_location = {
message = nil,
anchor = 'publisher_location',
category = 'CS1 maint: publisher ___location',
hidden = true,
},
Line 2,029 ⟶ 2,119:
hidden = true,
},
maint_unfit = {
message = nil,
anchor = 'unfit',
category = 'CS1 maint: unfit URL',
hidden = true,
},
maint_unknown_lang = {
message = nil,
Line 2,051 ⟶ 2,135:
anchor = 'url_status',
category = 'CS1 maint: url-status',
hidden = true,
},
maint_year= {
message = nil,
anchor = 'year',
category = 'CS1 maint: year',
hidden = true,
},
Line 2,064 ⟶ 2,154:
--[[--------------------------< I D _ L I M I T S _ D A T A _ T >----------------------------------------------
 
fetch id limits for certain identifiers from c:Data:Sandbox/CS1/Identifier limits.tab. This source is a json tabular
tabular data file maintained at wikipedia commons. Convert the json format to a table of k/v pairs.
 
The values from <id_limits_data_t> are used to set handle.id_limit.
 
From 2025-02-21, MediaWiki is broken. Use this link to edit the tablular data file:
https://commons.wikimedia.org/w/index.php?title=Data:CS1/Identifier_limits.tab&action=edit
See Phab:T389105
 
]]
 
local id_limits_data_t = {};
 
for _, limit_t in ipairs (mw.ext.data.get ("Sandbox/CS1/Identifier limits.tab").data) do
local use_commons_data = true; -- set to false if your wiki does not have access to mediawiki commons; then,
id_limits_data_t[limit_t[1]] = limit_t[2]; --
if false == use_commons_data then -- update this table from https://commons.wikimedia.org/wiki/Data:CS1/Identifier_limits.tab; last update: 2025-02-21
id_limits_data_t = {['OCLC'] = 10450000000, ['OSTI'] = 23010000, ['PMC'] = 11900000, ['PMID'] = 40400000, ['RFC'] = 9300, ['SSRN'] = 5200000, ['S2CID'] = 276000000}; -- this table must be maintained locally
 
else -- here for wikis that do have access to mediawiki commons
local load_fail_limit = 99999999999; -- very high number to avoid error messages on load failure
id_limits_data_t = {['OCLC'] = load_fail_limit, ['OSTI'] = load_fail_limit, ['PMC'] = load_fail_limit, ['PMID'] = load_fail_limit, ['RFC'] = load_fail_limit, ['SSRN'] = load_fail_limit, ['S2CID'] = load_fail_limit};
 
local id_limits_data_load_fail = false; -- flag; assume that we will be successful when loading json id limit tabular data
local tab_data_t = mw.ext.data.get ('CS1/Identifier limits.tab'); -- attempt to load the tabular data from commons into <tab_data_t>
if false == tab_data_t then -- undocumented 'feature': mw.ext.data.get() sometimes returns false
id_limits_data_load_fail = true; -- set the flag so that Module:Citation/CS1 can create an unannotated maint category
else
for _, limit_t in ipairs (tab_data_t.data) do -- overwrite default <load_fail_limit> values from the data table in the tabular data
id_limits_data_t[limit_t[1]] = limit_t[2]; -- <limit[1]> is identifier; <limit[2]> is upper limit for that identifier
end
end
end
 
Line 2,187 ⟶ 2,298:
q = 'Q46339674',
label = 'eISSN',
prefix = 'https://wwwsearch.worldcat.org/issn/',
COinS = 'rft.eissn',
encode = false,
Line 2,230 ⟶ 2,341:
q = 'Q131276',
label = 'ISSN',
prefix = 'https://wwwsearch.worldcat.org/issn/',
COinS = 'rft.issn',
encode = false,
Line 2,298 ⟶ 2,409:
q = 'Q190593',
label = 'OCLC',
prefix = 'https://wwwsearch.worldcat.org/oclc/',
COinS = 'info:oclcnum',
encode = true,
Line 2,444 ⟶ 2,555:
url_skip = build_skip_table (url_skip, url_meta_params),
known_free_doi_registrants_t = build_free_doi_registrants_table(),
id_limits_data_load_fail = id_limits_data_load_fail, -- true when commons tabular identifier-limit data fails to load
 
name_space_sort_keys = name_space_sort_keys,
Line 2,453 ⟶ 2,565:
editor_markup_patterns = editor_markup_patterns,
et_al_patterns = et_al_patterns,
extended_registrants_t = extended_registrants_t,
id_handlers = id_handlers,
keywords_lists = keywords_lists,
Line 2,477 ⟶ 2,590:
vol_iss_pg_patterns = vol_iss_pg_patterns,
single_letter_2nd_lvl_domains_t = single_letter_2nd_lvl_domains_t,
url_access_map_t = url_access_map_t,
inter_wiki_map = inter_wiki_map,