Module:Citation/CS1/Configuration/sandbox: Difference between revisions

Content deleted Content added
No edit summary
+10.'9778'
 
(76 intermediate revisions by 6 users not shown)
Line 1:
--[[
History of changes since last sync: 20242025-0304-2312
 
2025-04-23: add script-lang tkr;
2024-04-15: fix 'email' generic name pattern; see Help_talk:Citation_Style_1#CS1_errors:_generic_name
2025-04-27: change maint warning message in preveiw box from #3a3 to #085 to match cs1|2 maint message color;
2024-05-05: fix undeclared variable 'uncategorized_namespaces_t'.
2025-05-01: tweak tabular data fetch; see Help_talk:Citation_Style_1#Lua_error%3F
2024-06-21: Add 12942 free registrant, Living Reviews
20242025-0705-1210: maint cat forto |year=track value{{cite morejournal}} precisetemplates thatmisusing a|page= yearfor |article-number=; see Help_talk:Citation_Style_1#year_parameter|page=_same_value_as_last_n-digits_of_|doi=
2025-05-26: add 10.1016/j.patter to free DOI prefix recognition (Patterns)
2025-06-07: maint cat for post 2007 arxiv format without |class=; see Help_talk:Citation_Style_1#Category%3ACS1_maint%3A_missing_class_%3F
2025-06-11: add 'updated' as bad author name; see Help_talk:Citation_Style_1#Author_check
2025-07-29: fix url access parameter application on wikipedia library url errors; see Help_talk:Citation_Style_1#Wikipedia_Library_errors_on_a_talk_page
2025-08-03: Add 10.9778 to free DOI recognition (CMAJ Open)
 
]]
 
 
local lang_obj = mw.language.getContentLanguage(); -- make a language object for the local language; used here for languages and dates
Line 152 ⟶ 158:
['warning_msg_e'] = '<span style="color:#d33">One or more <code style="color: inherit; background: inherit; border: none; padding: inherit;">&#123;{$1}}</code> templates have errors</span>; messages may be hidden ([[Help:CS1_errors#Controlling_error_message_display|help]]).'; -- $1 is template link
['warning_msg_m'] = '<span style="color:#3a3085">One or more <code style="color: inherit; background: inherit; border: none; padding: inherit;">&#123;{$1}}</code> templates have maintenance messages</span>; messages may be hidden ([[Help:CS1_errors#Controlling_error_message_display|help]]).'; -- $1 is template link
}
 
Line 473 ⟶ 479:
local punct_skip = {};
local url_skip = {};
 
 
--[[--------------------------< U R L _ A C C E S S _ M A P >--------------------------------------------------
 
this table used by the wikipedia library url test, has_twl_url(), which automatically sets a url-access parameter
to 'subscription' when it discovers a wikipedia library url in any of the url-holding paramters used as keys in
this table.
 
translators: if your wiki uses parameter names for these url-holding parameters and their matching -access parameters,
add your wiki's parameters to this list. Leave the English parameters in place.
 
TODO: is there a better way to do this?
 
]]
 
local url_access_map_t = {
['chapter-url'] = 'chapter-url-access',
['contribution-url'] = 'contribution-url-access',
['entry-url'] = 'entry-url-access',
['article-url'] = 'article-url-access',
['section-url'] = 'section-url-access',
['map-url'] = 'map-url-access',
['mapurl'] = 'map-url-access',
['url'] = 'url-access',
['URL'] = 'url-access'
}
 
 
Line 519 ⟶ 551:
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
{['en'] = {'are you a robot', true}, ['local'] = nil},
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'page not found', true}, ['local'] = nil},
Line 526 ⟶ 558:
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'^404', false}, ['local'] = nil},
{['en'] = {'error[ %-]404', false}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
Line 549 ⟶ 582:
{['en'] = {'allmusic', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'^[Bb]ureau$', false}, ['local'] = nil},
{['en'] = {'business', true}, ['local'] = nil},
{['en'] = {'cnn', true}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'^[Cc]ompany$', false}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'correspondent', true}, ['local'] = nil},
{['en'] = {'^[Dd]esk$', false}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
Line 564 ⟶ 601:
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'^[Gg]roup$', false}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'^[Ii]nc%.?$', false}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'^[Ll]imited$', false}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil},
{['en'] = {'[Nn]ews[ %-]?[Rr]oom', false}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
Line 582 ⟶ 622:
{['en'] = {'super.?user', false}, ['local'] = nil},
{['en'] = {'%f['..is_Latn..'][Uu]ser%f[^'..is_Latn..']', false}, ['local'] = nil},
{['en'] = {'updated', true}, ['local'] = nil},
{['en'] = {'verfasser', true}, ['local'] = nil},
}
Line 1,064 ⟶ 1,105:
 
-- list of emoji that use a zwj character (U+200D) to combine with another emoji
-- from: https://unicode.org/Public/emoji/1516.10/emoji-zwj-sequences.txt; version: 1516.10; 20232024-0608-0514
-- table created by: [[:en:Module:Make emoji zwj table]]
local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx
Line 1,166 ⟶ 1,207:
 
local script_lang_codes = {
'ab', 'am', 'ar', 'az', 'be', 'bg', 'bn', 'bo', 'bs', 'dv', 'dz', 'el', 'face', 'guchr', 'hecu',
'hidv', 'hydz', 'jael', 'kafa', 'kkgrc', 'kmgu', 'knhe', 'kohi', 'kuhy', 'kyja', 'loka', 'mk', 'ml', 'mnkk',
'mnikm', 'mrkn', 'myko', 'neku', 'orky', 'otalo', 'pamk', 'psml', 'rumn', 'sdmni', 'simr', 'sr', 'syc', 'tamy',
'tene', 'tgor', 'thota', 'tipa', 'ttps', 'ugru', 'uksd', 'ursi', 'uzsr', 'yisyc', 'yueta', 'zhte',
'tg', 'th', 'ti', 'tkr', 'tt', 'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh',
'zgh'
};
 
Line 1,248 ⟶ 1,291:
['___location-test'] = 'CS1 ___location test',
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 characters
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1language codetag
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
['unfit'] = 'CS1: unfit URL', -- |url-status=unfit or |url-status=usurped; used to be a maint cat
['vanc-accept'] = 'CS1:Vancouver names with accept markup', -- for |vauthors=/|veditors= with accept-as-written markup
['year-range-abbreviated'] = 'CS1: abbreviated year range', -- probably temporary cat to identify scope of |date=, |year= values using YYYY–YY form
}
Line 1,289 ⟶ 1,334:
for _, v in ipairs ({
'1045', '1074', '1096', '1100', '1155', '1186', '1194', '1371', '1629', '1989', '1999', '2147', '2196', '3285', '3389', '3390',
'3748', '3814', '3847', '3897', '4061', '4089', '4103', '4172', '4175', '42494230', '4236', '4239', '4240', '4249', '4251',
'4252', '4253', '4254', '4291', '4292', '4329', '4330', '4331', '5194', '5210', '5306', '5312', '5313', '5314',
'5315', '5316', '5317', '5318', '5319', '5320', '5321', '5334', '5402', '5409', '5410', '5411', '5412',
'5492', '5493', '5494', '5495', '5496', '5497', '5498', '5499', '5500', '5501', '5527', '5528', '5662',
'6064', '6219', '7167', '7217', '7287', '7482', '7490', '7554', '7717', '7759', '7766', '9778', '11131', '11569', '11647',
'11648', '12688', '12703', '12715', '12942', '12998', '13105', '14256', '14293', '14303', '15215', '15347', '15412', '15560', '16995',
'17645', '18637', '19080', '19173', '20944', '21037', '21468', '21767', '22261', '22323', '22459', '24105', '24196', '24966',
'26775', '30845', '32545', '35711', '35712', '35713', '35995', '36648', '37126', '37532', '37871', '47128',
'47622', '47959', '52437', '52975', '53288', '54081', '54947', '55667', '55914', '57009', '58647', '59081',
Line 1,305 ⟶ 1,350:
end
 
local extended_registrants_t = { -- known free registrants identifiable by an early portion of the doi suffix incipit
['1002'] = {'aelm', 'leap'}, -- Advanced Electronic Materials, Learned Publishing
['1093'] = {'mnras'},
['1016'] = {'j.heliyon', 'j.nlp', 'j.patter', 'j.proche'}, -- Heliyon, Natural Language Processing, Patterns, Procedia Chemistry
['1111'] = {'j.1365-2966'},
['1017'] = {'nlp'}, -- Natural Language Processing Journal
['1046'] = {'j.1365-8711'},
['1046'] = {'j.1365-8711', 'j.1365-246x'}, -- MNRAS, GJI
['1093'] = {'mnras', 'mnrasl', 'gji', 'rasti'}, -- MNRAS, MNRAS Letters, GJI, RASTI
['1099'] = {'acmi', 'mic', '00221287', 'mgen'}, -- Access Microbiology, Microbiology, Journal of General Microbiology, Microbial Genomics
['1111'] = {'j.1365-2966', 'j.1745-3933', 'j.1365-246X'}, -- MNRAS, MNRAS Letters, GJI
['1210'] = {'jendso','jcemcr'}, -- Journal of the Endocrine Society, JCEM Case Reports
['4171'] = {'dm','mag'}, -- Documenta Mathematica, EMS Magazine
['14231'] = {'ag'}, -- Algebraic Geometry
}
 
Line 1,739 ⟶ 1,791:
category = 'CS1 errors: generic title',
hidden = false,
},
err_invalid_isbn_date = {
message = 'ISBN / Date incompatibility',
anchor = 'invalid_isbn_date',
category = 'CS1 errors: ISBN date',
hidden = true
},
err_invalid_param_val = {
Line 1,798 ⟶ 1,856:
anchor = 'param_has_ext_link',
category = 'CS1 errors: external links',
hidden = false
},
err_param_has_twl_url = {
message = 'Wikipedia Library link in <code class="cs1-code">$1</code>', -- $1 is parameter name
anchor = 'param_has_twl_url',
category = 'CS1 errors: URL',
hidden = false
},
Line 1,945 ⟶ 2,009:
anchor = 'extra_punct',
category = 'CS1 maint: extra punctuation',
hidden = true,
},
maint_id_limit_load_fail = { -- applies to all cs1|2 templates on a page;
message = nil, -- maint message (category link) never emitted
anchor = 'id_limit_load_fail',
category = 'CS1 maint: ID limit load fail',
hidden = true,
},
Line 1,969 ⟶ 2,039:
anchor = '___location',
category = 'CS1 maint: ___location',
hidden = true,
},
maint_missing_class = {
message = nil,
anchor = 'missing_class',
category = 'CS1 maint: missing class',
hidden = true,
},
Line 2,005 ⟶ 2,081:
anchor = 'overridden',
category = 'CS1 maint: overridden setting',
hidden = true,
},
maint_page_art_num = {
message = nil,
anchor = 'page_art_num',
category = 'CS1 maint: article number as page number',
hidden = true,
},
Line 2,023 ⟶ 2,105:
anchor = 'postscript',
category = 'CS1 maint: postscript',
hidden = true,
},
maint_publisher_location = {
message = nil,
anchor = 'publisher_location',
category = 'CS1 maint: publisher ___location',
hidden = true,
},
Line 2,031 ⟶ 2,119:
hidden = true,
},
maint_unfit = {
message = nil,
anchor = 'unfit',
category = 'CS1 maint: unfit URL',
hidden = true,
},
maint_unknown_lang = {
message = nil,
Line 2,076 ⟶ 2,158:
 
The values from <id_limits_data_t> are used to set handle.id_limit.
 
From 2025-02-21, MediaWiki is broken. Use this link to edit the tablular data file:
https://commons.wikimedia.org/w/index.php?title=Data:CS1/Identifier_limits.tab&action=edit
See Phab:T389105
 
]]
 
local id_limits_data_t = {};
 
for _, limit_t in ipairs (mw.ext.data.get ("CS1/Identifier limits.tab").data) do
local use_commons_data = true; -- set to false if your wiki does not have access to mediawiki commons; then,
id_limits_data_t[limit_t[1]] = limit_t[2]; -- <limit[1]> is identifier; <limit[2]> is upper limit for that identifier
if false == use_commons_data then -- update this table from https://commons.wikimedia.org/wiki/Data:CS1/Identifier_limits.tab; last update: 2025-02-21
id_limits_data_t = {['OCLC'] = 10450000000, ['OSTI'] = 23010000, ['PMC'] = 11900000, ['PMID'] = 40400000, ['RFC'] = 9300, ['SSRN'] = 5200000, ['S2CID'] = 276000000}; -- this table must be maintained locally
 
else -- here for wikis that do have access to mediawiki commons
local load_fail_limit = 99999999999; -- very high number to avoid error messages on load failure
id_limits_data_t = {['OCLC'] = load_fail_limit, ['OSTI'] = load_fail_limit, ['PMC'] = load_fail_limit, ['PMID'] = load_fail_limit, ['RFC'] = load_fail_limit, ['SSRN'] = load_fail_limit, ['S2CID'] = load_fail_limit};
 
local id_limits_data_load_fail = false; -- flag; assume that we will be successful when loading json id limit tabular data
local tab_data_t = mw.ext.data.get ('CS1/Identifier limits.tab'); -- attempt to load the tabular data from commons into <tab_data_t>
if false == tab_data_t then -- undocumented 'feature': mw.ext.data.get() sometimes returns false
id_limits_data_load_fail = true; -- set the flag so that Module:Citation/CS1 can create an unannotated maint category
else
for _, limit_t in ipairs (tab_data_t.data) do -- overwrite default <load_fail_limit> values from the data table in the tabular data
id_limits_data_t[limit_t[1]] = limit_t[2]; -- <limit[1]> is identifier; <limit[2]> is upper limit for that identifier
end
end
end
 
Line 2,195 ⟶ 2,298:
q = 'Q46339674',
label = 'eISSN',
prefix = 'https://wwwsearch.worldcat.org/issn/',
COinS = 'rft.eissn',
encode = false,
Line 2,238 ⟶ 2,341:
q = 'Q131276',
label = 'ISSN',
prefix = 'https://wwwsearch.worldcat.org/issn/',
COinS = 'rft.issn',
encode = false,
Line 2,306 ⟶ 2,409:
q = 'Q190593',
label = 'OCLC',
prefix = 'https://wwwsearch.worldcat.org/oclc/',
COinS = 'info:oclcnum',
encode = true,
Line 2,452 ⟶ 2,555:
url_skip = build_skip_table (url_skip, url_meta_params),
known_free_doi_registrants_t = build_free_doi_registrants_table(),
id_limits_data_load_fail = id_limits_data_load_fail, -- true when commons tabular identifier-limit data fails to load
 
name_space_sort_keys = name_space_sort_keys,
Line 2,486 ⟶ 2,590:
vol_iss_pg_patterns = vol_iss_pg_patterns,
single_letter_2nd_lvl_domains_t = single_letter_2nd_lvl_domains_t,
url_access_map_t = url_access_map_t,
inter_wiki_map = inter_wiki_map,