Module:Citation/CS1/Configuration: Difference between revisions

Content deleted Content added
m bump pmc;
 
(19 intermediate revisions by one other user not shown)
Line 17:
local date_name_auto_xlate_enable = false; -- when true translates English month-names to the local-wiki's language month names; always false at en.wiki
local date_digit_auto_xlate_enable = false; -- when true translates Western date digit to the local-wiki's language digits (date_names['local_digits']); always false at en.wiki
local enable_sort_keys = true; -- when true module adds namespace sort keys to error and maintenance category links
 
 
Line 30 ⟶ 31:
]]
 
local uncategorized_namespaces_t = {[2]=true}; -- init with user namespace id
for k, _ in pairs (mw.site.talkNamespaces) do -- add all talk namespace ids
uncategorized_namespaces_t[k] = true;
Line 37 ⟶ 38:
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases', '/[^/]*[Ll]og', '/[Aa]rchive'}; -- list of Lua patterns found in page names of pages we should not categorize
 
--[[
at en.wiki Greek characters are used as sort keys for certain items in a category so that those items are
placed at the end of a category page. See Wikipedia:Categorization#Sort_keys. That works well for en.wiki
because English is written using the Latn script. This may not work well for other languages. At en.wiki it
is desireable to place content from certain namespaces at the end of a category listing so the module adds sort
keys to error and maintenance category links when rendering a cs1|2 template on a page in that namespace.
 
i18n: if this does not work well for your language, set <enable_sort_keys> to false.
]]
 
local name_space_sort_keys = { -- sort keys to be used with these namespaces:
[4] = 'ω', -- wikipedia; omega
[10] = 'τ', -- template; tau
[118] = 'Δ', -- draft; delta
['other'] = 'ο', -- all other non-talk namespaces except main (article); omicron
}
 
--[[--------------------------< M E S S A G E S >--------------------------------------------------------------
Line 114 ⟶ 131:
-- categories
['cat wikilink'] = '[[Category:$1]]', -- $1 is the category name
['cat wikilink sk'] = '[[Category:$1|$2]]', -- $1 is the category name; $2 is namespace sort key
[':cat wikilink'] = '[[:Category:$1|link]]', -- category name as maintenance message wikilink; $1 is the category name
 
Line 201 ⟶ 219:
 
['ext-link-access-signal'] = '<span class="$1" title="$2">$3</span>', -- external link with appropriate lock icon
['free'] = {class='cs1id-lock-free', title='Freely accessible'}, -- classes defined in Module:Citation/CS1/styles.css
['registration'] = {class='cs1id-lock-registration', title='Free registration required'},
['limited'] = {class='cs1id-lock-limited', title='Free access subject to limited trial, subscription normally required'},
['subscription'] = {class='cs1id-lock-subscription', title='Paid subscription required'},
 
['interwiki-icon'] = '<span class="$1" title="$2">$3</span>',
Line 264 ⟶ 282:
['ASINTLD'] = 'asin-tld',
['At'] = 'at', -- Used by InternetArchiveBot
['Authors'] = {'authors', 'people', 'credits'},
['BookTitle'] = {'book-title', 'booktitle'},
['Cartography'] = 'cartography',
Line 270 ⟶ 288:
['ChapterFormat'] = {'chapter-format', 'contribution-format', 'entry-format',
'article-format', 'section-format'};
['ChapterURL'] = {'chapter-url', 'contribution-url', 'entry-url', 'article-url', 'section-url', 'chapterurl'}, -- Used by InternetArchiveBot
['ChapterUrlAccess'] = {'chapter-url-access', 'contribution-url-access',
'entry-url-access', 'article-url-access', 'section-url-access'}, -- Used by InternetArchiveBot
Line 327 ⟶ 345:
['ScriptChapter'] = {'script-chapter', 'script-contribution', 'script-entry',
'script-article', 'script-section'},
['ScriptEncyclopedia'] = {'script-encyclopedia', 'script-encyclopaedia'}, -- cite encyclopedia only
['ScriptMap'] = 'script-map',
['ScriptPeriodical'] = {'script-journal', 'script-magazine', 'script-newspaper',
Line 351 ⟶ 370:
['TranscriptFormat'] = 'transcript-format',
['TranscriptURL'] = 'transcript-url', -- Used by InternetArchiveBot
['TransEncyclopedia'] = {'trans-encyclopedia', 'trans-encyclopaedia'}, -- cite encyclopedia only
['TransMap'] = 'trans-map', -- cite map only
['TransPeriodical'] = {'trans-journal', 'trans-magazine', 'trans-newspaper',
Line 452 ⟶ 472:
]]
 
local single_letter_2nd_lvl_domains_t = {'cash', 'company', 'foundation', 'media', 'org', 'today'};
 
 
Line 463 ⟶ 483:
 
]]
local is_Latn = 'A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143\225\184\128-\225\187\191';
local special_case_translation = {
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
Line 489 ⟶ 509:
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
{['en'] = {'are you a robot', true}, ['local'] = nil},
{['en'] = {'hugedomains.com', true}, ['local'] = nil},
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'page not found', true}, ['local'] = nil},
Line 496 ⟶ 516:
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'^404', false}, ['local'] = nil},
{['en'] = {'error[ %-]404', false}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
Line 519 ⟶ 540:
{['en'] = {'allmusic', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'^[Bb]ureau$', false}, ['local'] = nil},
{['en'] = {'business', true}, ['local'] = nil},
{['en'] = {'cnn', true}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'^[Cc]ompany$', false}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'correspondent', true}, ['local'] = nil},
{['en'] = {'^[Dd]esk$', false}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
Line 531 ⟶ 556:
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a]][Ee]mail%f[%A]', false}, ['local'] = nil},
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'^[Gg]roup$', false}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'^[Ii]nc%.?$', false}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'^[Ll]imited$', false}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil},
{['en'] = {'[Nn]ews[ %-]?[Rr]oom', false}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
Line 581 ⟶ 609:
 
local_date_names_from_mediawiki is a boolean. When set to:
true – module will fetch local month names from MediaWiki for both date_names['local']['long'] and date_names['local']['short']; this will unconditionally overwrite manual translations
false – module will *not* fetch local month names from MediaWiki
 
Line 593 ⟶ 621:
]]
 
local local_date_names_from_mediawiki = true; -- when false, manual translation required for date_names['local']['long'] and date_names['local']['short']; overwrites manual translations
-- when true, module fetches long and short month names from MediaWiki
local date_names = {
Line 674 ⟶ 702:
 
local function get_date_format ()
-- if title_object.namespace == 10 then -- not in template space so that unused templates appear in unused-template-reports;
if not content then -- nil content when we're in template
return nil; -- auto-formatting does not work in Template space so don't set global_df
Line 681 ⟶ 708:
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
if match then
local use_dates_template = content:match ('%b{}', start); -- get the whole template
if use_dates_template:match ('| *cs1%-dates *= *[lsy][sy]?') then -- look for |cs1-dates=publication date length access-/archive-date length
return match:lower() .. '-' .. use_dates_template:match ('| *cs1%-dates *= *([lsy][sy]?)');
else
Line 737 ⟶ 764:
'^[Pp]gs.?',
},
vpatternsvi_patterns_t = { -- patternscombined to catch volume-like text in |issue= and issue-like text forin |volume=
'^volumes?', -- volume-like text
'^vols?[%.:=]?',
 
},
ipatterns = { '^issues?', --issue-like patterns for |issue=text
'^issues?',
'^iss[%.:=]?',
'^numbers?',
'^nos?%A', -- don't match 'november' or 'nostradamus'
'^nr[%.:=]?',
'^n[%.:= ]' , -- might be a valid issue without separator (space char is sep char here)
'^n°', -- 'n' with degree sign (U+00B0)
}
'^№', -- precomposed unicode numero character (U+2116)
},
}
 
Line 898 ⟶ 926:
 
local function get_cs1_config ()
-- if title_object.namespace == 10 then -- not in template space so that unused templates appear in unused-template-reports;
if not content then -- nil content when we're in template
return nil; -- auto-formatting does not work in Template space so don't set global_df
Line 1,035 ⟶ 1,062:
 
-- list of emoji that use a zwj character (U+200D) to combine with another emoji
-- from: https://unicode.org/Public/emoji/1516.0/emoji-zwj-sequences.txt; version: 1516.0; 20222024-0508-0614
-- table created by: [[:en:Module:Make emoji zwj table]]
local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx
[8596] = true, -- U+2194 ↔ left right arrow
[8597] = true, -- U+2195 ↕ up down arrow
[9760] = true, -- U+2620 ☠ skull and crossbones
[9792] = true, -- U+2640 ♀ female sign
Line 1,047 ⟶ 1,076:
[10052] = true, -- U+2744 ❄ snowflake
[10084] = true, -- U+2764 ❤ heavy black heart
[10145] = true, -- U+27A1 ➡ black rightwards arrow
[11035] = true, -- U+2B1B ⬛ black large square
[127752] = true, -- U+1F308 🌈 rainbow
Line 1,064 ⟶ 1,094:
[128105] = true, -- U+1F469 👩 woman
[128139] = true, -- U+1F48B 💋 kiss mark
[128165] = true, -- U+1F4A5 💥 collision symbol
[128168] = true, -- U+1F4A8 💨 dash symbol
[128171] = true, -- U+1F4AB 💫 dizzy symbol
Line 1,074 ⟶ 1,105:
[128640] = true, -- U+1F680 🚀 rocket
[128658] = true, -- U+1F692 🚒 fire engine
[129001] = true, -- U+1F7E9 🟩 large green square
[129003] = true, -- U+1F7EB 🟫 large brown square
[129309] = true, -- U+1F91D 🤝 handshake
[129455] = true, -- U+1F9AF 🦯 probing cane
Line 1,084 ⟶ 1,117:
[129469] = true, -- U+1F9BD 🦽 manual wheelchair
[129489] = true, -- U+1F9D1 🧑 adult
[129490] = true, -- U+1F9D2 🧒 child
[129657] = true, -- U+1FA79 🩹 adhesive bandage
[129778] = true, -- U+1FAF2 🫲 leftwards hand
Line 1,130 ⟶ 1,164:
 
local script_lang_codes = {
'ab', 'am', 'ar', 'az', 'be', 'bg', 'bn', 'bo', 'bs', 'dvce', 'dzchr', 'eldv', 'fa', 'gu', 'hedz',
'hiel', 'hyfa', 'jagrc', 'kagu', 'kkhe', 'kmhi', 'knhy', 'koja', 'kuka', 'kykk', 'lokm', 'mk', 'mlkn', 'mnko',
'mniku', 'mrky', 'mylo', 'nemk', 'orml', 'otamn', 'pamni', 'psmr', 'rumy', 'sdne', 'sior', 'sr', 'syc', 'taota',
'tepa', 'tgps', 'thru', 'tisd', 'ttsi', 'ugsr', 'uksyc', 'urta', 'uzte', 'yitg', 'yueth', 'zhti', 'tt',
'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh', 'zgh'
};
 
Line 1,164 ⟶ 1,199:
['ca-valencia'] = 'Valencian', -- IETF variant of Catalan
['fkv'] = 'Kven', -- MediaWiki returns Kvensk
['gsw'] = 'Swiss German',
['ilo'] = 'Ilocano', -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ksh'] = 'Kölsch', -- MediaWiki: Colognian; use IANA/ISO 639 preferred name
Line 1,169 ⟶ 1,205:
['mis-x-ripuar'] = 'Ripuarian', -- override MediaWiki ksh; no IANA/ISO 639 code for Ripuarian; IETF private code created at Module:Lang/data
['nan-tw'] = 'Taiwanese Hokkien', -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese and support en.wiki preferred name
['sr-ec'] = 'Serbian (Cyrillic script)', -- MediaWiki returns српски (ћирилица)
['sr-el'] = 'Serbian (Latin script)', -- MediaWiki returns srpski (latinica)
}
 
local lang_name_remap = { -- used for |language=; names require proper capitalization; tags must be lowercase
['alemannic'] = {'Swiss German', 'gsw'}, -- ISO 639-2, -3 alternate for Swiss German; MediaWiki mediawiki returns Alemannic for gsw; en.wiki preferred name
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
Line 1,185 ⟶ 1,224:
['kvensk'] = {'Kven', 'fkv'}, -- ...they say to refer to IANA registry for English names
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639
['serbian (cyrillic script)'] = {'Serbian (Cyrillic script)', 'sr-cyrl'}, -- special case to get correct tag when |language=sr-ec
['serbian (latin script)'] = {'Serbian (Latin script)', 'sr-latn'}, -- special case to get correct tag when |language=sr-el
['swiss german'] = {'Swiss German', 'gsw'},
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-tw'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found
Line 1,200 ⟶ 1,242:
['foreign-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is foreign-language name, $2 is ISO639-1 code
['foreign-lang-source-2'] = 'CS1 foreign language sources (ISO 639-2)|$1', -- |language= category; a cat for ISO639-2 languages; $1 is the ISO 639-2 code used as a sort key
['interproj-linked-name'] = 'CS1 interproject-linked names|$1', -- any author, editor, etc that has an interproject link; $1 is interproject tag used as a sort key
['jul-greg-uncertainty'] = 'CS1: Julian–Gregorian uncertainty', -- probably temporary cat to identify scope of template with dates 1 October 1582 – 1 January 1926
['interwiki-linked-name'] = 'CS1 interwiki-linked names|$1', -- any author, editor, etc that has an interwiki link; $1 is interwiki tag used as a sort key; yeilds to interproject
['local-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['___location-test'] = 'CS1 ___location test',
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4 characters
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is ISO639-1language codetag
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
['unfit'] = 'CS1: unfit URL', -- |url-status=unfit or |url-status=usurped; used to be a maint cat
['vanc-accept'] = 'CS1:Vancouver names with accept markup', -- for |vauthors=/|veditors= with accept-as-written markup
['year-range-abbreviated'] = 'CS1: abbreviated year range', -- probably temporary cat to identify scope of |date=, |year= values using YYYY–YY form
}
Line 1,228 ⟶ 1,273:
['techreport'] = 'Technical report',
['thesis'] = 'Thesis',
}
 
 
--[[--------------------------< B U I L D _ K N O W N _ F R E E _ D O I _ R E G I S T R A N T S _ T A B L E >--
 
build a table of doi registrants known to be free-to-read In a doi, the registrant ID is the series of digits
between the '10.' and the first '/': in doi 10.1100/sommat, 1100 is the registrant ID
 
see §3.2.2 DOI prefix of the Doi Handbook p. 43
https://www.doi.org/doi-handbook/DOI_Handbook_Final.pdf#page=43
 
]]
 
local function build_free_doi_registrants_table()
local registrants_t = {};
for _, v in ipairs ({
'1045', '1074', '1096', '1100', '1155', '1186', '1194', '1371', '1629', '1989', '1999', '2147', '2196', '3285', '3389', '3390',
'3748', '3814', '3847', '3897', '4061', '4089', '4103', '4172', '4175', '4230', '4236', '4239', '4240', '4249', '4251',
'4252', '4253', '4254', '4291', '4292', '4329', '4330', '4331', '5194', '5210', '5306', '5312', '5313', '5314',
'5315', '5316', '5317', '5318', '5319', '5320', '5321', '5334', '5402', '5409', '5410', '5411', '5412',
'5492', '5493', '5494', '5495', '5496', '5497', '5498', '5499', '5500', '5501', '5527', '5528', '5662',
'6064', '6219', '7167', '7217', '7287', '7482', '7490', '7554', '7717', '7759', '7766', '11131', '11569', '11647',
'11648', '12688', '12703', '12715', '12942', '12998', '13105', '14256', '14293', '14303', '15215', '15347', '15412', '15560', '16995',
'17645', '18637', '19080', '19173', '20944', '21037', '21468', '21767', '22261', '22323', '22459', '24105', '24196', '24966',
'26775', '30845', '32545', '35711', '35712', '35713', '35995', '36648', '37126', '37532', '37871', '47128',
'47622', '47959', '52437', '52975', '53288', '54081', '54947', '55667', '55914', '57009', '58647', '59081',
}) do
registrants_t[v] = true; -- build a k/v table of known free-to-read doi registrants
end
 
return registrants_t;
end
 
local extended_registrants_t = { -- known free registrants identifiable by the doi suffix incipit
['1002'] = {'aelm', 'leap'}, -- Advanced Electronic Materials, Learned Publishing
['1016'] = {'j.heliyon', 'j.nlp', 'j.proche'}, -- Heliyon, Natural Language Processing, Procedia Chemistry
['1017'] = {'nlp'}, -- Natural Language Processing Journal
['1046'] = {'j.1365-8711', 'j.1365-246x'}, -- MNRAS, GJI
['1093'] = {'mnras', 'mnrasl', 'gji', 'rasti'}, -- MNRAS, MNRAS Letters, GJI, RASTI
['1099'] = {'acmi', 'mic', '00221287', 'mgen'}, -- Access Microbiology, Microbiology, Journal of General Microbiology, Microbial Genomics
['1111'] = {'j.1365-2966', 'j.1745-3933', 'j.1365-246X'}, -- MNRAS, MNRAS Letters, GJI
['1210'] = {'jendso','jcemcr'}, -- Journal of the Endocrine Society, JCEM Case Reports
['4171'] = {'dm','mag'}, -- Documenta Mathematica, EMS Magazine
['14231'] = {'ag'}, -- Algebraic Geometry
}
 
Line 1,306 ⟶ 1,395:
},
err_archive_date_url_ts_mismatch = {
message = '<code class="cs1-code">&#124;archive-date=</code> / <code class="cs1-code">&#124;archive-url=</code> timestamp mismatch; $1 suggested',
anchor = 'archive_date_url_ts_mismatch',
category = 'CS1 errors: archive-url',
Line 1,633 ⟶ 1,722:
anchor = 'extra_text_volume',
category = 'CS1 errors: extra text: volume',
hidden = truefalse,
},
err_first_missing_last = {
Line 1,658 ⟶ 1,747:
category = 'CS1 errors: generic title',
hidden = false,
},
err_invalid_isbn_date = {
message = 'ISBN / Date incompatibility',
anchor = 'invalid_isbn_date',
category = 'CS1 errors: ISBN date',
hidden = true
},
err_invalid_param_val = {
Line 1,687 ⟶ 1,782:
anchor = 'missing_periodical',
category = 'CS1 errors: missing periodical',
hidden = truefalse
},
err_missing_pipe = {
Line 1,700 ⟶ 1,795:
category = 'CS1 errors: missing publisher',
hidden = false
},
err_numeric_names = {
message = '<code class="cs1-code">&#124;$1=</code> has numeric name', -- $1 is parameter name',
anchor = 'numeric_names',
category = 'CS1 errors: numeric name',
hidden = false,
},
err_param_access_requires_param = {
Line 1,711 ⟶ 1,812:
anchor = 'param_has_ext_link',
category = 'CS1 errors: external links',
hidden = false
},
err_param_has_twl_url = {
message = 'Wikipedia Library link in <code class="cs1-code">$1</code>', -- $1 is parameter name
anchor = 'param_has_twl_url',
category = 'CS1 errors: URL',
hidden = false
},
Line 1,792 ⟶ 1,899:
anchor = 'archived_copy',
category = 'CS1 maint: archived copy as title',
hidden = true,
},
maint_authors = {
message = nil,
anchor = 'authors',
category = 'CS1 maint: uses authors parameter',
hidden = true,
},
Line 1,852 ⟶ 1,953:
anchor = 'doi_inactive_dated',
category = 'CS1 maint: DOI inactive as of $2$3$1', -- $1 is year, $2 is month-name or empty string, $3 is space or empty string
hidden = true,
},
maint_doi_unflagged_free = {
message = nil,
anchor = 'doi_unflagged_free',
category = 'CS1 maint: unflagged free DOI',
hidden = true,
},
Line 1,858 ⟶ 1,965:
anchor = 'extra_punct',
category = 'CS1 maint: extra punctuation',
hidden = true,
},
maint_id_limit_load_fail = { -- applies to all cs1|2 templates on a page;
message = nil, -- maint message (category link) never emitted
anchor = 'id_limit_load_fail',
category = 'CS1 maint: ID limit load fail',
hidden = true,
},
Line 1,936 ⟶ 2,049:
anchor = 'postscript',
category = 'CS1 maint: postscript',
hidden = true,
},
maint_publisher_location = {
message = nil,
anchor = 'publisher_location',
category = 'CS1 maint: publisher ___location',
hidden = true,
},
Line 1,944 ⟶ 2,063:
hidden = true,
},
maint_unfit = {
message = nil,
anchor = 'unfit',
category = 'CS1 maint: unfit URL',
hidden = true,
},
maint_unknown_lang = {
message = nil,
Line 1,966 ⟶ 2,079:
anchor = 'url_status',
category = 'CS1 maint: url-status',
hidden = true,
},
maint_year= {
message = nil,
anchor = 'year',
category = 'CS1 maint: year',
hidden = true,
},
Line 1,975 ⟶ 2,094:
},
}
 
 
--[[--------------------------< I D _ L I M I T S _ D A T A _ T >----------------------------------------------
 
fetch id limits for certain identifiers from c:Data:CS1/Identifier limits.tab. This source is a json tabular
data file maintained at wikipedia commons. Convert the json format to a table of k/v pairs.
 
The values from <id_limits_data_t> are used to set handle.id_limit.
 
From 2025-02-21, MediaWiki is broken. Use this link to edit the tablular data file:
https://commons.wikimedia.org/w/index.php?title=Data:CS1/Identifier_limits.tab&action=edit
See Phab:T389105
 
]]
 
local id_limits_data_t = {};
 
local use_commons_data = true; -- set to false if your wiki does not have access to mediawiki commons; then,
if false == use_commons_data then -- update this table from https://commons.wikimedia.org/wiki/Data:CS1/Identifier_limits.tab; last update: 2025-02-21
id_limits_data_t = {['OCLC'] = 10450000000, ['OSTI'] = 23010000, ['PMC'] = 11900000, ['PMID'] = 40400000, ['RFC'] = 9300, ['SSRN'] = 5200000, ['S2CID'] = 276000000}; -- this table must be maintained locally
 
else -- here for wikis that do have access to mediawiki commons
local load_fail_limit = 99999999999; -- very high number to avoid error messages on load failure
id_limits_data_t = {['OCLC'] = load_fail_limit, ['OSTI'] = load_fail_limit, ['PMC'] = load_fail_limit, ['PMID'] = load_fail_limit, ['RFC'] = load_fail_limit, ['SSRN'] = load_fail_limit, ['S2CID'] = load_fail_limit};
 
local id_limits_data_load_fail = false; -- flag; assume that we will be successful when loading json id limit tabular data
local tab_data_t = mw.ext.data.get ('CS1/Identifier limits.tab').data; -- attempt to load the json limit data from commons into <tab_data_t>
if false == tab_data_t then -- undocumented 'feature': mw.ext.data.get() sometimes returns false
id_limits_data_load_fail = true; -- set the flag so that Module:Citation/CS1 can create an unannotated maint category
else
for _, limit_t in ipairs (tab_data_t) do -- overwrite default <load_fail_limit> values
id_limits_data_t[limit_t[1]] = limit_t[2]; -- <limit[1]> is identifier; <limit[2]> is upper limit for that identifier
end
end
end
 
 
Line 2,087 ⟶ 2,242:
q = 'Q46339674',
label = 'eISSN',
prefix = 'https://wwwsearch.worldcat.org/issn/',
COinS = 'rft.eissn',
encode = false,
Line 2,130 ⟶ 2,285:
q = 'Q131276',
label = 'ISSN',
prefix = 'https://wwwsearch.worldcat.org/issn/',
COinS = 'rft.issn',
encode = false,
Line 2,198 ⟶ 2,353:
q = 'Q190593',
label = 'OCLC',
prefix = 'https://wwwsearch.worldcat.org/oclc/',
COinS = 'info:oclcnum',
encode = true,
separator = '&nbsp;',
id_limit = 9999999999, --id_limits_data_t.OCLC 10-digitsor 0,
},
['OL'] = {
Line 2,226 ⟶ 2,381:
encode = true,
separator = '&nbsp;',
id_limit = 23010000id_limits_data_t.OSTI or 0,
custom_access = 'osti-access',
},
Line 2,240 ⟶ 2,395:
encode = true,
separator = '&nbsp;',
id_limit = 10700000id_limits_data_t.PMC or 0,
access = 'free', -- free to read
},
Line 2,253 ⟶ 2,408:
encode = false,
separator = '&nbsp;',
id_limit = 37900000id_limits_data_t.PMID or 0,
},
['RFC'] = {
Line 2,265 ⟶ 2,420:
encode = false,
separator = '&nbsp;',
id_limit = 9300id_limits_data_t.RFC or 0,
access = 'free', -- free to read
},
Line 2,287 ⟶ 2,442:
encode = true,
separator = '&nbsp;',
id_limit = 4600000id_limits_data_t.SSRN or 0,
custom_access = 'ssrn-access',
},
Line 2,300 ⟶ 2,455:
encode = false,
separator = '&nbsp;',
id_limit = 262000000id_limits_data_t.S2CID or 0,
custom_access = 's2cid-access',
},
Line 2,336 ⟶ 2,491:
date_name_auto_xlate_enable = date_name_auto_xlate_enable,
date_digit_auto_xlate_enable = date_digit_auto_xlate_enable,
enable_sort_keys = enable_sort_keys,
-- tables and variables created when this module is loaded
Line 2,342 ⟶ 2,498:
punct_skip = build_skip_table (punct_skip, punct_meta_params),
url_skip = build_skip_table (url_skip, url_meta_params),
known_free_doi_registrants_t = build_free_doi_registrants_table(),
id_limits_data_load_fail = id_limits_data_load_fail, -- true when commons tabular identifier-limit data fails to load
 
name_space_sort_keys = name_space_sort_keys,
aliases = aliases,
special_case_translation = special_case_translation,
Line 2,350 ⟶ 2,509:
editor_markup_patterns = editor_markup_patterns,
et_al_patterns = et_al_patterns,
extended_registrants_t = extended_registrants_t,
id_handlers = id_handlers,
keywords_lists = keywords_lists,