Content deleted Content added
bump pmc; |
change line 1754 parameter to true to hide ISBN/ date compatibility issues. See Help talk:Citation Style 1#ISBN / Date incompatibility and Wikipedia:Administrators' noticeboard/Incidents#Template edit incorrectly creating error cat with 25000+ entries needs reverting |
||
(49 intermediate revisions by 3 users not shown) | |||
Line 1:
local lang_obj = mw.language.getContentLanguage(); -- make a language object for the local language; used here for languages and dates
--[[--------------------------< S E T T I N G S >--------------------------------------------------------------
boolean settings used to control various things. these setting located here to make them easy to find
]]
-- these settings local to this module only
local local_digits_from_mediawiki = false; -- for i18n; when true, module fills date_names['local_digits'] from MediaWiki; manual fill required else; always false at en.wiki
local local_date_names_from_mediawiki = false; -- for i18n; when true, module fills date_names['local']['long'] and date_names['local']['short'] from MediaWiki;
-- manual translation required else; ; always false at en.wiki
-- these settings exported to other modules
local use_identifier_redirects = true; -- when true use redirect name for identifier label links; always true at en.wiki
local local_lang_cat_enable = false; -- when true categorizes pages where |language=<local wiki's language>; always false at en.wiki
local date_name_auto_xlate_enable = false; -- when true translates English month-names to the local-wiki's language month names; always false at en.wiki
local date_digit_auto_xlate_enable = false; -- when true translates Western date digit to the local-wiki's language digits (date_names['local_digits']); always false at en.wiki
local enable_sort_keys = true; -- when true module adds namespace sort keys to error and maintenance category links
--[[--------------------------< U N C A T E G O R I Z E D _ N A M E S P A C E S >------------------------------
List of namespaces identifiers for namespaces that
Same as setting notracking = true by default.
For wikis that have a current version of Module:cs1 documentation support, this #invoke will return an unordered
list of namespace names and their associated identifiers:
{{#invoke:cs1 documentation support|uncategorized_namespace_lister|all=<anything>}}
]]
local uncategorized_namespaces_t = {[2]=true}; -- init with user namespace id
for k, _ in pairs (mw.site.talkNamespaces) do -- add all talk namespace ids
uncategorized_namespaces_t[k] = true;
end
local uncategorized_subpages = {'/[Ss]andbox', '/[Tt]estcases', '/[^/]*[Ll]og', '/[Aa]rchive'}; -- list of Lua patterns found in page names of pages we should not categorize
--[[
at en.wiki Greek characters are used as sort keys for certain items in a category so that those items are
placed at the end of a category page. See Wikipedia:Categorization#Sort_keys. That works well for en.wiki
because English is written using the Latn script. This may not work well for other languages. At en.wiki it
is desireable to place content from certain namespaces at the end of a category listing so the module adds sort
keys to error and maintenance category links when rendering a cs1|2 template on a page in that namespace.
i18n: if this does not work well for your language, set <enable_sort_keys> to false.
]]
local name_space_sort_keys = { -- sort keys to be used with these namespaces:
[4] = 'ω', -- wikipedia; omega
[10] = 'τ', -- template; tau
[118] = 'Δ', -- draft; delta
['other'] = 'ο', -- all other non-talk namespaces except main (article); omicron
}
--[[--------------------------< M E S S A G E S >--------------------------------------------------------------
Line 30 ⟶ 69:
['archived-dead'] = 'Archived from $1 on $2',
['archived-live'] = '$1 from the original on $2',
['archived-unfit'] = 'Archived from the original on ',
['archived'] = 'Archived',
Line 44 ⟶ 82:
['inset'] = '$1 inset',
['interview'] = 'Interviewed by $1',
['mismatch'] = '<code class="cs1-code">|$1=</code> / <code class="cs1-code">|$2=</code> mismatch', -- $1 is year param name; $2 is date param name
['newsgroup'] = '[[Usenet newsgroup|Newsgroup]]: $1',
Line 62 ⟶ 99:
['vol'] = '$1 Vol. $2', -- $1 is sepc; bold journal style volume is in presentation{}
['vol-no'] = '$1 Vol. $2, no. $3',
['issue'] = '$1 No. $2', -- $1 is sepc
['art'] = '$1 Art. $2', -- $1 is sepc; for {{cite conference}} only
['vol-art'] = '$1 Vol. $2, art. $3', -- sepc, volume, article-number; for {{cite conference}} only
['j-vol'] = '$1 $2', -- sepc, volume; bold journal volume is in presentation{}
['j-issue'] = ' ($1)',
['j-article-num'] = ' $1', -- TODO: any punctuation here? static text?
['nopp'] = '$1 $2'; -- page(s) without prefix; $1 is sepc
Line 90 ⟶ 131:
-- categories
['cat wikilink'] = '[[Category:$1]]', -- $1 is the category name
['cat wikilink sk'] = '[[Category:$1|$2]]', -- $1 is the category name; $2 is namespace sort key
[':cat wikilink'] = '[[:Category:$1|link]]', -- category name as maintenance message wikilink; $1 is the category name
Line 110 ⟶ 152:
used as class attributes in the <cite> tag that encloses the citation so these names may not contain spaces while
the canonical template name may. These names are used in warning_msg_e and warning_msg_m to create links to the
template's documentation when an article is displayed in
Most cs1|2 template |CitationClass= values at en.wiki match their canonical template names so are not listed here.
Line 117 ⟶ 159:
local citation_class_map_t = { -- TODO: if kept, these and all other config.CitationClass 'names' require some sort of i18n
['arxiv'] = 'arXiv',
['audio-visual'] = 'AV media',
['AV-media-notes'] = 'AV media notes',
['biorxiv'] = 'bioRxiv',
['citeseerx'] = 'CiteSeerX',
['encyclopaedia'] = 'encyclopedia',
['mailinglist'] = 'mailing list',
['
['pressrelease'] = 'press release',
['ssrn'] = 'SSRN',
['techreport'] = 'tech report',
}
Line 164 ⟶ 212:
['format'] = ' <span class="cs1-format">($1)</span>', -- for |format=, |chapter-format=, etc.
['interwiki'] = ' <span class="cs1-format">[in $1]</span>', -- for interwiki-language-linked author, editor, etc
['interproj'] = ' <span class="cs1-format">[at $1]</span>', -- for interwiki-project-linked author, editor, etc (:d: and :s: supported; :w: ignored)
-- various access levels, for |access=, |doi-access=, |arxiv=, ...
Line 169 ⟶ 219:
['ext-link-access-signal'] = '<span class="$1" title="$2">$3</span>', -- external link with appropriate lock icon
['free'] = {class='
['registration'] = {class='
['limited'] = {class='
['subscription'] = {class='
['interwiki-icon'] = '<span class="$1" title="$2">$3</span>',
Line 229 ⟶ 279:
['ArchiveFormat'] = 'archive-format',
['ArchiveURL'] = {'archive-url', 'archiveurl'}, -- Used by InternetArchiveBot
['ArticleNumber'] = 'article-number',
['ASINTLD'] = 'asin-tld',
['At'] = 'at', -- Used by InternetArchiveBot
['Authors'] = {
['BookTitle'] = {'book-title', 'booktitle'},
['Cartography'] = 'cartography',
Line 237 ⟶ 288:
['ChapterFormat'] = {'chapter-format', 'contribution-format', 'entry-format',
'article-format', 'section-format'};
['ChapterURL'] = {'chapter-url', 'contribution-url', 'entry-url', 'article-url', 'section-url
['ChapterUrlAccess'] = {'chapter-url-access', 'contribution-url-access',
'entry-url-access', 'article-url-access', 'section-url-access'}, -- Used by InternetArchiveBot
Line 264 ⟶ 315:
['Issue'] = {'issue', 'number'},
['Language'] = {'language', 'lang'},
['MailingList'] = {'mailing-list', 'mailinglist'}, -- cite mailing list only
['Map'] = 'map', -- cite map only
Line 298 ⟶ 345:
['ScriptChapter'] = {'script-chapter', 'script-contribution', 'script-entry',
'script-article', 'script-section'},
['ScriptEncyclopedia'] = {'script-encyclopedia', 'script-encyclopaedia'}, -- cite encyclopedia only
['ScriptMap'] = 'script-map',
['ScriptPeriodical'] = {'script-journal', 'script-magazine', 'script-newspaper',
Line 315 ⟶ 363:
['Title'] = 'title', -- Used by InternetArchiveBot
['TitleLink'] = {'title-link', 'episode-link', 'episodelink'}, -- Used by InternetArchiveBot
['TitleNote'] = {'title-note', 'department'},
['TitleType'] = {'type', 'medium'},
['TransChapter'] = {'trans-article', 'trans-chapter', 'trans-contribution',
Line 321 ⟶ 369:
['Transcript'] = 'transcript',
['TranscriptFormat'] = 'transcript-format',
['TranscriptURL'] =
['TransEncyclopedia'] = {'trans-encyclopedia', 'trans-encyclopaedia'}, -- cite encyclopedia only
['TransMap'] = 'trans-map', -- cite map only
['TransPeriodical'] = {'trans-journal', 'trans-magazine', 'trans-newspaper',
Line 336 ⟶ 385:
['Year'] = 'year',
['AuthorList-First'] = {"first#", "author-first#", "author#-first", "author-given#", "author#-given",
"
"given#"},
"subject-last#", "subject#-last", "subject-surname#", "subject#-surname",
"author#", 'host#', "subject#", "surname#"},
['AuthorList-Link'] = {"author-link#", "author#-link", "subject-link#",
"subject#-link", "authorlink#", "author#link"},
Line 383 ⟶ 434:
'AuthorList-Mask', 'ContributorList-Mask', 'EditorList-Mask', 'InterviewerList-Mask', 'TranslatorList-Mask', -- name-list mask may have name separators
'PostScript', 'Quote', 'ScriptQuote', 'TransQuote', 'Ref', -- miscellaneous
'ArchiveURL', 'ChapterURL', 'ConferenceURL
}
local url_meta_params = { -- table of aliases[] keys (meta parameters); each key has a table of parameter names for a value
'ArchiveURL', 'ChapterURL', 'ConferenceURL', 'ID
'Page', 'Pages', 'At', 'QuotePage', 'QuotePages', -- insource locators allowed to hold urls
}
Line 412 ⟶ 463:
local punct_skip = {};
local url_skip = {};
--[[--------------------------< S I N G L E - L E T T E R S E C O N D - L E V E L D O M A I N S >----------
this is a list of tlds that are known to have single-letter second-level ___domain names. This list does not include
ccTLDs which are accepted in is_domain_name().
]]
local single_letter_2nd_lvl_domains_t = {'cash', 'company', 'foundation', 'media', 'org', 'today'};
Line 422 ⟶ 483:
]]
local is_Latn = 'A-Za-z\195\128-\195\150\195\152-\195\182\195\184-\198\191\199\132-\201\143\225\184\128-\225\187\191';
local special_case_translation = {
['AuthorList'] = 'authors list', -- used to assemble maintenance category names
['ContributorList'] = 'contributors list', -- translation of these names plus translation of the base
['EditorList'] = 'editors list', -- must match the names of the actual categories
['InterviewerList'] = 'interviewers list', -- this group or translations used by name_has_ed_markup() and name_has_mult_names()
Line 448 ⟶ 509:
{['en'] = {'^wayback%s+machine$', false}, ['local'] = nil},
{['en'] = {'are you a robot', true}, ['local'] = nil},
{['en'] = {'hugedomains
{['en'] = {'^[%(%[{<]?no +title[>}%]%)]?$', false}, ['local'] = nil},
{['en'] = {'page not found', true}, ['local'] = nil},
Line 455 ⟶ 516:
{['en'] = {'website is for sale', true}, ['local'] = nil},
{['en'] = {'^404', false}, ['local'] = nil},
{['en'] = {'error[ %-]404', false}, ['local'] = nil},
{['en'] = {'internet archive wayback machine', true}, ['local'] = nil},
{['en'] = {'log into facebook', true}, ['local'] = nil},
Line 476 ⟶ 538:
{['en'] = {'about us', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]dvisor%f[%A]', false}, ['local'] = nil},
{['en'] = {'allmusic', true}, ['local'] = nil},
{['en'] = {'%f[%a][Aa]uthor%f[%A]', false}, ['local'] = nil},
{['en'] = {'^[Bb]ureau$', false}, ['local'] = nil},
{['en'] = {'business', true}, ['local'] = nil},
{['en'] = {'cnn', true}, ['local'] = nil},
{['en'] = {'collaborator', true}, ['local'] = nil},
{['en'] = {'^[Cc]ompany$', false}, ['local'] = nil},
{['en'] = {'contributor', true}, ['local'] = nil},
{['en'] = {'contact us', true}, ['local'] = nil},
{['en'] = {'correspondent', true}, ['local'] = nil},
{['en'] = {'^[Dd]esk$', false}, ['local'] = nil},
{['en'] = {'directory', true}, ['local'] = nil},
{['en'] = {'%f[%(%[][%(%[]%s*eds?%.?%s*[%)%]]?$', false}, ['local'] = nil},
Line 487 ⟶ 556:
{['en'] = {'%f[%a][Ee]dited%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a][Ee]ditors?%f[%A]', false}, ['local'] = nil},
{['en'] = {'%f[%a]
{['en'] = {'facebook', true}, ['local'] = nil},
{['en'] = {'google', true}, ['local'] = nil},
{['en'] = {'^[Gg]roup$', false}, ['local'] = nil},
{['en'] = {'home page', true}, ['local'] = nil},
{['en'] = {'^[Ii]nc%.?$', false}, ['local'] = nil},
{['en'] = {'instagram', true}, ['local'] = nil},
{['en'] = {'interviewer', true}, ['local'] = nil},
{['en'] = {'^[Ll]imited$', false}, ['local'] = nil},
{['en'] = {'linkedIn', true}, ['local'] = nil},
{['en'] = {'^[Nn]ews$', false}, ['local'] = nil},
{['en'] = {'[Nn]ews[ %-]?[Rr]oom', false}, ['local'] = nil},
{['en'] = {'pinterest', true}, ['local'] = nil},
{['en'] = {'policy', true}, ['local'] = nil},
{['en'] = {'privacy', true}, ['local'] = nil},
{['en'] = {'reuters', true}, ['local'] = nil},
{['en'] = {'translator', true}, ['local'] = nil},
{['en'] = {'tumblr', true}, ['local'] = nil},
Line 535 ⟶ 609:
local_date_names_from_mediawiki is a boolean. When set to:
true – module will fetch local month names from MediaWiki for both date_names['local']['long'] and date_names['local']['short']; this will unconditionally overwrite manual translations
false – module will *not* fetch local month names from MediaWiki
Line 547 ⟶ 621:
]]
local local_date_names_from_mediawiki = true; -- when false, manual translation required for date_names['local']['long'] and date_names['local']['short']; overwrites manual translations
-- when true, module fetches long and short month names from MediaWiki
local date_names = {
Line 591 ⟶ 665:
date_names[invert_t[2]][i] = name; -- invert to get [i] = 'name' for conversions from ymd
end
end
if local_digits_from_mediawiki then -- if fetching local digits from MediaWiki is enabled
local digits_t = {};
for i=0, 9 do -- loop 10x and
digits_t [lang_obj:formatNum (i)] = tostring (i); -- format the loop indexer as local lang table index and assign loop indexer (a string) as the value
end
date_names['local_digits'] = digits_t;
end
Line 612 ⟶ 694:
'{{ *([Mm]DY) *[|}]', -- 0
}
local title_object = mw.title.getCurrentTitle();
local content; -- done this way so that unused templates appear in unused-template-reports; self-transcluded makes them look like they are used
if 10 ~= title_object.namespace then -- all namespaces except Template
content = title_object:getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
end
local function get_date_format ()
if not content then -- nil content when we're in template
return nil; -- auto-formatting does not work in Template space so don't set global_df
end
for _, pattern in ipairs (df_template_patterns) do -- loop through the patterns looking for {{Use dmy dates}} or {{Use mdy dates}} or any of their redirects
local start, _, match = content:find(pattern); -- match is the three letters indicating desired date format
if match then
if
return match:lower() .. '-' ..
else
return match:lower() .. '-all'; -- no |cs1-dates= k/v pair; return value appropriate for use in |df=
Line 632 ⟶ 718:
end
local global_df; -- TODO: add this to <global_cs1_config_t>?
Line 678 ⟶ 764:
'^[Pp]gs.?',
},
'^volumes?', -- volume-like text
'^vols?[%.:=]?',
'^iss[%.:=]?',
'^numbers?',
'^nos?%A', -- don't match 'november' or 'nostradamus'
'^nr[%.:=]?',
'^n[%.:= ]'
'^n°', -- 'n' with degree sign (U+00B0)
'^№', -- precomposed unicode numero character (U+2116)
},
}
Line 822 ⟶ 909:
['id-access'] = make_keywords_list ({keywords.free}),
}
--[[--------------------------< C S 1 _ C O N F I G _ G E T >--------------------------------------------------
fetch and validate values from {{cs1 config}} template to fill <global_cs1_config_t>
no error messages; when errors are detected, the parameter value from {{cs1 config}} is blanked.
Supports all parameters and aliases associated with the metaparameters: DisplayAuthors, DisplayContributors,
DisplayEditors, DisplayInterviewers, DisplayTranslators, NameListStyle, and Mode. The DisplayWhatever metaparameters
accept numeric values only (|display-authors=etal and the like is not supported).
]]
local global_cs1_config_t = {}; -- TODO: add value returned from get_date_format() to this table?
local function get_cs1_config ()
if not content then -- nil content when we're in template
return nil; -- auto-formatting does not work in Template space so don't set global_df
end
local start = content:find('{{ *[Cc][Ss]1 config *[|}]'); -- <start> is offset into <content> when {{cs1 config}} found; nil else
if start then
local cs1_config_template = content:match ('%b{}', start); -- get the whole template
if not cs1_config_template then
return nil;
end
local params_t = mw.text.split (cs1_config_template:gsub ('^{{%s*', ''):gsub ('%s*}}$', ''), '%s*|%s*'); -- remove '{{' and '}}'; make a sequence of parameter/value pairs (split on the pipe)
table.remove (params_t, 1); -- remove the template name because it isn't a parameter/value pair
local config_meta_params_t = {'DisplayAuthors', 'DisplayContributors', 'DisplayEditors', 'DisplayInterviewers', 'DisplayTranslators', 'NameListStyle', 'Mode'};
local meta_param_map_t = {}; -- list of accepted parameter names usable in {{cs1 config}} goes here
for _, meta_param in ipairs (config_meta_params_t) do -- for i18n using <config_meta_params_t>, map template parameter names to their metaparameter equivalents
if 'table' == type (aliases[meta_param]) then -- if <meta_param> is a sequence,
for _, param in ipairs (aliases[meta_param]) do -- extract its contents
meta_param_map_t[param] = meta_param; -- and add to <meta_param_map_t>
end
else
meta_param_map_t[aliases[meta_param]] = meta_param; -- not a sequence so just add the parameter to <meta_param_map_t>
end
end
local keywords_t = {}; -- map valid keywords to their associate metaparameter; reverse form of <keyword_lists[key] for these metaparameters
for _, metaparam_t in ipairs ({{'NameListStyle', 'name-list-style'}, {'Mode', 'mode'}}) do -- only these metaparameter / keywords_lists key pairs
for _, keyword in ipairs (keywords_lists[metaparam_t[2]]) do -- spin through the list of keywords
keywords_t[keyword] = metaparam_t[1]; -- add [keyword] = metaparameter to the map
end
end
for _, param in ipairs (params_t) do -- spin through the {{cs1 config}} parameters and fill <global_cs1_config_t>
local k, v = param:match ('([^=]-)%s*=%s*(.+)'); -- <k> is the parameter name; <v> is parameter's assigned value
if k then
if k:find ('^display') then -- if <k> is one of the |display-<namelist>= parameters
if v:match ('%d+') then -- the assigned value must be digits; doesn't accept 'etal'
global_cs1_config_t[meta_param_map_t[k]]=v; -- add the display param and its value to globals table
end
else
if keywords_t[v] == meta_param_map_t[k] then -- keywords_t[v] returns nil or the metaparam name; these must be the same
global_cs1_config_t[meta_param_map_t[k]]=v; -- add the parameter and its value to globals table
end
end
end
end
end
end
get_cs1_config (); -- fill <global_cs1_config_t>
Line 904 ⟶ 1,061:
local indic_script = '[\224\164\128-\224\181\191\224\163\160-\224\183\191\225\128\128-\225\130\159\234\167\160-\234\167\191\234\169\160-\234\169\191]';
-- list of emoji that use a zwj character (U+200D) to combine with another emoji
-- from: https://unicode.org/Public/emoji/16.0/emoji-zwj-sequences.txt; version: 16.0; 2024-08-14
-- table created by: [[:en:Module:Make emoji zwj table]]
local emoji_t = { -- indexes are decimal forms of the hex values in U+xxxx
[8596] = true, -- U+2194 ↔ left right arrow
[8597] = true, -- U+2195 ↕ up down arrow
[9760] = true, -- U+2620 ☠ skull and crossbones
[9792] = true, -- U+2640 ♀ female sign
[9794] = true, -- U+2642 ♂ male sign
[9877] = true, -- U+2695 ⚕ staff of aesculapius
[9878] = true, -- U+2696 ⚖ scales
[9895] = true, -- U+26A7 ⚧ male with stroke and male and female sign
[9992] = true, -- U+2708 ✈ airplane
[10052] = true, -- U+2744 ❄ snowflake
[10084] = true, -- U+2764 ❤ heavy black heart
[10145] = true, -- U+27A1 ➡ black rightwards arrow
[11035] = true, -- U+2B1B ⬛ black large square
[127752] = true, -- U+1F308 🌈 rainbow
[127787] = true, -- U+1F32B 🌫 fog
[127806] = true, -- U+1F33E 🌾 ear of rice
[127859] = true, -- U+1F373 🍳 cooking
[127868] = true, -- U+1F37C 🍼 baby bottle
[127876] = true, -- U+1F384 🎄 christmas tree
[127891] = true, -- U+1F393 🎓 graduation cap
[127908] = true, -- U+1F3A4 🎤 microphone
Line 919 ⟶ 1,094:
[128105] = true, -- U+1F469 👩 woman
[128139] = true, -- U+1F48B 💋 kiss mark
[128165] = true, -- U+1F4A5 💥 collision symbol
[128168] = true, -- U+1F4A8 💨 dash symbol
[128171] = true, -- U+1F4AB 💫 dizzy symbol
[128187] = true, -- U+1F4BB 💻 personal computer
[128188] = true, -- U+1F4BC 💼 brief case
[128293] = true, -- U+1F525 🔥 fire
[128295] = true, -- U+1F527 🔧 wrench
[128300] = true, -- U+1F52C 🔬 microscope
Line 926 ⟶ 1,105:
[128640] = true, -- U+1F680 🚀 rocket
[128658] = true, -- U+1F692 🚒 fire engine
[129001] = true, -- U+1F7E9 🟩 large green square
[129003] = true, -- U+1F7EB 🟫 large brown square
[129309] = true, -- U+1F91D 🤝 handshake
[129455] = true, -- U+1F9AF 🦯 probing cane
Line 936 ⟶ 1,117:
[129469] = true, -- U+1F9BD 🦽 manual wheelchair
[129489] = true, -- U+1F9D1 🧑 adult
[
[
[
}
Line 955 ⟶ 1,132:
local this_wiki_code = lang_obj:getCode(); -- get this wiki's language code
if string.match (mw.site.server, 'wikidata') then
this_wiki_code = mw.getCurrentFrame():
end
Line 987 ⟶ 1,164:
local script_lang_codes = {
'ab', 'am', 'ar', 'az', 'be', 'bg', 'bn', 'bo', 'bs', '
'
'
'
'ug', 'uk', 'ur', 'uz', 'yi', 'yue', 'zh', 'zgh'
};
Line 998 ⟶ 1,176:
These tables hold language information that is different (correct) from MediaWiki's definitions
For each ['
lang_tag_remap{}:
key is always lowercase ISO 639-1, -2, -3 language
value is properly spelled and capitalized language name associated with
only one language name per
key/value pair must have matching entry in lang_name_remap{}
lang_name_remap{}:
key is always lowercase language name
value is a table the holds correctly spelled and capitalized language name [1] and associated
may have multiple keys referring to a common preferred name and
['kolsch'] and ['kölsch'] both refer to 'Kölsch' and 'ksh'
]]
local
['als'] = 'Tosk Albanian', -- MediaWiki returns Alemannisch
['bh'] = 'Bihari', -- MediaWiki uses 'bh' as a subdomain name for Bhojpuri Wikipedia: bh.wikipedia.org
Line 1,020 ⟶ 1,198:
['bn'] = 'Bengali', -- MediaWiki returns Bangla
['ca-valencia'] = 'Valencian', -- IETF variant of Catalan
['fkv'] = 'Kven', -- MediaWiki returns Kvensk
['gsw'] = 'Swiss German',
['ilo'] = 'Ilocano', -- MediaWiki/IANA/ISO 639: Iloko; use en.wiki preferred name
['ksh'] = 'Kölsch', -- MediaWiki: Colognian; use IANA/ISO 639 preferred name
Line 1,025 ⟶ 1,205:
['mis-x-ripuar'] = 'Ripuarian', -- override MediaWiki ksh; no IANA/ISO 639 code for Ripuarian; IETF private code created at Module:Lang/data
['nan-tw'] = 'Taiwanese Hokkien', -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese and support en.wiki preferred name
['sr-ec'] = 'Serbian (Cyrillic script)', -- MediaWiki returns српски (ћирилица)
['sr-el'] = 'Serbian (Latin script)', -- MediaWiki returns srpski (latinica)
}
local lang_name_remap = { -- used for |language=; names require proper capitalization; tags must be lowercase
['alemannic'] = {'Swiss German', 'gsw'}, -- ISO 639-2, -3 alternate for Swiss German; MediaWiki mediawiki returns Alemannic for gsw; en.wiki preferred name
['alemannisch'] = {'Swiss German', 'gsw'}, -- not an ISO or IANA language name; MediaWiki uses 'als' as a subdomain name for Alemannic Wikipedia: als.wikipedia.org
['bangla'] = {'Bengali', 'bn'}, -- MediaWiki returns Bangla (the endonym) but we want Bengali (the exonym); here we remap
Line 1,038 ⟶ 1,221:
['kolsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name (use non-diacritical o instead of umlaut ö)
['kölsch'] = {'Kölsch', 'ksh'}, -- use IANA/ISO 639 preferred name
['kven'] = {'Kven', 'fkv'}, -- Unicode CLDR have decided not to support English language name for these two...
['kvensk'] = {'Kven', 'fkv'}, -- ...they say to refer to IANA registry for English names
['ripuarian'] = {'Ripuarian', 'mis-x-ripuar'}, -- group of dialects; no code in MediaWiki or in IANA/ISO 639
['serbian (cyrillic script)'] = {'Serbian (Cyrillic script)', 'sr-cyrl'}, -- special case to get correct tag when |language=sr-ec
['serbian (latin script)'] = {'Serbian (Latin script)', 'sr-latn'}, -- special case to get correct tag when |language=sr-el
['swiss german'] = {'Swiss German', 'gsw'},
['taiwanese hokkien'] = {'Taiwanese Hokkien', 'nan-tw'}, -- make room for MediaWiki/IANA/ISO 639 nan: Min Nan Chinese
['tosk albanian'] = {'Tosk Albanian', 'als'}, -- MediaWiki replaces 'Tosk Albanian' with 'Alemannisch' so 'Tosk Albanian' cannot be found
['valencian'] = {'Valencian', 'ca-valencia'},
}
Line 1,054 ⟶ 1,242:
['foreign-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is foreign-language name, $2 is ISO639-1 code
['foreign-lang-source-2'] = 'CS1 foreign language sources (ISO 639-2)|$1', -- |language= category; a cat for ISO639-2 languages; $1 is the ISO 639-2 code used as a sort key
['interproj-linked-name'] = 'CS1 interproject-linked names|$1', -- any author, editor, etc that has an interproject link; $1 is interproject tag used as a sort key
['interwiki-linked-name'] = 'CS1 interwiki-linked names|$1', -- any author, editor, etc that has an interwiki link; $1 is interwiki tag used as a sort key; yeilds to interproject
['local-lang-source'] = 'CS1 $1-language sources ($2)', -- |language= categories; $1 is local-language name, $2 is ISO639-1 code; not emitted when local_lang_cat_enable is false
['___location-test'] = 'CS1 ___location test',
['long-vol'] = 'CS1: long volume value', -- probably temporary cat to identify scope of |volume= values longer than 4
['script'] = 'CS1 uses $1-language script ($2)', -- |script-title=xx: has matching category; $1 is language name, $2 is
['tracked-param'] = 'CS1 tracked parameter: $1', -- $1 is base (enumerators removed) parameter name
['unfit'] = 'CS1: unfit URL', -- |url-status=unfit or |url-status=usurped; used to be a maint cat
['vanc-accept'] = 'CS1:Vancouver names with accept markup', -- for |vauthors=/|veditors= with accept-as-written markup
['year-range-abbreviated'] = 'CS1: abbreviated year range', -- probably temporary cat to identify scope of |date=, |year= values using YYYY–YY form
}
Line 1,072 ⟶ 1,263:
local title_types = {
['AV-media-notes'] = 'Media notes',
['document'] = 'Document',
['interview'] = 'Interview',
['mailinglist'] = 'Mailing list',
Line 1,081 ⟶ 1,273:
['techreport'] = 'Technical report',
['thesis'] = 'Thesis',
}
--[[--------------------------< B U I L D _ K N O W N _ F R E E _ D O I _ R E G I S T R A N T S _ T A B L E >--
build a table of doi registrants known to be free-to-read In a doi, the registrant ID is the series of digits
between the '10.' and the first '/': in doi 10.1100/sommat, 1100 is the registrant ID
see §3.2.2 DOI prefix of the Doi Handbook p. 43
https://www.doi.org/doi-handbook/DOI_Handbook_Final.pdf#page=43
]]
local function build_free_doi_registrants_table()
local registrants_t = {};
for _, v in ipairs ({
'1045', '1074', '1096', '1100', '1155', '1186', '1194', '1371', '1629', '1989', '1999', '2147', '2196', '3285', '3389', '3390',
'3748', '3814', '3847', '3897', '4061', '4089', '4103', '4172', '4175', '4230', '4236', '4239', '4240', '4249', '4251',
'4252', '4253', '4254', '4291', '4292', '4329', '4330', '4331', '5194', '5210', '5306', '5312', '5313', '5314',
'5315', '5316', '5317', '5318', '5319', '5320', '5321', '5334', '5402', '5409', '5410', '5411', '5412',
'5492', '5493', '5494', '5495', '5496', '5497', '5498', '5499', '5500', '5501', '5527', '5528', '5662',
'6064', '6219', '7167', '7217', '7287', '7482', '7490', '7554', '7717', '7759', '7766', '11131', '11569', '11647',
'11648', '12688', '12703', '12715', '12942', '12998', '13105', '14256', '14293', '14303', '15215', '15347', '15412', '15560', '16995',
'17645', '18637', '19080', '19173', '20944', '21037', '21468', '21767', '22261', '22323', '22459', '24105', '24196', '24966',
'26775', '30845', '32545', '35711', '35712', '35713', '35995', '36648', '37126', '37532', '37871', '47128',
'47622', '47959', '52437', '52975', '53288', '54081', '54947', '55667', '55914', '57009', '58647', '59081',
}) do
registrants_t[v] = true; -- build a k/v table of known free-to-read doi registrants
end
return registrants_t;
end
local extended_registrants_t = { -- known free registrants identifiable by the doi suffix incipit
['1002'] = {'aelm', 'leap'}, -- Advanced Electronic Materials, Learned Publishing
['1016'] = {'j.heliyon', 'j.nlp', 'j.proche'}, -- Heliyon, Natural Language Processing, Procedia Chemistry
['1017'] = {'nlp'}, -- Natural Language Processing Journal
['1046'] = {'j.1365-8711', 'j.1365-246x'}, -- MNRAS, GJI
['1093'] = {'mnras', 'mnrasl', 'gji', 'rasti'}, -- MNRAS, MNRAS Letters, GJI, RASTI
['1099'] = {'acmi', 'mic', '00221287', 'mgen'}, -- Access Microbiology, Microbiology, Journal of General Microbiology, Microbial Genomics
['1111'] = {'j.1365-2966', 'j.1745-3933', 'j.1365-246X'}, -- MNRAS, MNRAS Letters, GJI
['1210'] = {'jendso','jcemcr'}, -- Journal of the Endocrine Society, JCEM Case Reports
['4171'] = {'dm','mag'}, -- Documenta Mathematica, EMS Magazine
['14231'] = {'ag'}, -- Algebraic Geometry
}
Line 1,152 ⟶ 1,388:
hidden = false
},
err_archive_date_missing_url = {
message = '<code class="cs1-code">|archive-date=</code> requires <code class="cs1-code">|archive-url=</code>',
anchor = 'archive_date_missing_url',
category = 'CS1 errors: archive-url',
hidden = false
},
err_archive_date_url_ts_mismatch = {
message = '<code class="cs1-code">|archive-date=</code> / <code class="cs1-code">|archive-url=</code> timestamp mismatch; $1 suggested',
anchor = 'archive_date_url_ts_mismatch',
category = 'CS1 errors: archive-url',
hidden = false
},
err_archive_missing_date = {
message = '<code class="cs1-code">|archive-url=</code> requires <code class="cs1-code">|archive-date=</code>',
Line 1,270 ⟶ 1,518:
anchor = 'bad_lccn',
category = 'CS1 errors: LCCN',
hidden = false
},
err_bad_medrxiv = {
message = 'Check <code class="cs1-code">|medrxiv=</code> value',
anchor = 'bad_medrxiv',
category = 'CS1 errors: medRxiv',
hidden = false
},
Line 1,468 ⟶ 1,722:
anchor = 'extra_text_volume',
category = 'CS1 errors: extra text: volume',
hidden =
},
err_first_missing_last = {
message = '<code class="cs1-code">|$1=</code> missing <code class="cs1-code">|$2=</code>', -- $1 is first alias, $2 is matching last alias
anchor = 'first_missing_last',
category = 'CS1 errors: missing name',
hidden = false
},
Line 1,493 ⟶ 1,747:
category = 'CS1 errors: generic title',
hidden = false,
},
err_invalid_isbn_date = {
message = 'ISBN / Date incompatibility',
anchor = 'invalid_isbn_date',
category = 'CS1 errors: ISBN date',
hidden = true
},
err_invalid_param_val = {
Line 1,504 ⟶ 1,764:
anchor = 'invisible_char',
category = 'CS1 errors: invisible characters',
hidden = false
},
err_medrxiv_missing = {
message = '<code class="cs1-code">|medrxiv=</code> required',
anchor = 'medrxiv_missing',
category = 'CS1 errors: medRxiv', -- same as bad medRxiv
hidden = false
},
Line 1,516 ⟶ 1,782:
anchor = 'missing_periodical',
category = 'CS1 errors: missing periodical',
hidden =
},
err_missing_pipe = {
Line 1,523 ⟶ 1,789:
category = 'CS1 errors: missing pipe',
hidden = false
},
err_missing_publisher = {
message = 'Cite $1 requires <code class="cs1-code">|$2=</code>', -- $1 is cs1 template name; $2 is canonical publisher parameter name for cite $1
anchor = 'missing_publisher',
category = 'CS1 errors: missing publisher',
hidden = false
},
err_numeric_names = {
message = '<code class="cs1-code">|$1=</code> has numeric name', -- $1 is parameter name',
anchor = 'numeric_names',
category = 'CS1 errors: numeric name',
hidden = false,
},
err_param_access_requires_param = {
Line 1,534 ⟶ 1,812:
anchor = 'param_has_ext_link',
category = 'CS1 errors: external links',
hidden = false
},
err_param_has_twl_url = {
message = 'Wikipedia Library link in <code class="cs1-code">$1</code>', -- $1 is parameter name
anchor = 'param_has_twl_url',
category = 'CS1 errors: URL',
hidden = false
},
Line 1,546 ⟶ 1,830:
anchor = 'parameter_ignored_suggest',
category = 'CS1 errors: unsupported parameter',
hidden = false
},
err_periodical_ignored = {
message = '<code class="cs1-code">|$1=</code> ignored', -- $1 is parameter name
anchor = 'periodical_ignored',
category = 'CS1 errors: periodical ignored',
hidden = false
},
Line 1,563 ⟶ 1,853:
message = '<code class="cs1-code">|ssrn=</code> required',
anchor = 'ssrn_missing',
category = 'CS1 errors: SSRN',
hidden = false
},
Line 1,611 ⟶ 1,901:
hidden = true,
},
message = nil,
anchor = '
category = 'CS1 maint:
hidden = true,
},
maint_location_no_publisher = { -- cite book, conference, encyclopedia; citation as book cite or encyclopedia cite
message = nil,
anchor = 'location_no_publisher',
category = 'CS1 maint: ___location missing publisher',
hidden = true,
},
Line 1,657 ⟶ 1,953:
anchor = 'doi_inactive_dated',
category = 'CS1 maint: DOI inactive as of $2$3$1', -- $1 is year, $2 is month-name or empty string, $3 is space or empty string
hidden = true,
},
maint_doi_unflagged_free = {
message = nil,
anchor = 'doi_unflagged_free',
category = 'CS1 maint: unflagged free DOI',
hidden = true,
},
Line 1,663 ⟶ 1,965:
anchor = 'extra_punct',
category = 'CS1 maint: extra punctuation',
hidden = true,
},
maint_id_limit_load_fail = { -- applies to all cs1|2 templates on a page;
message = nil, -- maint message (category link) never emitted
anchor = 'id_limit_load_fail',
category = 'CS1 maint: ID limit load fail',
hidden = true,
},
Line 1,688 ⟶ 1,996:
category = 'CS1 maint: ___location',
hidden = true,
maint_mr_format = {
message = nil,
Line 1,694 ⟶ 2,002:
category = 'CS1 maint: MR format',
hidden = true,
maint_mult_names = {
message = nil,
Line 1,718 ⟶ 2,026:
category = 'CS1 maint: others in cite AV media (notes)',
hidden = true,
maint_overridden_setting = {
message = nil,
anchor = 'overridden',
category = 'CS1 maint: overridden setting',
hidden = true,
},
maint_pmc_embargo = {
message = nil,
Line 1,735 ⟶ 2,049:
anchor = 'postscript',
category = 'CS1 maint: postscript',
hidden = true,
},
maint_publisher_location = {
message = nil,
anchor = 'publisher_location',
category = 'CS1 maint: publisher ___location',
hidden = true,
},
Line 1,743 ⟶ 2,063:
hidden = true,
},
maint_unknown_lang = {
message = nil,
Line 1,765 ⟶ 2,079:
anchor = 'url_status',
category = 'CS1 maint: url-status',
hidden = true,
},
maint_year= {
message = nil,
anchor = 'year',
category = 'CS1 maint: year',
hidden = true,
},
Line 1,774 ⟶ 2,094:
},
}
--[[--------------------------< I D _ L I M I T S _ D A T A _ T >----------------------------------------------
fetch id limits for certain identifiers from c:Data:CS1/Identifier limits.tab. This source is a json tabular
data file maintained at wikipedia commons. Convert the json format to a table of k/v pairs.
The values from <id_limits_data_t> are used to set handle.id_limit.
From 2025-02-21, MediaWiki is broken. Use this link to edit the tablular data file:
https://commons.wikimedia.org/w/index.php?title=Data:CS1/Identifier_limits.tab&action=edit
See Phab:T389105
]]
local id_limits_data_t = {};
local use_commons_data = true; -- set to false if your wiki does not have access to mediawiki commons; then,
if false == use_commons_data then -- update this table from https://commons.wikimedia.org/wiki/Data:CS1/Identifier_limits.tab; last update: 2025-02-21
id_limits_data_t = {['OCLC'] = 10450000000, ['OSTI'] = 23010000, ['PMC'] = 11900000, ['PMID'] = 40400000, ['RFC'] = 9300, ['SSRN'] = 5200000, ['S2CID'] = 276000000}; -- this table must be maintained locally
else -- here for wikis that do have access to mediawiki commons
local load_fail_limit = 99999999999; -- very high number to avoid error messages on load failure
id_limits_data_t = {['OCLC'] = load_fail_limit, ['OSTI'] = load_fail_limit, ['PMC'] = load_fail_limit, ['PMID'] = load_fail_limit, ['RFC'] = load_fail_limit, ['SSRN'] = load_fail_limit, ['S2CID'] = load_fail_limit};
local id_limits_data_load_fail = false; -- flag; assume that we will be successful when loading json id limit tabular data
local tab_data_t = mw.ext.data.get ('CS1/Identifier limits.tab').data; -- attempt to load the json limit data from commons into <tab_data_t>
if false == tab_data_t then -- undocumented 'feature': mw.ext.data.get() sometimes returns false
id_limits_data_load_fail = true; -- set the flag so that Module:Citation/CS1 can create an unannotated maint category
else
for _, limit_t in ipairs (tab_data_t) do -- overwrite default <load_fail_limit> values
id_limits_data_t[limit_t[1]] = limit_t[2]; -- <limit[1]> is identifier; <limit[2]> is upper limit for that identifier
end
end
end
Line 1,786 ⟶ 2,142:
redirect: a local redirect to a local Wikipedia article name; at en.wiki, 'ISBN (identifier)' is a redirect to 'International Standard Book Number'
q: Wikidata q number for the identifier
label: the label
redirect from id_handlers['<id>'].redirect when use_identifier_redirects is true
Wikidata-supplied article name for the local wiki from id_handlers['<id>'].q
Line 1,815 ⟶ 2,171:
q = 'Q118398',
label = 'arXiv',
prefix = 'https://arxiv.org/abs/',
encode = false,
COinS = 'info:arxiv',
Line 1,827 ⟶ 2,183:
q = 'Q1753278',
label = 'ASIN',
prefix = 'https://www.amazon.',
COinS = 'url',
separator = ' ',
Line 1,850 ⟶ 2,206:
q = 'Q19835482',
label = 'bioRxiv',
prefix = 'https://doi.org/',
COinS = 'pre', -- use prefix value
access = 'free', -- free to read
Line 1,862 ⟶ 2,218:
q = 'Q2715061',
label = 'CiteSeerX',
prefix = 'https://citeseerx.ist.psu.edu/viewdoc/summary?doi=',
COinS = 'pre', -- use prefix value
access = 'free', -- free to read
Line 1,874 ⟶ 2,230:
q = 'Q25670',
label = 'doi',
prefix = 'https://doi.org/',
COinS = 'info:doi',
separator = ':',
Line 1,886 ⟶ 2,242:
q = 'Q46339674',
label = 'eISSN',
prefix = 'https://
COinS = 'rft.eissn',
encode = false,
Line 1,897 ⟶ 2,253:
q = 'Q3126718',
label = 'hdl',
prefix = 'https://hdl.handle.net/',
COinS = 'info:hdl',
separator = ':',
Line 1,929 ⟶ 2,285:
q = 'Q131276',
label = 'ISSN',
prefix = 'https://
COinS = 'rft.issn',
encode = false,
Line 1,940 ⟶ 2,296:
q = '',
label = 'JFM',
prefix = 'https://zbmath.org/?format=complete&q=an:',
COinS = 'pre', -- use prefix value
encode = true,
Line 1,951 ⟶ 2,307:
q = 'Q1420342',
label = 'JSTOR',
prefix = 'https://www.jstor.org/stable/',
COinS = 'pre', -- use prefix value
encode = false,
Line 1,963 ⟶ 2,319:
q = 'Q620946',
label = 'LCCN',
prefix = 'https://lccn.loc.gov/',
COinS = 'info:lccn',
encode = false,
separator = ' ',
},
['MEDRXIV'] = {
parameters = {'medrxiv'},
link = 'medRxiv',
redirect = 'medRxiv (identifier)',
q = 'Q58465838',
label = 'medRxiv',
prefix = 'https://www.medrxiv.org/content/',
COinS = 'pre', -- use prefix value
access = 'free', -- free to read
encode = false,
separator = ' ',
Line 1,974 ⟶ 2,342:
q = 'Q211172',
label = 'MR',
prefix = 'https://
COinS = 'pre', -- use prefix value
encode = true,
Line 1,985 ⟶ 2,353:
q = 'Q190593',
label = 'OCLC',
prefix = 'https://
COinS = 'info:oclcnum',
encode = true,
separator = ' ',
id_limit =
},
['OL'] = {
Line 1,997 ⟶ 2,365:
q = 'Q1201876',
label = 'OL',
prefix = 'https://openlibrary.org/',
COinS = 'url',
separator = ' ',
Line 2,009 ⟶ 2,377:
q = 'Q2015776',
label = 'OSTI',
prefix = 'https://www.osti.gov/biblio/',
COinS = 'pre', -- use prefix value
encode = true,
separator = ' ',
id_limit =
custom_access = 'osti-access',
},
Line 2,022 ⟶ 2,390:
q = 'Q229883',
label = 'PMC',
prefix = 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC',
suffix = '',
COinS = 'pre', -- use prefix value
encode = true,
separator = ' ',
id_limit =
access = 'free', -- free to read
},
Line 2,036 ⟶ 2,404:
q = 'Q2082879',
label = 'PMID',
prefix = 'https://pubmed.ncbi.nlm.nih.gov/',
COinS = 'info:pmid',
encode = false,
separator = ' ',
id_limit =
},
['RFC'] = {
Line 2,048 ⟶ 2,416:
q = 'Q212971',
label = 'RFC',
prefix = 'https://tools.ietf.org/html/rfc',
COinS = 'pre', -- use prefix value
encode = false,
separator = ' ',
id_limit =
access = 'free', -- free to read
},
Line 2,070 ⟶ 2,438:
q = 'Q7550801',
label = 'SSRN',
prefix = 'https://papers.ssrn.com/
COinS = 'pre', -- use prefix value
encode = true,
separator = ' ',
id_limit =
custom_access = 'ssrn-access',
},
Line 2,087 ⟶ 2,455:
encode = false,
separator = ' ',
id_limit =
custom_access = 's2cid-access',
},
Line 2,107 ⟶ 2,475:
q = 'Q190269',
label = 'Zbl',
prefix = 'https://zbmath.org/?format=complete&q=an:',
COinS = 'pre', -- use prefix value
encode = true,
Line 2,119 ⟶ 2,487:
return {
use_identifier_redirects =
local_lang_cat_enable = local_lang_cat_enable,
date_name_auto_xlate_enable = date_name_auto_xlate_enable,
date_digit_auto_xlate_enable = date_digit_auto_xlate_enable,
enable_sort_keys = enable_sort_keys,
global_df = get_date_format (), -- this line can be replaced with "global_df = 'dmy-all'," to have all dates auto translated to dmy format.
global_cs1_config_t = global_cs1_config_t, -- global settings from {{cs1 config}}
punct_skip = build_skip_table (punct_skip, punct_meta_params),
url_skip = build_skip_table (url_skip, url_meta_params),
known_free_doi_registrants_t = build_free_doi_registrants_table(),
id_limits_data_load_fail = id_limits_data_load_fail, -- true when commons tabular identifier-limit data fails to load
name_space_sort_keys = name_space_sort_keys,
aliases = aliases,
special_case_translation = special_case_translation,
Line 2,135 ⟶ 2,509:
editor_markup_patterns = editor_markup_patterns,
et_al_patterns = et_al_patterns,
extended_registrants_t = extended_registrants_t,
id_handlers = id_handlers,
keywords_lists = keywords_lists,
keywords_xlate = keywords_xlate,
stripmarkers = stripmarkers,
invisible_chars = invisible_chars,
invisible_defs = invisible_defs,
indic_script = indic_script,
maint_cats = maint_cats,
messages = messages,
Line 2,148 ⟶ 2,523:
prop_cats = prop_cats,
script_lang_codes = script_lang_codes,
lang_tag_remap = lang_tag_remap,
lang_name_remap = lang_name_remap,
this_wiki_code = this_wiki_code,
title_types = title_types,
uncategorized_namespaces =
uncategorized_subpages = uncategorized_subpages,
templates_using_volume = templates_using_volume,
Line 2,158 ⟶ 2,533:
templates_not_using_page = templates_not_using_page,
vol_iss_pg_patterns = vol_iss_pg_patterns,
single_letter_2nd_lvl_domains_t = single_letter_2nd_lvl_domains_t,
inter_wiki_map = inter_wiki_map,
|