Module:Citation/CS1/Identifiers: Difference between revisions

Content deleted Content added
sync from sandbox;
No edit summary
 
(8 intermediate revisions by the same user not shown)
Line 164:
 
 
--[=[-------------------------< I S _ V A L I D _ B I O R X I V _ D A T E >------------------------------------------
 
for biorxiv, returns true if:
2019-12-11T00:00Z <= biorxiv_date < today + 2 days
for medrxiv, returns true if:
2020-01-01T00:00Z <= medrxiv_date < today + 2 days
The dated form of biorxiv identifier has a start date of 2019-12-11. The Unix timestamp for that date is {{#time:U|2019-12-11}} = 1576022400
The medrxiv identifier has a start date of 2020-01-01. The Unix timestamp for that date is {{#time:U|2020-01-01}} = 1577836800
 
biorxiv_date<rxiv_date> is the date provided in those |biorxiv= parameter values that are dated and in |medrxiv= parameter values at time 00:00:00 UTC
<today> is the current date at time 00:00:00 UTC plus 48 hours
if today's date is 20152023-01-01T00:00:00 then
adding 24 hours gives 20152023-01-02T00:00:00 – one second more than today
adding 24 hours gives 20152023-01-03T00:00:00 – one second more than tomorrow
 
inputs:
This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser
<y>, <m>, <d> – year, month, day parts of the date from the birxiv or medrxiv identifier
apparently doesn't understand non-English date month names. This function will always return false when the date
<select> 'b' for biorxiv, 'm' for medrxiv; defaults to 'b'
contains a non-English month name because good1 is false after the call to lang_object.formatDate(). To get
around that call this function with date parts and create a YYYY-MM-DD format date.
 
]=]
 
local function is_valid_biorxiv_dateis_valid_rxiv_date (y, m, d, select)
localif biorxiv_date0 == table.concattonumber ({y, m,) d},and '-'12 < tonumber (m); then -- make<m> must be a ymdnumber date1–12
return false;
end
if 0 == tonumber (d) and 31 < tonumber (d) then -- <d> must be a number 1–31; TODO: account for month length and leap yer?
return false;
end
local rxiv_date = table.concat ({y, m, d}, '-'); -- make ymd date string
local good1, good2;
local biorxiv_tsrxiv_ts, tomorrow_ts; -- to hold Unix timestamps representing the dates
local lang_object = mw.getContentLanguage();
 
good1, biorxiv_tsrxiv_ts = pcall (lang_object.formatDate, lang_object, 'U', biorxiv_daterxiv_date); -- convert biorxiv_daterxiv_date value to Unix timestamp
good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which tonumber() may not understand
biorxiv_tsrxiv_ts = tonumber (biorxiv_tsrxiv_ts) or lang_object:parseFormattedNumber (biorxiv_tsrxiv_ts); -- convert to numbers for the comparison;
tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts);
else
Line 200 ⟶ 209:
end
 
local limit_ts = ((select and ('m' == select)) and 1577836800) or 1576022400; -- choose the appropriate limit timesatmp
return ((1576022400 <= biorxiv_ts) and (biorxiv_ts < tomorrow_ts)) -- 2012-12-11T00:00Z <= biorxiv_date < tomorrow's date
 
return ((limit_ts <= rxiv_ts) and (rxiv_ts < tomorrow_ts)) -- limit_ts <= rxiv_date < tomorrow's date
end
 
Line 280 ⟶ 291:
return lccn;
end
 
 
Line 367 ⟶ 378:
if is_set (class) then
if id:match ('^%d+') then
text = table.concat ({text, ' [[https://arxiv.org/archive/', class, ' ', class, ']]'}); -- external link within square brackets, not wikilink
else
set_message ('err_class_ignored');
Line 399 ⟶ 410:
local access = options.access;
local handler = options.handler;
local ignore_invalid = options.accept;
local err_type;
local err_msg = '';
Line 421 ⟶ 433:
if id:find('&%.') then
err_type = cfg.err_msg_supl.journal; -- journal abbreviation must not have '&.' (if it does it's missing a letter)
end
if id:match ('.........%.tmp%.') then -- temporary bibcodes when positions 10–14 are '.tmp.'
set_message ('maint_bibcode');
end
end
end
 
if is_set (err_type) and not ignore_invalid then -- if there was an error detected and accept-as-written markup not used
set_message ('err_bad_bibcode', {err_type});
options.coins_list_t['BIBCODE'] = nil; -- when error, unset so not included in COinS
 
end
 
Line 456 ⟶ 470:
local patterns = {
'^10%.1101/%d%d%d%d%d%d$', -- simple 6-digit identifier (before 2019-12-11)
'^10%.1101/(20[1-9]%d%d)%.([01]%d%d)%.([0-3]%d%d)%.%d%d%d%d%d%dv%d+$', -- y.m.d. date + 6-digit identifier + version (after 2019-12-11)
'^10%.1101/(20[1-9]%d%d)%.([01]%d%d)%.([0-3]%d%d)%.%d%d%d%d%d%d$', -- y.m.d. date + 6-digit identifier (after 2019-12-11)
}
Line 466 ⟶ 480:
 
if m then -- m is nil when id is the six-digit form
if not is_valid_biorxiv_dateis_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; TODO: don'tb' ignorefor leap-yearbiorxiv and actual month lengths ({{#time:}} is a poor date validator)limit
break; -- date fail; break out early so we don't unset the error message
end
Line 540 ⟶ 554:
local handler = options.handler;
local err_flag;
 
local function is_extended_free (registrant, suffix) -- local function to check those few registrants that are mixed; identifiable by the doi suffix <incipit>
if cfg.extended_registrants_t[registrant] then -- if this registrant has known free-to-read extentions
for _, incipit in ipairs (cfg.extended_registrants_t[registrant]) do -- loop through the registrant's incipits
if mw.ustring.find (suffix, '^' .. incipit) then -- if found
return true;
end
end
end
end
 
local text;
if is_set (inactive) then
local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
local inactive_month, good;
 
Line 554 ⟶ 578:
end
end
end -- otherwise, |doi-broken-date= has something but it isn't a date
else
inactive_year = nil; -- |doi-broken-date= has something but it isn't a date
end
if is_set (inactive_year) and is_set (inactive_month) then
Line 568 ⟶ 590:
end
 
local suffix;
local registrant = mw.ustring.match (id, '^10%.([^/]+)/[^%s–]-[^%.,]$'); -- registrant set when DOI has the proper basic form
local registrant, suffix = mw.ustring.match (id, '^10%.([^/]+)/([^%s–]-[^%.,])$'); -- registrant and suffix set when DOI has the proper basic form
 
local registrant_err_patterns = { -- these patterns are for code ranges that are not supported
'^[^1-3]%d%d%d%d%.%d%d*+$', -- 5 digits with subcode (0xxxx, 40000+); accepts: 10000–39999
'^[^1-57]%d%d%d%d$', -- 5 digits without subcode (0xxxx, 60000+); accepts: 10000–5999910000–69999
'^[^1-9]%d%d%d%.%d%d*+$', -- 4 digits with subcode (0xxx); accepts: 1000–9999
'^[^1-9]%d%d%d$', -- 4 digits without subcode (0xxx); accepts: 1000–9999
'^%d%d%d%d%d%d+', -- 6 or more digits
Line 599 ⟶ 622:
if err_flag then
options.coins_list_t['DOI'] = nil; -- when error, unset so not included in COinS
else
if not access and (cfg.known_free_doi_registrants_t[registrant] or is_extended_free (registrant, suffix)) then -- |doi-access=free not set and <registrant> is known to be free
set_message ('maint_doi_unflagged_free'); -- set a maint cat
end
end
Line 683 ⟶ 710:
]]
 
local function isbn (optionsoptions_t)
local isbn_str = optionsoptions_t.id;
local ignore_invalid = optionsoptions_t.accept;
local handler = optionsoptions_t.handler;
local year = options_t.Year; -- when set, valid anchor_year; may have a disambiguator which must be removed
 
local function return_result (check, err_type) -- local function to handle the various returns
Line 695 ⟶ 723:
else -- here when not ignoring
if not check then -- and there is an error
optionsoptions_t.coins_list_t['ISBN'] = nil; -- when error, unset so not included in COinS
set_message ('err_bad_isbn', err_type); -- set an error message
return ISBN; -- return id text
Line 701 ⟶ 729:
end
return ISBN; -- return id text
end
 
if year and not ignore_invalid then --
year = year:match ('%d%d%d%d?'); -- strip disambiguator if present
if year and (1965 > tonumber(year)) then
set_message ('err_invalid_isbn_date'); -- set an error message
return internal_link_id ({link = handler.link, label = handler.label, redirect = handler.redirect,
prefix = handler.prefix, id = isbn_str, separator = handler.separator});
end
end
 
Line 1,012 ⟶ 1,049:
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode});
end
 
 
--[[--------------------------< M E D R X I V >-----------------------------------------------------------------
 
Format medRxiv ID and do simple error checking. Similar to later bioRxiv IDs, medRxiv IDs are prefixed with a
yyyy.mm.dd. date and suffixed with an optional version identifier. Ealiest date accepted is 2020.01.01
 
The medRxiv ID is a date followed by an eight-digit number followed by an optional version indicator 'v' and one or more digits:
https://www.medrxiv.org/content/10.1101/2020.11.16.20232009v2 -> 10.1101/2020.11.16.20232009v2
 
]]
 
local function medrxiv (options)
local id = options.id;
local handler = options.handler;
local err_msg_flag = true; -- flag; assume that there will be an error
 
local patterns = {
'%d%d%d%d%d%d%d%d$', -- simple 8-digit identifier; these should be relatively rare
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%dv%d+$', -- y.m.d. date + 8-digit identifier + version (2020-01-01 and later)
'^10%.1101/(20%d%d)%.(%d%d)%.(%d%d)%.%d%d%d%d%d%d%d%d$', -- y.m.d. date + 8-digit identifier (2020-01-01 and later)
}
for _, pattern in ipairs (patterns) do -- spin through the patterns looking for a match
if id:match (pattern) then
local y, m, d = id:match (pattern); -- found a match, attempt to get year, month and date from the identifier
 
if m then -- m is nil when id is the 8-digit form
if not is_valid_rxiv_date (y, m, d, 'b') then -- validate the encoded date; 'b' for medrxiv limit
break; -- date fail; break out early so we don't unset the error message
end
end
err_msg_flag = nil; -- we found a match so unset the error message
break; -- and done
end
end -- <err_msg_flag> remains set here when no match
 
if err_msg_flag then
options.coins_list_t['MEDRXIV'] = nil; -- when error, unset so not included in COinS
set_message ('err_bad_medrxiv'); -- and set the error message
end
return external_link_id ({link = handler.link, label = handler.label, q = handler.q, redirect = handler.redirect,
prefix = handler.prefix, id = id, separator = handler.separator,
encode = handler.encode, access = handler.access});
end
 
Line 1,073 ⟶ 1,156:
elseif id:match('^%d+$') then -- no prefix
number = id; -- get the number
if 10tonumber < number:len(id) > handler.id_limit then
number = nil; -- constrainunset towhen 1 to 10 digits; change this whenid OCLCvalue issuesexceeds 11-digitthe numberslimit
end
end
Line 1,535 ⟶ 1,618:
['JSTOR'] = jstor,
['LCCN'] = lccn,
['MEDRXIV'] = medrxiv,
['MR'] = mr,
['OCLC'] = oclc,