Module:Citation/CS1/Utilities: Difference between revisions

Content deleted Content added
update per RfC;
sync from sandbox;
 
Line 98:
--[[--------------------------< H Y P H E N _ T O _ D A S H >--------------------------------------------------
 
Converts a hyphen, toendash, aemdash to dashendash under certain conditions. The hyphen/en/em must separate
like items; unlike items are returned unmodified. These forms are modified:
letter - letter (A - B)
digit - digit (4-5)
digit separator digit - digit separator digit (4.1-4.5 or 4-1-4-5)
letterdigit - letterdigit (A1-A5) (an optional separator between letter and
digit is supported – a.1-a.5 or a-1-a-5)
digitletter - digitletter (5a - 5d) (an optional separator between letter and
digit is supported – 5.a-5.d or 5-a-5-d)
 
any other forms are returned unmodified.
 
str may be a comma- or semicolon-separated list of page ranges with/without single pages
 
]]
Line 118:
return str;
end
 
local accept; -- boolean
 
str = str:gsub ("(%(%(.-%)%))", function(m) return m:gsub(",", ","):gsub(";", ";") end) -- replace commas and semicolons in accept-as-written markup with similar unicode characters so they'll be ignored during the split
str = str:gsub ('&[nm]dash;', {['&ndash;'] = '–', ['&mdash;'] = '—'}); -- replace &mdash; and &ndash; entities with their characters; semicolon mucks up the text.split
str = str:gsub ('&#45;', '-'); -- replace HTML numeric entity with hyphen character
str = str:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with generic keyboard space character
local out = {};
local list = mw.text.split (str, '%s*[,;]%s*'); -- split str at comma or semicolon separators if there are any
 
local accept; -- boolean
 
for _, item in ipairs (list) do -- for each item in the list
item, accept = has_accept_as_written (item); -- remove accept-this-as-written markup when it wraps all of item
if not accept and mw.ustring.match (item, '^%w*[%.%-]?%w+%s*[%—–-–—]%s*%w*[%.%-]?%w+$') then -- if a hyphenated range or has endash or emdash separators
if item:mw.ustring.match (item, '^%a+[%.%-]?%d+%s*%[—–-]%s*%a+[%.%-]?%d+$') or -- letterdigit hyphen letterdigit (optional separator between letter and digit)
item:mw.ustring.match (item, '^%d+[%.%-]?%a+%s*%[—–-]%s*%d+[%.%-]?%a+$') or -- digitletter hyphen digitletter (optional separator between digit and letter)
item:mw.ustring.match (item, '^%d+[%.%-]%d+%s*%[—–-]%s*%d+[%.%-]%d+$') or then -- digit separator digit hyphen digit separator digit
item = mw.ustring.gsub (item, '(%w*[%.%-]?%w+)%s*[—–-]%s*(%w*[%.%-]?%w+)', '<span class="nowrap">%1 –</span> <span class="nowrap">%2</span>'); -- replace hyphen/dash, with spaced endash
item:match ('^%d+%s*%-%s*%d+$') or -- digit hyphen digit
 
item:match ('^%a+%s*%-%s*%a+$') then -- letter hyphen letter
itemelseif =mw.ustring.match (item:gsub, ('(^%w*[%.%-]?%wd+)%s*%[—–-]%s*(%w*[%.%-]?%wd+$')', '%1–%2');or -- replacedigit hyphen, remove extraneous space charactersdigit
item:mw.ustring.match (item, '^%da+%s*%[—–-]%s*%da+$') or then -- digitletter hyphen digitletter
item = mw.ustring.gsub (item, '(%w+)%s*[—–-]%s*(%w+)', '<span class="nowrap">%1–</span>%2'); -- replace hyphen/emdash with endash, remove extraneous space characters
 
else
-- item = mw.ustring.gsub (item, '%s*[–——–-]%s*', '–'); -- fordisabled; endashhere orwhen emdash'unlike' separateditems ranges,so replacereturn em with en, remove<item> extraneousas whitespaceis
end
end