Module:Excerpt/sandbox: Difference between revisions

Content deleted Content added
Change local function names to camelCase except the entry points for backwards compatibility (see talk page technical debt). Also improve a couple comments.
Rename most variables to camelCase for inner consistency and with general Lua and MediaWiki style and better readability on long variable names. Also un-abbreviated a few variable names, again for better readability and hopefully easier onboarding of new developers.
Line 37:
 
-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
local norefnoRef = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
norefnoRef = mw.ustring.gsub(norefnoRef, "|%s*ref%s*%f[|}]", "")
 
-- If a wanted template has unwanted nested templates, purge them too
norefnoRef = mw.ustring.sub(norefnoRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(norefnoRef, 3), "%b{}", stripTemplate)
 
-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
norefnoRef = mw.ustring.gsub(norefnoRef, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")
 
-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
norefnoRef = mw.ustring.gsub(norefnoRef, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")
 
if norefnoRef ~= t then return norefnoRef end
 
return nil -- not an unwanted template: keep
Line 76:
end
 
local descfileDescription, rtitlefileTitle = getContent(page) -- get file description and title after following any redirect
if descfileDescription and descfileDescription ~= "" then -- found description on local wiki
if mw.ustring.match(descfileDescription, "[Nn]on%-free") then return false end
descfileDescription = mw.ustring.gsub(descfileDescription, "%b{}", stripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess
elseif not rtitlefileTitle then
return false
else
-- try commons
descfileDescription = "{{" .. rtitlefileTitle .. "}}"
end
frame = frame or mw.getCurrentFrame()
descfileDescription = frame:preprocess(descfileDescription)
 
return ( descfileDescription and descfileDescription ~= "" and not mw.ustring.match(descfileDescription, "[Nn]on%-free") ) and true or false -- hide non-free image
end
 
Line 106:
local function parseCaption(caption)
if not caption then return nil end
local lenlength = mw.ustring.len(caption)
local posposition = 1
while posposition <= lenlength do
local linkstartlinkStart, linkendlinkEnd = mw.ustring.find(caption, "%b[]", posposition)
linkstartlinkStart = linkstartlinkStart or lenlength + 1 -- avoid comparison with nil when no link
local templatestarttemplateStart, templateendtemplateEnd = mw.ustring.find(caption, "%b{}", posposition)
templatestarttemplateStart = templatestarttemplateStart or lenlength + 1 -- avoid comparison with nil when no template
local argendargEnd = mw.ustring.find(caption, "[|}]", posposition) or lenlength + 1
if linkstartlinkStart < templatestarttemplateStart and linkstartlinkStart < argendargEnd then
posposition = linkendlinkEnd + 1 -- skip wikilink
elseif templatestarttemplateStart < argendargEnd then
posposition = templateendtemplateEnd + 1 -- skip template
else -- argument ends before the next wikilink or template
return mw.ustring.sub(caption, 1, argendargEnd - 1)
end
end
Line 137:
local hasImages = false
local images = {}
local capture_fromcaptureFrom = 1
while capture_fromcaptureFrom < mw.ustring.len(text) do
local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", capture_fromcaptureFrom)
if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
local lcArgname = mw.ustring.lower(argname)
Line 151:
hasImages = true
images[position] = image
capture_fromcaptureFrom = position
else
capture_fromcaptureFrom = mw.ustring.len(text)
end
end
capture_fromcaptureFrom = 1
while capture_fromcaptureFrom < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", capture_fromcaptureFrom)
if image then
hasImages = true
images[position] = image
capture_fromcaptureFrom = position
else
capture_fromcaptureFrom = mw.ustring.len(text)
end
end
capture_fromcaptureFrom = 1
while capture_fromcaptureFrom < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", capture_fromcaptureFrom)
if image then
hasImages = true
Line 175:
images[position] = image
end
capture_fromcaptureFrom = position
else
capture_fromcaptureFrom = mw.ustring.len(text)
end
end
Line 185:
-- find all captions
local captions = {}
capture_fromcaptureFrom = 1
while capture_fromcaptureFrom < mw.ustring.len(text) do
local position, caption = matchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", capture_fromcaptureFrom)
if caption then
-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
Line 207:
end
end
capture_fromcaptureFrom = position
else
capture_fromcaptureFrom = mw.ustring.len(text)
end
end
Line 223:
mw.ustring.match(altText, ".*%[%b[]%]()") or 1)
 
local lenlength = mw.ustring.len(altText)
local aftertext = math.min( -- find position after whichever comes first: end of string, }} or |
mw.ustring.match(altText, "()}}", lookfrom) or lenlength+1,
mw.ustring.match(altText, "()|", lookfrom) or lenlength+1)
altText = mw.ustring.sub(altText, 1, aftertext-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}
 
Line 333:
}
 
local function modifyImage(image, fileargsfileArgs)
if fileargsfileArgs then
for _, filearg in pairs(mw.text.split(fileargsfileArgs, "|")) do -- handle fileargsfileArgs=left|border etc.
local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
local group = {fa} -- group of "border" is ["border"]...
Line 362:
-- @param filesOnly : If set, only return the files and not the prose
local function parse(text, options, filesOnly)
local allparasallParagraphs = true -- keep all paragraphs?
if options.paraflags then
if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end
for _, v in pairs(options.paraflags) do
if v then allparasallParagraphs = false end -- if any para specifically requested, don't keep all
end
end
if filesOnly then
allparasallParagraphs = false
options.paraflags = {}
end
Line 382:
end
local fileargsfileArgs = options.fileargs and mw.text.trim(options.fileargs)
if fileargsfileArgs == '' then fileargsfileArgs = nil end
 
local leadstartleadStart = nil -- have we found some text yet?
local t = "" -- the stripped down output text
local filetextfileText = "" -- output text with concatenated [[File:Foo|...]]\n entries
local files = 0 -- how many images so far
local paras = 0 -- how many paragraphs so far
Line 401:
f = "[[File:" .. f .. "]]"
f = modifyImage(f, "thumb")
f = modifyImage(f, fileargsfileArgs)
if checkImage(f) then filetextfileText = filetextfileText .. f .. "\n" end
end
 
repeat -- loop around parsing a template, image or paragraph
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
if not leadstartleadStart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started
 
local line = mw.ustring.match(text, "[^\n]*")
Line 421:
if token then -- found a template which is not the prefix to a line of text
 
if leadstartleadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
if not filesOnly and not startLine then t = t .. token end
 
Line 441:
if options.fileflags and options.fileflags[files] then -- if displaying this image
image = modifyImage(image, "thumb")
image = modifyImage(image, fileargsfileArgs)
filetextfileText = filetextfileText .. image
end
end
Line 454:
if options.fileflags and options.fileflags[files] then
local image = token -- copy token for manipulation by adding |right etc. without changing the original
image = modifyImage(image, fileargsfileArgs)
filetextfileText = filetextfileText .. image
end
end
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterendafterEnd = mw.ustring.len(text) + 1
local blankposblankPosition = mw.ustring.find(text, "\n%s*\n") or afterendafterEnd -- position of next paragraph delimiter (or end of text)
local endposendPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterendafterEnd,
mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterendafterEnd,
blankposblankPosition)
token = mw.ustring.sub(text, 1, endposendPosition-1)
if blankposblankPosition < afterendafterEnd and blankposblankPosition == endposendPosition then -- paragraph ends with a blank line
token = token .. mw.ustring.match(text, "\n%s*\n", blankposblankPosition)
end
local isHatnote = not(leadstartleadStart) and mw.ustring.sub(token, 1, 1) == ':'
if not isHatnote then
leadstartleadStart = leadstartleadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
paras = paras + 1
if allparasallParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
end
end -- of "else got a paragraph"
Line 483:
 
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
return filetextfileText, text
end
 
Line 531:
 
-- Parse a ==Section== from a page
local function getSection(text, section, mainonlymainOnly)
local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
if not content then return nil end -- no such section
local nextsectionnextSection
if mainonlymainOnly then
nextsectionnextSection = "\n==.*" -- Main part of section terminates at any level of header
else
nextsectionnextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
end
content = mw.ustring.gsub(content, nextsectionnextSection, "") -- remove later sections with headings at this level or higher
return content
end
Line 547:
-- Remove unmatched <tag> or </tag> tags
local function fixTags(text, tag)
local startcountstartCount = 0
for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startcountstartCount = startcountstartCount + 1 end
 
local endcountendCount = 0
for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endcountendCount = endcountendCount + 1 end
 
if startcountstartCount > endcountendCount then -- more <tag> than </tag>: remove the last few <tag>s
local i = 0
text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
i = i + 1
if i > endcountendCount then return "" else return nil end
end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
elseif endcountendCount > startcountstartCount then -- more </tag> than <tag>: remove the first few </tag>s
text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endcountendCount - startcountstartCount)
end
return text
Line 566:
 
-- Main function returns a string value: text of the lead of a page
local function main(pagenamespageNames, options)
if not pagenamespageNames or #pagenamespageNames < 1 then return err("No page names given") end
local pagenamepageName
local text
local pagecountpageCount = #pagenamespageNames
local firstpagefirstPage = pagenamespageNames[1] or "(nil)" -- save for error message, as it the name will be deleted
local gotoptgotOptions
local pageoptstrpageOptionsString
local section
 
-- read the page, or a random one if multiple pages were provided
if pagecountpageCount > 1 then math.randomseed(os.time()) end
while not text and pagecountpageCount > 0 do
local pagenumpageNumber = 1
if pagecountpageCount > 1 then pagenumpageNumber = math.random(pagecountpageCount) end -- pick a random title
pagenamepageName = pagenamespageNames[pagenumpageNumber]
if pagenamepageName and pagenamepageName ~= "" then
-- We have page or [[page]] or [[page|text]], possibly followed by |opt1|opt2...
local pn
pn, gotoptgotOptions, pageoptstrpageOptionsString = mw.ustring.match(pagenamepageName, "^%s*(%[%b[]%])%s*(|?)(.*)")
if pn then
pagenamepageName = mw.ustring.match(pn, "%[%[([^|%]]*)") -- turn [[page|text]] into page, discarding text
else -- we have page or page|opt...
pagenamepageName, gotoptgotOptions, pageoptstrpageOptionsString = mw.ustring.match(pagenamepageName, "%s*([^|]*[^|%s])%s*(|?)(.*)")
end
 
if pagenamepageName and pagenamepageName ~= "" then
local pn
pn, section = mw.ustring.match(pagenamepageName, "(.-)#(.*)")
pagenamepageName = pn or pagenamepageName
text, normalisedPagenamenormalisedPageName = getContent(pagenamepageName)
if not normalisedPagenamenormalisedPageName then
return err("No title for page name " .. pagenamepageName)
else
pagenamepageName = normalisedPagenamenormalisedPageName
end
if text and options.nostubs then
Line 607:
end
if not section then
section = mw.ustring.match(pagenamepageName, ".-#(.*)") -- parse redirect to Page#Section
end
if text and section and section ~= "" then text = getSection(text, section) end
end
end
if not text then table.remove(pagenamespageNames, pagenumpageNumber) end -- this one didn't work; try another
pagecountpageCount = pagecountpageCount - 1 -- ensure that we exit the loop after at most #pagenamespageNames iterations
end
if not text then return err("Cannot read a valid page: first name is " .. firstpagefirstPage) end
 
text = cleanupText(text, options)
 
local pageoptspageOptions = {} -- pageoptspageOptions (even if value is "") have priority over global options
for k, v in pairs(options) do pageoptspageOptions[k] = v end
if gotoptgotOptions and gotoptgotOptions ~= "" then
for _, t in pairs(mw.text.split(pageoptstrpageOptionsString, "|")) do
local k, v = mw.ustring.match(t, "%s*([^=]-)%s*=(.-)%s*$")
pageoptspageOptions[k] = v
end
pageoptspageOptions.paraflags = numberFlags(pageoptspageOptions["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
pageoptspageOptions.fileflags = numberFlags(pageoptspageOptions["files"] or "") -- parse file numbers
if pageoptspageOptions.more and pageoptspageOptions.more == "" then pageoptspageOptions.more = "Read more..." end -- more= is short for this default text
end
 
local filetextfileText
filetextfileText, text = parse(text, pageoptspageOptions)
 
-- replace the bold title or synonym near the start of the article by a wikilink to the article
local lang = mw.language.getContentLanguage()
local pos = mw.ustring.find(text, "'''" .. lang:ucfirst(pagenamepageName) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
or mw.ustring.find(text, "'''" .. lang:lcfirst(pagenamepageName) .. "'''", 1, true) -- plain search: special characters in pagenamepageName represent themselves
if pos then
local len = mw.ustring.len(pagenamepageName)
text = mw.ustring.sub(text, 1, pos + 2) .. "[[" .. mw.ustring.sub(text, pos + 3, pos + len + 2) .. "]]" .. mw.ustring.sub(text, pos + len + 3, -1) -- link it
else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
text = mw.ustring.gsub(text, "()'''(.-'*)'''", function(a, b)
if a < 100 and not mw.ustring.find(b, "%[") then ---if early in article and not wikilinked
return "'''[[" .. pagenamepageName .. "|" .. b .. "]]'''" -- replace '''Foo''' by '''[[pagenamepageName|Foo]]'''
else
return nil -- instruct gsub to make no change
Line 652:
 
-- remove '''bold text''' if requested
if is(pageoptspageOptions.nobold) then text = mw.ustring.gsub(text, "'''", "") end
 
text = filetextfileText .. text
 
-- Seek and destroy unterminated templates and wikilinks
Line 674:
text = fixTags(text, "div")
 
if pageoptspageOptions.more then text = text .. " '''[[" .. pagenamepageName .. "|" .. pageoptspageOptions.more .. "]]'''" end -- wikilink to article for more info
 
if pageoptspageOptions.list and not pageoptspageOptions.showall then -- add a collapsed list of pages which might appear
local listtext = pageoptspageOptions.list
if listtext == "" then listtext = "Other articles" end
text = text .. "{{collapse top|title={{resize|85%|" ..listtext .. "}}|bg=fff}}{{hlist"
for _, p in pairs(pagenamespageNames) do
if mw.ustring.match(p, "%S") then text = text .. "|[[" .. mw.text.trim(p) .. "]]" end
end
Line 702:
end
 
local pagenamespageNames = {}
if func == "lead" then
pagenamespageNames = { args[1] }
elseif func == "linked" or func == "listitem" then
-- Read named page and find its wikilinks
Line 721:
text = mw.ustring.gsub(text, "{{%s*[Aa]nnotated[ _]link%s*|%s*(.-)%s*}}", "[[%1]]")
if func == "linked" then
for p in mw.ustring.gmatch(text, "%[%[%s*([^%]|\n]*)") do table.insert(pagenamespageNames, p) end
else -- listitem: first wikilink on a line beginning *, :#, etc. except in "See also" or later section
text = mw.ustring.gsub(text, "\n== *See also.*", "")
for p in mw.ustring.gmatch(text, "\n:*[%*#][^\n]-%[%[%s*([^%]|\n]*)") do table.insert(pagenamespageNames, p) end
end
elseif func == "random" then
-- accept any number of page names. If more than one, we'll pick one randomly
for i, p in pairs(args) do
if p and type(i) == 'number' then table.insert(pagenamespageNames, p) end
end
elseif func == "selected" then
Line 737:
if articlekey == 0 then articlekey = articlecount end
end
pagenamespageNames = { args[articlekey] }
end
 
Line 748:
if options.showall then
local separator = ""
for _, p in pairs(pagenamespageNames) do
local t = main({ p }, options)
if t ~= "" then
Line 757:
end
else
text = main(pagenamespageNames, options)
end
 
Line 811:
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
local pagenamespageNames = { (article .. '#' .. (section or '')) }
local text = main(pagenamespageNames, options)
if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
Line 847:
function p.parseimage(text, start) return parseImage(text, start) end
function p.cleanupText(text, options) return cleanupText(text, options) end
function p.main(pagenamespageNames, options) return main(pagenamespageNames, options) end
function p.numberflags(str) return numberFlags(str) end