Module:Excerpt: Difference between revisions

Content deleted Content added
Parse templates more carefully to catch unwanted templates with other templates nested inside. Also removing a few redundant % signs.
Use infobox CSS class if it exists (mainly for dark mode compatibility)
 
(149 intermediate revisions by 10 users not shown)
Line 1:
-- Module:Excerpt implements the Excerpt template
local p = {}
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt
local mRedirect = require('Module:Redirect')
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others
-- License: CC-BY-SA-3.0
 
local Transcluder = require( 'Module:Transcluder' )
local errors
-- Return blank text, or an error message if requested
local function err(text)
if errors then error(text, 2) end
return ""
end
 
local yesno = require( 'Module:Yesno' )
-- Check image for suitablity
local function checkimage(image)
local page = mw.ustring.match(image, "([Ff]ile%s*:[^|%]]*)") -- File:(name) ...
or mw.ustring.match(image, "([Ii]mage%s*:[^|%]]*)") -- or Image:(name) ...
if not page then return nil end
 
local ok, config = pcall( require, 'Module:Excerpt/config' )
local title = mw.title.new(":" .. page) -- Read description page (for :File:Foo rather than File:Foo)
if not titleok then returnconfig nil= {} end
 
local redirp = mRedirect.getTarget(title){}
if redir then title = mw.title.new(redir) end
 
local frame = mw.getCurrentFrame()
local desc = frame:preprocess("{{" .. title.prefixedText .. "}}")
return desc and desc ~= "" and not mw.ustring.match(desc, "[Nn]on%-free") -- hide non-free image
end
 
-- Helper function to get arguments
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere or at the start
local args
local function parseimage(text, start)
local startrefunction =getArg( ""key, default )
local value = args[ key ]
if start then startre = "^" end -- a true flag restricts search to start of string
if value and mw.text.trim( value ) ~= '' then
local image = mw.ustring.match(text, startre .. "%[%[%s*[Ff]ile%s*:.*") -- [[File: ...
return value
or mw.ustring.match(text, startre .. "%[%[%s*[Ii]mage%s*:.*") -- or [[Image: ...
if image then
image = mw.ustring.match(image, "%b[]%s*") -- match [[...]] to handle nesting
end
return imagedefault
end
 
-- Helper function to handle errors
-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local function argimagegetError(text message, value )
if type( message ) == 'string' then
local token = nil
message = Transcluder.getError( message, value )
if mw.ustring.match(text, "{{%s*[Ii]nfobox") then
local image = mw.ustring.match(text, "|%s*image%s*=%s*([^}|]*)") -- parse image= argument...
or mw.ustring.match(text, "|%s*Cover%s*=%s*([^}|]-)") -- or Cover= from Infobox album
if image then -- add in relevant parameters: caption, alt text and image size
token = "[[" -- Add File: unless name already begins File: or Image:
if not (mw.ustring.match(image, "^[Ff]ile%s*:")
or mw.ustring.match(image, "^[Ii]mage%s*:")) then
token = token .. "File:"
end
token = token .. image
local caption = mw.ustring.match(text, "|%s*[Cc]aption%s*=%s*([^}|]*)")
if caption then token = token .. "|" .. caption end
local alt = mw.ustring.match(text, "|%s*alt%s*=%s*([^}|]*)")
if alt then token = token .. "|alt=" .. alt end
local image_size = mw.ustring.match(text, "|%s*image_size%s*=%s*([^}|]*)")
if image_size then token = token .. "|" .. image_size end
token = mw.ustring.gsub(token, "\n","") .. "]]\n"
end
end
if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then
 
message:node( '[[Category:' .. config.categories.errors .. ']]' )
return token
end
return message
end
 
-- HelpHelper gsubfunction to removeget unwantedlocalized templatesmessages
local function getMessage( key )
-- If template is unwanted then return "" (replace by nothing) else return nil (keep existing string)
local ok, TNT = pcall( require, 'Module:TNT' )
 
if not ok then return key end
local function striptemplate(t)
return TNT.format( 'I18n/Module:Excerpt.tab', key )
local unwanted = {"[Ee]fn", "[Ee]fn-la", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef", "[Rr]efn?",
"[CcDd]n", "[Cc]itation needed", "[Dd]isambiguation needed"}
for _, u in pairs(unwanted) do
if mw.ustring.match(t, "^{{%s*" .. u .. "%s*%f[|}]") then return "" end -- unwanted template: remove
end
return nil -- not an unwanted template: keep
end
 
-- EntryMain entry point for Lua callerstemplates
function p.main( frame )
-- Returns a string value: text of the lead of a page
args = Transcluder.parseArgs( frame )
function p._lead(pagenames, options)
errors = options.errors
 
-- Make sure the requested page exists
if not pagenames or #pagenames < 1 then return err("No page names given") end
local pagenamepage = getArg( 1 )
if not page or page == '{{{1}}}' then return getError( 'no-page' ) end
local text
local pagecounttitle = #pagenamesmw.title.new(page)
if not title then return getError( 'invalid-title', page ) end
local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted
if title.isRedirect then title = title.redirectTarget end
if not title.exists then return getError( 'page-not-found', page ) end
page = title.prefixedText
 
-- Set variables from the template parameters
-- read the page, or a random one if multiple pages were provided
local section = getArg( 2, mw.ustring.match( getArg( 1 ), '[^#]+#(.+)' ) )
if pagecount > 1 then math.randomseed(os.time()) end
local hat = yesno( getArg( 'hat', true ) )
while not text and pagecount > 0 do
local edit = yesno( getArg( 'edit', true ) )
local pagenum = 1
local this = getArg( 'this' )
if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title
local only = getArg( 'only' )
pagename = pagenames[pagenum]
local files = getArg( 'files', getArg( 'file', ( only == 'file' and 1 ) ) )
if pagename and pagename ~= "" then
local lists = getArg( 'lists', getArg( 'list', ( only == 'list' and 1 ) ) )
pagename = mw.ustring.match(pagename, "%[%[%s*(.-)[]|#]") or pagename -- "[[Foo|Bar]]" → "Foo"
local tables = getArg( 'tables', getArg( 'table', ( only == 'table' and 1 ) ) )
pagename = mw.ustring.match(pagename, "%S.*%S") -- strip leading and trailing white space
local templates = getArg( 'templates', getArg( 'template', ( only == 'template' and 1 ) ) )
local paragraphs = getArg( 'paragraphs', getArg( 'paragraph', ( only == 'paragraph' and 1 ) ) )
local references = getArg( 'references' )
local subsections = not yesno( getArg( 'subsections' ) )
local noLinks = not yesno( getArg( 'links', true ) )
local noBold = not yesno( getArg( 'bold' ) )
local onlyFreeFiles = yesno( getArg( 'onlyfreefiles', true ) )
local briefDates = yesno( getArg( 'briefdates', false ) )
local inline = yesno( getArg( 'inline' ) )
local quote = yesno( getArg( 'quote' ) )
local more = yesno( getArg( 'more' ) )
local class = getArg( 'class' )
local displaytitle = getArg( 'displaytitle' ) or page
 
-- Build the hatnote
if pagename and pagename ~= "" then
if hat and not inline then
local title = mw.title.new(pagename) -- Find the lead section of the named page
if this then
if not title then return err("No title for page name " .. pagename) end
hat = this
local redir = mRedirect.getTarget(title)
elseif quote then
if redir then title = mw.title.new(redir) end
hat = getMessage( 'this' )
 
elseif only then
text = title:getContent()
hat = getMessage( only )
end
else
hat = getMessage( 'section' )
end
hat = hat .. ' ' .. getMessage( 'excerpt' ) .. ' '
if section then
hat = hat .. '[[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. displaytitle
.. ' § ' .. mw.ustring.gsub( section, '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
else
hat = hat .. '[[:' .. page .. '|' .. displaytitle .. ']].'
end
if edit then
hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
hat = hat .. title:fullUrl( 'action=edit' ) .. ' ' .. mw.message.new( 'editsection' ):plain()
hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
end
if config.hat then
if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another
hat = config.hat .. hat .. '}}'
pagecount = pagecount - 1 -- ensure that we exit the loop eventually
hat = frame:preprocess( hat )
else
hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat )
end
else
hat = nil
end
if not text then return err("Cannot read a valid page: first name is " .. firstpage) end
 
-- Build the "Read more" link
text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
if more and not inline then
text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first heading and everything after it
more = "'''[[" .. page .. '#' .. ( section or '' ) .. "|" .. getMessage( 'more' ) .. "]]'''"
text = mw.ustring.gsub(text, "<noinclude>.-</noinclude>", "") -- remove noinclude bits
more = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( more )
text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
else
text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs
more = nil
text = mw.ustring.gsub(text, "<%s*imagemap.->.-<%s*/%s*imagemap%s*>", "") -- remove imagemaps
end
text = mw.ustring.gsub(text, "%b{}", striptemplate) -- remove unwanted templates such as references
text = mw.ustring.gsub(text, "\n%s*{{%s*[Tt][Oo][Cc].-}}", "\n") -- remove most common tables of contents
 
-- Build the options for Module:Transcluder out of the template parameters and the desired defaults
local allparas = true -- keep all paragraphs?
iflocal options.paraflags then= {
files = files,
for _, v in pairs(options.paraflags) do
lists = lists,
if v then allparas = false end -- if any para specifically requested, don't keep all
tables = tables,
end
paragraphs = paragraphs,
sections = subsections,
categories = 0,
references = references,
only = only and mw.text.trim( only, 's' ) .. 's',
noLinks = noLinks,
noBold = noBold,
noSelfLinks = true,
noNonFreeFiles = onlyFreeFiles,
noBehaviorSwitches = true,
fixReferences = true,
linkBold = true,
}
 
-- Get the excerpt itself
local title = page .. '#' .. ( section or '' )
local ok, excerpt = pcall( Transcluder.get, title, options )
if not ok then return getError( excerpt ) end
if mw.text.trim( excerpt ) == '' and not only then
if section then return getError( 'section-empty', section ) else return getError( 'lead-empty' ) end
end
 
-- Fix birth and death dates, but only in the first paragraph
local maxfile = 0 -- for efficiency, stop checking images after this many have been found
if options.fileflagsbriefDates then
local startpos = 1 -- skip initial templates
for k, v in pairs(options.fileflags) do
local s
if v and k > maxfile then maxfile = k end
local e = 0
repeat
startpos = e + 1
s, e = mw.ustring.find( excerpt, "%s*%b{}%s*", startpos )
until not s or s > startpos
s, e = mw.ustring.find( excerpt, "%b()", startpos ) -- get (...), which may be (year–year)
if s and s < startpos + 100 then -- look only near the start
local year1, conjunction, year2 = mw.ustring.match( mw.ustring.sub( excerpt, s, e ), '(%d%d%d+)(.-)(%d%d%d+)' )
if year1 and year2 and (mw.ustring.match( conjunction, '[%-–—]' ) or mw.ustring.match( conjunction, '{{%s*[sS]nd%s*}}' )) then
local y1 = tonumber(year1)
local y2 = tonumber(year2)
if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( "%Y" )) then
excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. "–" .. year2 .. mw.ustring.sub( excerpt, e )
end
end
end
end
 
-- aIf basicno parserfile was found, try to trimget one downfrom the leadinfobox
local fileNamespaces = Transcluder.getNamespaces( 'File' )
local inlead = false -- have we found some text yet?
if ( ( only == 'file' or only == 'files' ) or ( not only and ( files ~= '0' or not files ) ) ) and -- caller asked for files
local t = "" -- the stripped down output text
not Transcluder.matchAny( excerpt, '%[%[', fileNamespaces, ':' ) and -- and there are no files in Transcluder's output
local files = 0 -- how many [[Image: or [[File: so far
config.captions -- and we have the config option required to try finding files in templates
local paras = 0 -- how many paragraphs so far
then
 
-- We cannot distinguish the infobox from the other templates so we search them all
text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
local infobox = Transcluder.getTemplates( excerpt );
repeat -- loop around parsing a template, image or paragraph
infobox = table.concat( infobox )
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}}
local parameters = Transcluder.getParameters( infobox )
if token then
local file, captions, caption, cssclasses, cssclass
if inlead then -- keep comments and templates only within text body
for _, pair in pairs( config.captions ) do
t = t .. token
file = pair[1]
elseif files < maxfile then -- look for [[File:... embedded in an infobox etc. in the preamble
file = parameters[file]
local image = parseimage(token, false) or argimage(token)
if file and Transcluder.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
if image and checkimage(image) then -- keep comments and templates only within text body
file = mw.ustring.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
files = files + 1
captions = pair[2]
if options.fileflags and options.fileflags[files] then
for _, p in pairs( captions ) do
image = mw.ustring.gsub(image, "|%s*frameless%s*%f[|%]]", "") -- make image a thumbnail, not frameless etc.
if parameters[ p ] then caption = parameters[ p ] break end
image = mw.ustring.gsub(image, "|%s*framed?%s*%f[|%]]", "")
end
if not mw.ustring.match(image, "|%s*thumb%s*%f[|%]]")
and not mw.ustring.match(image, "|%s*thumbnail%s*%f[|%]]") then
-- Check for CSS classes
image = mw.ustring.gsub(image, "(%]%]%s*)$", "|thumb%1")
-- We opt to use skin-invert-image instead of skin-invert
-- in all other cases, the CSS provided in the infobox is used
if pair[3] then
cssclasses = pair[3]
for _, p in pairs(cssclasses) do
if parameters[p] then
cssclass = ((parameters[p] == 'skin-invert') and 'skin-invert-image' or parameters[p])
break
end
if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end
t = t .. image
end
end
end
excerpt = '[[File:' .. file ..
else
(cssclass and ('|class=' .. cssclass) or '') ..
token = parseimage(text, true)
'|thumb|' .. (caption or '') .. ']]' .. excerpt
if token then
if files( <onlyFreeFiles maxfile and checkimage(token) then
excerpt = Transcluder.removeNonFreeFiles( excerpt )
files = files + 1
if options.fileflags and options.fileflags[files] then
local image = token
if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end
t = t .. image
end
end
break
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterend = mw.ustring.len(text) + 1
local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend
local endpos = math.min(
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
blankpos)
token = mw.ustring.sub(text, 1, endpos-1)
if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line
token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
end
inlead = true
paras = paras + 1
if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end
end
end
end
 
-- Unlike other elements, templates are filtered here
if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end
-- because we had to search the infoboxes for files
until not text or text == "" or not token or token == ""
local trash
 
if only and ( only == 'template' or only == 'templates' ) then
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
trash, excerpt = Transcluder.getTemplates( excerpt, templates );
 
else -- Remove blacklisted templates
if options.more then text = text .. " '''[[" .. pagename .. "|" .. options.more .. "]]'''" end
local blacklist = config.blacklist and table.concat( config.blacklist, ',' ) or ''
return text
if templates then
end
if string.sub( templates, 1, 1 ) == '-' then --Unwanted templates. Append to blacklist
 
blacklist = templates .. ',' .. blacklist
-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
else --Wanted templates. Replaces blacklist and acts as whitelist
local function numberflags(str)
blacklist = templates
local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
end
local flags = {}
else
for _, r in pairs(ranges) do
blacklist = '-' .. blacklist
local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
if not max then min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
if max then
for p = min, max do flags[p] = true end
end
trash, excerpt = Transcluder.getTemplates( excerpt, blacklist );
end
return flags
end
 
-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
-- Shared template invocation code for lead and random functions
excerpt = mw.text.trim( excerpt )
local function leadrandom(frame, israndom)
excerpt = string.gsub( excerpt, '\n\n\n+', '\n\n' )
-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
excerpt = '\n' .. excerpt .. '\n'
local args = frame.args -- from calling module
 
local pargs = frame:getParent().args -- from template
-- Remove nested categories
excerpt = frame:preprocess( excerpt )
local categories, excerpt = Transcluder.getCategories( excerpt, options.categories )
 
-- Add tracking categories
local pagenames = { args[1] or pargs[1] } -- For lead, ignore all but the first unnamed argument
if israndomconfig.categories then
local contentCategory = config.categories.content
-- For random, accept any number of page names. If more than one, we'll pick one randomly
if contentCategory and mw.title.getCurrentTitle().isContentPage then
for i, p in pairs(args) do
excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
if p and type(i) == 'number' and i > 1 then table.insert(pagenames, p) end
end
local namespaceCategory = config.categories[ mw.title.getCurrentTitle().namespace ]
for i, p in pairs(pargs) do
if namespaceCategory then
if p and type(i) == 'number' and i > 1 and not args[i] then table.insert(pagenames, p) end
excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
end
end
 
-- Load the styles
local options = {}
local styles
options.paraflags = numberflags(args["paragraphs"] or pargs["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
if config.styles then
options.fileflags = numberflags(args["files"] or pargs["files"] or "") -- parse file numbers
styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
options.fileargs = args["fileargs"] or pargs["fileargs"]
end
options.more = args["more"] or pargs["more"]
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
options.errors = args["errors"] or pargs["errors"]
 
-- Combine and return the elements
local text = p._lead(pagenames, options)
if inline then
return frame:preprocess(text)
return mw.text.trim( excerpt )
end
local tag = 'div'
if quote then
tag = 'blockquote'
end
excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )
local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( class )
return block:node( styles ):node( hat ):node( excerpt ):node( more )
end
 
-- Entry points for templatebackwards callers using #invoke:compatibility
function p.lead( frame ) return leadrandomp.main( frame, false) end
function p.randomexcerpt( frame ) return leadrandomp.main( frame, true) end
 
return p