-- Module:Excerpt implements the Excerpt template
-- Local aliases of the file namespace
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt
local fileNamespaces = {
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others
"[Ff]ile",
-- License: CC-BY-SA-3.0
"[Ii]mage"
}
local parser = require( 'Module:WikitextParser' )
local captionParams = {
local yesno = require( 'Module:Yesno' )
"[^=|]*[Cc]aption[^=|]*",
"[^=|]*[Ll]egend[^=|]*"
}
local ok, config = pcall( require, 'Module:Excerpt/config' )
-- Local category to track content pages with broken excerpts (may be empty, don't include the "Category:" prefix)
if not ok then config = {} end
local brokenCategory = "Articles with broken excerpts"
local Excerpt = {}
-- The module keeps all inline templates except these ones
local unwantedInlineTemplates = {"[Ee]fn", "[Ee]fn%-[lu][arg]", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bmp]", "[Ss]f[bn]", "[Nn]ote[Tt]ag", "#[Tt]ag:%s*[Rr]ef", "[Rr]efn?",
"[CcDd]n", "[Cc]itation[%- _]needed", "[Dd]isambiguation needed", "[Ff]eatured article", "[Gg]ood article",
"[Dd]ISPLAYTITLE", "[Ss]hort[ _]+description", "[Cc]itation", "[Cc]ite[%- _]+[%w_%s]-", "[Cc]oor[%w_%s]-",
"[Uu]?n?[Rr]eliable source[%?%w_%s]-", "[Rr]s%??", "[Vv]c", "[Vv]erify credibility", "[Bb]y[ _]*[Ww]ho[m]*%??", "[Ww]ikisource[ -_]*multi", "[Ii]nflation[ _/-]*[Ff]n",
"[Bb]iblesource",
-- aliases for Clarification needed
"[Cc]f[ny]", "[Cc]larification[ _]+inline", "[Cc]larification[%- _]*needed", "[Cc]larification", "[Cc]larify%-inline", "[Cc]larify%-?me",
"[Cc]larify[ _]+inline", "[Cc]larify", "[Cc]LARIFY", "[Cc]onfusing%-inline", "[Cc]onfusing%-short", "[Ee]xplainme", "[Hh]uh[ _]*%??", "[Ww]hat%?",
"[Ii]nline[ _]+[Uu]nclear", "[Ii]n[ _]+what[ _]+sense", "[Oo]bscure", "[Pp]lease[ _]+clarify", "[Uu]nclear[ _]+inline", "[Ww]hat's[ _]+this%?",
"[Gg]eoQuelle", "[Nn]eed[s]+[%- _]+[Ii][Pp][Aa]", "[Ii]PA needed",
-- aliases for Clarification needed lead
"[Cc]itation needed %(?lea?de?%)?", "[Cc]nl", "[Ff]act %(?lea?de?%)?", "[Ll]ead citation needed", "[Nn]ot in body", "[Nn]ot verified in body",
-- Primary source etc.
"[Pp]s[ci]", "[Nn]psn", "[Nn]on%-primary[ _]+source[ _]+needed", "[Ss]elf%-published[%w_%s]-", "[Uu]ser%-generated[%w_%s]-",
"[Pp]rimary source[%w_%s]-", "[Ss]econdary source[%w_%s]-", "[Tt]ertiary source[%w_%s]-", "[Tt]hird%-party[%w_%s]-",
-- aliases for Disambiguation (page) and similar
"[Bb]egriffsklärung", "[Dd][Aa][Bb]", "[Dd]big", "[%w_%s]-%f[%w][Dd]isam[%w_%s]-", "[Hh][Nn][Dd][Ii][Ss]",
-- aliases for Failed verification
"[Bb]adref", "[Ff]aile?[ds] ?[rv][%w_%s]-", "[Ff][Vv]", "[Nn][Ii]?[Cc][Gg]", "[Nn]ot ?in ?[crs][%w_%s]-", "[Nn]ot specifically in source",
"[Vv]erification[%- _]failed",
-- aliases for When
"[Aa]s[ _]+of[ _]+when%??", "[Aa]s[ _%-]+of%??", "[Cc]larify date", "[Dd]ate[ _]*needed", "[Nn]eeds?[ _]+date", "[Rr]ecently", "[Ss]ince[ _]+when%??",
"[Ww]HEN", "[Ww]hen%??",
-- aliases for Update
"[Nn]ot[ _]*up[ _]*to[ _]*date","[Oo]u?[Tt][Dd]","[Oo]ut[%- _]*o?f?[%- _]*dated?", "[Uu]pdate", "[Uu]pdate[ _]+sect", "[Uu]pdate[ _]+Watch",
-- aliases for Pronunciation needed
"[Pp]ronunciation%??[%- _]*n?e?e?d?e?d?", "[Pp]ronounce", "[Rr]equested[%- _]*pronunciation", "[Rr]e?q?pron", "[Nn]eeds[%- _]*pronunciation",
-- Chart, including Chart/start etc.
"[Cc]hart", "[Cc]hart/[%w_%s]-",
-- Cref and others
"[Cc]ref2?", "[Cc]note",
-- Explain and others
"[Ee]xplain", "[Ff]urther[ ]*explanation[ ]*needed", "[Ee]laboration[ ]*needed", "[Ee]xplanation[ ]*needed",
-- TOC templates
"[Cc][Oo][Mm][Pp][Aa][Cc][Tt][ _]*[Tt][Oo][Cc][8]*[5]*", "[Tt][Oo][Cc]", "09[Aa][Zz]", "[Tt][Oo][Cc][ ]*[Cc][Oo][Mm][Pp][Aa][Cc][Tt]", "[Tt][Oo][Cc][ ]*[Ss][Mm][Aa][Ll][Ll]", "[Cc][Oo][Mm][Pp][Aa][Cc][Tt][ _]*[Aa][Ll][Pp][Hh][Aa][Bb][Ee][Tt][Ii][Cc][ _]*[Tt][Oo][Cc]",
"DEFAULTSORT:.-",
"[Oo]ne[ _]+source"
}
-- TheMain moduleentry removespoint all blockfor templates except these ones
function Excerpt.main( frame )
local wantedBlockTemplates = {
"[Hh]istorical populations"
}
-- Make sure the requested page exists and get the wikitext
local p = {}
local page = Excerpt.getArg( 1 )
if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end
local title = mw.title.new( page )
if not title then return Excerpt.getError( 'invalid-title', page ) end
local fragment = title.fragment -- save for later
if title.isRedirect then
title = title.redirectTarget
if fragment == "" then
fragment = title.fragment -- page merge potential
end
end
if not title.exists then return Excerpt.getError( 'page-not-found', page ) end
page = title.prefixedText
local wikitext = title:getContent()
-- Get the template params and process them
local errors
local params = {
-- Return blank text, or an error message if requested
hat = yesno( Excerpt.getArg( 'hat', true ) ),
local function err(text)
this = Excerpt.getArg( 'this' ),
if errors then error(text, 2) end
only = Excerpt.getArg( 'only' ),
return ""
files = Excerpt.getArg( 'files', Excerpt.getArg( 'file' ) ),
end
lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list' ) ),
tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table' ) ),
templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template' ) ),
paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph' ) ),
references = yesno( Excerpt.getArg( 'references', true ) ),
subsections = yesno( Excerpt.getArg( 'subsections', false ) ),
links = yesno( Excerpt.getArg( 'links', true ) ),
bold = yesno( Excerpt.getArg( 'bold', false ) ),
briefDates = yesno( Excerpt.getArg( 'briefdates', false ) ),
inline = yesno( Excerpt.getArg( 'inline' ) ),
quote = yesno( Excerpt.getArg( 'quote' ) ),
more = yesno( Excerpt.getArg( 'more' ) ),
class = Excerpt.getArg( 'class' ),
displayTitle = Excerpt.getArg( 'displaytitle', page ),
}
-- Make sure the requested section exists and get the excerpt
-- Helper function to test for falsy values
local excerpt
local function falsy( value )
local section = Excerpt.getArg( 2, fragment )
if not value or value == "" or value == "0" or value == "false" or value == "no" then
section = mw.text.trim( section )
return true
if section == '' then section = nil end
if section then
excerpt = parser.getSectionTag( wikitext, section )
if not excerpt then
if params.subsections then
excerpt = parser.getSection( wikitext, section )
else
local sections = parser.getSections( wikitext )
excerpt = sections[ section ]
end
end
if not excerpt then return Excerpt.getError( 'section-not-found', section ) end
if excerpt == '' then return Excerpt.getError( 'section-empty', section ) end
else
excerpt = parser.getLead( wikitext )
if excerpt == '' then return Excerpt.getError( 'lead-empty' ) end
end
return false
end
-- Remove noinclude bits
-- In text, match pre..list[1]..post or pre..list[2]..post or ...
excerpt = excerpt:gsub( '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '' )
local function matchany(text, pre, list, post, init)
local match = {}
for i = 1, #list do
match = { mw.ustring.match(text, pre .. list[i] .. post, init) }
if match[1] then return unpack(match) end
end
return nil
end
-- Filter various elements from the excerpt
-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
excerpt = Excerpt.filterFiles( excerpt, params.files )
local function striptemplate(t)
excerpt = Excerpt.filterLists( excerpt, params.lists )
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
excerpt = Excerpt.filterTables( excerpt, params.tables )
if matchany(t, "^{{%s*", unwantedInlineTemplates, "%s*%f[|}]") then return "" end
excerpt = Excerpt.filterParagraphs( excerpt, params.paragraphs )
-- If no file is found, try to get one from the infobox
-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
if ( params.only == 'file' or params.only == 'files' or not params.only and ( not params.files or params.files ~= '0' ) ) -- caller asked for files
local noref = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
and not section -- and we're in the lead section
noref = mw.ustring.gsub(noref, "|%s*ref%s*%f[|}]", "")
and config.captions -- and we have the config option required to try finding files in infoboxes
and #parser.getFiles( excerpt ) == 0 -- and there're no files in the excerpt
then
excerpt = Excerpt.addInfoboxFile( excerpt )
end
-- IfFilter athe wantedtemplates templateby hasappending unwanted nestedthe templates, purgeblacklist themto the templates toofilter
if config.blacklist then
noref = mw.ustring.sub(noref, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noref, 3), "%b{}", striptemplate)
local blacklist = table.concat( config.blacklist, ',' )
if params.templates then
if string.sub( params.templates, 1, 1 ) == '-' then
params.templates = params.templates .. ',' .. blacklist
end
else
params.templates = '-' .. blacklist
end
end
excerpt = Excerpt.filterTemplates( excerpt, params.templates )
-- Leave only the requested elements
-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
if params.only == 'file' or params.only == 'files' then
noref = mw.ustring.gsub(noref, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")
local files = parser.getFiles( excerpt )
excerpt = params.only == 'file' and files[1] or table.concat( files, '\n\n' )
end
if params.only == 'list' or params.only == 'lists' then
local lists = parser.getLists( excerpt )
excerpt = params.only == 'list' and lists[1] or table.concat( lists, '\n\n' )
end
if params.only == 'table' or params.only == 'tables' then
local tables = parser.getTables( excerpt )
excerpt = params.only == 'table' and tables[1] or table.concat( tables, '\n\n' )
end
if params.only == 'paragraph' or params.only == 'paragraphs' then
local paragraphs = parser.getParagraphs( excerpt )
excerpt = params.only == 'paragraph' and paragraphs[1] or table.concat( paragraphs, '\n\n' )
end
if params.only == 'template' or params.only == 'templates' then
local templates = parser.getTemplates( excerpt )
excerpt = params.only == 'template' and templates[1] or table.concat( templates, '\n\n' )
end
-- @todo Make more robust and move downwards
-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
if params.briefDates then
noref = mw.ustring.gsub(noref, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")
excerpt = Excerpt.fixDates( excerpt )
end
-- Remove unwanted elements
if noref ~= t then return noref end
excerpt = Excerpt.removeComments( excerpt )
excerpt = Excerpt.removeSelfLinks( excerpt )
excerpt = Excerpt.removeNonFreeFiles( excerpt )
excerpt = Excerpt.removeBehaviorSwitches( excerpt )
-- Fix or remove the references
return nil -- not an unwanted template: keep
if params.references then
end
excerpt = Excerpt.fixReferences( excerpt, page, wikitext )
else
excerpt = Excerpt.removeReferences( excerpt )
end
-- Remove wikilinks
-- Get a page's content, following redirects, and processing file description pages for files.
if not params.links then
-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found
excerpt = Excerpt.removeLinks( excerpt )
local function getContent(page, frame)
end
local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)
if not title then return false, false end
-- Link the bold text near the start of most leads and then remove it
local target = title.redirectTarget
if targetnot section then title = target end
excerpt = Excerpt.linkBold( excerpt, page )
end
if not params.bold then
excerpt = Excerpt.removeBold( excerpt )
end
-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
return title:getContent(), title.prefixedText
excerpt = excerpt:gsub( '\n\n\n+', '\n\n' )
end
excerpt = mw.text.trim( excerpt )
excerpt = '\n' .. excerpt .. '\n'
-- Remove nested categories
-- Check image for suitability
excerpt = frame:preprocess( excerpt )
local function checkimage(image)
excerpt = Excerpt.removeCategories( excerpt )
local page = matchany(image, "", fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
if not page then return false end
-- Add tracking categories
-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
if config.categories then
if not matchany(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s*$") then
excerpt = Excerpt.addTrackingCategories( excerpt )
return false
end
-- Build the final output
local desc, rtitle = getContent(page) -- get file description and title after following any redirect
if params.inline then
if desc and desc ~= "" then -- found description on local wiki
return mw.text.trim( excerpt )
if mw.ustring.match(desc, "[Nn]on%-free") then return false end
desc = mw.ustring.gsub(desc, "%b{}", striptemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess
elseif not rtitle then
return false
else
-- try commons
desc = "{{" .. rtitle .. "}}"
end
frame = frame or mw.getCurrentFrame()
desc = frame:preprocess(desc)
local tag = params.quote and 'blockquote' or 'div'
return ( desc and desc ~= "" and not mw.ustring.match(desc, "[Nn]on%-free") ) and true or false -- hide non-free image
local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( params.class )
end
if config.styles then
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
local function parseimage(text, start)
block:node( styles )
local startre = ""
if start then startre = "^" end -- a true flag restricts search to start of string
local image = matchany(text, startre .. "%[%[%s*", fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
if image then
image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
end
return image
end
if params.hat then
-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
local hat = Excerpt.getHat( page, section, params )
local function parsecaption(caption)
block:node( hat )
if not caption then return nil end
local len = mw.ustring.len(caption)
local pos = 1
while pos <= len do
local linkstart, linkend = mw.ustring.find(caption, "%b[]", pos)
linkstart = linkstart or len + 1 -- avoid comparison with nil when no link
local templatestart, templateend = mw.ustring.find(caption, "%b{}", pos)
templatestart = templatestart or len + 1 -- avoid comparison with nil when no template
local argend = mw.ustring.find(caption, "[|}]", pos) or len + 1
if linkstart < templatestart and linkstart < argend then
pos = linkend + 1 -- skip wikilink
elseif templatestart < argend then
pos = templateend + 1 -- skip template
else -- argument ends before the next wikilink or template
return mw.ustring.sub(caption, 1, argend - 1)
end
end
return caption -- No terminator found: return entire caption
end
excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )
-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
block:node( excerpt )
local function argimage(text)
local token = nil
local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image
if params.more then
-- ensure image map is captured
local more = Excerpt.getReadMore( page, section )
text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '|imagemap=')
block:node( more )
-- find all images
local hasImages = false
local images = {}
local capture_from = 1
while capture_from < mw.ustring.len(text) do
local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", capture_from)
if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
local lcArgname = mw.ustring.lower(argname)
if mw.ustring.find(lcArgname, "caption")
or mw.ustring.find(lcArgname, "size")
or mw.ustring.find(lcArgname, "upright") then
image = nil
end
end
if image then
hasImages = true
images[position] = image
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
capture_from = 1
while capture_from < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", capture_from)
if image then
hasImages = true
images[position] = image
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
capture_from = 1
while capture_from < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", capture_from)
if image then
hasImages = true
if not images[position] then
images[position] = image
end
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
if not hasImages then return nil endblock
end
-- Filter the files in the given wikitext against the given filter
-- find all captions
function Excerpt.filterFiles( wikitext, filter )
local captions = {}
if not filter then return wikitext end
capture_from = 1
local filters, isBlacklist = Excerpt.parseFilter( filter )
while capture_from < mw.ustring.len(text) do
local files = parser.getFiles( wikitext )
local position, caption = matchany(text, "|%s*", captionParams, "%s*=%s*()([^\n]+)", capture_from)
for index, file in pairs( files ) do
if caption then
local name = parser.getFileName( file )
-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) )
local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then
if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end
wikitext = Excerpt.removeString( wikitext, file )
caption = mw.text.trim(caption)
local captionStart = mw.ustring.sub(caption, 1, 1)
if captionStart == '|' or captionStart == '}' then caption = nil end
end
if caption then
-- find nearest image, and use same index for captions table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not captions[i] then
captions[i] = parsecaption(caption)
end
end
end
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
return wikitext
end
-- Filter the lists in the given wikitext against the given filter
-- find all alt text
function Excerpt.filterLists( wikitext, filter )
local altTexts = {}
if not filter then return wikitext end
for position, altText in mw.ustring.gmatch(text, "|%s*[Aa][Ll][Tt]%s*=%s*()([^\n]*)") do
local filters, isBlacklist = Excerpt.parseFilter( filter )
if altText then
local lists = parser.getLists( wikitext )
for index, list in pairs( lists ) do
-- altText is terminated by }} or |, but first skip any matched [[...]] and {{...}}
if isBlacklist and Excerpt.matchFilter( index, filters )
local lookfrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}
or not isBlacklist and not Excerpt.matchFilter( index, filters ) then
mw.ustring.match(altText, ".*{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b
wikitext = Excerpt.removeString( wikitext, list )
mw.ustring.match(altText, ".*%[%b[]%]()") or 1)
local len = mw.ustring.len(altText)
local aftertext = math.min( -- find position after whichever comes first: end of string, }} or |
mw.ustring.match(altText, "()}}", lookfrom) or len+1,
mw.ustring.match(altText, "()|", lookfrom) or len+1)
altText = mw.ustring.sub(altText, 1, aftertext-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}
altText = mw.text.trim(altText)
local altTextStart = mw.ustring.sub(altText, 1, 1)
if altTextStart == '|' or altTextStart == '}' then altText = nil end
end
if altText then
-- find nearest image, and use same index for altTexts table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not altTexts[i] then
altTexts[i] = altText
end
end
end
end
end
return wikitext
end
-- Filter the tables in the given wikitext against the given filter
-- find all image sizes
function Excerpt.filterTables( wikitext, filter )
local imageSizes = {}
if not filter then return wikitext end
for position, imageSizeMatch in mw.ustring.gmatch(text, "|%s*[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s*=%s*()([^}|\n]*)") do
local filters, isBlacklist = Excerpt.parseFilter( filter )
local imageSize = mw.ustring.match(imageSizeMatch, "=%s*([^}|\n]*)")
local tables = parser.getTables( wikitext )
if imageSize then
for index, t in pairs( tables ) do
imageSize = mw.text.trim(imageSize )
local id = string.match( t, '{|[^\n]-id%s*=%s*["\']?([^"\'\n]+)["\']?[^\n]*\n' )
local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( id, filters ) )
if imageSizeStart == '|' or imageSizeStart == '}' then imageSize = nil end
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( id, filters ) ) then
end
wikitext = Excerpt.removeString( wikitext, t )
if imageSize then
-- find nearest image, and use same index for imageSizes table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not imageSizes[i] then
imageSizes[i] = imageSize
end
end
end
end
end
return wikitext
end
-- Filter the paragraphs in the given wikitext against the given filter
-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order
function Excerpt.filterParagraphs( wikitext, filter )
local keys = {}
if not filter then return wikitext end
for key, val in pairs(images) do
local filters, isBlacklist = Excerpt.parseFilter( filter )
table.insert(keys, key)
local paragraphs = parser.getParagraphs( wikitext )
end
for index, paragraph in pairs( paragraphs ) do
table.sort(keys)
if isBlacklist and Excerpt.matchFilter( index, filters )
or not isBlacklist and not Excerpt.matchFilter( index, filters ) then
-- add in relevant optional parameters for each image: caption, alt text and image size
wikitext = Excerpt.removeString( wikitext, paragraph )
local imageTokens = {}
for _, index in ipairs(keys) do
local image = images[index]
local token = parseimage(image, true) -- look for image=[[File:...]] etc.
if not token then
image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
token = "[[" -- Add File: unless name already begins File: or Image:
if not matchany(image, "^", fileNamespaces, "%s*:") then
token = token .. "File:"
end
token = token .. image
local caption = captions[index]
if caption and mw.ustring.match(caption, "%S") then token = token .. "|" .. caption end
local alt = altTexts[index]
if alt then token = token .. "|alt=" .. alt end
local image_size = imageSizes[index]
if image_size and mw.ustring.match(image_size, "%S") then token = token .. "|" .. image_size end
token = token .. "]]"
end
token = mw.ustring.gsub(token, "\n","") .. "\n"
table.insert(imageTokens, token)
end
return imageTokenswikitext
end
-- Filter the templates in the given wikitext against the given filter
-- Help gsub convert imagemaps into standard images
function Excerpt.filterTemplates( wikitext, filter )
local function convertImagemap(imagemap)
if not filter then return wikitext end
local image = matchany(imagemap, "[>\n]%s*", fileNamespaces, "[^\n]*")
local filters, isBlacklist = Excerpt.parseFilter( filter )
if image then
local templates = parser.getTemplates( wikitext )
return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
for index, template in pairs( templates ) do
else
local name = parser.getTemplateName( template )
return "" -- remove entire block if image can't be extracted
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) )
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then
wikitext = Excerpt.removeString( wikitext, template )
end
end
return wikitext
end
function Excerpt.addInfoboxFile( excerpt )
-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
-- We cannot distinguish the infobox from the other templates, so we search them all
local function numberflags(str)
local templates = parser.getTemplates( excerpt )
local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
for _, template in pairs( templates ) do
local flags = {}
local parameters = parser.getTemplateParameters( template )
for _, r in pairs(ranges) do
local file, captions, caption, cssClasses, cssClass
local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
for _, pair in pairs( config.captions ) do
if not max then min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
if file max= thenpair[1]
file = parameters[file]
for p = min, max do flags[p] = true end
if file and Excerpt.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
file = string.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
captions = pair[2]
for _, p in pairs( captions ) do
if parameters[ p ] then caption = parameters[ p ] break end
end
-- Check for CSS classes
-- We opt to use skin-invert-image instead of skin-invert
-- in all other cases, the CSS provided in the infobox is used
if pair[3] then
cssClasses = pair[3]
for _, p in pairs( cssClasses ) do
if parameters[ p ] then
cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ]
break
end
end
end
local class = cssClass and ( '|class=' .. cssClass ) or ''
return '[[File:' .. file .. class .. '|thumb|' .. ( caption or '' ) .. ']]' .. excerpt
end
end
end
return flagsexcerpt
end
function Excerpt.removeNonFreeFiles( wikitext )
local imageArgGroups = {
local files = parser.getFiles( wikitext )
{"thumb", "thumbnail", "frame", "framed", "frameless"},
for _, file in pairs( files ) do
{"right", "left", "center", "none"},
local fileName = 'File:' .. parser.getFileName( file )
{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}
local fileTitle = mw.title.new( fileName )
}
if fileTitle then
local fileDescription = fileTitle:getContent()
local function modifyImage(image, fileargs)
if not fileDescription or fileDescription == '' then
if fileargs then
local frame = mw.getCurrentFrame()
for _, filearg in pairs(mw.text.split(fileargs, "|")) do -- handle fileargs=left|border etc.
fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons
local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
local group = {fa} -- group of "border" is ["border"]...
for _, g in pairs(imageArgGroups) do
for _, a in pairs(g) do
if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]
end
end
if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then
for _, a in pairs(group) do
wikitext = Excerpt.removeString( wikitext, file )
image = mw.ustring.gsub(image, "|%s*" .. a .. "%f[%A]%s*=[^|%]]*", "") -- remove "|upright=0.75" etc.
image = mw.ustring.gsub(image, "|%s*" .. a .. "%s*([|%]])", "%1") -- replace "|left|" by "|" etc.
end
image = mw.ustring.gsub(image, "([|%]])", "|" .. filearg .. "%1", 1) -- replace "|" by "|left|" etc.
end
end
return imagewikitext
end
function Excerpt.getHat( page, section, params )
-- a basic parser to trim down extracted wikitext
local hat
-- @param text : Wikitext to be processed
-- @param options : A table of options...
-- Build the text
-- options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept.
if params.this then
-- options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`
hat = params.this
-- options.fileargs : args for the [[File:]] syntax, such as `left`
elseif params.quote then
-- @param filesOnly : If set, only return the files and not the prose
hat = Excerpt.getMessage( 'this' )
local function parse(text, options, filesOnly)
elseif params.only then
local allparas = true -- keep all paragraphs?
hat = Excerpt.getMessage( params.only )
if options.paraflags then
else
if type(options.paraflags) ~= "table" then options.paraflags = numberflags(options.paraflags) end
hat = Excerpt.getMessage( 'section' )
for _, v in pairs(options.paraflags) do
if v then allparas = false end -- if any para specifically requested, don't keep all
end
end
if filesOnly then
allparas = false
options.paraflags = {}
end
hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' )
-- Build the link
local maxfile = 0 -- for efficiency, stop checking images after this many have been found
if options.fileflagssection then
hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. params.displayTitle
if type(options.fileflags) ~= "table" then options.fileflags = numberflags(options.fileflags) end
.. ' § ' .. section:gsub( '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
for k, v in pairs(options.fileflags) do
else
if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
hat = hat .. ' [[:' .. page .. '|' .. params.displayTitle .. ']].'
end
end
local fileargs = options.fileargs and mw.text.trim(options.fileargs)
if fileargs == '' then fileargs = nil end
-- Build the edit link
local leadstart = nil -- have we found some text yet?
local title = mw.title.new( page )
local t = "" -- the stripped down output text
local editUrl = title:fullUrl( 'action=edit' )
local filetext = "" -- output text with concatenated [[File:Foo|...]]\n entries
hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
local files = 0 -- how many images so far
hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain()
local paras = 0 -- how many paragraphs so far
hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
local startLine = true -- at the start of a line (no non-spaces found since last \n)?
if config.hat then
text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
local frame = mw.getCurrentFrame()
hat = config.hat .. hat .. '}}'
-- Add named files
hat = frame:preprocess( hat )
local f = options.files
else
if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list
hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat )
f = mw.ustring.gsub(f, "^%s*File%s*:%s*", "", 1)
f = mw.ustring.gsub(f, "^%s*Image%s*:%s*", "", 1)
f = "[[File:" .. f .. "]]"
f = modifyImage(f, "thumb")
f = modifyImage(f, fileargs)
if checkimage(f) then filetext = filetext .. f .. "\n" end
end
return hat
repeat -- loop around parsing a template, image or paragraph
end
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
if not leadstart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started
function Excerpt.getReadMore( page, section )
local link = "'''[[" .. page
if section then
link = link .. '#' .. section
end
local text = Excerpt.getMessage( 'more' )
link = link .. '|' .. text .. "]]'''"
link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link )
return link
end
-- Fix birth and death dates, but only in the first paragraph
local line = mw.ustring.match(text, "[^\n]*")
-- @todo Use parser.getParagraphs() to get the first paragraph
if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)
function Excerpt.fixDates( excerpt )
line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line
local start = 1 -- skip initial templates
line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
local s
-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
local e = 0
if mw.ustring.find(line, "%S") and not matchany(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
repeat
token = nil
start = e + 1
s, e = mw.ustring.find( excerpt, '%s*%b{}%s*', start )
until not s or s > start
s, e = mw.ustring.find( excerpt, '%b()', start ) -- get (...), which may be (year–year)
if s and s < start + 100 then -- look only near the start
local excerptStart = mw.ustring.sub( excerpt, s, e )
local year1, conjunction, year2 = string.match( excerptStart, '(%d%d%d+)(.-)(%d%d%d+)' )
if year1 and year2 and ( string.match( conjunction, '[%-–—]' ) or string.match( conjunction, '{{%s*[sS]nd%s*}}' ) ) then
local y1 = tonumber( year1 )
local y2 = tonumber( year2 )
if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( '%Y' ) ) then
excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. '–' .. year2 .. mw.ustring.sub( excerpt, e )
end
end
end
return excerpt
end
-- Replace the first call to each reference defined outside of the excerpt for the full reference, to prevent undefined references
if token then -- found a template which is not the prefix to a line of text
-- Then prefix the page title to the reference names to prevent conflicts
if leadstart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
-- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo">
if not filesOnly and not startLine then t = t .. token end
-- and also <ref name="Foo" /> for <ref name="Title of the article Foo" />
-- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo">
elseif matchany(token, "{{%s*", wantedBlockTemplates, "%s*%f[|}]") then
-- and <ref group="Bar"> for <ref>
t = t .. token -- keep wanted block templates
-- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book">
function Excerpt.fixReferences( excerpt, page, wikitext )
elseif not falsy(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{|' then
local references = parser.getReferences( excerpt )
t = t .. token -- keep tables
local fixed = {}
for _, reference in pairs( references ) do
elseif files < maxfile then -- discard template, but if we are still collecting images...
local imagesname = argimageparser.getTagAttribute(token) orreference, {}'name' )
if not imagesfixed[ name ] then -- fix each reference only once
local content = parser.getTagContent( reference )
local image = parseimage(token, false) -- look for embedded [[File:...]], |image=, etc.
if imagenot content then table.insert(images,-- reference image)is endself-closing
local full = parser.getReference( excerpt, name )
end
if not full then -- the reference is not defined in the excerpt
for _, image in ipairs(images) do
full = parser.getReference( wikitext, name )
if files < maxfile and checkimage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
if full then
files = files + 1 -- count the file, whether displaying it or not
excerpt = excerpt:gsub( Excerpt.escapeString( reference ), Excerpt.escapeString( full ), 1 )
if options.fileflags and options.fileflags[files] then -- if displaying this image
image = modifyImage(image, "thumb")
image = modifyImage(image, fileargs)
filetext = filetext .. image
end
end
table.insert( fixed, name )
end
end
end
else -- the next token in text is not a template
end
token = parseimage(text, true)
-- Prepend the page title to the reference names to prevent conflicts with other references in the transcluding page
if token then -- the next token in text looks like an image
excerpt = excerpt:gsub( '< *[Rr][Ee][Ff][^>]*name *= *["\']?([^"\'>/]+)["\']?[^>/]*(/?) *>', '<ref name="' .. page:gsub( '"', '' ) .. ' %1"%2>' )
if files < maxfile and checkimage(token) then -- if more images are wanted and this is a wanted image
-- Remove reference groups because they don't apply to the transcluding page
files = files + 1
excerpt = excerpt:gsub( '< *[Rr][Ee][Ff] *group *= *["\']?[^"\'>/]+["\'] *>', '<ref>' )
if options.fileflags and options.fileflags[files] then
return excerpt
local image = token -- copy token for manipulation by adding |right etc. without changing the original
image = modifyImage(image, fileargs)
filetext = filetext .. image
end
end
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterend = mw.ustring.len(text) + 1
local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend -- position of next paragraph delimiter (or end of text)
local endpos = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterend,
blankpos)
token = mw.ustring.sub(text, 1, endpos-1)
if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line
token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
end
local isHatnote = not(leadstart) and mw.ustring.sub(token, 1, 1) == ':'
if not isHatnote then
leadstart = leadstart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
paras = paras + 1
if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
end
end -- of "else got a paragraph"
end -- of "else not a template"
if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
startLine = mw.ustring.find(token, "\n%s*$") -- will the next token be the first non-space on a line?
until not text or text == "" or not token or token == "" -- loop until all text parsed
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
return filetext, text
end
function Excerpt.removeReferences( excerpt )
local function cleanupText(text, keepSubsections, keepRefs)
local references = parser.getReferences( excerpt )
text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
for _, reference in pairs( references ) do
if falsy(keepSubsections) then
excerpt = Excerpt.removeString( excerpt, reference )
text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it
text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty
end
return excerpt
text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits
if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections
text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*", "") -- remove text after last onlyinclude section
end
if falsy(keepRefs) then
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
text = mw.ustring.gsub(text, "%b{}", striptemplate) -- remove unwanted templates such as references
end
text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImagemap) -- convert imagemaps into standard images
text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
text = mw.ustring.gsub(text, "\n%s*{{%s*[Pp]p%-.-}}", "\n") -- remove protection templates
text = mw.ustring.gsub(text, "%s*{{[^{|}]*[Ss]idebar%s*}}", "") -- remove most sidebars
text = mw.ustring.gsub(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}", "") -- remove most stub templates
text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories
text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon
return text
end
function Excerpt.removeCategories( excerpt )
-- Parse a ==Section== from a page
local categories = parser.getCategories( excerpt )
local function getsection(text, section, mainonly)
for _, category in pairs( categories ) do
local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
excerpt = Excerpt.removeString( excerpt, category )
local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
if not content then return nil end -- no such section
local nextsection
if mainonly then
nextsection = "\n==.*" -- Main part of section terminates at any level of header
else
nextsection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
end
return excerpt
content = mw.ustring.gsub(content, nextsection, "") -- remove later sections with headings at this level or higher
return content
end
function Excerpt.removeBehaviorSwitches( excerpt )
-- Remove unmatched <tag> or </tag> tags
return excerpt:gsub( '__[A-Z]+__', '' )
local function fixtags(text, tag)
end
local startcount = 0
for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startcount = startcount + 1 end
function Excerpt.removeComments( excerpt )
local endcount = 0
return excerpt:gsub( '<!%-%-.-%-%->', '' )
for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endcount = endcount + 1 end
end
function Excerpt.removeBold( excerpt )
if startcount > endcount then -- more <tag> than </tag>: remove the last few <tag>s
return excerpt:gsub( "'''", '' )
local i = 0
text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
i = i + 1
if i > endcount then return "" else return nil end
end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
elseif endcount > startcount then -- more </tag> than <tag>: remove the first few </tag>s
text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endcount - startcount)
end
return text
end
function Excerpt.removeLinks( excerpt )
-- Main function returns a string value: text of the lead of a page
local links = parser.getLinks( excerpt )
local function main(pagenames, options)
for _, link in pairs( links ) do
if not pagenames or #pagenames < 1 then return err("No page names given") end
excerpt = Excerpt.removeString( excerpt, link )
local pagename
local text
local pagecount = #pagenames
local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted
local gotopt
local pageoptstr
local section
-- read the page, or a random one if multiple pages were provided
if pagecount > 1 then math.randomseed(os.time()) end
while not text and pagecount > 0 do
local pagenum = 1
if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title
pagename = pagenames[pagenum]
if pagename and pagename ~= "" then
-- We have page or [[page]] or [[page|text]], possibly followed by |opt1|opt2...
local pn
pn, gotopt, pageoptstr = mw.ustring.match(pagename, "^%s*(%[%b[]%])%s*(|?)(.*)")
if pn then
pagename = mw.ustring.match(pn, "%[%[([^|%]]*)") -- turn [[page|text]] into page, discarding text
else -- we have page or page|opt...
pagename, gotopt, pageoptstr = mw.ustring.match(pagename, "%s*([^|]*[^|%s])%s*(|?)(.*)")
end
if pagename and pagename ~= "" then
local pn
pn, section = mw.ustring.match(pagename, "(.-)#(.*)")
pagename = pn or pagename
text, normalisedPagename = getContent(pagename)
if not normalisedPagename then
return err("No title for page name " .. pagename)
else
pagename = normalisedPagename
end
if text and options.nostubs then
local isStub = mw.ustring.find(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}")
if isStub then text = nil end
end
if not section then
section = mw.ustring.match(pagename, ".-#(.*)") -- parse redirect to Page#Section
end
if text and section and section ~= "" then text = getsection(text, section) end
end
end
if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another
pagecount = pagecount - 1 -- ensure that we exit the loop after at most #pagenames iterations
end
return excerpt
if not text then return err("Cannot read a valid page: first name is " .. firstpage) end
end
-- @todo Use parser.getLinks
local keepRefs = options.keepRefs
function Excerpt.removeSelfLinks( excerpt, page )
local keepSubsections = options.keepSubsections
local lang = mw.language.getContentLanguage()
text = cleanupText(text, keepSubsections, keepRefs)
local page = Excerpt.escapeString( mw.title.getCurrentTitle().prefixedText )
local ucpage = lang:ucfirst( page )
local pageopts = {} -- pageopts (even if value is "") have priority over global options
local lcpage = lang:lcfirst( page )
for k, v in pairs(options) do pageopts[k] = v end
excerpt = excerpt
if gotopt and gotopt ~= "" then
:gsub( '%[%[(' .. ucpage .. ')%]%]', '%1' )
for _, t in pairs(mw.text.split(pageoptstr, "|")) do
:gsub( '%[%[(' .. lcpage .. ')%]%]', '%1' )
local k, v = mw.ustring.match(t, "%s*([^=]-)%s*=(.-)%s*$")
:gsub( '%[%[' .. ucpage .. '|([^]]+)%]%]', '%1' )
pageopts[k] = v
:gsub( '%[%[' .. lcpage .. '|([^]]+)%]%]', '%1' )
end
return excerpt
pageopts.paraflags = numberflags(pageopts["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
end
pageopts.fileflags = numberflags(pageopts["files"] or "") -- parse file numbers
if pageopts.more and pageopts.more == "" then pageopts.more = "Read more..." end -- more= is short for this default text
end
local filetext
filetext, text = parse(text, pageopts)
-- replaceReplace the bold title or synonym near the start of the articlepage by a wikilinklink to the articlepage
function Excerpt.linkBold( excerpt, page )
local lang = mw.language.getContentLanguage()
local posposition = mw.ustring.find(text excerpt, "'''" .. lang:ucfirst(pagename page ) .. "'''", 1, true ) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
or mw.ustring.find(text excerpt, "'''" .. lang:lcfirst(pagename page ) .. "'''", 1, true ) -- plain search: special characters in pagenamepage represent themselves
if posposition then
local lenlength = mw.ustring.len(pagename page )
textexcerpt = mw.ustring.sub(text excerpt, 1, posposition + 2 ) .. "'[["' .. mw.ustring.sub(text excerpt, posposition + 3, posposition + lenlength + 2 ) .. "']]"' .. mw.ustring.sub(text excerpt, posposition + lenlength + 3, -1 ) -- link it
else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
textexcerpt = mw.ustring.gsub(text excerpt, "()'''(.-'*)'''", function ( a, b )
if anot <mw.ustring.find( 100b, '%[' ) and not mw.ustring.find( b, "'%["{' ) then --- if earlynot inwikilinked articleor andsome notweird wikilinkedtemplate
return "'''[[" .. pagenamepage .. "'|"' .. b .. "]]'''" -- replace '''Foo''' by '''[[pagenamepage|Foo]]'''
else
return nil -- instruct gsub to make no change
end
end, 1 ) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end
return excerpt
end
function Excerpt.addTrackingCategories( excerpt )
-- remove '''bold text''' if requested
local currentTitle = mw.title.getCurrentTitle()
if not falsy(pageopts.nobold) then text = mw.ustring.gsub(text, "'''", "") end
local contentCategory = config.categories.content
if contentCategory and currentTitle.isContentPage then
excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
end
local namespaceCategory = config.categories[ currentTitle.namespace ]
if namespaceCategory then
excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
end
return excerpt
end
-- Helper method to match from a list of regular expressions
text = filetext .. text
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
function Excerpt.matchAny( text, pre, list, post, init )
local match = {}
for i = 1, #list do
match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) }
if match[1] then return unpack( match ) end
end
return nil
end
-- Helper function to get arguments
-- Seek and destroy unterminated templates and wikilinks
-- args from Lua calls have priority over parent args from template
repeat -- hide matched {{template}}s including nested templates
function Excerpt.getArg( key, default )
local t = text
local frame = mw.getCurrentFrame()
text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape
for k, value in pairs( frame:getParent().args ) do
text = mw.ustring.gsub(text, "(< *math[^>]*>[^<]-)}}(.-< */math *>)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>
if k == key and mw.text.trim( value ) ~= '' then
until text == t
return value
repeat -- do similar for [[wikilink]]s
end
local t = text
end
text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")
for k, value in pairs( frame.args ) do
until text == t
if k == key and mw.text.trim( value ) ~= '' then
return value
end
end
return default
end
-- Helper method to get an error message
text = text.gsub(text, "([{}%[%]])%1[^\27].*", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.
-- This method also categorizes the current page in one of the configured error categories
text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text
function Excerpt.getError( key, value )
text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.
local message = Excerpt.getMessage( 'error-' .. key, value )
local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message )
if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then
markup:node( '[[Category:' .. config.categories.errors .. ']]' )
end
return markup
end
-- Helper method to get a localized message
-- Ensure div tags match
-- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab
text = fixtags(text, "div")
-- If Module:TNT is not available or the localized message does not exist, the key is returned instead
function Excerpt.getMessage( key, value )
local ok, TNT = pcall( require, 'Module:TNT' )
if not ok then return key end
local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value )
if not ok2 then return key end
return message
end
-- Helper method to escape a string for use in regexes
if pageopts.more then text = text .. " '''[[" .. pagename .. "|" .. pageopts.more .. "]]'''" end -- wikilink to article for more info
function Excerpt.escapeString( str )
return str:gsub( '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
end
-- Helper method to remove a string from a text
if pageopts.list and not pageopts.showall then -- add a collapsed list of pages which might appear
-- @param text Text from where to remove the string
local listtext = pageopts.list
-- @param str String to remove
if listtext == "" then listtext = "Other articles" end
-- @return The given text with the string removed
text = text .. "{{collapse top|title={{resize|85%|" ..listtext .. "}}|bg=fff}}{{hlist"
function Excerpt.removeString( text, str )
for _, p in pairs(pagenames) do
local pattern = Excerpt.escapeString( str )
if mw.ustring.match(p, "%S") then text = text .. "|[[" .. mw.text.trim(p) .. "]]" end
if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes
end
pattern = Excerpt.escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. Excerpt.escapeString( mw.ustring.sub( str, -999 ) )
text = text .. "}}\n{{collapse bottom}}"
end
return text:gsub( pattern, '' )
return text
end
-- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- Shared template invocation code for lead and random functions
-- @param filter Required. Comma-separated list of numbers or min-max ranges, for example '1,3-5'
local function invoke(frame, func)
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
-- @return Boolean indicating whether the filters should be treated as a blacklist or not
local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
-- @note Merging this into matchFilter is possible, but way too inefficient
for k, v in pairs(frame:getParent().args) do args[k] = v end
function Excerpt.parseFilter( filter )
for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
local filters = {}
errors = args["errors"] -- set the module level boolean used in local function err
local isBlacklist = false
if string.sub( filter, 1, 1 ) == '-' then
local articlecount = #args -- must be 1 except with selected=Foo and Foo=Somepage
isBlacklist = true
if articlecount < 1 and not (func == "selected" and args[func] and args[args[func]]) then
filter = string.sub( filter, 2 )
return err("No articles provided")
end
local values = mw.text.split( filter, ',' ) -- split values: '1,3-5' to {'1','3-5'}
for _, value in pairs( values ) do
local pagenames = {}
value = mw.text.trim( value )
if func == "lead" then
local min, max = mw.ustring.match( value, '^(%d+)%s*[-–—]%s*(%d+)$' ) -- '3-5' to min=3 max=5
pagenames = { args[1] }
if not max then min, max = string.match( value, '^((%d+))$' ) end -- '1' to min=1 max=1
elseif func == "linked" or func == "listitem" then
if max then
-- Read named page and find its wikilinks
for i = min, max do filters[ i ] = true end
local page = args[1]
else
local text, title = getContent(page)
filters[ value ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
if not title then
return err("No title for page name " .. page)
elseif not text then
return err("No content for page name " .. page)
end
if args["section"] then -- check relevant section only
text = getsection(text, args["section"], args["sectiononly"])
if not text then return err("No section " .. args["section"] .. " in page " .. page) end
end
-- replace annotated links with real links
text = mw.ustring.gsub(text, "{{%s*[Aa]nnotated[ _]link%s*|%s*(.-)%s*}}", "[[%1]]")
if func == "linked" then
for p in mw.ustring.gmatch(text, "%[%[%s*([^%]|\n]*)") do table.insert(pagenames, p) end
else -- listitem: first wikilink on a line beginning *, :#, etc. except in "See also" or later section
text = mw.ustring.gsub(text, "\n== *See also.*", "")
for p in mw.ustring.gmatch(text, "\n:*[%*#][^\n]-%[%[%s*([^%]|\n]*)") do table.insert(pagenames, p) end
end
elseif func == "random" then
-- accept any number of page names. If more than one, we'll pick one randomly
for i, p in pairs(args) do
if p and type(i) == 'number' then table.insert(pagenames, p) end
end
elseif func == "selected" then
local articlekey = args[func]
if tonumber(articlekey) then -- normalise article number into the range 1..#args
articlekey = articlekey % articlecount
if articlekey == 0 then articlekey = articlecount end
end
pagenames = { args[articlekey] }
end
local filter = {cache = {}, terms = filters}
return filter, isBlacklist
end
-- Helper function to see if a value matches any of the given filters
local options = args -- pick up miscellaneous options: more, errors, fileargs
function Excerpt.matchFilter( value, filter )
options.paraflags = numberflags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
if type(value) == "number" then
options.fileflags = numberflags(args["files"] or "") -- parse file numbers
return filter.terms[value]
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
else
local textcached = ""filter.cache[value]
if options.showallcached ~= nil then
return cached
local separator = ""
end
for _, p in pairs(pagenames) do
local tlang = mainmw.language.getContentLanguage({ p }, options)
local lcvalue = lang:lcfirst(value)
if t ~= "" then
local ucvalue = lang:ucfirst(value)
text = text .. separator .. t
for term in pairs( filter.terms ) do
separator = options.showall
if value == tostring(term)
if separator == "" then separator = "{{clear}}{{hr}}" end
or type(term) == "string" and (
lcvalue == term
or ucvalue == term
or mw.ustring.match( value, term )
) then
filter.cache[value] = true
return true
end
end
filter.cache[value] = false
else
text = main(pagenames, options)
end
if text == "" and brokenCategory and brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
return "[[Category:" .. brokenCategory .. "]]"
else
return frame:preprocess(text)
end
end
return Excerpt
-- Entry points for template callers using #invoke:
function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article
function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page
function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page
function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)
function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter
-- Entry points for other Lua modules
function p.getContent(page, frame) return getContent(page, frame) end
function p.getsection(text, section) return getsection(text, section) end
function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end
function p.argimage(text) return argimage(text) end
function p.checkimage(image) return checkimage(image) end
function p.parseimage(text, start) return parseimage(text, start) end
function p.cleanupText(text, keepSubsections, keepRefs) return cleanupText(text, keepSubsections, keepRefs) end
function p.main(pagenames, options) return main(pagenames, options) end
function p.numberflags(str) return numberflags(str) end
return p
|