-- Module:Excerpt implements the Excerpt template
-- Get localized data
-- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt
local d = require("Module:Excerpt/i18n")
-- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others
-- License: CC-BY-SA-3.0
local parser = require( 'Module:WikitextParser' )
local p = {}
local yesno = require( 'Module:Yesno' )
local ok, config = pcall( require, 'Module:Excerpt/config' )
-- Helper function to debug
if not ok then config = {} end
-- Returns blank text or an error message if requested
local errors
local function err(text)
if errors then error(text, 2) end
return ""
end
local Excerpt = {}
-- Helper function to test for truthy and falsy values
local function is(value)
if not value or value == "" or value == "0" or value == "false" or value == "no" then
return false
end
return true
end
-- Main entry point for templates
-- Helper function to match from a list regular expressions
function Excerpt.main( frame )
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function matchAny(text, pre, list, post, init)
-- Make sure the requested page exists and get the wikitext
local match = {}
local page = Excerpt.getArg( 1 )
for i = 1, #list do
if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end
match = { mw.ustring.match(text, pre .. list[i] .. post, init) }
local title = mw.title.new( page )
if match[1] then return unpack(match) end
if not title then return Excerpt.getError( 'invalid-title', page ) end
local fragment = title.fragment -- save for later
if title.isRedirect then
title = title.redirectTarget
if fragment == "" then
fragment = title.fragment -- page merge potential
end
end
if not title.exists then return Excerpt.getError( 'page-not-found', page ) end
return nil
page = title.prefixedText
end
local wikitext = title:getContent()
-- Get the template params and process them
-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
local functionparams stripTemplate(t)= {
hat = yesno( Excerpt.getArg( 'hat', true ) ),
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
this = Excerpt.getArg( 'this' ),
if matchAny(t, "^{{%s*", d.unwantedInlineTemplates, "%s*%f[|}]") then return "" end
only = Excerpt.getArg( 'only' ),
files = Excerpt.getArg( 'files', Excerpt.getArg( 'file' ) ),
lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list' ) ),
tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table' ) ),
templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template' ) ),
paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph' ) ),
references = yesno( Excerpt.getArg( 'references', true ) ),
subsections = yesno( Excerpt.getArg( 'subsections', false ) ),
links = yesno( Excerpt.getArg( 'links', true ) ),
bold = yesno( Excerpt.getArg( 'bold', false ) ),
briefDates = yesno( Excerpt.getArg( 'briefdates', false ) ),
inline = yesno( Excerpt.getArg( 'inline' ) ),
quote = yesno( Excerpt.getArg( 'quote' ) ),
more = yesno( Excerpt.getArg( 'more' ) ),
class = Excerpt.getArg( 'class' ),
displayTitle = Excerpt.getArg( 'displaytitle', page ),
}
-- Make sure the requested section exists and get the excerpt
-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
local excerpt
local noref = mw.ustring.gsub(t, "|%s*shortref%s*%f[|}]", "")
local section = Excerpt.getArg( 2, fragment )
noref = mw.ustring.gsub(noref, "|%s*ref%s*%f[|}]", "")
section = mw.text.trim( section )
if section == '' then section = nil end
if section then
excerpt = parser.getSectionTag( wikitext, section )
if not excerpt then
if params.subsections then
excerpt = parser.getSection( wikitext, section )
else
local sections = parser.getSections( wikitext )
excerpt = sections[ section ]
end
end
if not excerpt then return Excerpt.getError( 'section-not-found', section ) end
if excerpt == '' then return Excerpt.getError( 'section-empty', section ) end
else
excerpt = parser.getLead( wikitext )
if excerpt == '' then return Excerpt.getError( 'lead-empty' ) end
end
-- Remove noinclude bits
-- If a wanted template has unwanted nested templates, purge them too
excerpt = excerpt:gsub( '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '' )
noref = mw.ustring.sub(noref, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noref, 3), "%b{}", stripTemplate)
-- Filter various elements from the excerpt
-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
excerpt = Excerpt.filterFiles( excerpt, params.files )
noref = mw.ustring.gsub(noref, "^{{%s*[Aa]udio.-|.-|(.-)%f[|}].*", "%1")
excerpt = Excerpt.filterLists( excerpt, params.lists )
excerpt = Excerpt.filterTables( excerpt, params.tables )
excerpt = Excerpt.filterParagraphs( excerpt, params.paragraphs )
-- If no file is found, try to get one from the infobox
-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot|English|英語|eigo}} → English
if ( params.only == 'file' or params.only == 'files' or not params.only and ( not params.files or params.files ~= '0' ) ) -- caller asked for files
noref = mw.ustring.gsub(noref, "^{{%s*[Nn]ihongo[ _]+foot%s*|(.-)%f[|}].*", "%1")
and not section -- and we're in the lead section
and config.captions -- and we have the config option required to try finding files in infoboxes
and #parser.getFiles( excerpt ) == 0 -- and there're no files in the excerpt
then
excerpt = Excerpt.addInfoboxFile( excerpt )
end
-- Filter the templates by appending the templates blacklist to the templates filter
if noref ~= t then return noref end
if config.blacklist then
local blacklist = table.concat( config.blacklist, ',' )
if params.templates then
if string.sub( params.templates, 1, 1 ) == '-' then
params.templates = params.templates .. ',' .. blacklist
end
else
params.templates = '-' .. blacklist
end
end
excerpt = Excerpt.filterTemplates( excerpt, params.templates )
-- Leave only the requested elements
return nil -- not an unwanted template: keep
if params.only == 'file' or params.only == 'files' then
end
local files = parser.getFiles( excerpt )
excerpt = params.only == 'file' and files[1] or table.concat( files, '\n\n' )
end
if params.only == 'list' or params.only == 'lists' then
local lists = parser.getLists( excerpt )
excerpt = params.only == 'list' and lists[1] or table.concat( lists, '\n\n' )
end
if params.only == 'table' or params.only == 'tables' then
local tables = parser.getTables( excerpt )
excerpt = params.only == 'table' and tables[1] or table.concat( tables, '\n\n' )
end
if params.only == 'paragraph' or params.only == 'paragraphs' then
local paragraphs = parser.getParagraphs( excerpt )
excerpt = params.only == 'paragraph' and paragraphs[1] or table.concat( paragraphs, '\n\n' )
end
if params.only == 'template' or params.only == 'templates' then
local templates = parser.getTemplates( excerpt )
excerpt = params.only == 'template' and templates[1] or table.concat( templates, '\n\n' )
end
-- @todo Make more robust and move downwards
-- Get a page's content, following redirects, and processing file description pages for files.
if params.briefDates then
-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found
excerpt = Excerpt.fixDates( excerpt )
local function getContent(page, frame)
end
local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)
if not title then return false, false end
-- Remove unwanted elements
local target = title.redirectTarget
excerpt = Excerpt.removeComments( excerpt )
if target then title = target end
excerpt = Excerpt.removeSelfLinks( excerpt )
excerpt = Excerpt.removeNonFreeFiles( excerpt )
excerpt = Excerpt.removeBehaviorSwitches( excerpt )
-- Fix or remove the references
return title:getContent(), title.prefixedText
if params.references then
end
excerpt = Excerpt.fixReferences( excerpt, page, wikitext )
else
-- Check image for suitability
excerpt = Excerpt.removeReferences( excerpt )
local function checkImage(image)
end
local page = matchAny(image, "", d.fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
if not page then return false end
-- Remove wikilinks
-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
if not params.links then
if not matchAny(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s*$") then
excerpt = Excerpt.removeLinks( excerpt )
return false
end
-- Link the bold text near the start of most leads and then remove it
local desc, rtitle = getContent(page) -- get file description and title after following any redirect
if not section then
if desc and desc ~= "" then -- found description on local wiki
excerpt = Excerpt.linkBold( excerpt, page )
if mw.ustring.match(desc, "[Nn]on%-free") then return false end
end
desc = mw.ustring.gsub(desc, "%b{}", stripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess
elseifif not rtitleparams.bold then
excerpt = Excerpt.removeBold( excerpt )
return false
else
-- try commons
desc = "{{" .. rtitle .. "}}"
end
frame = frame or mw.getCurrentFrame()
desc = frame:preprocess(desc)
-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
return ( desc and desc ~= "" and not mw.ustring.match(desc, "[Nn]on%-free") ) and true or false -- hide non-free image
excerpt = excerpt:gsub( '\n\n\n+', '\n\n' )
end
excerpt = mw.text.trim( excerpt )
excerpt = '\n' .. excerpt .. '\n'
-- Remove nested categories
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
excerpt = frame:preprocess( excerpt )
local function parseImage(text, start)
excerpt = Excerpt.removeCategories( excerpt )
local startre = ""
if start then startre = "^" end -- a true flag restricts search to start of string
-- Add tracking categories
local image = matchAny(text, startre .. "%[%[%s*", d.fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
if imageconfig.categories then
excerpt = Excerpt.addTrackingCategories( excerpt )
image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
end
return image
end
-- Build the final output
-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
if params.inline then
local function parseCaption(caption)
return mw.text.trim( excerpt )
if not caption then return nil end
local len = mw.ustring.len(caption)
local pos = 1
while pos <= len do
local linkstart, linkend = mw.ustring.find(caption, "%b[]", pos)
linkstart = linkstart or len + 1 -- avoid comparison with nil when no link
local templatestart, templateend = mw.ustring.find(caption, "%b{}", pos)
templatestart = templatestart or len + 1 -- avoid comparison with nil when no template
local argend = mw.ustring.find(caption, "[|}]", pos) or len + 1
if linkstart < templatestart and linkstart < argend then
pos = linkend + 1 -- skip wikilink
elseif templatestart < argend then
pos = templateend + 1 -- skip template
else -- argument ends before the next wikilink or template
return mw.ustring.sub(caption, 1, argend - 1)
end
end
return caption -- No terminator found: return entire caption
end
local tag = params.quote and 'blockquote' or 'div'
-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( params.class )
local function argImage(text)
local token = nil
local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image
if config.styles then
-- ensure image map is captured
local styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '|imagemap=')
block:node( styles )
end
if params.hat then
-- find all images
local hat = Excerpt.getHat( page, section, params )
local hasImages = false
block:node( hat )
local images = {}
local capture_from = 1
while capture_from < mw.ustring.len(text) do
local argname, position, image = mw.ustring.match(text, "|%s*([^=|]-[Ii][Mm][Aa][Gg][Ee][^=|]-)%s*=%s*()(.*)", capture_from)
if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image
local lcArgname = mw.ustring.lower(argname)
if mw.ustring.find(lcArgname, "caption")
or mw.ustring.find(lcArgname, "size")
or mw.ustring.find(lcArgname, "upright") then
image = nil
end
end
if image then
hasImages = true
images[position] = image
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
capture_from = 1
while capture_from < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|]-[Pp][Hh][Oo][Tt][Oo][^=|]-%s*=%s*()(.*)", capture_from)
if image then
hasImages = true
images[position] = image
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
capture_from = 1
while capture_from < mw.ustring.len(text) do
local position, image = mw.ustring.match(text, "|%s*[^=|{}]-%s*=%s*()%[?%[?([^|{}]*%.%a%a%a%a?)%s*%f[|}]", capture_from)
if image then
hasImages = true
if not images[position] then
images[position] = image
end
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )
if not hasImages then return nil end
block:node( excerpt )
if params.more then
-- find all captions
local more = Excerpt.getReadMore( page, section )
local captions = {}
block:node( more )
capture_from = 1
while capture_from < mw.ustring.len(text) do
local position, caption = matchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", capture_from)
if caption then
-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)
if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end
caption = mw.text.trim(caption)
local captionStart = mw.ustring.sub(caption, 1, 1)
if captionStart == '|' or captionStart == '}' then caption = nil end
end
if caption then
-- find nearest image, and use same index for captions table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not captions[i] then
captions[i] = parseCaption(caption)
end
end
end
capture_from = position
else
capture_from = mw.ustring.len(text)
end
end
return block
-- find all alt text
end
local altTexts = {}
for position, altText in mw.ustring.gmatch(text, "|%s*[Aa][Ll][Tt]%s*=%s*()([^\n]*)") do
if altText then
-- Filter the files in the given wikitext against the given filter
-- altText is terminated by }} or |, but first skip any matched [[...]] and {{...}}
function Excerpt.filterFiles( wikitext, filter )
local lookfrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}
if not filter then return wikitext end
mw.ustring.match(altText, ".*{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b
local filters, isBlacklist = Excerpt.parseFilter( filter )
mw.ustring.match(altText, ".*%[%b[]%]()") or 1)
local files = parser.getFiles( wikitext )
for index, file in pairs( files ) do
local len = mw.ustring.len(altText)
local name = parser.getFileName( file )
local aftertext = math.min( -- find position after whichever comes first: end of string, }} or |
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) )
mw.ustring.match(altText, "()}}", lookfrom) or len+1,
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then
mw.ustring.match(altText, "()|", lookfrom) or len+1)
wikitext = Excerpt.removeString( wikitext, file )
altText = mw.ustring.sub(altText, 1, aftertext-1) -- chop off |... or }}... which is not part of [[...]] or {{...}}
altText = mw.text.trim(altText)
local altTextStart = mw.ustring.sub(altText, 1, 1)
if altTextStart == '|' or altTextStart == '}' then altText = nil end
end
if altText then
-- find nearest image, and use same index for altTexts table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not altTexts[i] then
altTexts[i] = altText
end
end
end
end
end
return wikitext
end
-- Filter the lists in the given wikitext against the given filter
-- find all image sizes
function Excerpt.filterLists( wikitext, filter )
local imageSizes = {}
if not filter then return wikitext end
for position, imageSizeMatch in mw.ustring.gmatch(text, "|%s*[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s*=%s*()([^}|\n]*)") do
local filters, isBlacklist = Excerpt.parseFilter( filter )
local imageSize = mw.ustring.match(imageSizeMatch, "=%s*([^}|\n]*)")
local lists = parser.getLists( wikitext )
if imageSize then
for index, list in pairs( lists ) do
imageSize = mw.text.trim(imageSize )
if isBlacklist and Excerpt.matchFilter( index, filters )
local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)
or not isBlacklist and not Excerpt.matchFilter( index, filters ) then
if imageSizeStart == '|' or imageSizeStart == '}' then imageSize = nil end
wikitext = Excerpt.removeString( wikitext, list )
end
if imageSize then
-- find nearest image, and use same index for imageSizes table
local i = position
while i > 0 and not images[i] do
i = i - 1
if images[i] then
if not imageSizes[i] then
imageSizes[i] = imageSize
end
end
end
end
end
return wikitext
end
-- Filter the tables in the given wikitext against the given filter
-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order
function Excerpt.filterTables( wikitext, filter )
local keys = {}
if not filter then return wikitext end
for key, val in pairs(images) do
local filters, isBlacklist = Excerpt.parseFilter( filter )
table.insert(keys, key)
local tables = parser.getTables( wikitext )
for index, t in pairs( tables ) do
local id = string.match( t, '{|[^\n]-id%s*=%s*["\']?([^"\'\n]+)["\']?[^\n]*\n' )
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( id, filters ) )
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( id, filters ) ) then
wikitext = Excerpt.removeString( wikitext, t )
end
end
return wikitext
table.sort(keys)
end
-- Filter the paragraphs in the given wikitext against the given filter
-- add in relevant optional parameters for each image: caption, alt text and image size
function Excerpt.filterParagraphs( wikitext, filter )
local imageTokens = {}
if not filter then return wikitext end
for _, index in ipairs(keys) do
local filters, isBlacklist = Excerpt.parseFilter( filter )
local image = images[index]
local paragraphs = parser.getParagraphs( wikitext )
local token = parseImage(image, true) -- look for image=[[File:...]] etc.
for index, paragraph in pairs( paragraphs ) do
if not token then
if isBlacklist and Excerpt.matchFilter( index, filters )
image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
or not isBlacklist and not Excerpt.matchFilter( index, filters ) then
token = "[[" -- Add File: unless name already begins File: or Image:
wikitext = Excerpt.removeString( wikitext, paragraph )
if not matchAny(image, "^", d.fileNamespaces, "%s*:") then
token = token .. "File:"
end
token = token .. image
local caption = captions[index]
if caption and mw.ustring.match(caption, "%S") then token = token .. "|" .. caption end
local alt = altTexts[index]
if alt then token = token .. "|alt=" .. alt end
local image_size = imageSizes[index]
if image_size and mw.ustring.match(image_size, "%S") then token = token .. "|" .. image_size end
token = token .. "]]"
end
token = mw.ustring.gsub(token, "\n","") .. "\n"
table.insert(imageTokens, token)
end
return imageTokenswikitext
end
-- Filter the templates in the given wikitext against the given filter
-- Help gsub convert imagemaps into standard images
function Excerpt.filterTemplates( wikitext, filter )
local function convertImageMap(imagemap)
if not filter then return wikitext end
local image = matchAny(imagemap, "[>\n]%s*", d.fileNamespaces, "[^\n]*")
local filters, isBlacklist = Excerpt.parseFilter( filter )
if image then
local templates = parser.getTemplates( wikitext )
return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
for index, template in pairs( templates ) do
else
local name = parser.getTemplateName( template )
return "" -- remove entire block if image can't be extracted
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) )
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then
wikitext = Excerpt.removeString( wikitext, template )
end
end
return wikitext
end
function Excerpt.addInfoboxFile( excerpt )
-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
-- We cannot distinguish the infobox from the other templates, so we search them all
local function numberFlags(str)
local templates = parser.getTemplates( excerpt )
local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
for _, template in pairs( templates ) do
local flags = {}
local parameters = parser.getTemplateParameters( template )
for _, r in pairs(ranges) do
local file, captions, caption, cssClasses, cssClass
local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
for _, pair in pairs( config.captions ) do
if not max then min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
if file max= thenpair[1]
file = parameters[file]
for p = min, max do flags[p] = true end
if file and Excerpt.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
file = string.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
captions = pair[2]
for _, p in pairs( captions ) do
if parameters[ p ] then caption = parameters[ p ] break end
end
-- Check for CSS classes
-- We opt to use skin-invert-image instead of skin-invert
-- in all other cases, the CSS provided in the infobox is used
if pair[3] then
cssClasses = pair[3]
for _, p in pairs( cssClasses ) do
if parameters[ p ] then
cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ]
break
end
end
end
local class = cssClass and ( '|class=' .. cssClass ) or ''
return '[[File:' .. file .. class .. '|thumb|' .. ( caption or '' ) .. ']]' .. excerpt
end
end
end
return flagsexcerpt
end
function Excerpt.removeNonFreeFiles( wikitext )
local imageArgGroups = {
local files = parser.getFiles( wikitext )
{"thumb", "thumbnail", "frame", "framed", "frameless"},
for _, file in pairs( files ) do
{"right", "left", "center", "none"},
local fileName = 'File:' .. parser.getFileName( file )
{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}
local fileTitle = mw.title.new( fileName )
}
if fileTitle then
local fileDescription = fileTitle:getContent()
local function modifyImage(image, fileargs)
if not fileDescription or fileDescription == '' then
if fileargs then
local frame = mw.getCurrentFrame()
for _, filearg in pairs(mw.text.split(fileargs, "|")) do -- handle fileargs=left|border etc.
fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons
local fa = mw.ustring.gsub(filearg, "=.*", "") -- "upright=0.75" → "upright"
local group = {fa} -- group of "border" is ["border"]...
for _, g in pairs(imageArgGroups) do
for _, a in pairs(g) do
if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]
end
end
if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then
for _, a in pairs(group) do
wikitext = Excerpt.removeString( wikitext, file )
image = mw.ustring.gsub(image, "|%s*" .. a .. "%f[%A]%s*=[^|%]]*", "") -- remove "|upright=0.75" etc.
image = mw.ustring.gsub(image, "|%s*" .. a .. "%s*([|%]])", "%1") -- replace "|left|" by "|" etc.
end
image = mw.ustring.gsub(image, "([|%]])", "|" .. filearg .. "%1", 1) -- replace "|" by "|left|" etc.
end
end
return imagewikitext
end
function Excerpt.getHat( page, section, params )
-- a basic parser to trim down extracted wikitext
local hat
-- @param text : Wikitext to be processed
-- @param options : A table of options...
-- Build the text
-- options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept.
if params.this then
-- options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`
hat = params.this
-- options.fileargs : args for the [[File:]] syntax, such as `left`
elseif params.quote then
-- @param filesOnly : If set, only return the files and not the prose
hat = Excerpt.getMessage( 'this' )
local function parse(text, options, filesOnly)
elseif params.only then
local allparas = true -- keep all paragraphs?
hat = Excerpt.getMessage( params.only )
if options.paraflags then
else
if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end
hat = Excerpt.getMessage( 'section' )
for _, v in pairs(options.paraflags) do
if v then allparas = false end -- if any para specifically requested, don't keep all
end
end
hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' )
if filesOnly then
allparas = false
-- Build the link
options.paraflags = {}
if section then
hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. params.displayTitle
.. ' § ' .. section:gsub( '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
else
hat = hat .. ' [[:' .. page .. '|' .. params.displayTitle .. ']].'
end
-- Build the edit link
local maxfile = 0 -- for efficiency, stop checking images after this many have been found
local title = mw.title.new( page )
if options.fileflags then
local editUrl = title:fullUrl( 'action=edit' )
if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end
hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
for k, v in pairs(options.fileflags) do
hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain()
if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
end
if config.hat then
local frame = mw.getCurrentFrame()
hat = config.hat .. hat .. '}}'
hat = frame:preprocess( hat )
else
hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat )
end
local fileargs = options.fileargs and mw.text.trim(options.fileargs)
if fileargs == '' then fileargs = nil end
return hat
local leadstart = nil -- have we found some text yet?
end
local t = "" -- the stripped down output text
local filetext = "" -- output text with concatenated [[File:Foo|...]]\n entries
local files = 0 -- how many images so far
local paras = 0 -- how many paragraphs so far
local startLine = true -- at the start of a line (no non-spaces found since last \n)?
function Excerpt.getReadMore( page, section )
text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
local link = "'''[[" .. page
if section then
-- Add named files
link = link .. '#' .. section
local f = options.files
if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list
f = mw.ustring.gsub(f, "^%s*File%s*:%s*", "", 1)
f = mw.ustring.gsub(f, "^%s*Image%s*:%s*", "", 1)
f = "[[File:" .. f .. "]]"
f = modifyImage(f, "thumb")
f = modifyImage(f, fileargs)
if checkImage(f) then filetext = filetext .. f .. "\n" end
end
local text = Excerpt.getMessage( 'more' )
link = link .. '|' .. text .. "]]'''"
link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link )
return link
end
repeat -- loopFix aroundbirth parsingand adeath templatedates, imagebut only in the orfirst paragraph
-- @todo Use parser.getParagraphs() to get the first paragraph
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}} or {| Table |}
function Excerpt.fixDates( excerpt )
if not leadstart and not token then token = mw.ustring.match(text, "^%b<>%s*%b{}%s*") end -- allow <tag>{{template}} before lead has started
local start = 1 -- skip initial templates
local s
local line = mw.ustring.match(text, "[^\n]*")
local e = 0
if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)
repeat
line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line
start = e + 1
line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
s, e = mw.ustring.find( excerpt, '%s*%b{}%s*', start )
-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
until not s or s > start
if mw.ustring.find(line, "%S") and not matchAny(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
s, e = mw.ustring.find( excerpt, '%b()', start ) -- get (...), which may be (year–year)
token = nil
if s and s < start + 100 then -- look only near the start
local excerptStart = mw.ustring.sub( excerpt, s, e )
local year1, conjunction, year2 = string.match( excerptStart, '(%d%d%d+)(.-)(%d%d%d+)' )
if year1 and year2 and ( string.match( conjunction, '[%-–—]' ) or string.match( conjunction, '{{%s*[sS]nd%s*}}' ) ) then
local y1 = tonumber( year1 )
local y2 = tonumber( year2 )
if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( '%Y' ) ) then
excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. '–' .. year2 .. mw.ustring.sub( excerpt, e )
end
end
end
return excerpt
end
-- Replace the first call to each reference defined outside of the excerpt for the full reference, to prevent undefined references
if token then -- found a template which is not the prefix to a line of text
-- Then prefix the page title to the reference names to prevent conflicts
-- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo">
if leadstart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)
-- and also <ref name="Foo" /> for <ref name="Title of the article Foo" />
if not filesOnly and not startLine then t = t .. token end
-- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo">
-- and <ref group="Bar"> for <ref>
elseif matchAny(token, "{{%s*", d.wantedBlockTemplates, "%s*%f[|}]") then
-- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book">
t = t .. token -- keep wanted block templates
function Excerpt.fixReferences( excerpt, page, wikitext )
local references = parser.getReferences( excerpt )
elseif is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{|' then
local fixed = {}
t = t .. token -- keep tables
for _, reference in pairs( references ) do
local name = parser.getTagAttribute( reference, 'name' )
elseif files < maxfile then -- discard template, but if we are still collecting images...
if not fixed[ name ] then -- fix each reference only once
local images = argImage(token) or {}
local content = parser.getTagContent( reference )
if not images then
if not content then -- reference is self-closing
local image = parseImage(token, false) -- look for embedded [[File:...]], |image=, etc.
local full = parser.getReference( excerpt, name )
if image then table.insert(images, image) end
if not full then -- the reference is not defined in the excerpt
end
full = parser.getReference( wikitext, name )
for _, image in ipairs(images) do
if full then
if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
excerpt = excerpt:gsub( Excerpt.escapeString( reference ), Excerpt.escapeString( full ), 1 )
files = files + 1 -- count the file, whether displaying it or not
if options.fileflags and options.fileflags[files] then -- if displaying this image
image = modifyImage(image, "thumb")
image = modifyImage(image, fileargs)
filetext = filetext .. image
end
end
table.insert( fixed, name )
end
end
else -- the next token in text is not a template
token = parseImage(text, true)
if token then -- the next token in text looks like an image
if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image
files = files + 1
if options.fileflags and options.fileflags[files] then
local image = token -- copy token for manipulation by adding |right etc. without changing the original
image = modifyImage(image, fileargs)
filetext = filetext .. image
end
end
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterend = mw.ustring.len(text) + 1
local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend -- position of next paragraph delimiter (or end of text)
local endpos = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterend,
blankpos)
token = mw.ustring.sub(text, 1, endpos-1)
if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line
token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
end
local isHatnote = not(leadstart) and mw.ustring.sub(token, 1, 1) == ':'
if not isHatnote then
leadstart = leadstart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section
paras = paras + 1
if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
end
end -- of "else got a paragraph"
end -- of "else not a template"
if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
startLine = mw.ustring.find(token, "\n%s*$") -- will the next token be the first non-space on a line?
until not text or text == "" or not token or token == "" -- loop until all text parsed
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
return filetext, text
end
local function cleanupText(text, options)
text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits
if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections
text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section
text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.*", "") -- remove text after last onlyinclude section
end
if is(options.fragment) then
local escapedFragment = mw.ustring.gsub( options.fragment, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0" )
local fragments = ""
local fragment = ""
local position = 1
while position < mw.ustring.len( text ) do
fragment, position = mw.ustring.match(text, "<%s*[Ss]ection%s+begin%s*=%s*[\"\']?%s*" .. escapedFragment .. "%s*[\"\']?%s*/>(.-)<%s*[Ss]ection%s+end=%s*[\"\']?%s*" .. escapedFragment .. "%s*[\"\']?%s*/>()", position )
if fragment and position then
fragments = fragments .. fragment
else
position = mw.ustring.len( text )
end
end
text = fragments
end
-- Prepend the page title to the reference names to prevent conflicts with other references in the transcluding page
if not is(options.keepSubsections) then
excerpt = excerpt:gsub( '< *[Rr][Ee][Ff][^>]*name *= *["\']?([^"\'>/]+)["\']?[^>/]*(/?) *>', '<ref name="' .. page:gsub( '"', '' ) .. ' %1"%2>' )
text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it
-- Remove reference groups because they don't apply to the transcluding page
text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty
excerpt = excerpt:gsub( '< *[Rr][Ee][Ff] *group *= *["\']?[^"\'>/]+["\'] *>', '<ref>' )
end
return excerpt
if not is(options.keepRefs) then
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references
end
text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImageMap) -- convert imagemaps into standard images
text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
text = mw.ustring.gsub(text, "\n%s*{{%s*[Pp]p%-.-}}", "\n") -- remove protection templates
text = mw.ustring.gsub(text, "%s*{{[^{|}]*[Ss]idebar%s*}}", "") -- remove most sidebars
text = mw.ustring.gsub(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}", "") -- remove most stub templates
text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories
text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon
return text
end
function Excerpt.removeReferences( excerpt )
-- Parse a ==Section== from a page
local references = parser.getReferences( excerpt )
local function getSection(text, section, mainonly)
for _, reference in pairs( references ) do
local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
excerpt = Excerpt.removeString( excerpt, reference )
local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
if not content then return nil end -- no such section
local nextsection
if mainonly then
nextsection = "\n==.*" -- Main part of section terminates at any level of header
else
nextsection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=].*" -- "===" → "\n===?[^=].*", matching "==" or "===" but not "===="
end
return excerpt
content = mw.ustring.gsub(content, nextsection, "") -- remove later sections with headings at this level or higher
return content
end
function Excerpt.removeCategories( excerpt )
-- Remove unmatched <tag> or </tag> tags
local categories = parser.getCategories( excerpt )
local function fixTags(text, tag)
for _, category in pairs( categories ) do
local startcount = 0
excerpt = Excerpt.removeString( excerpt, category )
for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startcount = startcount + 1 end
local endcount = 0
for i in mw.ustring.gmatch(text, "<%s*/" .. tag .. "%f[^%w_].->") do endcount = endcount + 1 end
if startcount > endcount then -- more <tag> than </tag>: remove the last few <tag>s
local i = 0
text = mw.ustring.gsub(text, "<%s*" .. tag .. "%f[^%w_].->", function(t)
i = i + 1
if i > endcount then return "" else return nil end
end) -- "end" here terminates the anonymous replacement function(t) passed to gsub
elseif endcount > startcount then -- more </tag> than <tag>: remove the first few </tag>s
text = mw.ustring.gsub(text, "<%s*/" .. tag .. "%f[^%w_].->", "", endcount - startcount)
end
return textexcerpt
end
function Excerpt.removeBehaviorSwitches( excerpt )
-- Main function returns a string value: text of the lead of a page
return excerpt:gsub( '__[A-Z]+__', '' )
local function main(pagenames, options)
end
if not pagenames or #pagenames < 1 then return err("No page names given") end
local pagename
local text
local pagecount = #pagenames
local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted
local gotopt
local pageoptstr
local section
function Excerpt.removeComments( excerpt )
-- read the page, or a random one if multiple pages were provided
return excerpt:gsub( '<!%-%-.-%-%->', '' )
if pagecount > 1 then math.randomseed(os.time()) end
end
while not text and pagecount > 0 do
local pagenum = 1
if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title
pagename = pagenames[pagenum]
if pagename and pagename ~= "" then
-- We have page or [[page]] or [[page|text]], possibly followed by |opt1|opt2...
local pn
pn, gotopt, pageoptstr = mw.ustring.match(pagename, "^%s*(%[%b[]%])%s*(|?)(.*)")
if pn then
pagename = mw.ustring.match(pn, "%[%[([^|%]]*)") -- turn [[page|text]] into page, discarding text
else -- we have page or page|opt...
pagename, gotopt, pageoptstr = mw.ustring.match(pagename, "%s*([^|]*[^|%s])%s*(|?)(.*)")
end
function Excerpt.removeBold( excerpt )
if pagename and pagename ~= "" then
return excerpt:gsub( "'''", '' )
local pn
end
pn, section = mw.ustring.match(pagename, "(.-)#(.*)")
pagename = pn or pagename
text, normalisedPagename = getContent(pagename)
if not normalisedPagename then
return err("No title for page name " .. pagename)
else
pagename = normalisedPagename
end
if text and options.nostubs then
local isStub = mw.ustring.find(text, "%s*{{[^{|}]*%-[Ss]tub%s*}}")
if isStub then text = nil end
end
if not section then
section = mw.ustring.match(pagename, ".-#(.*)") -- parse redirect to Page#Section
end
if text and section and section ~= "" then text = getSection(text, section) end
end
end
if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another
pagecount = pagecount - 1 -- ensure that we exit the loop after at most #pagenames iterations
end
if not text then return err("Cannot read a valid page: first name is " .. firstpage) end
function Excerpt.removeLinks( excerpt )
text = cleanupText(text, options)
local links = parser.getLinks( excerpt )
for _, link in pairs( links ) do
local pageopts = {} -- pageopts (even if value is "") have priority over global options
excerpt = Excerpt.removeString( excerpt, link )
for k, v in pairs(options) do pageopts[k] = v end
if gotopt and gotopt ~= "" then
for _, t in pairs(mw.text.split(pageoptstr, "|")) do
local k, v = mw.ustring.match(t, "%s*([^=]-)%s*=(.-)%s*$")
pageopts[k] = v
end
pageopts.paraflags = numberFlags(pageopts["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
pageopts.fileflags = numberFlags(pageopts["files"] or "") -- parse file numbers
if pageopts.more and pageopts.more == "" then pageopts.more = "Read more..." end -- more= is short for this default text
end
return excerpt
end
-- @todo Use parser.getLinks
local filetext
function Excerpt.removeSelfLinks( excerpt, page )
filetext, text = parse(text, pageopts)
local lang = mw.language.getContentLanguage()
local page = Excerpt.escapeString( mw.title.getCurrentTitle().prefixedText )
local ucpage = lang:ucfirst( page )
local lcpage = lang:lcfirst( page )
excerpt = excerpt
:gsub( '%[%[(' .. ucpage .. ')%]%]', '%1' )
:gsub( '%[%[(' .. lcpage .. ')%]%]', '%1' )
:gsub( '%[%[' .. ucpage .. '|([^]]+)%]%]', '%1' )
:gsub( '%[%[' .. lcpage .. '|([^]]+)%]%]', '%1' )
return excerpt
end
-- replaceReplace the bold title or synonym near the start of the articlepage by a wikilinklink to the articlepage
function Excerpt.linkBold( excerpt, page )
local lang = mw.language.getContentLanguage()
local posposition = mw.ustring.find(text excerpt, "'''" .. lang:ucfirst(pagename page ) .. "'''", 1, true ) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
or mw.ustring.find(text excerpt, "'''" .. lang:lcfirst(pagename page ) .. "'''", 1, true ) -- plain search: special characters in pagenamepage represent themselves
if posposition then
local lenlength = mw.ustring.len(pagename page )
textexcerpt = mw.ustring.sub(text excerpt, 1, posposition + 2 ) .. "'[["' .. mw.ustring.sub(text excerpt, posposition + 3, posposition + lenlength + 2 ) .. "']]"' .. mw.ustring.sub(text excerpt, posposition + lenlength + 3, -1 ) -- link it
else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
textexcerpt = mw.ustring.gsub(text excerpt, "()'''(.-'*)'''", function ( a, b )
if anot <mw.ustring.find( 100b, '%[' ) and not mw.ustring.find( b, "'%["{' ) then --- if earlynot inwikilinked articleor andsome notweird wikilinkedtemplate
return "'''[[" .. pagenamepage .. "'|"' .. b .. "]]'''" -- replace '''Foo''' by '''[[pagenamepage|Foo]]'''
else
return nil -- instruct gsub to make no change
end
end, 1 ) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end
return excerpt
end
function Excerpt.addTrackingCategories( excerpt )
-- remove '''bold text''' if requested
local currentTitle = mw.title.getCurrentTitle()
if is(pageopts.nobold) then text = mw.ustring.gsub(text, "'''", "") end
local contentCategory = config.categories.content
if contentCategory and currentTitle.isContentPage then
text = filetext .. text
excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
-- Seek and destroy unterminated templates and wikilinks
repeat -- hide matched {{template}}s including nested templates
local t = text
text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape
text = mw.ustring.gsub(text, "(< *math[^>]*>[^<]-)}}(.-< */math *>)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>
until text == t
repeat -- do similar for [[wikilink]]s
local t = text
text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")
until text == t
text = text.gsub(text, "([{}%[%]])%1[^\27].*", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.
text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text
text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.
-- Ensure div tags match
text = fixTags(text, "div")
if pageopts.more then text = text .. " '''[[" .. pagename .. "|" .. pageopts.more .. "]]'''" end -- wikilink to article for more info
if pageopts.list and not pageopts.showall then -- add a collapsed list of pages which might appear
local listtext = pageopts.list
if listtext == "" then listtext = "Other articles" end
text = text .. "{{collapse top|title={{resize|85%|" ..listtext .. "}}|bg=fff}}{{hlist"
for _, p in pairs(pagenames) do
if mw.ustring.match(p, "%S") then text = text .. "|[[" .. mw.text.trim(p) .. "]]" end
end
text = text .. "}}\n{{collapse bottom}}"
end
local namespaceCategory = config.categories[ currentTitle.namespace ]
if namespaceCategory then
return text
excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
end
return excerpt
end
-- Helper method to match from a list of regular expressions
-- Shared template invocation code for lead and random functions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function invoke(frame, func)
function Excerpt.matchAny( text, pre, list, post, init )
-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
local match = {}
local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
for i = 1, #list do
for k, v in pairs(frame:getParent().args) do args[k] = v end
match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) }
for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
if match[1] then return unpack( match ) end
errors = args["errors"] -- set the module level boolean used in local function err
local articlecount = #args -- must be 1 except with selected=Foo and Foo=Somepage
if articlecount < 1 and not (func == "selected" and args[func] and args[args[func]]) then
return err("No articles provided")
end
return nil
end
-- Helper function to get arguments
local pagenames = {}
-- args from Lua calls have priority over parent args from template
if func == "lead" then
function Excerpt.getArg( key, default )
pagenames = { args[1] }
local frame = mw.getCurrentFrame()
elseif func == "linked" or func == "listitem" then
for k, value in pairs( frame:getParent().args ) do
-- Read named page and find its wikilinks
if k == key and mw.text.trim( value ) ~= '' then
local page = args[1]
return value
local text, title = getContent(page)
if not title then
return err("No title for page name " .. page)
elseif not text then
return err("No content for page name " .. page)
end
if args["section"] then -- check relevant section only
text = getSection(text, args["section"], args["sectiononly"])
if not text then return err("No section " .. args["section"] .. " in page " .. page) end
end
-- replace annotated links with real links
text = mw.ustring.gsub(text, "{{%s*[Aa]nnotated[ _]link%s*|%s*(.-)%s*}}", "[[%1]]")
if func == "linked" then
for p in mw.ustring.gmatch(text, "%[%[%s*([^%]|\n]*)") do table.insert(pagenames, p) end
else -- listitem: first wikilink on a line beginning *, :#, etc. except in "See also" or later section
text = mw.ustring.gsub(text, "\n== *See also.*", "")
for p in mw.ustring.gmatch(text, "\n:*[%*#][^\n]-%[%[%s*([^%]|\n]*)") do table.insert(pagenames, p) end
end
elseif func == "random" then
-- accept any number of page names. If more than one, we'll pick one randomly
for i, p in pairs(args) do
if p and type(i) == 'number' then table.insert(pagenames, p) end
end
elseif func == "selected" then
local articlekey = args[func]
if tonumber(articlekey) then -- normalise article number into the range 1..#args
articlekey = articlekey % articlecount
if articlekey == 0 then articlekey = articlecount end
end
pagenames = { args[articlekey] }
end
for k, value in pairs( frame.args ) do
if k == key and mw.text.trim( value ) ~= '' then
local options = args -- pick up miscellaneous options: more, errors, fileargs
return value
options.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
options.fileflags = numberFlags(args["files"] or "") -- parse file numbers
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
local text = ""
if options.showall then
local separator = ""
for _, p in pairs(pagenames) do
local t = main({ p }, options)
if t ~= "" then
text = text .. separator .. t
separator = options.showall
if separator == "" then separator = "{{clear}}{{hr}}" end
end
end
else
text = main(pagenames, options)
end
return default
end
-- Helper method to get an error message
if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
-- This method also categorizes the current page in one of the configured error categories
return "[[Category:" .. d.brokenCategory .. "]]"
function Excerpt.getError( key, value )
else
local message = Excerpt.getMessage( 'error-' .. key, value )
return frame:preprocess(text)
local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message )
if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then
markup:node( '[[Category:' .. config.categories.errors .. ']]' )
end
return markup
end
-- Helper method to get a localized message
local function excerpt(frame) -- Replicate {{Excerpt}} entirely in Lua for reduced Post-expand include size
-- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab
local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
-- If Module:TNT is not available or the localized message does not exist, the key is returned instead
for k, v in pairs(frame:getParent().args) do args[k] = v end
function Excerpt.getMessage( key, value )
for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
local ok, TNT = pcall( require, 'Module:TNT' )
if not ok then return key end
local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value )
if not ok2 then return key end
return message
end
-- Helper method to escape a string for use in regexes
local tag = is(args.tag) and args.tag or 'div'
function Excerpt.escapeString( str )
local article = is(args.article) and args.article or args[1] or '{{{1}}}'
return str:gsub( '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0' )
local section = is(args.section) and args.section or args[2]
end
-- Helper method to remove a string from a text
local output = {}
-- @param text Text from where to remove the string
output[1] = frame:extensionTag{ name = 'templatestyles', args = {src='Excerpt/styles.css'} }
-- @param str String to remove
output[2] = '<' .. tag .. ' class="excerpt-block">'
-- @return The given text with the string removed
output[3] = is(args.indicator) and ('<' .. tag .. ' class="excerpt-indicator">') or ''
function Excerpt.removeString( text, str )
if is(args.nohat) then
local pattern = Excerpt.escapeString( str )
output[4] = ''
if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes
else
pattern = Excerpt.escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. Excerpt.escapeString( mw.ustring.sub( str, -999 ) )
local hatnote = {}
end
hatnote[1] = 'This' .. (is(args.indicator) and '' or ' section') .. ' is an excerpt from '
return text:gsub( pattern, '' )
hatnote[2] = '[['
hatnote[3] = article .. (is(section) and ('#' .. frame:callParserFunction( 'urlencode', section, 'WIKI' )) or '')
hatnote[4] = '|'
hatnote[5] = article .. (is(section) and (frame:callParserFunction( '#tag:nowiki', ' § ' ) .. section) or '')
hatnote[6] = ']]'
hatnote[7] = "''" .. '<span class="mw-editsection-like plainlinks"><span>[ </span>['
local title = mw.title.new(article) or mw.title.getCurrentTitle()
hatnote[8] = title:fullUrl('action=edit') .. ' edit'
hatnote[9] = ']<span> ]</span></span>' .. "''"
output[4] = require('Module:Hatnote')._hatnote(table.concat(hatnote), {selfref=true}) or err("Error generating hatnote")
end
output[5] = '<' .. tag .. ' class="excerpt">\n'
if article ~= '{{{1}}}' then
if is(args.fragment) then
output[6] = frame:callParserFunction( '#lst', article, args.fragment) or err("Error transcluding text")
else
local options = args -- pick up miscellaneous options: more, errors, fileargs
options.paraflags = numberFlags(args.paragraphs or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
options.fileflags = numberFlags(args.files or "1") -- parse file numbers
options.nobold=1
options.keepTables = is(args.tables) and args.tables or 1
options.keepRefs = is(args.references) and args.references or 1
options.keepSubsections = is(args.subsections) and args.subsections or ""
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
local pagenames = { (article .. '#' .. (section or '')) }
local text = main(pagenames, options)
if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then
output[6] = "[[Category:" .. d.brokenCategory .. "]]"
else
output[6] = frame:preprocess(text) or err("Error processing text")
end
end
else
output[6] = err("No article provided")
end
output[7] = '</' .. tag .. '>'
output[8] = is(args.indicator) and ('</' .. tag .. '>') or ''
output[9] = '</' .. tag .. '>'
output[10] = mw.title.getCurrentTitle().isContentPage and '[[Category:Articles with excerpts]]' or ''
return table.concat(output)
end
-- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- Entry points for template callers using #invoke:
-- @param filter Required. Comma-separated list of numbers or min-max ranges, for example '1,3-5'
function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page
-- @return Boolean indicating whether the filters should be treated as a blacklist or not
function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page
-- @note Merging this into matchFilter is possible, but way too inefficient
function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)
function Excerpt.parseFilter( filter )
function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter
local filters = {}
function p.excerpt(frame) return excerpt(frame) end -- {{Excerpt}} transcludes part of an article into another article
local isBlacklist = false
if string.sub( filter, 1, 1 ) == '-' then
isBlacklist = true
filter = string.sub( filter, 2 )
end
local values = mw.text.split( filter, ',' ) -- split values: '1,3-5' to {'1','3-5'}
for _, value in pairs( values ) do
value = mw.text.trim( value )
local min, max = mw.ustring.match( value, '^(%d+)%s*[-–—]%s*(%d+)$' ) -- '3-5' to min=3 max=5
if not max then min, max = string.match( value, '^((%d+))$' ) end -- '1' to min=1 max=1
if max then
for i = min, max do filters[ i ] = true end
else
filters[ value ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
end
end
local filter = {cache = {}, terms = filters}
return filter, isBlacklist
end
-- Helper function to see if a value matches any of the given filters
-- Entry points for other Lua modules
function Excerpt.matchFilter( value, filter )
function p.getContent(page, frame) return getContent(page, frame) end
if type(value) == "number" then
function p.getsection(text, section) return getSection(text, section) end
return filter.terms[value]
function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end
else
function p.argimage(text) return argImage(text) end
local cached = filter.cache[value]
function p.checkimage(image) return checkImage(image) end
if cached ~= nil then
function p.parseimage(text, start) return parseImage(text, start) end
return cached
function p.cleanupText(text, options) return cleanupText(text, options) end
end
function p.main(pagenames, options) return main(pagenames, options) end
local lang = mw.language.getContentLanguage()
function p.numberflags(str) return numberFlags(str) end
local lcvalue = lang:lcfirst(value)
local ucvalue = lang:ucfirst(value)
for term in pairs( filter.terms ) do
if value == tostring(term)
or type(term) == "string" and (
lcvalue == term
or ucvalue == term
or mw.ustring.match( value, term )
) then
filter.cache[value] = true
return true
end
end
filter.cache[value] = false
end
end
return pExcerpt
|