Module:Excerpt/sandbox: Difference between revisions

Browse history interactively

← Previous edit

Content deleted Content added

Revision as of 23:34, 5 April 2020 edit Sophivorus (talk \| contribs) Extended confirmed users, Template editors 5,444 edits Rename truthy() for falsy(), as the function actually checked for falsy values ← Previous edit		Latest revision as of 21:21, 26 May 2025 edit undo Aidan9382 (talk \| contribs) Extended confirmed users, Page movers, Template editors 15,961 edits Try to automatically account for page merge-like redirects
(232 intermediate revisions by 12 users not shown)
Line 1: -- Module:Excerpt implements the Excerpt template ~~-- Local aliases of the file namespace~~ -- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt ~~local fileNamespaces = {~~ -- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others ~~"[Ff]ile",~~ -- License: CC-BY-SA-3.0 ~~"[Ii]mage"~~ } local parser = require( 'Module:WikitextParser' ) ~~local captionParams = {~~ local yesno = require( 'Module:Yesno' ) ~~"[^=\|][Cc]aption[^=\|]",~~ ~~"[^=\|][Ll]egend[^=\|]"~~ } local ok, config = pcall( require, 'Module:Excerpt/config' ) ~~-- Local category to track content pages with broken excerpts (may be empty, don't include the "Category:" prefix)~~ if not ok then config = {} end ~~local brokenCategory = "Articles with broken excerpts"~~ local Excerpt = {} ~~-- The module keeps all inline templates except these ones~~ ~~local unwantedInlineTemplates = {"[Ee]fn", "[Ee]fn%-[lu][arg]", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bmp]", "[Ss]f[bn]", "[Nn]ote[Tt]ag", "#[Tt]ag:%s[Rr]ef", "[Rr]efn?",~~ ~~"[CcDd]n", "[Cc]itation[%- _]needed", "[Dd]isambiguation needed", "[Ff]eatured article", "[Gg]ood article",~~ ~~"[Dd]ISPLAYTITLE", "[Ss]hort[ _]+description", "[Cc]itation", "[Cc]ite[%- _]+[%w_%s]-", "[Cc]oor[%w_%s]-",~~ ~~"[Uu]?n?[Rr]eliable source[%?%w_%s]-", "[Rr]s%??", "[Vv]c", "[Vv]erify credibility", "[Bb]y[ _][Ww]ho[m]%??", "[Ww]ikisource[ -_]multi", "[Ii]nflation[ _/-][Ff]n",~~ ~~"[Bb]iblesource",~~ ~~-- aliases for Clarification needed~~ ~~"[Cc]f[ny]", "[Cc]larification[ _]+inline", "[Cc]larification[%- _]needed", "[Cc]larification", "[Cc]larify%-inline", "[Cc]larify%-?me",~~ ~~"[Cc]larify[ _]+inline", "[Cc]larify", "[Cc]LARIFY", "[Cc]onfusing%-inline", "[Cc]onfusing%-short", "[Ee]xplainme", "[Hh]uh[ _]%??", "[Ww]hat%?",~~ ~~"[Ii]nline[ _]+[Uu]nclear", "[Ii]n[ _]+what[ _]+sense", "[Oo]bscure", "[Pp]lease[ _]+clarify", "[Uu]nclear[ _]+inline", "[Ww]hat's[ _]+this%?",~~ ~~"[Gg]eoQuelle", "[Nn]eed[s]+[%- _]+[Ii][Pp][Aa]", "[Ii]PA needed",~~ ~~-- aliases for Clarification needed lead~~ ~~"[Cc]itation needed %(?lea?de?%)?", "[Cc]nl", "[Ff]act %(?lea?de?%)?", "[Ll]ead citation needed", "[Nn]ot in body", "[Nn]ot verified in body",~~ ~~-- Primary source etc.~~ ~~"[Pp]s[ci]", "[Nn]psn", "[Nn]on%-primary[ _]+source[ _]+needed", "[Ss]elf%-published[%w_%s]-", "[Uu]ser%-generated[%w_%s]-",~~ ~~"[Pp]rimary source[%w_%s]-", "[Ss]econdary source[%w_%s]-", "[Tt]ertiary source[%w_%s]-", "[Tt]hird%-party[%w_%s]-",~~ ~~-- aliases for Disambiguation (page) and similar~~ ~~"[Bb]egriffsklärung", "[Dd][Aa][Bb]", "[Dd]big", "[%w_%s]-%f[%w][Dd]isam[%w_%s]-", "[Hh][Nn][Dd][Ii][Ss]",~~ ~~-- aliases for Failed verification~~ ~~"[Bb]adref", "[Ff]aile?[ds] ?[rv][%w_%s]-", "[Ff][Vv]", "[Nn][Ii]?[Cc][Gg]", "[Nn]ot ?in ?[crs][%w_%s]-", "[Nn]ot specifically in source",~~ ~~"[Vv]erification[%- _]failed",~~ ~~-- aliases for When~~ ~~"[Aa]s[ _]+of[ _]+when%??", "[Aa]s[ _%-]+of%??", "[Cc]larify date", "[Dd]ate[ _]needed", "[Nn]eeds?[ _]+date", "[Rr]ecently", "[Ss]ince[ _]+when%??",~~ ~~"[Ww]HEN", "[Ww]hen%??",~~ ~~-- aliases for Update~~ ~~"[Nn]ot[ _]up[ _]to[ _]date","[Oo]u?[Tt][Dd]","[Oo]ut[%- _]o?f?[%- _]dated?", "[Uu]pdate", "[Uu]pdate[ _]+sect", "[Uu]pdate[ _]+Watch",~~ ~~-- aliases for Pronunciation needed~~ ~~"[Pp]ronunciation%??[%- _]n?e?e?d?e?d?", "[Pp]ronounce", "[Rr]equested[%- _]pronunciation", "[Rr]e?q?pron", "[Nn]eeds[%- _]pronunciation",~~ ~~-- Chart, including Chart/start etc.~~ ~~"[Cc]hart", "[Cc]hart/[%w_%s]-",~~ ~~-- Cref and others~~ ~~"[Cc]ref2?", "[Cc]note",~~ ~~-- Explain and others~~ ~~"[Ee]xplain", "[Ff]urther[ ]explanation[ ]needed", "[Ee]laboration[ ]needed", "[Ee]xplanation[ ]needed",~~ ~~-- TOC templates~~ "[Cc][Oo][Mm][Pp][Aa][Cc][Tt][ _][Tt][Oo][Cc][8][5]", "[Tt][Oo][Cc]", "09[Aa][Zz]", "[Tt][Oo][Cc][ ][Cc][Oo][Mm][Pp][Aa][Cc][Tt]", "[Tt][Oo][Cc][ ][Ss][Mm][Aa][Ll][Ll]", "[Cc][Oo][Mm][Pp][Aa][Cc][Tt][ _][Aa][Ll][Pp][Hh][Aa][Bb][Ee][Tt][Ii][Cc][ _][Tt][Oo][Cc]", ~~"DEFAULTSORT:.-",~~ ~~"[Oo]ne[ _]+source"~~ } -- ~~The~~Main ~~module~~entry ~~removes~~point ~~all block~~for templates ~~except these ones~~ function Excerpt.main( frame ) ~~local wantedBlockTemplates = {~~ ~~"[Hh]istorical populations"~~ } -- Make sure the requested page exists and get the wikitext ~~local p = {}~~ local page = Excerpt.getArg( 1 ) if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end local title = mw.title.new( page ) if not title then return Excerpt.getError( 'invalid-title', page ) end local fragment = title.fragment -- save for later if title.isRedirect then title = title.redirectTarget if fragment == "" then fragment = title.fragment -- page merge potential end end if not title.exists then return Excerpt.getError( 'page-not-found', page ) end page = title.prefixedText local wikitext = title:getContent() -- Get the template params and process them ~~local errors~~ local params = { ~~-- Return blank text, or an error message if requested~~ hat = yesno( Excerpt.getArg( 'hat', true ) ), ~~local function err(text)~~ this = Excerpt.getArg( 'this' ), ~~if errors then error(text, 2) end~~ only = Excerpt.getArg( 'only' ), ~~return ""~~ files = Excerpt.getArg( 'files', Excerpt.getArg( 'file' ) ), ~~end~~ lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list' ) ), tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table' ) ), templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template' ) ), paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph' ) ), references = yesno( Excerpt.getArg( 'references', true ) ), subsections = yesno( Excerpt.getArg( 'subsections', false ) ), links = yesno( Excerpt.getArg( 'links', true ) ), bold = yesno( Excerpt.getArg( 'bold', false ) ), briefDates = yesno( Excerpt.getArg( 'briefdates', false ) ), inline = yesno( Excerpt.getArg( 'inline' ) ), quote = yesno( Excerpt.getArg( 'quote' ) ), more = yesno( Excerpt.getArg( 'more' ) ), class = Excerpt.getArg( 'class' ), displayTitle = Excerpt.getArg( 'displaytitle', page ), } -- Make sure the requested section exists and get the excerpt ~~-- Helper function to test for falsy values~~ local excerpt ~~local function falsy( value )~~ local section = Excerpt.getArg( 2, fragment ) ~~if not value or value == "" or value == "0" or value == "false" or value == "no" then~~ section = mw.text.trim( section ) ~~return true~~ if section == '' then section = nil end if section then excerpt = parser.getSectionTag( wikitext, section ) if not excerpt then if params.subsections then excerpt = parser.getSection( wikitext, section ) else local sections = parser.getSections( wikitext ) excerpt = sections[ section ] end end if not excerpt then return Excerpt.getError( 'section-not-found', section ) end if excerpt == '' then return Excerpt.getError( 'section-empty', section ) end else excerpt = parser.getLead( wikitext ) if excerpt == '' then return Excerpt.getError( 'lead-empty' ) end end ~~return false~~ ~~end~~ -- Remove noinclude bits ~~-- In text, match pre..list[1]..post or pre..list[2]..post or ...~~ excerpt = excerpt:gsub( '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '' ) ~~local function matchany(text, pre, list, post, init)~~ ~~local match = {}~~ ~~for i = 1, #list do~~ ~~match = { mw.ustring.match(text, pre .. list[i] .. post, init) }~~ ~~if match[1] then return unpack(match) end~~ ~~end~~ ~~return nil~~ ~~end~~ -- Filter various elements from the excerpt ~~-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT~~ excerpt = Excerpt.filterFiles( excerpt, params.files ) ~~local function striptemplate(t)~~ excerpt = Excerpt.filterLists( excerpt, params.lists ) ~~-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)~~ excerpt = Excerpt.filterTables( excerpt, params.tables ) ~~if matchany(t, "^{{%s", unwantedInlineTemplates, "%s%f[\|}]") then return "" end~~ excerpt = Excerpt.filterParagraphs( excerpt, params.paragraphs ) -- If no file is found, try to get one from the infobox ~~-- If template is wanted but produces an unwanted reference then return the string with \|shortref or \|ref removed~~ if ( params.only == 'file' or params.only == 'files' or not params.only and ( not params.files or params.files ~= '0' ) ) -- caller asked for files ~~local noref = mw.ustring.gsub(t, "\|%sshortref%s%f[\|}]", "")~~ and not section -- and we're in the lead section ~~noref = mw.ustring.gsub(noref, "\|%sref%s%f[\|}]", "")~~ and config.captions -- and we have the config option required to try finding files in infoboxes and #parser.getFiles( excerpt ) == 0 -- and there're no files in the excerpt then excerpt = Excerpt.addInfoboxFile( excerpt ) end -- IfFilter athe ~~wanted~~templates ~~template~~by ~~has~~appending ~~unwanted nested~~the templates, ~~purge~~blacklist ~~them~~to the templates ~~too~~filter if config.blacklist then ~~noref = mw.ustring.sub(noref, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noref, 3), "%b{}", striptemplate)~~ local blacklist = table.concat( config.blacklist, ',' ) if params.templates then if string.sub( params.templates, 1, 1 ) == '-' then params.templates = params.templates .. ',' .. blacklist end else params.templates = '-' .. blacklist end end excerpt = Excerpt.filterTemplates( excerpt, params.templates ) -- Leave only the requested elements ~~-- Replace {{audio}} by its text parameter: {{Audio\|Foo.ogg\|Bar}} → Bar~~ if params.only == 'file' or params.only == 'files' then ~~noref = mw.ustring.gsub(noref, "^{{%s[Aa]udio.-\|.-\|(.-)%f[\|}].", "%1")~~ local files = parser.getFiles( excerpt ) excerpt = params.only == 'file' and files[1] or table.concat( files, '\n\n' ) end if params.only == 'list' or params.only == 'lists' then local lists = parser.getLists( excerpt ) excerpt = params.only == 'list' and lists[1] or table.concat( lists, '\n\n' ) end if params.only == 'table' or params.only == 'tables' then local tables = parser.getTables( excerpt ) excerpt = params.only == 'table' and tables[1] or table.concat( tables, '\n\n' ) end if params.only == 'paragraph' or params.only == 'paragraphs' then local paragraphs = parser.getParagraphs( excerpt ) excerpt = params.only == 'paragraph' and paragraphs[1] or table.concat( paragraphs, '\n\n' ) end if params.only == 'template' or params.only == 'templates' then local templates = parser.getTemplates( excerpt ) excerpt = params.only == 'template' and templates[1] or table.concat( templates, '\n\n' ) end -- @todo Make more robust and move downwards ~~-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot\|English\|英語\|eigo}} → English~~ if params.briefDates then ~~noref = mw.ustring.gsub(noref, "^{{%s[Nn]ihongo[ _]+foot%s\|(.-)%f[\|}].", "%1")~~ excerpt = Excerpt.fixDates( excerpt ) end -- Remove unwanted elements ~~if noref ~= t then return noref end~~ excerpt = Excerpt.removeComments( excerpt ) excerpt = Excerpt.removeSelfLinks( excerpt ) excerpt = Excerpt.removeNonFreeFiles( excerpt ) excerpt = Excerpt.removeBehaviorSwitches( excerpt ) -- Fix or remove the references ~~return nil -- not an unwanted template: keep~~ if params.references then ~~end~~ excerpt = Excerpt.fixReferences( excerpt, page, wikitext ) else excerpt = Excerpt.removeReferences( excerpt ) end -- Remove wikilinks ~~-- Get a page's content, following redirects, and processing file description pages for files.~~ if not params.links then ~~-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found~~ excerpt = Excerpt.removeLinks( excerpt ) ~~local function getContent(page, frame)~~ end ~~local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)~~ ~~if not title then return false, false end~~ -- Link the bold text near the start of most leads and then remove it ~~local target = title.redirectTarget~~ if ~~target~~not section then ~~title = target end~~ excerpt = Excerpt.linkBold( excerpt, page ) end if not params.bold then excerpt = Excerpt.removeBold( excerpt ) end -- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly ~~return title:getContent(), title.prefixedText~~ excerpt = excerpt:gsub( '\n\n\n+', '\n\n' ) ~~end~~ excerpt = mw.text.trim( excerpt ) excerpt = '\n' .. excerpt .. '\n' -- Remove nested categories ~~-- Check image for suitability~~ excerpt = frame:preprocess( excerpt ) ~~local function checkimage(image)~~ excerpt = Excerpt.removeCategories( excerpt ) ~~local page = matchany(image, "", fileNamespaces, "%s:[^\|%]]") -- match File:(name) or Image:(name)~~ ~~if not page then return false end~~ -- Add tracking categories ~~-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)~~ if config.categories then ~~if not matchany(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s$") then~~ excerpt = Excerpt.addTrackingCategories( excerpt ) ~~return false~~ end -- Build the final output ~~local desc, rtitle = getContent(page) -- get file description and title after following any redirect~~ if params.inline then ~~if desc and desc ~= "" then -- found description on local wiki~~ return mw.text.trim( excerpt ) ~~if mw.ustring.match(desc, "[Nn]on%-free") then return false end~~ ~~desc = mw.ustring.gsub(desc, "%b{}", striptemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess~~ ~~elseif not rtitle then~~ ~~return false~~ ~~else~~ ~~-- try commons~~ ~~desc = "{{" .. rtitle .. "}}"~~ end ~~frame = frame or mw.getCurrentFrame()~~ ~~desc = frame:preprocess(desc)~~ local tag = params.quote and 'blockquote' or 'div' ~~return ( desc and desc ~= "" and not mw.ustring.match(desc, "[Nn]on%-free") ) and true or false -- hide non-free image~~ local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( params.class ) ~~end~~ if config.styles then ~~-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)~~ local styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } ) ~~local function parseimage(text, start)~~ block:node( styles ) ~~local startre = ""~~ ~~if start then startre = "^" end -- a true flag restricts search to start of string~~ ~~local image = matchany(text, startre .. "%[%[%s", fileNamespaces, "%s:.") -- [[File: or [[Image: ...~~ ~~if image then~~ ~~image = mw.ustring.match(image, "%b[]%s") -- matching [[...]] to handle wikilinks nested in caption~~ end ~~return image~~ ~~end~~ if params.hat then ~~-- Parse a caption, which ends at a \| (end of parameter) or } (end of infobox) but may contain nested [..] and {..}~~ local hat = Excerpt.getHat( page, section, params ) ~~local function parsecaption(caption)~~ block:node( hat ) ~~if not caption then return nil end~~ ~~local len = mw.ustring.len(caption)~~ ~~local pos = 1~~ ~~while pos <= len do~~ ~~local linkstart, linkend = mw.ustring.find(caption, "%b[]", pos)~~ ~~linkstart = linkstart or len + 1 -- avoid comparison with nil when no link~~ ~~local templatestart, templateend = mw.ustring.find(caption, "%b{}", pos)~~ ~~templatestart = templatestart or len + 1 -- avoid comparison with nil when no template~~ ~~local argend = mw.ustring.find(caption, "[\|}]", pos) or len + 1~~ ~~if linkstart < templatestart and linkstart < argend then~~ ~~pos = linkend + 1 -- skip wikilink~~ ~~elseif templatestart < argend then~~ ~~pos = templateend + 1 -- skip template~~ ~~else -- argument ends before the next wikilink or template~~ ~~return mw.ustring.sub(caption, 1, argend - 1)~~ ~~end~~ end ~~return caption -- No terminator found: return entire caption~~ ~~end~~ excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt ) ~~-- Attempt to construct a [[File:...]] block from {{infobox ... \|image= ...}}~~ block:node( excerpt ) ~~local function argimage(text)~~ ~~local token = nil~~ ~~local hasNamedArgs = mw.ustring.find(text, "\|") and mw.ustring.find(text, "=")~~ ~~if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image~~ if params.more then ~~-- ensure image map is captured~~ local more = Excerpt.getReadMore( page, section ) ~~text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '\|imagemap=')~~ block:node( more ) ~~-- find all images~~ ~~local hasImages = false~~ ~~local images = {}~~ ~~local capture_from = 1~~ ~~while capture_from < mw.ustring.len(text) do~~ ~~local argname, position, image = mw.ustring.match(text, "\|%s([^=\|]-[Ii][Mm][Aa][Gg][Ee][^=\|]-)%s=%s()(.)", capture_from)~~ ~~if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image~~ ~~local lcArgname = mw.ustring.lower(argname)~~ ~~if mw.ustring.find(lcArgname, "caption")~~ ~~or mw.ustring.find(lcArgname, "size")~~ ~~or mw.ustring.find(lcArgname, "upright") then~~ ~~image = nil~~ ~~end~~ ~~end~~ ~~if image then~~ ~~hasImages = true~~ ~~images[position] = image~~ ~~capture_from = position~~ ~~else~~ ~~capture_from = mw.ustring.len(text)~~ ~~end~~ ~~end~~ ~~capture_from = 1~~ ~~while capture_from < mw.ustring.len(text) do~~ ~~local position, image = mw.ustring.match(text, "\|%s[^=\|]-[Pp][Hh][Oo][Tt][Oo][^=\|]-%s=%s()(.)", capture_from)~~ ~~if image then~~ ~~hasImages = true~~ ~~images[position] = image~~ ~~capture_from = position~~ ~~else~~ ~~capture_from = mw.ustring.len(text)~~ ~~end~~ ~~end~~ ~~capture_from = 1~~ ~~while capture_from < mw.ustring.len(text) do~~ ~~local position, image = mw.ustring.match(text, "\|%s[^=\|{}]-%s=%s()%[?%[?([^\|{}]%.%a%a%a%a?)%s%f[\|}]", capture_from)~~ ~~if image then~~ ~~hasImages = true~~ ~~if not images[position] then~~ ~~images[position] = image~~ ~~end~~ ~~capture_from = position~~ ~~else~~ ~~capture_from = mw.ustring.len(text)~~ ~~end~~ end ~~if not hasImages then~~ return ~~nil end~~block end -- Filter the files in the given wikitext against the given filter ~~-- find all captions~~ function Excerpt.filterFiles( wikitext, filter ) ~~local captions = {}~~ if not filter then return wikitext end ~~capture_from = 1~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~while capture_from < mw.ustring.len(text) do~~ local files = parser.getFiles( wikitext ) ~~local position, caption = matchany(text, "\|%s", captionParams, "%s=%s()([^\n]+)", capture_from)~~ for index, file in pairs( files ) do ~~if caption then~~ local name = parser.getFileName( file ) ~~-- extend caption to parse "\| caption = Foo {{Template\n on\n multiple lines}} Bar\n"~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) ) ~~local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)~~ or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then ~~if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end~~ wikitext = Excerpt.removeString( wikitext, file ) ~~caption = mw.text.trim(caption)~~ ~~local captionStart = mw.ustring.sub(caption, 1, 1)~~ ~~if captionStart == '\|' or captionStart == '}' then caption = nil end~~ ~~end~~ ~~if caption then~~ ~~-- find nearest image, and use same index for captions table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not captions[i] then~~ ~~captions[i] = parsecaption(caption)~~ ~~end~~ ~~end~~ ~~end~~ ~~capture_from = position~~ ~~else~~ ~~capture_from = mw.ustring.len(text)~~ end end return wikitext end -- Filter the lists in the given wikitext against the given filter ~~-- find all alt text~~ function Excerpt.filterLists( wikitext, filter ) ~~local altTexts = {}~~ if not filter then return wikitext end ~~for position, altText in mw.ustring.gmatch(text, "\|%s[Aa][Ll][Tt]%s=%s()([^\n])") do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~if altText then~~ local lists = parser.getLists( wikitext ) for index, list in pairs( lists ) do ~~-- altText is terminated by }} or \|, but first skip any matched [[...]] and {{...}}~~ if isBlacklist and Excerpt.matchFilter( index, filters ) ~~local lookfrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}~~ or not isBlacklist and not Excerpt.matchFilter( index, filters ) then ~~mw.ustring.match(altText, ".{%b{}}()") or 1, -- if multiple {{...}}, .* consumes all but one, leaving the last for %b~~ wikitext = Excerpt.removeString( wikitext, list ) ~~mw.ustring.match(altText, ".%[%b[]%]()") or 1)~~ ~~local len = mw.ustring.len(altText)~~ ~~local aftertext = math.min( -- find position after whichever comes first: end of string, }} or \|~~ ~~mw.ustring.match(altText, "()}}", lookfrom) or len+1,~~ ~~mw.ustring.match(altText, "()\|", lookfrom) or len+1)~~ ~~altText = mw.ustring.sub(altText, 1, aftertext-1) -- chop off \|... or }}... which is not part of [[...]] or {{...}}~~ ~~altText = mw.text.trim(altText)~~ ~~local altTextStart = mw.ustring.sub(altText, 1, 1)~~ ~~if altTextStart == '\|' or altTextStart == '}' then altText = nil end~~ ~~end~~ ~~if altText then~~ ~~-- find nearest image, and use same index for altTexts table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not altTexts[i] then~~ ~~altTexts[i] = altText~~ ~~end~~ ~~end~~ ~~end~~ end end return wikitext end -- Filter the tables in the given wikitext against the given filter ~~-- find all image sizes~~ function Excerpt.filterTables( wikitext, filter ) ~~local imageSizes = {}~~ if not filter then return wikitext end ~~for position, imageSizeMatch in mw.ustring.gmatch(text, "\|%s[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s=%s()([^}\|\n])") do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~local imageSize = mw.ustring.match(imageSizeMatch, "=%s([^}\|\n])")~~ local tables = parser.getTables( wikitext ) ~~if imageSize then~~ for index, t in pairs( tables ) do ~~imageSize = mw.text.trim(imageSize )~~ local id = string.match( t, '{\|[^\n]-id%s=%s["\']?([^"\'\n]+)["\']?[^\n]\n' ) ~~local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( id, filters ) ) ~~if imageSizeStart == '\|' or imageSizeStart == '}' then imageSize = nil end~~ or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( id, filters ) ) then ~~end~~ wikitext = Excerpt.removeString( wikitext, t ) ~~if imageSize then~~ ~~-- find nearest image, and use same index for imageSizes table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not imageSizes[i] then~~ ~~imageSizes[i] = imageSize~~ ~~end~~ ~~end~~ ~~end~~ end end return wikitext end -- Filter the paragraphs in the given wikitext against the given filter ~~-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order~~ function Excerpt.filterParagraphs( wikitext, filter ) ~~local keys = {}~~ if not filter then return wikitext end ~~for key, val in pairs(images) do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~table.insert(keys, key)~~ local paragraphs = parser.getParagraphs( wikitext ) ~~end~~ for index, paragraph in pairs( paragraphs ) do ~~table.sort(keys)~~ if isBlacklist and Excerpt.matchFilter( index, filters ) or not isBlacklist and not Excerpt.matchFilter( index, filters ) then ~~-- add in relevant optional parameters for each image: caption, alt text and image size~~ wikitext = Excerpt.removeString( wikitext, paragraph ) ~~local imageTokens = {}~~ ~~for _, index in ipairs(keys) do~~ ~~local image = images[index]~~ ~~local token = parseimage(image, true) -- look for image=[[File:...]] etc.~~ ~~if not token then~~ ~~image = mw.ustring.match(image, "^[^}\|\n]") -- remove later arguments~~ ~~token = "[[" -- Add File: unless name already begins File: or Image:~~ ~~if not matchany(image, "^", fileNamespaces, "%s:") then~~ ~~token = token .. "File:"~~ ~~end~~ ~~token = token .. image~~ ~~local caption = captions[index]~~ ~~if caption and mw.ustring.match(caption, "%S") then token = token .. "\|" .. caption end~~ ~~local alt = altTexts[index]~~ ~~if alt then token = token .. "\|alt=" .. alt end~~ ~~local image_size = imageSizes[index]~~ ~~if image_size and mw.ustring.match(image_size, "%S") then token = token .. "\|" .. image_size end~~ ~~token = token .. "]]"~~ end ~~token = mw.ustring.gsub(token, "\n","") .. "\n"~~ ~~table.insert(imageTokens, token)~~ end return ~~imageTokens~~wikitext end -- Filter the templates in the given wikitext against the given filter ~~-- Help gsub convert imagemaps into standard images~~ function Excerpt.filterTemplates( wikitext, filter ) ~~local function convertImagemap(imagemap)~~ if not filter then return wikitext end ~~local image = matchany(imagemap, "[>\n]%s", fileNamespaces, "[^\n]")~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~if image then~~ local templates = parser.getTemplates( wikitext ) ~~return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s", "", 1) .. "]]"~~ for index, template in pairs( templates ) do ~~else~~ local name = parser.getTemplateName( template ) ~~return "" -- remove entire block if image can't be extracted~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) ) or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then wikitext = Excerpt.removeString( wikitext, template ) end end return wikitext end function Excerpt.addInfoboxFile( excerpt ) ~~-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}~~ -- We cannot distinguish the infobox from the other templates, so we search them all ~~local function numberflags(str)~~ local templates = parser.getTemplates( excerpt ) ~~local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}~~ for _, template in pairs( templates ) do ~~local flags = {}~~ local parameters = parser.getTemplateParameters( template ) ~~for _, r in pairs(ranges) do~~ local file, captions, caption, cssClasses, cssClass ~~local min, max = mw.ustring.match(r, "^%s(%d+)%s%-%s(%d+)%s$") -- "3-5" → min=3 max=5~~ for _, pair in pairs( config.captions ) do ~~if not max then min, max = mw.ustring.match(r, "^%s((%d+))%s$") end -- "1" → min=1 max=1~~ if file ~~max~~= ~~then~~pair[1] file = parameters[file] ~~for p = min, max do flags[p] = true end~~ if file and Excerpt.matchAny( file, '^.%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.' ) then file = string.match( file, '%[?%[?.-:([^{\|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg captions = pair[2] for _, p in pairs( captions ) do if parameters[ p ] then caption = parameters[ p ] break end end -- Check for CSS classes -- We opt to use skin-invert-image instead of skin-invert -- in all other cases, the CSS provided in the infobox is used if pair[3] then cssClasses = pair[3] for _, p in pairs( cssClasses ) do if parameters[ p ] then cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ] break end end end local class = cssClass and ( '\|class=' .. cssClass ) or '' return '[[File:' .. file .. class .. '\|thumb\|' .. ( caption or '' ) .. ']]' .. excerpt end end end return ~~flags~~excerpt end function Excerpt.removeNonFreeFiles( wikitext ) ~~local imageArgGroups = {~~ local files = parser.getFiles( wikitext ) ~~{"thumb", "thumbnail", "frame", "framed", "frameless"},~~ for _, file in pairs( files ) do ~~{"right", "left", "center", "none"},~~ local fileName = 'File:' .. parser.getFileName( file ) ~~{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}~~ local fileTitle = mw.title.new( fileName ) } if fileTitle then local fileDescription = fileTitle:getContent() ~~local function modifyImage(image, fileargs)~~ if not fileDescription or fileDescription == '' then ~~if fileargs then~~ local frame = mw.getCurrentFrame() ~~for _, filearg in pairs(mw.text.split(fileargs, "\|")) do -- handle fileargs=left\|border etc.~~ fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons ~~local fa = mw.ustring.gsub(filearg, "=.", "") -- "upright=0.75" → "upright"~~ ~~local group = {fa} -- group of "border" is ["border"]...~~ ~~for _, g in pairs(imageArgGroups) do~~ ~~for _, a in pairs(g) do~~ ~~if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]~~ ~~end~~ end if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then ~~for _, a in pairs(group) do~~ wikitext = Excerpt.removeString( wikitext, file ) ~~image = mw.ustring.gsub(image, "\|%s" .. a .. "%f[%A]%s=[^\|%]]", "") -- remove "\|upright=0.75" etc.~~ ~~image = mw.ustring.gsub(image, "\|%s" .. a .. "%s([\|%]])", "%1") -- replace "\|left\|" by "\|" etc.~~ end ~~image = mw.ustring.gsub(image, "([\|%]])", "\|" .. filearg .. "%1", 1) -- replace "\|" by "\|left\|" etc.~~ end end return ~~image~~wikitext end function Excerpt.getHat( page, section, params ) ~~-- a basic parser to trim down extracted wikitext~~ local hat ~~-- @param text : Wikitext to be processed~~ ~~-- @param options : A table of options...~~ -- Build the text -- options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept. if params.this then ~~-- options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`~~ hat = params.this ~~-- options.fileargs : args for the [[File:]] syntax, such as `left`~~ elseif params.quote then ~~-- @param filesOnly : If set, only return the files and not the prose~~ hat = Excerpt.getMessage( 'this' ) ~~local function parse(text, options, filesOnly)~~ elseif params.only then ~~local allparas = true -- keep all paragraphs?~~ hat = Excerpt.getMessage( params.only ) ~~if options.paraflags then~~ else ~~if type(options.paraflags) ~= "table" then options.paraflags = numberflags(options.paraflags) end~~ hat = Excerpt.getMessage( 'section' ) ~~for _, v in pairs(options.paraflags) do~~ ~~if v then allparas = false end -- if any para specifically requested, don't keep all~~ ~~end~~ ~~end~~ ~~if filesOnly then~~ ~~allparas = false~~ ~~options.paraflags = {}~~ end hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' ) -- Build the link ~~local maxfile = 0 -- for efficiency, stop checking images after this many have been found~~ if ~~options.fileflags~~section then hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '\|' .. params.displayTitle ~~if type(options.fileflags) ~= "table" then options.fileflags = numberflags(options.fileflags) end~~ .. ' § ' .. section:gsub( '%[%[([^]\|]+)\|?[^]]%]%]', '%1' ) .. ']].' -- remove nested links ~~for k, v in pairs(options.fileflags) do~~ else ~~if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags~~ hat = hat .. ' [[:' .. page .. '\|' .. params.displayTitle .. ']].' ~~end~~ end ~~local fileargs = options.fileargs and mw.text.trim(options.fileargs)~~ ~~if fileargs == '' then fileargs = nil end~~ -- Build the edit link ~~local leadstart = nil -- have we found some text yet?~~ local title = mw.title.new( page ) ~~local t = "" -- the stripped down output text~~ local editUrl = title:fullUrl( 'action=edit' ) ~~local filetext = "" -- output text with concatenated [[File:Foo\|...]]\n entries~~ hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>[' ~~local files = 0 -- how many images so far~~ hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain() ~~local paras = 0 -- how many paragraphs so far~~ hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>' ~~local startLine = true -- at the start of a line (no non-spaces found since last \n)?~~ if config.hat then ~~text = mw.ustring.gsub(text,"^%s","") -- remove initial white space~~ local frame = mw.getCurrentFrame() hat = config.hat .. hat .. '}}' ~~-- Add named files~~ hat = frame:preprocess( hat ) ~~local f = options.files~~ else ~~if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list~~ hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat ) ~~f = mw.ustring.gsub(f, "^%sFile%s:%s", "", 1)~~ ~~f = mw.ustring.gsub(f, "^%sImage%s:%s", "", 1)~~ ~~f = "[[File:" .. f .. "]]"~~ ~~f = modifyImage(f, "thumb")~~ ~~f = modifyImage(f, fileargs)~~ ~~if checkimage(f) then filetext = filetext .. f .. "\n" end~~ end return hat ~~repeat -- loop around parsing a template, image or paragraph~~ end ~~local token = mw.ustring.match(text, "^%b{}%s") or false -- {{Template}} or {\| Table \|}~~ ~~if not leadstart and not token then token = mw.ustring.match(text, "^%b<>%s%b{}%s") end -- allow <tag>{{template}} before lead has started~~ function Excerpt.getReadMore( page, section ) local link = "'''[[" .. page if section then link = link .. '#' .. section end local text = Excerpt.getMessage( 'more' ) link = link .. '\|' .. text .. "]]'''" link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link ) return link end -- Fix birth and death dates, but only in the first paragraph ~~local line = mw.ustring.match(text, "[^\n]")~~ -- @todo Use parser.getParagraphs() to get the first paragraph ~~if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)~~ function Excerpt.fixDates( excerpt ) ~~line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line~~ local start = 1 -- skip initial templates ~~line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line~~ local s ~~-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line~~ local e = 0 ~~if mw.ustring.find(line, "%S") and not matchany(line, "^%s", { "{{", "%[%[%s[Ff]ile:", "%[%[%s[Ii]mage:" }, "") then~~ repeat ~~token = nil~~ start = e + 1 s, e = mw.ustring.find( excerpt, '%s%b{}%s', start ) until not s or s > start s, e = mw.ustring.find( excerpt, '%b()', start ) -- get (...), which may be (year–year) if s and s < start + 100 then -- look only near the start local excerptStart = mw.ustring.sub( excerpt, s, e ) local year1, conjunction, year2 = string.match( excerptStart, '(%d%d%d+)(.-)(%d%d%d+)' ) if year1 and year2 and ( string.match( conjunction, '[%-–—]' ) or string.match( conjunction, '{{%s[sS]nd%s}}' ) ) then local y1 = tonumber( year1 ) local y2 = tonumber( year2 ) if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( '%Y' ) ) then excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. '–' .. year2 .. mw.ustring.sub( excerpt, e ) end end end return excerpt end -- Replace the first call to each reference defined outside of the excerpt for the full reference, to prevent undefined references ~~if token then -- found a template which is not the prefix to a line of text~~ -- Then prefix the page title to the reference names to prevent conflicts ~~if leadstart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)~~ -- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo"> ~~if not filesOnly and not startLine then t = t .. token end~~ -- and also <ref name="Foo" /> for <ref name="Title of the article Foo" /> -- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo"> ~~elseif matchany(token, "{{%s", wantedBlockTemplates, "%s%f[\|}]") then~~ -- and <ref group="Bar"> for <ref> ~~t = t .. token -- keep wanted block templates~~ -- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book"> function Excerpt.fixReferences( excerpt, page, wikitext ) ~~elseif not falsy(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{\|' then~~ local references = parser.getReferences( excerpt ) ~~t = t .. token -- keep tables~~ local fixed = {} for _, reference in pairs( references ) do ~~elseif files < maxfile then -- discard template, but if we are still collecting images...~~ local ~~images~~name = ~~argimage~~parser.getTagAttribute(~~token)~~ orreference, {}'name' ) if not ~~images~~fixed[ name ] then -- fix each reference only once local content = parser.getTagContent( reference ) ~~local image = parseimage(token, false) -- look for embedded [[File:...]], \|image=, etc.~~ if ~~image~~not content then ~~table.insert(images,~~-- reference ~~image)~~is ~~end~~self-closing local full = parser.getReference( excerpt, name ) ~~end~~ if not full then -- the reference is not defined in the excerpt ~~for _, image in ipairs(images) do~~ full = parser.getReference( wikitext, name ) ~~if files < maxfile and checkimage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)~~ if full then ~~files = files + 1 -- count the file, whether displaying it or not~~ excerpt = excerpt:gsub( Excerpt.escapeString( reference ), Excerpt.escapeString( full ), 1 ) ~~if options.fileflags and options.fileflags[files] then -- if displaying this image~~ ~~image = modifyImage(image, "thumb")~~ ~~image = modifyImage(image, fileargs)~~ ~~filetext = filetext .. image~~ ~~end~~ end table.insert( fixed, name ) end end end ~~else -- the next token in text is not a template~~ end ~~token = parseimage(text, true)~~ -- Prepend the page title to the reference names to prevent conflicts with other references in the transcluding page ~~if token then -- the next token in text looks like an image~~ excerpt = excerpt:gsub( '< [Rr][Ee][Ff][^>]name = ["\']?([^"\'>/]+)["\']?[^>/](/?) >', '<ref name="' .. page:gsub( '"', '' ) .. ' %1"%2>' ) ~~if files < maxfile and checkimage(token) then -- if more images are wanted and this is a wanted image~~ -- Remove reference groups because they don't apply to the transcluding page ~~files = files + 1~~ excerpt = excerpt:gsub( '< [Rr][Ee][Ff] group = ["\']?[^"\'>/]+["\'] >', '<ref>' ) ~~if options.fileflags and options.fileflags[files] then~~ return excerpt ~~local image = token -- copy token for manipulation by adding \|right etc. without changing the original~~ ~~image = modifyImage(image, fileargs)~~ ~~filetext = filetext .. image~~ ~~end~~ ~~end~~ ~~else -- got a paragraph, which ends at a file, image, blank line or end of text~~ ~~local afterend = mw.ustring.len(text) + 1~~ ~~local blankpos = mw.ustring.find(text, "\n%s\n") or afterend -- position of next paragraph delimiter (or end of text)~~ ~~local endpos = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter~~ ~~mw.ustring.find(text, "%[%[%s[Ff]ile%s:") or afterend,~~ ~~mw.ustring.find(text, "%[%[%s[Ii]mage%s:") or afterend,~~ ~~blankpos)~~ ~~token = mw.ustring.sub(text, 1, endpos-1)~~ ~~if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line~~ ~~token = token .. mw.ustring.match(text, "\n%s\n", blankpos)~~ ~~end~~ ~~local isHatnote = not(leadstart) and mw.ustring.sub(token, 1, 1) == ':'~~ ~~if not isHatnote then~~ ~~leadstart = leadstart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section~~ ~~paras = paras + 1~~ ~~if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted~~ ~~end~~ ~~end -- of "else got a paragraph"~~ ~~end -- of "else not a template"~~ ~~if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text~~ ~~startLine = mw.ustring.find(token, "\n%s$") -- will the next token be the first non-space on a line?~~ ~~until not text or text == "" or not token or token == "" -- loop until all text parsed~~ ~~text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt\|Foo}} more" flows on one line~~ ~~return filetext, text~~ end function Excerpt.removeReferences( excerpt ) ~~local function cleanupText(text, keepSubsections, keepRefs)~~ local references = parser.getReferences( excerpt ) ~~text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments~~ for _, reference in pairs( references ) do ~~if falsy(keepSubsections) then~~ excerpt = Excerpt.removeString( excerpt, reference ) ~~text = mw.ustring.gsub(text, "\n==.","") -- remove first ==Heading== and everything after it~~ ~~text = mw.ustring.gsub(text, "^==.","") -- ...even if the lead is empty~~ end return excerpt ~~text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits~~ ~~if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible~~ ~~text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections~~ ~~text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section~~ ~~text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.", "") -- remove text after last onlyinclude section~~ ~~end~~ ~~if falsy(keepRefs) then~~ ~~text = mw.ustring.gsub(text, "<%s[Rr][Ee][Ff][^>]-/%s>", "") -- remove refs cited elsewhere~~ ~~text = mw.ustring.gsub(text, "<%s[Rr][Ee][Ff].->.-<%s/%s[Rr][Ee][Ff]%s>", "") -- remove refs~~ ~~text = mw.ustring.gsub(text, "%b{}", striptemplate) -- remove unwanted templates such as references~~ ~~end~~ ~~text = mw.ustring.gsub(text, "<%s[Ss][Cc][Oo][Rr][Ee].->.-<%s/%s[Ss][Cc][Oo][Rr][Ee]%s>", "") -- remove musical scores~~ ~~text = mw.ustring.gsub(text, "<%s[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s/%s[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s>", convertImagemap) -- convert imagemaps into standard images~~ ~~text = mw.ustring.gsub(text, "%s{{%s[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents~~ ~~text = mw.ustring.gsub(text, "%s__[A-Z]TOC__", "") -- remove TOC behavior switches~~ ~~text = mw.ustring.gsub(text, "\n%s{{%s[Pp]p%-.-}}", "\n") -- remove protection templates~~ ~~text = mw.ustring.gsub(text, "%s{{[^{\|}][Ss]idebar%s}}", "") -- remove most sidebars~~ ~~text = mw.ustring.gsub(text, "%s{{[^{\|}]%-[Ss]tub%s}}", "") -- remove most stub templates~~ ~~text = mw.ustring.gsub(text, "%s%[%[%s:?[Cc]ategory:.-%]%]", "") -- remove categories~~ ~~text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon~~ ~~return text~~ end function Excerpt.removeCategories( excerpt ) ~~-- Parse a ==Section== from a page~~ local categories = parser.getCategories( excerpt ) ~~local function getsection(text, section, mainonly)~~ for _, category in pairs( categories ) do ~~local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.~~ excerpt = Excerpt.removeString( excerpt, category ) ~~local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s" .. escapedSection .. "%s==.-\n(.)")~~ ~~if not content then return nil end -- no such section~~ ~~local nextsection~~ ~~if mainonly then~~ ~~nextsection = "\n==." -- Main part of section terminates at any level of header~~ ~~else~~ ~~nextsection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=]." -- "===" → "\n===?[^=].", matching "==" or "===" but not "===="~~ end return excerpt ~~content = mw.ustring.gsub(content, nextsection, "") -- remove later sections with headings at this level or higher~~ ~~return content~~ end function Excerpt.removeBehaviorSwitches( excerpt ) ~~-- Remove unmatched <tag> or </tag> tags~~ return excerpt:gsub( '__[A-Z]+__', '' ) ~~local function fixtags(text, tag)~~ end ~~local startcount = 0~~ ~~for i in mw.ustring.gmatch(text, "<%s" .. tag .. "%f[^%w_].->") do startcount = startcount + 1 end~~ function Excerpt.removeComments( excerpt ) ~~local endcount = 0~~ return excerpt:gsub( '<!%-%-.-%-%->', '' ) ~~for i in mw.ustring.gmatch(text, "<%s/" .. tag .. "%f[^%w_].->") do endcount = endcount + 1 end~~ end function Excerpt.removeBold( excerpt ) ~~if startcount > endcount then -- more <tag> than </tag>: remove the last few <tag>s~~ return excerpt:gsub( "'''", '' ) ~~local i = 0~~ ~~text = mw.ustring.gsub(text, "<%s" .. tag .. "%f[^%w_].->", function(t)~~ ~~i = i + 1~~ ~~if i > endcount then return "" else return nil end~~ ~~end) -- "end" here terminates the anonymous replacement function(t) passed to gsub~~ ~~elseif endcount > startcount then -- more </tag> than <tag>: remove the first few </tag>s~~ ~~text = mw.ustring.gsub(text, "<%s/" .. tag .. "%f[^%w_].->", "", endcount - startcount)~~ ~~end~~ ~~return text~~ end function Excerpt.removeLinks( excerpt ) ~~-- Main function returns a string value: text of the lead of a page~~ local links = parser.getLinks( excerpt ) ~~local function main(pagenames, options)~~ for _, link in pairs( links ) do ~~if not pagenames or #pagenames < 1 then return err("No page names given") end~~ excerpt = Excerpt.removeString( excerpt, link ) ~~local pagename~~ ~~local text~~ ~~local pagecount = #pagenames~~ ~~local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted~~ ~~local gotopt~~ ~~local pageoptstr~~ ~~local section~~ ~~-- read the page, or a random one if multiple pages were provided~~ ~~if pagecount > 1 then math.randomseed(os.time()) end~~ ~~while not text and pagecount > 0 do~~ ~~local pagenum = 1~~ ~~if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title~~ ~~pagename = pagenames[pagenum]~~ ~~if pagename and pagename ~= "" then~~ ~~-- We have page or [[page]] or [[page\|text]], possibly followed by \|opt1\|opt2...~~ ~~local pn~~ ~~pn, gotopt, pageoptstr = mw.ustring.match(pagename, "^%s(%[%b[]%])%s(\|?)(.)")~~ ~~if pn then~~ ~~pagename = mw.ustring.match(pn, "%[%[([^\|%]])") -- turn [[page\|text]] into page, discarding text~~ ~~else -- we have page or page\|opt...~~ ~~pagename, gotopt, pageoptstr = mw.ustring.match(pagename, "%s([^\|][^\|%s])%s(\|?)(.)")~~ ~~end~~ ~~if pagename and pagename ~= "" then~~ ~~local pn~~ ~~pn, section = mw.ustring.match(pagename, "(.-)#(.)")~~ ~~pagename = pn or pagename~~ ~~text, normalisedPagename = getContent(pagename)~~ ~~if not normalisedPagename then~~ ~~return err("No title for page name " .. pagename)~~ ~~else~~ ~~pagename = normalisedPagename~~ ~~end~~ ~~if text and options.nostubs then~~ ~~local isStub = mw.ustring.find(text, "%s{{[^{\|}]%-[Ss]tub%s}}")~~ ~~if isStub then text = nil end~~ ~~end~~ ~~if not section then~~ ~~section = mw.ustring.match(pagename, ".-#(.)") -- parse redirect to Page#Section~~ ~~end~~ ~~if text and section and section ~= "" then text = getsection(text, section) end~~ ~~end~~ ~~end~~ ~~if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another~~ ~~pagecount = pagecount - 1 -- ensure that we exit the loop after at most #pagenames iterations~~ end return excerpt ~~if not text then return err("Cannot read a valid page: first name is " .. firstpage) end~~ end -- @todo Use parser.getLinks ~~local keepRefs = options.keepRefs~~ function Excerpt.removeSelfLinks( excerpt, page ) ~~local keepSubsections = options.keepSubsections~~ local lang = mw.language.getContentLanguage() ~~text = cleanupText(text, keepSubsections, keepRefs)~~ local page = Excerpt.escapeString( mw.title.getCurrentTitle().prefixedText ) local ucpage = lang:ucfirst( page ) ~~local pageopts = {} -- pageopts (even if value is "") have priority over global options~~ local lcpage = lang:lcfirst( page ) ~~for k, v in pairs(options) do pageopts[k] = v end~~ excerpt = excerpt ~~if gotopt and gotopt ~= "" then~~ :gsub( '%[%[(' .. ucpage .. ')%]%]', '%1' ) ~~for _, t in pairs(mw.text.split(pageoptstr, "\|")) do~~ :gsub( '%[%[(' .. lcpage .. ')%]%]', '%1' ) ~~local k, v = mw.ustring.match(t, "%s([^=]-)%s=(.-)%s$")~~ :gsub( '%[%[' .. ucpage .. '\|([^]]+)%]%]', '%1' ) ~~pageopts[k] = v~~ :gsub( '%[%[' .. lcpage .. '\|([^]]+)%]%]', '%1' ) ~~end~~ return excerpt ~~pageopts.paraflags = numberflags(pageopts["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ end ~~pageopts.fileflags = numberflags(pageopts["files"] or "") -- parse file numbers~~ ~~if pageopts.more and pageopts.more == "" then pageopts.more = "Read more..." end -- more= is short for this default text~~ ~~end~~ ~~local filetext~~ ~~filetext, text = parse(text, pageopts)~~ -- ~~replace~~Replace the bold title or synonym near the start of the ~~article~~page by a ~~wikilink~~link to the ~~article~~page function Excerpt.linkBold( excerpt, page ) local lang = mw.language.getContentLanguage() local ~~pos~~position = mw.ustring.find(~~text~~ excerpt, "'''" .. lang:ucfirst(~~pagename~~ page ) .. "'''", 1, true ) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc) or mw.ustring.find(~~text~~ excerpt, "'''" .. lang:lcfirst(~~pagename~~ page ) .. "'''", 1, true ) -- plain search: special characters in ~~pagename~~page represent themselves if ~~pos~~position then local ~~len~~length = mw.ustring.len(~~pagename~~ page ) ~~text~~excerpt = mw.ustring.sub(~~text~~ excerpt, 1, ~~pos~~position + 2 ) .. "'[["' .. mw.ustring.sub(~~text~~ excerpt, ~~pos~~position + 3, ~~pos~~position + ~~len~~length + 2 ) .. "']]"' .. mw.ustring.sub(~~text~~ excerpt, ~~pos~~position + ~~len~~length + 3, -1 ) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) ~~text~~excerpt = mw.ustring.gsub(~~text~~ excerpt, "()'''(.-')'''", function ( a, b ) if anot <mw.ustring.find( ~~100~~b, '%[' ) and not mw.ustring.find( b, "'%["{' ) then --- if ~~early~~not inwikilinked ~~article~~or ~~and~~some ~~not~~weird ~~wikilinked~~template return "'''[[" .. ~~pagename~~page .. "'\|"' .. b .. "]]'''" -- replace '''Foo''' by '''[[~~pagename~~page\|Foo]]''' else return nil -- instruct gsub to make no change end end, 1 ) -- ~~"end" here~~ terminates the anonymous replacement function~~(a, b)~~ passed to gsub end return excerpt end function Excerpt.addTrackingCategories( excerpt ) ~~-- remove '''bold text''' if requested~~ local currentTitle = mw.title.getCurrentTitle() ~~if not falsy(pageopts.nobold) then text = mw.ustring.gsub(text, "'''", "") end~~ local contentCategory = config.categories.content if contentCategory and currentTitle.isContentPage then excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]' end local namespaceCategory = config.categories[ currentTitle.namespace ] if namespaceCategory then excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]' end return excerpt end -- Helper method to match from a list of regular expressions ~~text = filetext .. text~~ -- Like so: match pre..list[1]..post or pre..list[2]..post or ... function Excerpt.matchAny( text, pre, list, post, init ) local match = {} for i = 1, #list do match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) } if match[1] then return unpack( match ) end end return nil end -- Helper function to get arguments ~~-- Seek and destroy unterminated templates and wikilinks~~ -- args from Lua calls have priority over parent args from template ~~repeat -- hide matched {{template}}s including nested templates~~ function Excerpt.getArg( key, default ) ~~local t = text~~ local frame = mw.getCurrentFrame() ~~text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape~~ for k, value in pairs( frame:getParent().args ) do ~~text = mw.ustring.gsub(text, "(< math[^>]>[^<]-)}}(.-< /math >)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>~~ if k == key and mw.text.trim( value ) ~= '' then ~~until text == t~~ return value ~~repeat -- do similar for [[wikilink]]s~~ end ~~local t = text~~ end ~~text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")~~ for k, value in pairs( frame.args ) do ~~until text == t~~ if k == key and mw.text.trim( value ) ~= '' then return value end end return default end -- Helper method to get an error message ~~text = text.gsub(text, "([{}%[%]])%1[^\27].", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.~~ -- This method also categorizes the current page in one of the configured error categories ~~text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text~~ function Excerpt.getError( key, value ) ~~text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.~~ local message = Excerpt.getMessage( 'error-' .. key, value ) local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message ) if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then markup:node( '[[Category:' .. config.categories.errors .. ']]' ) end return markup end -- Helper method to get a localized message ~~-- Ensure div tags match~~ -- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab ~~text = fixtags(text, "div")~~ -- If Module:TNT is not available or the localized message does not exist, the key is returned instead function Excerpt.getMessage( key, value ) local ok, TNT = pcall( require, 'Module:TNT' ) if not ok then return key end local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value ) if not ok2 then return key end return message end -- Helper method to escape a string for use in regexes ~~if pageopts.more then text = text .. " '''[[" .. pagename .. "\|" .. pageopts.more .. "]]'''" end -- wikilink to article for more info~~ function Excerpt.escapeString( str ) return str:gsub( '[%^%$%(%)%.%[%]%%+%-%?%%]', '%%%0' ) end -- Helper method to remove a string from a text ~~if pageopts.list and not pageopts.showall then -- add a collapsed list of pages which might appear~~ -- @param text Text from where to remove the string ~~local listtext = pageopts.list~~ -- @param str String to remove ~~if listtext == "" then listtext = "Other articles" end~~ -- @return The given text with the string removed ~~text = text .. "{{collapse top\|title={{resize\|85%\|" ..listtext .. "}}\|bg=fff}}{{hlist"~~ function Excerpt.removeString( text, str ) ~~for _, p in pairs(pagenames) do~~ local pattern = Excerpt.escapeString( str ) ~~if mw.ustring.match(p, "%S") then text = text .. "\|[[" .. mw.text.trim(p) .. "]]" end~~ if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes ~~end~~ pattern = Excerpt.escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. Excerpt.escapeString( mw.ustring.sub( str, -999 ) ) ~~text = text .. "}}\n{{collapse bottom}}"~~ end return text:gsub( pattern, '' ) ~~return text~~ end -- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans ~~-- Shared template invocation code for lead and random functions~~ -- @param filter Required. Comma-separated list of numbers or min-max ranges, for example '1,3-5' ~~local function invoke(frame, func)~~ -- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true} ~~-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}~~ -- @return Boolean indicating whether the filters should be treated as a blacklist or not ~~local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)~~ -- @note Merging this into matchFilter is possible, but way too inefficient ~~for k, v in pairs(frame:getParent().args) do args[k] = v end~~ function Excerpt.parseFilter( filter ) ~~for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template~~ local filters = {} ~~errors = args["errors"] -- set the module level boolean used in local function err~~ local isBlacklist = false if string.sub( filter, 1, 1 ) == '-' then ~~local articlecount = #args -- must be 1 except with selected=Foo and Foo=Somepage~~ isBlacklist = true ~~if articlecount < 1 and not (func == "selected" and args[func] and args[args[func]]) then~~ filter = string.sub( filter, 2 ) ~~return err("No articles provided")~~ end local values = mw.text.split( filter, ',' ) -- split values: '1,3-5' to {'1','3-5'} for _, value in pairs( values ) do ~~local pagenames = {}~~ value = mw.text.trim( value ) ~~if func == "lead" then~~ local min, max = mw.ustring.match( value, '^(%d+)%s[-–—]%s(%d+)$' ) -- '3-5' to min=3 max=5 ~~pagenames = { args[1] }~~ if not max then min, max = string.match( value, '^((%d+))$' ) end -- '1' to min=1 max=1 ~~elseif func == "linked" or func == "listitem" then~~ if max then ~~-- Read named page and find its wikilinks~~ for i = min, max do filters[ i ] = true end ~~local page = args[1]~~ else ~~local text, title = getContent(page)~~ filters[ value ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3' ~~if not title then~~ ~~return err("No title for page name " .. page)~~ ~~elseif not text then~~ ~~return err("No content for page name " .. page)~~ ~~end~~ ~~if args["section"] then -- check relevant section only~~ ~~text = getsection(text, args["section"], args["sectiononly"])~~ ~~if not text then return err("No section " .. args["section"] .. " in page " .. page) end~~ ~~end~~ ~~-- replace annotated links with real links~~ ~~text = mw.ustring.gsub(text, "{{%s[Aa]nnotated[ _]link%s\|%s(.-)%s}}", "[[%1]]")~~ ~~if func == "linked" then~~ ~~for p in mw.ustring.gmatch(text, "%[%[%s([^%]\|\n])") do table.insert(pagenames, p) end~~ ~~else -- listitem: first wikilink on a line beginning , :#, etc. except in "See also" or later section~~ ~~text = mw.ustring.gsub(text, "\n== See also.", "")~~ ~~for p in mw.ustring.gmatch(text, "\n:[%#][^\n]-%[%[%s([^%]\|\n])") do table.insert(pagenames, p) end~~ ~~end~~ ~~elseif func == "random" then~~ ~~-- accept any number of page names. If more than one, we'll pick one randomly~~ ~~for i, p in pairs(args) do~~ ~~if p and type(i) == 'number' then table.insert(pagenames, p) end~~ ~~end~~ ~~elseif func == "selected" then~~ ~~local articlekey = args[func]~~ ~~if tonumber(articlekey) then -- normalise article number into the range 1..#args~~ ~~articlekey = articlekey % articlecount~~ ~~if articlekey == 0 then articlekey = articlecount end~~ end ~~pagenames = { args[articlekey] }~~ end local filter = {cache = {}, terms = filters} return filter, isBlacklist end -- Helper function to see if a value matches any of the given filters ~~local options = args -- pick up miscellaneous options: more, errors, fileargs~~ function Excerpt.matchFilter( value, filter ) ~~options.paraflags = numberflags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ if type(value) == "number" then ~~options.fileflags = numberflags(args["files"] or "") -- parse file numbers~~ return filter.terms[value] ~~if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text~~ else local ~~text~~cached = ""filter.cache[value] if ~~options.showall~~cached ~= nil then return cached ~~local separator = ""~~ end ~~for _, p in pairs(pagenames) do~~ local tlang = ~~main~~mw.language.getContentLanguage(~~{ p }, options~~) local lcvalue = lang:lcfirst(value) ~~if t ~= "" then~~ local ucvalue = lang:ucfirst(value) ~~text = text .. separator .. t~~ for term in pairs( filter.terms ) do ~~separator = options.showall~~ if value == tostring(term) ~~if separator == "" then separator = "{{clear}}{{hr}}" end~~ or type(term) == "string" and ( lcvalue == term or ucvalue == term or mw.ustring.match( value, term ) ) then filter.cache[value] = true return true end end filter.cache[value] = false ~~else~~ ~~text = main(pagenames, options)~~ ~~end~~ ~~if text == "" and brokenCategory and brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then~~ ~~return "[[Category:" .. brokenCategory .. "]]"~~ ~~else~~ ~~return frame:preprocess(text)~~ end end return Excerpt ~~-- Entry points for template callers using #invoke:~~ ~~function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article~~ ~~function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page~~ ~~function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page~~ ~~function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)~~ ~~function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter~~ ~~-- Entry points for other Lua modules~~ ~~function p.getContent(page, frame) return getContent(page, frame) end~~ ~~function p.getsection(text, section) return getsection(text, section) end~~ ~~function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end~~ ~~function p.argimage(text) return argimage(text) end~~ ~~function p.checkimage(image) return checkimage(image) end~~ ~~function p.parseimage(text, start) return parseimage(text, start) end~~ ~~function p.cleanupText(text, keepSubsections, keepRefs) return cleanupText(text, keepSubsections, keepRefs) end~~ ~~function p.main(pagenames, options) return main(pagenames, options) end~~ ~~function p.numberflags(str) return numberflags(str) end~~ ~~return p~~