Module:Excerpt/sandbox: Difference between revisions

Browse history interactively

← Previous edit

Content deleted Content added

Revision as of 18:02, 29 April 2020 edit Ahecht (talk \| contribs) Edit filter managers, Autopatrolled, Administrators 72,334 edits allow err to read i18n ← Previous edit		Latest revision as of 21:21, 26 May 2025 edit undo Aidan9382 (talk \| contribs) Extended confirmed users, Page movers, Template editors 15,961 edits Try to automatically account for page merge-like redirects
(209 intermediate revisions by 12 users not shown)
Line 1: -- Module:Excerpt implements the Excerpt template ~~-- Get localized data~~ -- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt ~~local d = require("Module:Excerpt/i18n")~~ -- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others -- License: CC-BY-SA-3.0 local parser = require( 'Module:WikitextParser' ) ~~local p = {}~~ local yesno = require( 'Module:Yesno' ) local ok, config = pcall( require, 'Module:Excerpt/config' ) ~~-- Helper function to debug~~ if not ok then config = {} end ~~-- Returns blank text or an error message if requested~~ ~~local errors~~ ~~local function err(msg,a,b)~~ ~~local text = mw.ustring.format(d.error[msg] or msg or '',a,b)~~ ~~if errors then error(text, 2) end~~ ~~return ""~~ ~~end~~ local Excerpt = {} ~~-- Helper function to test for truthy and falsy values~~ ~~local function is(value)~~ ~~if not value or value == "" or value == "0" or value == "false" or value == "no" then~~ ~~return false~~ ~~end~~ ~~return true~~ ~~end~~ -- Main entry point for templates ~~-- Helper function to match from a list regular expressions~~ function Excerpt.main( frame ) ~~-- Like so: match pre..list[1]..post or pre..list[2]..post or ...~~ ~~local function matchAny(text, pre, list, post, init)~~ -- Make sure the requested page exists and get the wikitext ~~local match = {}~~ local page = Excerpt.getArg( 1 ) ~~for i = 1, #list do~~ if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end ~~match = { mw.ustring.match(text, pre .. list[i] .. post, init) }~~ local title = mw.title.new( page ) ~~if match[1] then return unpack(match) end~~ if not title then return Excerpt.getError( 'invalid-title', page ) end local fragment = title.fragment -- save for later if title.isRedirect then title = title.redirectTarget if fragment == "" then fragment = title.fragment -- page merge potential end end if not title.exists then return Excerpt.getError( 'page-not-found', page ) end ~~return nil~~ page = title.prefixedText ~~end~~ local wikitext = title:getContent() -- Get the template params and process them ~~-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT~~ local ~~function~~params ~~stripTemplate(t)~~= { hat = yesno( Excerpt.getArg( 'hat', true ) ), ~~-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)~~ this = Excerpt.getArg( 'this' ), ~~if matchAny(t, "^{{%s", d.unwantedInlineTemplates, "%s%f[\|}]") then return "" end~~ only = Excerpt.getArg( 'only' ), files = Excerpt.getArg( 'files', Excerpt.getArg( 'file' ) ), lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list' ) ), tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table' ) ), templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template' ) ), paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph' ) ), references = yesno( Excerpt.getArg( 'references', true ) ), subsections = yesno( Excerpt.getArg( 'subsections', false ) ), links = yesno( Excerpt.getArg( 'links', true ) ), bold = yesno( Excerpt.getArg( 'bold', false ) ), briefDates = yesno( Excerpt.getArg( 'briefdates', false ) ), inline = yesno( Excerpt.getArg( 'inline' ) ), quote = yesno( Excerpt.getArg( 'quote' ) ), more = yesno( Excerpt.getArg( 'more' ) ), class = Excerpt.getArg( 'class' ), displayTitle = Excerpt.getArg( 'displaytitle', page ), } -- Make sure the requested section exists and get the excerpt ~~-- If template is wanted but produces an unwanted reference then return the string with \|shortref or \|ref removed~~ local excerpt ~~local noRef = mw.ustring.gsub(t, "\|%sshortref%s%f[\|}]", "")~~ local section = Excerpt.getArg( 2, fragment ) ~~noRef = mw.ustring.gsub(noRef, "\|%sref%s%f[\|}]", "")~~ section = mw.text.trim( section ) if section == '' then section = nil end if section then excerpt = parser.getSectionTag( wikitext, section ) if not excerpt then if params.subsections then excerpt = parser.getSection( wikitext, section ) else local sections = parser.getSections( wikitext ) excerpt = sections[ section ] end end if not excerpt then return Excerpt.getError( 'section-not-found', section ) end if excerpt == '' then return Excerpt.getError( 'section-empty', section ) end else excerpt = parser.getLead( wikitext ) if excerpt == '' then return Excerpt.getError( 'lead-empty' ) end end -- Remove noinclude bits ~~-- If a wanted template has unwanted nested templates, purge them too~~ excerpt = excerpt:gsub( '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '' ) ~~noRef = mw.ustring.sub(noRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noRef, 3), "%b{}", stripTemplate)~~ -- Filter various elements from the excerpt ~~-- Replace {{audio}} by its text parameter: {{Audio\|Foo.ogg\|Bar}} → Bar~~ excerpt = Excerpt.filterFiles( excerpt, params.files ) ~~noRef = mw.ustring.gsub(noRef, "^{{%s[Aa]udio.-\|.-\|(.-)%f[\|}].", "%1")~~ excerpt = Excerpt.filterLists( excerpt, params.lists ) excerpt = Excerpt.filterTables( excerpt, params.tables ) excerpt = Excerpt.filterParagraphs( excerpt, params.paragraphs ) -- If no file is found, try to get one from the infobox ~~-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot\|English\|英語\|eigo}} → English~~ if ( params.only == 'file' or params.only == 'files' or not params.only and ( not params.files or params.files ~= '0' ) ) -- caller asked for files ~~noRef = mw.ustring.gsub(noRef, "^{{%s[Nn]ihongo[ _]+foot%s\|(.-)%f[\|}].", "%1")~~ and not section -- and we're in the lead section and config.captions -- and we have the config option required to try finding files in infoboxes and #parser.getFiles( excerpt ) == 0 -- and there're no files in the excerpt then excerpt = Excerpt.addInfoboxFile( excerpt ) end -- Filter the templates by appending the templates blacklist to the templates filter ~~if noRef ~= t then return noRef end~~ if config.blacklist then local blacklist = table.concat( config.blacklist, ',' ) if params.templates then if string.sub( params.templates, 1, 1 ) == '-' then params.templates = params.templates .. ',' .. blacklist end else params.templates = '-' .. blacklist end end excerpt = Excerpt.filterTemplates( excerpt, params.templates ) -- Leave only the requested elements ~~return nil -- not an unwanted template: keep~~ if params.only == 'file' or params.only == 'files' then ~~end~~ local files = parser.getFiles( excerpt ) excerpt = params.only == 'file' and files[1] or table.concat( files, '\n\n' ) end if params.only == 'list' or params.only == 'lists' then local lists = parser.getLists( excerpt ) excerpt = params.only == 'list' and lists[1] or table.concat( lists, '\n\n' ) end if params.only == 'table' or params.only == 'tables' then local tables = parser.getTables( excerpt ) excerpt = params.only == 'table' and tables[1] or table.concat( tables, '\n\n' ) end if params.only == 'paragraph' or params.only == 'paragraphs' then local paragraphs = parser.getParagraphs( excerpt ) excerpt = params.only == 'paragraph' and paragraphs[1] or table.concat( paragraphs, '\n\n' ) end if params.only == 'template' or params.only == 'templates' then local templates = parser.getTemplates( excerpt ) excerpt = params.only == 'template' and templates[1] or table.concat( templates, '\n\n' ) end -- @todo Make more robust and move downwards ~~-- Get a page's content, following redirects, and processing file description pages for files.~~ if params.briefDates then ~~-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found~~ excerpt = Excerpt.fixDates( excerpt ) ~~local function getContent(page, frame)~~ end ~~local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)~~ ~~if not title then return false, false end~~ -- Remove unwanted elements ~~local target = title.redirectTarget~~ excerpt = Excerpt.removeComments( excerpt ) ~~if target then title = target end~~ excerpt = Excerpt.removeSelfLinks( excerpt ) excerpt = Excerpt.removeNonFreeFiles( excerpt ) excerpt = Excerpt.removeBehaviorSwitches( excerpt ) -- Fix or remove the references ~~return title:getContent(), title.prefixedText~~ if params.references then ~~end~~ excerpt = Excerpt.fixReferences( excerpt, page, wikitext ) else ~~-- Check image for suitability~~ excerpt = Excerpt.removeReferences( excerpt ) ~~local function checkImage(image)~~ end ~~local page = matchAny(image, "", d.fileNamespaces, "%s:[^\|%]]") -- match File:(name) or Image:(name)~~ ~~if not page then return false end~~ -- Remove wikilinks ~~-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)~~ if not params.links then ~~if not matchAny(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s$") then~~ excerpt = Excerpt.removeLinks( excerpt ) ~~return false~~ end -- Link the bold text near the start of most leads and then remove it ~~local fileDescription, fileTitle = getContent(page) -- get file description and title after following any redirect~~ if not section then ~~if fileDescription and fileDescription ~= "" then -- found description on local wiki~~ excerpt = Excerpt.linkBold( excerpt, page ) ~~if mw.ustring.match(fileDescription, "[Nn]on%-free") then return false end~~ end ~~fileDescription = mw.ustring.gsub(fileDescription, "%b{}", stripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess~~ ~~elseif~~if not ~~fileTitle~~params.bold then excerpt = Excerpt.removeBold( excerpt ) ~~return false~~ ~~else~~ ~~-- try commons~~ ~~fileDescription = "{{" .. fileTitle .. "}}"~~ end ~~frame = frame or mw.getCurrentFrame()~~ ~~fileDescription = frame:preprocess(fileDescription)~~ -- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly ~~return ( fileDescription and fileDescription ~= "" and not mw.ustring.match(fileDescription, "[Nn]on%-free") ) and true or false -- hide non-free image~~ excerpt = excerpt:gsub( '\n\n\n+', '\n\n' ) ~~end~~ excerpt = mw.text.trim( excerpt ) excerpt = '\n' .. excerpt .. '\n' -- Remove nested categories ~~-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)~~ excerpt = frame:preprocess( excerpt ) ~~local function parseImage(text, start)~~ excerpt = Excerpt.removeCategories( excerpt ) ~~local startre = ""~~ ~~if start then startre = "^" end -- a true flag restricts search to start of string~~ -- Add tracking categories ~~local image = matchAny(text, startre .. "%[%[%s", d.fileNamespaces, "%s:.") -- [[File: or [[Image: ...~~ if ~~image~~config.categories then excerpt = Excerpt.addTrackingCategories( excerpt ) ~~image = mw.ustring.match(image, "%b[]%s") -- matching [[...]] to handle wikilinks nested in caption~~ end ~~return image~~ ~~end~~ -- Build the final output ~~-- Parse a caption, which ends at a \| (end of parameter) or } (end of infobox) but may contain nested [..] and {..}~~ if params.inline then ~~local function parseCaption(caption)~~ return mw.text.trim( excerpt ) ~~if not caption then return nil end~~ ~~local length = mw.ustring.len(caption)~~ ~~local position = 1~~ ~~while position <= length do~~ ~~local linkStart, linkEnd = mw.ustring.find(caption, "%b[]", position)~~ ~~linkStart = linkStart or length + 1 -- avoid comparison with nil when no link~~ ~~local templateStart, templateEnd = mw.ustring.find(caption, "%b{}", position)~~ ~~templateStart = templateStart or length + 1 -- avoid comparison with nil when no template~~ ~~local argEnd = mw.ustring.find(caption, "[\|}]", position) or length + 1~~ ~~if linkStart < templateStart and linkStart < argEnd then~~ ~~position = linkEnd + 1 -- skip wikilink~~ ~~elseif templateStart < argEnd then~~ ~~position = templateEnd + 1 -- skip template~~ ~~else -- argument ends before the next wikilink or template~~ ~~return mw.ustring.sub(caption, 1, argEnd - 1)~~ ~~end~~ end ~~return caption -- No terminator found: return entire caption~~ ~~end~~ local tag = params.quote and 'blockquote' or 'div' ~~-- Attempt to construct a [[File:...]] block from {{infobox ... \|image= ...}}~~ local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( params.class ) ~~local function argImage(text)~~ ~~local token = nil~~ ~~local hasNamedArgs = mw.ustring.find(text, "\|") and mw.ustring.find(text, "=")~~ ~~if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image~~ if config.styles then ~~-- ensure image map is captured~~ local styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } ) ~~text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '\|imagemap=')~~ block:node( styles ) end if params.hat then ~~-- find all images~~ local hat = Excerpt.getHat( page, section, params ) ~~local hasImages = false~~ block:node( hat ) ~~local images = {}~~ ~~local captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local argname, position, image = mw.ustring.match(text, "\|%s([^=\|]-[Ii][Mm][Aa][Gg][Ee][^=\|]-)%s=%s()(.)", captureFrom)~~ ~~if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image~~ ~~local lcArgName = mw.ustring.lower(argname)~~ ~~if mw.ustring.find(lcArgName, "caption")~~ ~~or mw.ustring.find(lcArgName, "size")~~ ~~or mw.ustring.find(lcArgName, "upright") then~~ ~~image = nil~~ ~~end~~ ~~end~~ ~~if image then~~ ~~hasImages = true~~ ~~images[position] = image~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ ~~end~~ ~~captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local position, image = mw.ustring.match(text, "\|%s[^=\|]-[Pp][Hh][Oo][Tt][Oo][^=\|]-%s=%s()(.)", captureFrom)~~ ~~if image then~~ ~~hasImages = true~~ ~~images[position] = image~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ ~~end~~ ~~captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local position, image = mw.ustring.match(text, "\|%s[^=\|{}]-%s=%s()%[?%[?([^\|{}]%.%a%a%a%a?)%s%f[\|}]", captureFrom)~~ ~~if image then~~ ~~hasImages = true~~ ~~if not images[position] then~~ ~~images[position] = image~~ ~~end~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ end excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt ) ~~if not hasImages then return nil end~~ block:node( excerpt ) if params.more then ~~-- find all captions~~ local more = Excerpt.getReadMore( page, section ) ~~local captions = {}~~ block:node( more ) ~~captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local position, caption = matchAny(text, "\|%s", d.captionParams, "%s=%s()([^\n]+)", captureFrom)~~ ~~if caption then~~ ~~-- extend caption to parse "\| caption = Foo {{Template\n on\n multiple lines}} Bar\n"~~ ~~local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)~~ ~~if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end~~ ~~caption = mw.text.trim(caption)~~ ~~local captionStart = mw.ustring.sub(caption, 1, 1)~~ ~~if captionStart == '\|' or captionStart == '}' then caption = nil end~~ ~~end~~ ~~if caption then~~ ~~-- find nearest image, and use same index for captions table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not captions[i] then~~ ~~captions[i] = parseCaption(caption)~~ ~~end~~ ~~end~~ ~~end~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ end return block ~~-- find all alt text~~ end ~~local altTexts = {}~~ ~~for position, altText in mw.ustring.gmatch(text, "\|%s[Aa][Ll][Tt]%s=%s()([^\n])") do~~ ~~if altText then~~ -- Filter the files in the given wikitext against the given filter ~~-- altText is terminated by }} or \|, but first skip any matched [[...]] and {{...}}~~ function Excerpt.filterFiles( wikitext, filter ) ~~local lookFrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}~~ if not filter then return wikitext end ~~mw.ustring.match(altText, ".{%b{}}()") or 1, -- if multiple {{...}}, . consumes all but one, leaving the last for %b~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~mw.ustring.match(altText, ".%[%b[]%]()") or 1)~~ local files = parser.getFiles( wikitext ) for index, file in pairs( files ) do ~~local length = mw.ustring.len(altText)~~ local name = parser.getFileName( file ) ~~local afterText = math.min( -- find position after whichever comes first: end of string, }} or \|~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) ) ~~mw.ustring.match(altText, "()}}", lookFrom) or length+1,~~ or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then ~~mw.ustring.match(altText, "()\|", lookFrom) or length+1)~~ wikitext = Excerpt.removeString( wikitext, file ) ~~altText = mw.ustring.sub(altText, 1, afterText-1) -- chop off \|... or }}... which is not part of [[...]] or {{...}}~~ ~~altText = mw.text.trim(altText)~~ ~~local altTextStart = mw.ustring.sub(altText, 1, 1)~~ ~~if altTextStart == '\|' or altTextStart == '}' then altText = nil end~~ ~~end~~ ~~if altText then~~ ~~-- find nearest image, and use same index for altTexts table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not altTexts[i] then~~ ~~altTexts[i] = altText~~ ~~end~~ ~~end~~ ~~end~~ end end return wikitext end -- Filter the lists in the given wikitext against the given filter ~~-- find all image sizes~~ function Excerpt.filterLists( wikitext, filter ) ~~local imageSizes = {}~~ if not filter then return wikitext end ~~for position, imageSizeMatch in mw.ustring.gmatch(text, "\|%s[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s=%s()([^}\|\n])") do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~local imageSize = mw.ustring.match(imageSizeMatch, "=%s([^}\|\n])")~~ local lists = parser.getLists( wikitext ) ~~if imageSize then~~ for index, list in pairs( lists ) do ~~imageSize = mw.text.trim(imageSize )~~ if isBlacklist and Excerpt.matchFilter( index, filters ) ~~local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)~~ or not isBlacklist and not Excerpt.matchFilter( index, filters ) then ~~if imageSizeStart == '\|' or imageSizeStart == '}' then imageSize = nil end~~ wikitext = Excerpt.removeString( wikitext, list ) ~~end~~ ~~if imageSize then~~ ~~-- find nearest image, and use same index for imageSizes table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not imageSizes[i] then~~ ~~imageSizes[i] = imageSize~~ ~~end~~ ~~end~~ ~~end~~ end end return wikitext end -- Filter the tables in the given wikitext against the given filter ~~-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order~~ function Excerpt.filterTables( wikitext, filter ) ~~local keys = {}~~ if not filter then return wikitext end ~~for key, val in pairs(images) do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~table.insert(keys, key)~~ local tables = parser.getTables( wikitext ) for index, t in pairs( tables ) do local id = string.match( t, '{\|[^\n]-id%s=%s["\']?([^"\'\n]+)["\']?[^\n]\n' ) if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( id, filters ) ) or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( id, filters ) ) then wikitext = Excerpt.removeString( wikitext, t ) end end return wikitext ~~table.sort(keys)~~ end -- Filter the paragraphs in the given wikitext against the given filter ~~-- add in relevant optional parameters for each image: caption, alt text and image size~~ function Excerpt.filterParagraphs( wikitext, filter ) ~~local imageTokens = {}~~ if not filter then return wikitext end ~~for _, index in ipairs(keys) do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~local image = images[index]~~ local paragraphs = parser.getParagraphs( wikitext ) ~~local token = parseImage(image, true) -- look for image=[[File:...]] etc.~~ for index, paragraph in pairs( paragraphs ) do ~~if not token then~~ if isBlacklist and Excerpt.matchFilter( index, filters ) ~~image = mw.ustring.match(image, "^[^}\|\n]") -- remove later arguments~~ or not isBlacklist and not Excerpt.matchFilter( index, filters ) then ~~token = "[[" -- Add File: unless name already begins File: or Image:~~ wikitext = Excerpt.removeString( wikitext, paragraph ) ~~if not matchAny(image, "^", d.fileNamespaces, "%s:") then~~ ~~token = token .. "File:"~~ ~~end~~ ~~token = token .. image~~ ~~local caption = captions[index]~~ ~~if caption and mw.ustring.match(caption, "%S") then token = token .. "\|" .. caption end~~ ~~local alt = altTexts[index]~~ ~~if alt then token = token .. "\|alt=" .. alt end~~ ~~local image_size = imageSizes[index]~~ ~~if image_size and mw.ustring.match(image_size, "%S") then token = token .. "\|" .. image_size end~~ ~~token = token .. "]]"~~ end ~~token = mw.ustring.gsub(token, "\n","") .. "\n"~~ ~~table.insert(imageTokens, token)~~ end return ~~imageTokens~~wikitext end -- Filter the templates in the given wikitext against the given filter ~~-- Help gsub convert imagemaps into standard images~~ function Excerpt.filterTemplates( wikitext, filter ) ~~local function convertImageMap(imagemap)~~ if not filter then return wikitext end ~~local image = matchAny(imagemap, "[>\n]%s", d.fileNamespaces, "[^\n]")~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~if image then~~ local templates = parser.getTemplates( wikitext ) ~~return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s", "", 1) .. "]]"~~ for index, template in pairs( templates ) do ~~else~~ local name = parser.getTemplateName( template ) ~~return "" -- remove entire block if image can't be extracted~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) ) or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then wikitext = Excerpt.removeString( wikitext, template ) end end return wikitext end function Excerpt.addInfoboxFile( excerpt ) ~~-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}~~ -- We cannot distinguish the infobox from the other templates, so we search them all ~~local function numberFlags(str)~~ local templates = parser.getTemplates( excerpt ) ~~local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}~~ for _, template in pairs( templates ) do ~~local flags = {}~~ local parameters = parser.getTemplateParameters( template ) ~~for _, r in pairs(ranges) do~~ local file, captions, caption, cssClasses, cssClass ~~local min, max = mw.ustring.match(r, "^%s(%d+)%s%-%s(%d+)%s$") -- "3-5" → min=3 max=5~~ for _, pair in pairs( config.captions ) do ~~if not max then min, max = mw.ustring.match(r, "^%s((%d+))%s$") end -- "1" → min=1 max=1~~ if file ~~max~~= ~~then~~pair[1] file = parameters[file] ~~for p = min, max do flags[p] = true end~~ if file and Excerpt.matchAny( file, '^.%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.' ) then file = string.match( file, '%[?%[?.-:([^{\|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg captions = pair[2] for _, p in pairs( captions ) do if parameters[ p ] then caption = parameters[ p ] break end end -- Check for CSS classes -- We opt to use skin-invert-image instead of skin-invert -- in all other cases, the CSS provided in the infobox is used if pair[3] then cssClasses = pair[3] for _, p in pairs( cssClasses ) do if parameters[ p ] then cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ] break end end end local class = cssClass and ( '\|class=' .. cssClass ) or '' return '[[File:' .. file .. class .. '\|thumb\|' .. ( caption or '' ) .. ']]' .. excerpt end end end return ~~flags~~excerpt end function Excerpt.removeNonFreeFiles( wikitext ) ~~local imageArgGroups = {~~ local files = parser.getFiles( wikitext ) ~~{"thumb", "thumbnail", "frame", "framed", "frameless"},~~ for _, file in pairs( files ) do ~~{"right", "left", "center", "none"},~~ local fileName = 'File:' .. parser.getFileName( file ) ~~{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}~~ local fileTitle = mw.title.new( fileName ) } if fileTitle then local fileDescription = fileTitle:getContent() ~~local function modifyImage(image, fileArgs)~~ if not fileDescription or fileDescription == '' then ~~if fileArgs then~~ local frame = mw.getCurrentFrame() ~~for _, filearg in pairs(mw.text.split(fileArgs, "\|")) do -- handle fileArgs=left\|border etc.~~ fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons ~~local fa = mw.ustring.gsub(filearg, "=.", "") -- "upright=0.75" → "upright"~~ ~~local group = {fa} -- group of "border" is ["border"]...~~ ~~for _, g in pairs(imageArgGroups) do~~ ~~for _, a in pairs(g) do~~ ~~if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]~~ ~~end~~ end if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then ~~for _, a in pairs(group) do~~ wikitext = Excerpt.removeString( wikitext, file ) ~~image = mw.ustring.gsub(image, "\|%s" .. a .. "%f[%A]%s=[^\|%]]", "") -- remove "\|upright=0.75" etc.~~ ~~image = mw.ustring.gsub(image, "\|%s" .. a .. "%s([\|%]])", "%1") -- replace "\|left\|" by "\|" etc.~~ end ~~image = mw.ustring.gsub(image, "([\|%]])", "\|" .. filearg .. "%1", 1) -- replace "\|" by "\|left\|" etc.~~ end end return ~~image~~wikitext end function Excerpt.getHat( page, section, params ) ~~-- a basic parser to trim down extracted wikitext~~ local hat ~~-- @param text : Wikitext to be processed~~ ~~-- @param options : A table of options...~~ -- Build the text -- options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept. if params.this then ~~-- options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`~~ hat = params.this ~~-- options.fileargs : args for the [[File:]] syntax, such as `left`~~ elseif params.quote then ~~-- @param filesOnly : If set, only return the files and not the prose~~ hat = Excerpt.getMessage( 'this' ) ~~local function parse(text, options, filesOnly)~~ elseif params.only then ~~local allParagraphs = true -- keep all paragraphs?~~ hat = Excerpt.getMessage( params.only ) ~~if options.paraflags then~~ else ~~if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end~~ hat = Excerpt.getMessage( 'section' ) ~~for _, v in pairs(options.paraflags) do~~ ~~if v then allParagraphs = false end -- if any para specifically requested, don't keep all~~ ~~end~~ end hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' ) ~~if filesOnly then~~ ~~allParagraphs = false~~ -- Build the link ~~options.paraflags = {}~~ if section then hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '\|' .. params.displayTitle .. ' § ' .. section:gsub( '%[%[([^]\|]+)\|?[^]]%]%]', '%1' ) .. ']].' -- remove nested links else hat = hat .. ' [[:' .. page .. '\|' .. params.displayTitle .. ']].' end -- Build the edit link ~~local maxfile = 0 -- for efficiency, stop checking images after this many have been found~~ local title = mw.title.new( page ) ~~if options.fileflags then~~ local editUrl = title:fullUrl( 'action=edit' ) ~~if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end~~ hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>[' ~~for k, v in pairs(options.fileflags) do~~ hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain() ~~if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags~~ hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>' ~~end~~ if config.hat then local frame = mw.getCurrentFrame() hat = config.hat .. hat .. '}}' hat = frame:preprocess( hat ) else hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat ) end ~~local fileArgs = options.fileargs and mw.text.trim(options.fileargs)~~ ~~if fileArgs == '' then fileArgs = nil end~~ return hat ~~local leadStart = nil -- have we found some text yet?~~ end ~~local t = "" -- the stripped down output text~~ ~~local fileText = "" -- output text with concatenated [[File:Foo\|...]]\n entries~~ ~~local files = 0 -- how many images so far~~ ~~local paras = 0 -- how many paragraphs so far~~ ~~local startLine = true -- at the start of a line (no non-spaces found since last \n)?~~ ~~text = mw.ustring.gsub(text,"^%s","") -- remove initial white space~~ function Excerpt.getReadMore( page, section ) ~~-- Add named files~~ local flink = ~~options~~"'''[[" .~~files~~. page if section then ~~if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list~~ link = link .. '#' .. section ~~f = mw.ustring.gsub(f, "^%sFile%s:%s", "", 1)~~ ~~f = mw.ustring.gsub(f, "^%sImage%s:%s", "", 1)~~ ~~f = "[[File:" .. f .. "]]"~~ ~~f = modifyImage(f, "thumb")~~ ~~f = modifyImage(f, fileArgs)~~ ~~if checkImage(f) then fileText = fileText .. f .. "\n" end~~ end local text = Excerpt.getMessage( 'more' ) link = link .. '\|' .. text .. "]]'''" link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link ) return link end ~~repeat~~ -- ~~loop~~Fix ~~around~~birth ~~parsing~~and adeath ~~template~~dates, ~~image~~but only in the orfirst paragraph -- @todo Use parser.getParagraphs() to get the first paragraph ~~local token = mw.ustring.match(text, "^%b{}%s") or false -- {{Template}} or {\| Table \|}~~ function Excerpt.fixDates( excerpt ) ~~if not leadStart and not token then token = mw.ustring.match(text, "^%b<>%s%b{}%s") end -- allow <tag>{{template}} before lead has started~~ local start = 1 -- skip initial templates local s ~~local line = mw.ustring.match(text, "[^\n]")~~ local e = 0 ~~if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)~~ repeat ~~line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line~~ start = e + 1 ~~line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line~~ s, e = mw.ustring.find( excerpt, '%s%b{}%s', start ) ~~-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line~~ until not s or s > start ~~if mw.ustring.find(line, "%S") and not matchAny(line, "^%s", { "{{", "%[%[%s[Ff]ile:", "%[%[%s[Ii]mage:" }, "") then~~ s, e = mw.ustring.find( excerpt, '%b()', start ) -- get (...), which may be (year–year) ~~token = nil~~ if s and s < start + 100 then -- look only near the start local excerptStart = mw.ustring.sub( excerpt, s, e ) local year1, conjunction, year2 = string.match( excerptStart, '(%d%d%d+)(.-)(%d%d%d+)' ) if year1 and year2 and ( string.match( conjunction, '[%-–—]' ) or string.match( conjunction, '{{%s[sS]nd%s}}' ) ) then local y1 = tonumber( year1 ) local y2 = tonumber( year2 ) if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( '%Y' ) ) then excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. '–' .. year2 .. mw.ustring.sub( excerpt, e ) end end end return excerpt end -- Replace the first call to each reference defined outside of the excerpt for the full reference, to prevent undefined references ~~if token then -- found a template which is not the prefix to a line of text~~ -- Then prefix the page title to the reference names to prevent conflicts -- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo"> ~~if leadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)~~ -- and also <ref name="Foo" /> for <ref name="Title of the article Foo" /> ~~if not filesOnly and not startLine then t = t .. token end~~ -- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo"> -- and <ref group="Bar"> for <ref> ~~elseif matchAny(token, "{{%s", d.wantedBlockTemplates, "%s%f[\|}]") then~~ -- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book"> ~~t = t .. token -- keep wanted block templates~~ function Excerpt.fixReferences( excerpt, page, wikitext ) local references = parser.getReferences( excerpt ) ~~elseif is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{\|' then~~ local fixed = {} ~~t = t .. token -- keep tables~~ for _, reference in pairs( references ) do local name = parser.getTagAttribute( reference, 'name' ) ~~elseif files < maxfile then -- discard template, but if we are still collecting images...~~ if not fixed[ name ] then -- fix each reference only once ~~local images = argImage(token) or {}~~ local content = parser.getTagContent( reference ) ~~if not images then~~ if not content then -- reference is self-closing ~~local image = parseImage(token, false) -- look for embedded [[File:...]], \|image=, etc.~~ local full = parser.getReference( excerpt, name ) ~~if image then table.insert(images, image) end~~ if not full then -- the reference is not defined in the excerpt ~~end~~ full = parser.getReference( wikitext, name ) ~~for _, image in ipairs(images) do~~ if full then ~~if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)~~ excerpt = excerpt:gsub( Excerpt.escapeString( reference ), Excerpt.escapeString( full ), 1 ) ~~files = files + 1 -- count the file, whether displaying it or not~~ ~~if options.fileflags and options.fileflags[files] then -- if displaying this image~~ ~~image = modifyImage(image, "thumb")~~ ~~image = modifyImage(image, fileArgs)~~ ~~fileText = fileText .. image~~ ~~end~~ end table.insert( fixed, name ) end end end ~~else -- the next token in text is not a template~~ end ~~token = parseImage(text, true)~~ -- Prepend the page title to the reference names to prevent conflicts with other references in the transcluding page ~~if token then -- the next token in text looks like an image~~ excerpt = excerpt:gsub( '< [Rr][Ee][Ff][^>]name = ["\']?([^"\'>/]+)["\']?[^>/](/?) >', '<ref name="' .. page:gsub( '"', '' ) .. ' %1"%2>' ) ~~if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image~~ -- Remove reference groups because they don't apply to the transcluding page ~~files = files + 1~~ excerpt = excerpt:gsub( '< [Rr][Ee][Ff] group = ["\']?[^"\'>/]+["\'] >', '<ref>' ) ~~if options.fileflags and options.fileflags[files] then~~ return excerpt ~~local image = token -- copy token for manipulation by adding \|right etc. without changing the original~~ ~~image = modifyImage(image, fileArgs)~~ ~~fileText = fileText .. image~~ ~~end~~ ~~end~~ ~~else -- got a paragraph, which ends at a file, image, blank line or end of text~~ ~~local afterEnd = mw.ustring.len(text) + 1~~ ~~local blankPosition = mw.ustring.find(text, "\n%s\n") or afterEnd -- position of next paragraph delimiter (or end of text)~~ ~~local endPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter~~ ~~mw.ustring.find(text, "%[%[%s[Ff]ile%s:") or afterEnd,~~ ~~mw.ustring.find(text, "%[%[%s[Ii]mage%s:") or afterEnd,~~ ~~blankPosition)~~ ~~token = mw.ustring.sub(text, 1, endPosition-1)~~ ~~if blankPosition < afterEnd and blankPosition == endPosition then -- paragraph ends with a blank line~~ ~~token = token .. mw.ustring.match(text, "\n%s\n", blankPosition)~~ ~~end~~ ~~local isHatnote = not(leadStart) and mw.ustring.sub(token, 1, 1) == ':'~~ ~~if not isHatnote then~~ ~~leadStart = leadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section~~ ~~paras = paras + 1~~ ~~if allParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted~~ ~~end~~ ~~end -- of "else got a paragraph"~~ ~~end -- of "else not a template"~~ ~~if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text~~ ~~startLine = mw.ustring.find(token, "\n%s$") -- will the next token be the first non-space on a line?~~ ~~until not text or text == "" or not token or token == "" -- loop until all text parsed~~ ~~text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt\|Foo}} more" flows on one line~~ ~~return fileText, text~~ end ~~local~~ function ~~cleanupText~~Excerpt.removeReferences(~~text,~~ ~~options~~excerpt ) local references = parser.getReferences( excerpt ) ~~text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments~~ for _, reference in pairs( references ) do ~~text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits~~ excerpt = Excerpt.removeString( excerpt, reference ) ~~if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible~~ ~~text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections~~ ~~text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section~~ ~~text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.", "") -- remove text after last onlyinclude section~~ end return excerpt ~~if not is(options.keepSubsections) then~~ ~~text = mw.ustring.gsub(text, "\n==.","") -- remove first ==Heading== and everything after it~~ ~~text = mw.ustring.gsub(text, "^==.","") -- ...even if the lead is empty~~ ~~end~~ ~~if not is(options.keepRefs) then~~ ~~text = mw.ustring.gsub(text, "<%s[Rr][Ee][Ff][^>]-/%s>", "") -- remove refs cited elsewhere~~ ~~text = mw.ustring.gsub(text, "<%s[Rr][Ee][Ff].->.-<%s/%s[Rr][Ee][Ff]%s>", "") -- remove refs~~ ~~text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references~~ ~~end~~ ~~text = mw.ustring.gsub(text, "<%s[Ss][Cc][Oo][Rr][Ee].->.-<%s/%s[Ss][Cc][Oo][Rr][Ee]%s>", "") -- remove musical scores~~ ~~text = mw.ustring.gsub(text, "<%s[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s/%s[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s>", convertImageMap) -- convert imagemaps into standard images~~ ~~text = mw.ustring.gsub(text, "%s{{%s[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents~~ ~~text = mw.ustring.gsub(text, "%s__[A-Z]TOC__", "") -- remove TOC behavior switches~~ ~~text = mw.ustring.gsub(text, "\n%s{{%s[Pp]p%-.-}}", "\n") -- remove protection templates~~ ~~text = mw.ustring.gsub(text, "%s{{[^{\|}][Ss]idebar%s}}", "") -- remove most sidebars~~ ~~text = mw.ustring.gsub(text, "%s{{[^{\|}]%-[Ss]tub%s}}", "") -- remove most stub templates~~ ~~text = mw.ustring.gsub(text, "%s%[%[%s:?[Cc]ategory:.-%]%]", "") -- remove categories~~ ~~text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon~~ ~~return text~~ end function Excerpt.removeCategories( excerpt ) ~~-- Parse a ==Section== from a page~~ local categories = parser.getCategories( excerpt ) ~~local function getSection(text, section, mainOnly)~~ for _, category in pairs( categories ) do ~~local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.~~ excerpt = Excerpt.removeString( excerpt, category ) ~~local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s" .. escapedSection .. "%s==.-\n(.)")~~ ~~if not content then return nil end -- no such section~~ ~~local nextSection~~ ~~if mainOnly then~~ ~~nextSection = "\n==." -- Main part of section terminates at any level of header~~ ~~else~~ ~~nextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=]." -- "===" → "\n===?[^=].", matching "==" or "===" but not "===="~~ end return excerpt ~~content = mw.ustring.gsub(content, nextSection, "") -- remove later sections with headings at this level or higher~~ ~~return content~~ end function Excerpt.removeBehaviorSwitches( excerpt ) ~~-- Remove unmatched <tag> or </tag> tags~~ return excerpt:gsub( '__[A-Z]+__', '' ) ~~local function fixTags(text, tag)~~ end ~~local startCount = 0~~ ~~for i in mw.ustring.gmatch(text, "<%s" .. tag .. "%f[^%w_].->") do startCount = startCount + 1 end~~ function Excerpt.removeComments( excerpt ) ~~local endCount = 0~~ return excerpt:gsub( '<!%-%-.-%-%->', '' ) ~~for i in mw.ustring.gmatch(text, "<%s/" .. tag .. "%f[^%w_].->") do endCount = endCount + 1 end~~ end function Excerpt.removeBold( excerpt ) ~~if startCount > endCount then -- more <tag> than </tag>: remove the last few <tag>s~~ return excerpt:gsub( "'''", '' ) ~~local i = 0~~ ~~text = mw.ustring.gsub(text, "<%s" .. tag .. "%f[^%w_].->", function(t)~~ ~~i = i + 1~~ ~~if i > endCount then return "" else return nil end~~ ~~end) -- "end" here terminates the anonymous replacement function(t) passed to gsub~~ ~~elseif endCount > startCount then -- more </tag> than <tag>: remove the first few </tag>s~~ ~~text = mw.ustring.gsub(text, "<%s/" .. tag .. "%f[^%w_].->", "", endCount - startCount)~~ ~~end~~ ~~return text~~ end function Excerpt.removeLinks( excerpt ) ~~-- Main function returns a string value: text of the lead of a page~~ local links = parser.getLinks( excerpt ) ~~local function main(pageNames, options)~~ for _, link in pairs( links ) do ~~if not pageNames or #pageNames < 1 then return err("No page names given") end~~ excerpt = Excerpt.removeString( excerpt, link ) ~~local pageName~~ ~~local text~~ ~~local pageCount = #pageNames~~ ~~local firstPage = pageNames[1] or "(nil)" -- save for error message, as it the name will be deleted~~ ~~local gotOptions~~ ~~local pageOptionsString~~ ~~local section~~ ~~-- read the page, or a random one if multiple pages were provided~~ ~~if pageCount > 1 then math.randomseed(os.time()) end~~ ~~while not text and pageCount > 0 do~~ ~~local pageNumber = 1~~ ~~if pageCount > 1 then pageNumber = math.random(pageCount) end -- pick a random title~~ ~~pageName = pageNames[pageNumber]~~ ~~if pageName and pageName ~= "" then~~ ~~-- We have page or [[page]] or [[page\|text]], possibly followed by \|opt1\|opt2...~~ ~~local pn~~ ~~pn, gotOptions, pageOptionsString = mw.ustring.match(pageName, "^%s(%[%b[]%])%s(\|?)(.)")~~ ~~if pn then~~ ~~pageName = mw.ustring.match(pn, "%[%[([^\|%]])") -- turn [[page\|text]] into page, discarding text~~ ~~else -- we have page or page\|opt...~~ ~~pageName, gotOptions, pageOptionsString = mw.ustring.match(pageName, "%s([^\|][^\|%s])%s(\|?)(.)")~~ ~~end~~ ~~if pageName and pageName ~= "" then~~ ~~local pn~~ ~~pn, section = mw.ustring.match(pageName, "(.-)#(.)")~~ ~~pageName = pn or pageName~~ ~~text, normalisedPageName = getContent(pageName)~~ ~~if is(options.fragment) then~~ ~~local frame = mw.getCurrentFrame()~~ ~~text = frame:callParserFunction('#lst', normalisedPageName, options.fragment)~~ ~~end~~ ~~if not normalisedPageName then~~ ~~return err("No title for page name " .. pageName)~~ ~~else~~ ~~pageName = normalisedPageName~~ ~~end~~ ~~if text and options.nostubs then~~ ~~local isStub = mw.ustring.find(text, "%s{{[^{\|}]%-[Ss]tub%s}}")~~ ~~if isStub then text = nil end~~ ~~end~~ ~~if not section then~~ ~~section = mw.ustring.match(pageName, ".-#(.)") -- parse redirect to Page#Section~~ ~~end~~ ~~if text and section and section ~= "" then text = getSection(text, section) end~~ ~~end~~ ~~end~~ ~~if not text then table.remove(pageNames, pageNumber) end -- this one didn't work; try another~~ ~~pageCount = pageCount - 1 -- ensure that we exit the loop after at most #pageNames iterations~~ end return excerpt ~~if not text then return err("Cannot read a valid page: first name is " .. firstPage) end~~ end -- @todo Use parser.getLinks ~~text = cleanupText(text, options)~~ function Excerpt.removeSelfLinks( excerpt, page ) local lang = mw.language.getContentLanguage() ~~local pageOptions = {} -- pageOptions (even if value is "") have priority over global options~~ local page = Excerpt.escapeString( mw.title.getCurrentTitle().prefixedText ) ~~for k, v in pairs(options) do pageOptions[k] = v end~~ local ucpage = lang:ucfirst( page ) ~~if gotOptions and gotOptions ~= "" then~~ local lcpage = lang:lcfirst( page ) ~~for _, t in pairs(mw.text.split(pageOptionsString, "\|")) do~~ excerpt = excerpt ~~local k, v = mw.ustring.match(t, "%s([^=]-)%s=(.-)%s$")~~ :gsub( '%[%[(' .. ucpage .. ')%]%]', '%1' ) ~~pageOptions[k] = v~~ :gsub( '%[%[(' .. lcpage .. ')%]%]', '%1' ) ~~end~~ :gsub( '%[%[' .. ucpage .. '\|([^]]+)%]%]', '%1' ) ~~pageOptions.paraflags = numberFlags(pageOptions["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ :gsub( '%[%[' .. lcpage .. '\|([^]]+)%]%]', '%1' ) ~~pageOptions.fileflags = numberFlags(pageOptions["files"] or "") -- parse file numbers~~ return excerpt ~~if pageOptions.more and pageOptions.more == "" then pageOptions.more = "Read more..." end -- more= is short for this default text~~ end ~~local fileText~~ ~~fileText, text = parse(text, pageOptions)~~ -- ~~replace~~Replace the bold title or synonym near the start of the ~~article~~page by a ~~wikilink~~link to the ~~article~~page function Excerpt.linkBold( excerpt, page ) local lang = mw.language.getContentLanguage() local ~~pos~~position = mw.ustring.find(~~text~~ excerpt, "'''" .. lang:ucfirst(~~pageName~~ page ) .. "'''", 1, true ) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc) or mw.ustring.find(~~text~~ excerpt, "'''" .. lang:lcfirst(~~pageName~~ page ) .. "'''", 1, true ) -- plain search: special characters in ~~pageName~~page represent themselves if ~~pos~~position then local ~~len~~length = mw.ustring.len(~~pageName~~ page ) ~~text~~excerpt = mw.ustring.sub(~~text~~ excerpt, 1, ~~pos~~position + 2 ) .. "'[["' .. mw.ustring.sub(~~text~~ excerpt, ~~pos~~position + 3, ~~pos~~position + ~~len~~length + 2 ) .. "']]"' .. mw.ustring.sub(~~text~~ excerpt, ~~pos~~position + ~~len~~length + 3, -1 ) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) ~~text~~excerpt = mw.ustring.gsub(~~text~~ excerpt, "()'''(.-')'''", function ( a, b ) if anot <mw.ustring.find( ~~100~~b, '%[' ) and not mw.ustring.find( b, "'%["{' ) then --- if ~~early~~not inwikilinked ~~article~~or ~~and~~some ~~not~~weird ~~wikilinked~~template return "'''[[" .. ~~pageName~~page .. "'\|"' .. b .. "]]'''" -- replace '''Foo''' by '''[[~~pageName~~page\|Foo]]''' else return nil -- instruct gsub to make no change end end, 1 ) -- ~~"end" here~~ terminates the anonymous replacement function~~(a, b)~~ passed to gsub end return excerpt end function Excerpt.addTrackingCategories( excerpt ) ~~-- remove '''bold text''' if requested~~ local currentTitle = mw.title.getCurrentTitle() ~~if is(pageOptions.nobold) then text = mw.ustring.gsub(text, "'''", "") end~~ local contentCategory = config.categories.content if contentCategory and currentTitle.isContentPage then ~~text = fileText .. text~~ excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]' ~~-- Seek and destroy unterminated templates and wikilinks~~ ~~repeat -- hide matched {{template}}s including nested templates~~ ~~local t = text~~ ~~text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape~~ ~~text = mw.ustring.gsub(text, "(< math[^>]>[^<]-)}}(.-< /math >)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>~~ ~~until text == t~~ ~~repeat -- do similar for [[wikilink]]s~~ ~~local t = text~~ ~~text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")~~ ~~until text == t~~ ~~text = text.gsub(text, "([{}%[%]])%1[^\27].", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.~~ ~~text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text~~ ~~text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.~~ ~~-- Ensure div tags match~~ ~~text = fixTags(text, "div")~~ ~~if pageOptions.more then text = text .. " '''[[" .. pageName .. "\|" .. pageOptions.more .. "]]'''" end -- wikilink to article for more info~~ ~~if pageOptions.list and not pageOptions.showall then -- add a collapsed list of pages which might appear~~ ~~local listtext = pageOptions.list~~ ~~if listtext == "" then listtext = "Other articles" end~~ ~~text = text .. "{{collapse top\|title={{resize\|85%\|" ..listtext .. "}}\|bg=fff}}{{hlist"~~ ~~for _, p in pairs(pageNames) do~~ ~~if mw.ustring.match(p, "%S") then text = text .. "\|[[" .. mw.text.trim(p) .. "]]" end~~ ~~end~~ ~~text = text .. "}}\n{{collapse bottom}}"~~ end local namespaceCategory = config.categories[ currentTitle.namespace ] if namespaceCategory then ~~return text~~ excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]' end return excerpt end -- Helper method to match from a list of regular expressions ~~-- Shared template invocation code for lead and random functions~~ -- Like so: match pre..list[1]..post or pre..list[2]..post or ... ~~local function invoke(frame, template)~~ function Excerpt.matchAny( text, pre, list, post, init ) ~~-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}~~ local match = {} ~~local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)~~ for i = 1, #list do ~~for k, v in pairs(frame:getParent().args) do args[k] = v end~~ match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) } ~~for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template~~ if match[1] then return unpack( match ) end ~~errors = args["errors"] -- set the module level boolean used in local function err~~ ~~local articleCount = #args -- must be 1 except with selected=Foo and Foo=Somepage~~ ~~if articleCount < 1 and not (template == "selected" and args[template] and args[args[template]]) then~~ ~~return err("No articles provided")~~ end return nil end -- Helper function to get arguments ~~local pageNames = {}~~ -- args from Lua calls have priority over parent args from template ~~if template == "lead" then~~ function Excerpt.getArg( key, default ) ~~pageNames = { args[1] }~~ local frame = mw.getCurrentFrame() ~~elseif template == "linked" or template == "listitem" then~~ for k, value in pairs( frame:getParent().args ) do ~~-- Read named page and find its wikilinks~~ if k == key and mw.text.trim( value ) ~= '' then ~~local page = args[1]~~ return value ~~local text, title = getContent(page)~~ ~~if not title then~~ ~~return err("No title for page name " .. page)~~ ~~elseif not text then~~ ~~return err("No content for page name " .. page)~~ end ~~if args["section"] then -- check relevant section only~~ ~~text = getSection(text, args["section"], args["sectiononly"])~~ ~~if not text then return err("No section " .. args["section"] .. " in page " .. page) end~~ ~~end~~ ~~-- replace annotated links with real links~~ ~~text = mw.ustring.gsub(text, "{{%s[Aa]nnotated[ _]link%s\|%s(.-)%s}}", "[[%1]]")~~ ~~if template == "linked" then~~ ~~for p in mw.ustring.gmatch(text, "%[%[%s([^%]\|\n])") do table.insert(pageNames, p) end~~ ~~else -- listitem: first wikilink on a line beginning , :#, etc. except in "See also" or later section~~ ~~text = mw.ustring.gsub(text, "\n== See also.", "")~~ ~~for p in mw.ustring.gmatch(text, "\n:[%#][^\n]-%[%[%s([^%]\|\n])") do table.insert(pageNames, p) end~~ ~~end~~ ~~elseif template == "random" then~~ ~~-- accept any number of page names. If more than one, we'll pick one randomly~~ ~~for i, p in pairs(args) do~~ ~~if p and type(i) == 'number' then table.insert(pageNames, p) end~~ ~~end~~ ~~elseif template == "selected" then~~ ~~local articleKey = args[template]~~ ~~if tonumber(articleKey) then -- normalise article number into the range 1..#args~~ ~~articleKey = articleKey % articleCount~~ ~~if articleKey == 0 then articleKey = articleCount end~~ ~~end~~ ~~pageNames = { args[articleKey] }~~ end for k, value in pairs( frame.args ) do if k == key and mw.text.trim( value ) ~= '' then ~~local options = args -- pick up miscellaneous options: more, errors, fileargs~~ return value ~~options.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ ~~options.fileflags = numberFlags(args["files"] or "") -- parse file numbers~~ ~~if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text~~ ~~local text = ""~~ ~~if options.showall then~~ ~~local separator = ""~~ ~~for _, p in pairs(pageNames) do~~ ~~local t = main({ p }, options)~~ ~~if t ~= "" then~~ ~~text = text .. separator .. t~~ ~~separator = options.showall~~ ~~if separator == "" then separator = "{{clear}}{{hr}}" end~~ ~~end~~ end ~~else~~ ~~text = main(pageNames, options)~~ end return default end -- Helper method to get an error message ~~if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then~~ -- This method also categorizes the current page in one of the configured error categories ~~return "[[Category:" .. d.brokenCategory .. "]]"~~ function Excerpt.getError( key, value ) ~~else~~ local message = Excerpt.getMessage( 'error-' .. key, value ) ~~return frame:preprocess(text)~~ local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message ) if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then markup:node( '[[Category:' .. config.categories.errors .. ']]' ) end return markup end -- Helper method to get a localized message ~~-- Replicate {{Excerpt}} entirely in Lua for reduced Post-expand include size~~ -- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab ~~local function excerpt(frame)~~ -- If Module:TNT is not available or the localized message does not exist, the key is returned instead ~~local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)~~ function Excerpt.getMessage( key, value ) ~~for k, v in pairs(frame:getParent().args) do args[k] = v end~~ local ok, TNT = pcall( require, 'Module:TNT' ) ~~for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template~~ if not ok then return key end local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value ) if not ok2 then return key end return message end -- Helper method to escape a string for use in regexes ~~local tag = is(args.tag) and args.tag or 'div'~~ function Excerpt.escapeString( str ) ~~local article = is(args.article) and args.article or args[1] or '{{{1}}}'~~ return str:gsub( '[%^%$%(%)%.%[%]%%+%-%?%%]', '%%%0' ) ~~local section = is(args.section) and args.section or args[2]~~ end -- Helper method to remove a string from a text ~~local output = {}~~ -- @param text Text from where to remove the string ~~output[1] = frame:extensionTag{ name = 'templatestyles', args = {src='Excerpt/styles.css'} }~~ -- @param str String to remove ~~output[2] = '<' .. tag .. ' class="excerpt-block">'~~ -- @return The given text with the string removed ~~output[3] = is(args.indicator) and ('<' .. tag .. ' class="excerpt-indicator">') or ''~~ function Excerpt.removeString( text, str ) ~~if is(args.nohat) then~~ local pattern = Excerpt.escapeString( str ) ~~output[4] = ''~~ if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes ~~else~~ pattern = Excerpt.escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. Excerpt.escapeString( mw.ustring.sub( str, -999 ) ) ~~local hatnote = {}~~ end ~~hatnote[1] = 'This' .. (is(args.indicator) and '' or ' section') .. ' is an excerpt from '~~ return text:gsub( pattern, '' ) ~~hatnote[2] = '[['~~ end ~~hatnote[3] = article .. (is(section) and ('#' .. frame:callParserFunction( 'urlencode', section, 'WIKI' )) or '')~~ ~~hatnote[4] = '\|'~~ ~~hatnote[5] = article .. (is(section) and (frame:callParserFunction( '#tag:nowiki', ' § ' ) .. section) or '')~~ ~~hatnote[6] = ']]'~~ ~~hatnote[7] = "''" .. '<span class="mw-editsection-like plainlinks"><span>[ </span>['~~ ~~local title = mw.title.new(article) or mw.title.getCurrentTitle()~~ ~~hatnote[8] = title:fullUrl('action=edit') .. ' edit'~~ ~~hatnote[9] = ']<span> ]</span></span>' .. "''"~~ ~~output[4] = require('Module:Hatnote')._hatnote(table.concat(hatnote), {selfref=true}) or err("Error generating hatnote")~~ ~~end~~ ~~output[5] = '<' .. tag .. ' class="excerpt">\n'~~ ~~if article ~= '{{{1}}}' then~~ ~~local options = args -- turn template arguments into module options~~ ~~options.paraflags = args.paragraphs~~ ~~options.fileflags = args.files or 1~~ ~~options.nobold = 1~~ ~~options.fragment = args.fragment~~ ~~options.keepTables = args.tables or 1~~ ~~options.keepRefs = args.references or 1~~ ~~options.keepSubsections = args.subsections~~ ~~local pageNames = { (article .. '#' .. (section or '')) }~~ ~~local text = main(pageNames, options)~~ -- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans ~~if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then~~ -- @param filter Required. Comma-separated list of numbers or min-max ranges, for example '1,3-5' ~~output[6] = "[[Category:" .. d.brokenCategory .. "]]"~~ -- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true} ~~else~~ -- @return Boolean indicating whether the filters should be treated as a blacklist or not ~~output[6] = frame:preprocess(text) or err("Error processing text")~~ -- @note Merging this into matchFilter is possible, but way too inefficient ~~end~~ function Excerpt.parseFilter( filter ) ~~else~~ local filters = {} ~~output[6] = err("No article provided")~~ local isBlacklist = false ~~end~~ if string.sub( filter, 1, 1 ) == '-' then ~~output[7] = '</' .. tag .. '>'~~ isBlacklist = true ~~output[8] = is(args.indicator) and ('</' .. tag .. '>') or ''~~ filter = string.sub( filter, 2 ) ~~output[9] = '</' .. tag .. '>'~~ end ~~output[10] = mw.title.getCurrentTitle().isContentPage and '[[Category:Articles with excerpts]]' or ''~~ local values = mw.text.split( filter, ',' ) -- split values: '1,3-5' to {'1','3-5'} for _, value in pairs( values ) do ~~return table.concat(output)~~ value = mw.text.trim( value ) local min, max = mw.ustring.match( value, '^(%d+)%s[-–—]%s(%d+)$' ) -- '3-5' to min=3 max=5 if not max then min, max = string.match( value, '^((%d+))$' ) end -- '1' to min=1 max=1 if max then for i = min, max do filters[ i ] = true end else filters[ value ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3' end end local filter = {cache = {}, terms = filters} return filter, isBlacklist end -- Helper function to see if a value matches any of the given filters ~~-- Entry points for template callers using #invoke:~~ function Excerpt.matchFilter( value, filter ) ~~function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article~~ if type(value) == "number" then ~~function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page~~ return filter.terms[value] ~~function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page~~ else ~~function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)~~ local cached = filter.cache[value] ~~function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter~~ if cached ~= nil then ~~function p.excerpt(frame) return excerpt(frame) end -- {{Excerpt}} transcludes part of an article into another article~~ return cached end ~~-- Entry points for other Lua modules~~ local lang = mw.language.getContentLanguage() ~~function p.getContent(page, frame) return getContent(page, frame) end~~ local lcvalue = lang:lcfirst(value) ~~function p.getsection(text, section) return getSection(text, section) end~~ local ucvalue = lang:ucfirst(value) ~~function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end~~ for term in pairs( filter.terms ) do ~~function p.argimage(text) return argImage(text) end~~ if value == tostring(term) ~~function p.checkimage(image) return checkImage(image) end~~ or type(term) == "string" and ( ~~function p.parseimage(text, start) return parseImage(text, start) end~~ lcvalue == term ~~function p.cleanupText(text, options) return cleanupText(text, options) end~~ or ucvalue == term ~~function p.main(pageNames, options) return main(pageNames, options) end~~ or mw.ustring.match( value, term ) ~~function p.numberflags(str) return numberFlags(str) end~~ ) then filter.cache[value] = true return true end end filter.cache[value] = false end end return pExcerpt