Module:Excerpt/sandbox: Difference between revisions

Browse history interactively

← Previous edit

Content deleted Content added

Revision as of 22:53, 28 April 2020 edit Sophivorus (talk \| contribs) Extended confirmed users, Template editors 5,449 edits Rename most variables to camelCase for inner consistency and with general Lua and MediaWiki style and better readability on long variable names. Also un-abbreviated a few variable names, again for better readability and hopefully easier onboarding of new developers. ← Previous edit		Latest revision as of 21:21, 26 May 2025 edit undo Aidan9382 (talk \| contribs) Extended confirmed users, Page movers, Template editors 15,961 edits Try to automatically account for page merge-like redirects
(215 intermediate revisions by 12 users not shown)
Line 1: -- Module:Excerpt implements the Excerpt template ~~-- Get localized data~~ -- Documentation and master version: https://en.wikipedia.org/wiki/Module:Excerpt ~~local d = require("Module:Excerpt/i18n")~~ -- Authors: User:Sophivorus, User:Certes, User:Aidan9382 & others -- License: CC-BY-SA-3.0 local parser = require( 'Module:WikitextParser' ) ~~local p = {}~~ local yesno = require( 'Module:Yesno' ) local ok, config = pcall( require, 'Module:Excerpt/config' ) ~~-- Helper function to debug~~ if not ok then config = {} end ~~-- Returns blank text or an error message if requested~~ ~~local errors~~ ~~local function err(text)~~ ~~if errors then error(text, 2) end~~ ~~return ""~~ ~~end~~ local Excerpt = {} ~~-- Helper function to test for truthy and falsy values~~ ~~local function is(value)~~ ~~if not value or value == "" or value == "0" or value == "false" or value == "no" then~~ ~~return false~~ ~~end~~ ~~return true~~ ~~end~~ -- Main entry point for templates ~~-- Helper function to match from a list regular expressions~~ function Excerpt.main( frame ) ~~-- Like so: match pre..list[1]..post or pre..list[2]..post or ...~~ ~~local function matchAny(text, pre, list, post, init)~~ -- Make sure the requested page exists and get the wikitext ~~local match = {}~~ local page = Excerpt.getArg( 1 ) ~~for i = 1, #list do~~ if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end ~~match = { mw.ustring.match(text, pre .. list[i] .. post, init) }~~ local title = mw.title.new( page ) ~~if match[1] then return unpack(match) end~~ if not title then return Excerpt.getError( 'invalid-title', page ) end local fragment = title.fragment -- save for later if title.isRedirect then title = title.redirectTarget if fragment == "" then fragment = title.fragment -- page merge potential end end if not title.exists then return Excerpt.getError( 'page-not-found', page ) end ~~return nil~~ page = title.prefixedText ~~end~~ local wikitext = title:getContent() -- Get the template params and process them ~~-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT~~ local ~~function~~params ~~stripTemplate(t)~~= { hat = yesno( Excerpt.getArg( 'hat', true ) ), ~~-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)~~ this = Excerpt.getArg( 'this' ), ~~if matchAny(t, "^{{%s", d.unwantedInlineTemplates, "%s%f[\|}]") then return "" end~~ only = Excerpt.getArg( 'only' ), files = Excerpt.getArg( 'files', Excerpt.getArg( 'file' ) ), lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list' ) ), tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table' ) ), templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template' ) ), paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph' ) ), references = yesno( Excerpt.getArg( 'references', true ) ), subsections = yesno( Excerpt.getArg( 'subsections', false ) ), links = yesno( Excerpt.getArg( 'links', true ) ), bold = yesno( Excerpt.getArg( 'bold', false ) ), briefDates = yesno( Excerpt.getArg( 'briefdates', false ) ), inline = yesno( Excerpt.getArg( 'inline' ) ), quote = yesno( Excerpt.getArg( 'quote' ) ), more = yesno( Excerpt.getArg( 'more' ) ), class = Excerpt.getArg( 'class' ), displayTitle = Excerpt.getArg( 'displaytitle', page ), } -- Make sure the requested section exists and get the excerpt ~~-- If template is wanted but produces an unwanted reference then return the string with \|shortref or \|ref removed~~ local excerpt ~~local noRef = mw.ustring.gsub(t, "\|%sshortref%s%f[\|}]", "")~~ local section = Excerpt.getArg( 2, fragment ) ~~noRef = mw.ustring.gsub(noRef, "\|%sref%s%f[\|}]", "")~~ section = mw.text.trim( section ) if section == '' then section = nil end if section then excerpt = parser.getSectionTag( wikitext, section ) if not excerpt then if params.subsections then excerpt = parser.getSection( wikitext, section ) else local sections = parser.getSections( wikitext ) excerpt = sections[ section ] end end if not excerpt then return Excerpt.getError( 'section-not-found', section ) end if excerpt == '' then return Excerpt.getError( 'section-empty', section ) end else excerpt = parser.getLead( wikitext ) if excerpt == '' then return Excerpt.getError( 'lead-empty' ) end end -- Remove noinclude bits ~~-- If a wanted template has unwanted nested templates, purge them too~~ excerpt = excerpt:gsub( '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '' ) ~~noRef = mw.ustring.sub(noRef, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noRef, 3), "%b{}", stripTemplate)~~ -- Filter various elements from the excerpt ~~-- Replace {{audio}} by its text parameter: {{Audio\|Foo.ogg\|Bar}} → Bar~~ excerpt = Excerpt.filterFiles( excerpt, params.files ) ~~noRef = mw.ustring.gsub(noRef, "^{{%s[Aa]udio.-\|.-\|(.-)%f[\|}].", "%1")~~ excerpt = Excerpt.filterLists( excerpt, params.lists ) excerpt = Excerpt.filterTables( excerpt, params.tables ) excerpt = Excerpt.filterParagraphs( excerpt, params.paragraphs ) -- If no file is found, try to get one from the infobox ~~-- Replace {{Nihongo foot}} by its text parameter: {{Nihongo foot\|English\|英語\|eigo}} → English~~ if ( params.only == 'file' or params.only == 'files' or not params.only and ( not params.files or params.files ~= '0' ) ) -- caller asked for files ~~noRef = mw.ustring.gsub(noRef, "^{{%s[Nn]ihongo[ _]+foot%s\|(.-)%f[\|}].", "%1")~~ and not section -- and we're in the lead section and config.captions -- and we have the config option required to try finding files in infoboxes and #parser.getFiles( excerpt ) == 0 -- and there're no files in the excerpt then excerpt = Excerpt.addInfoboxFile( excerpt ) end -- Filter the templates by appending the templates blacklist to the templates filter ~~if noRef ~= t then return noRef end~~ if config.blacklist then local blacklist = table.concat( config.blacklist, ',' ) if params.templates then if string.sub( params.templates, 1, 1 ) == '-' then params.templates = params.templates .. ',' .. blacklist end else params.templates = '-' .. blacklist end end excerpt = Excerpt.filterTemplates( excerpt, params.templates ) -- Leave only the requested elements ~~return nil -- not an unwanted template: keep~~ if params.only == 'file' or params.only == 'files' then ~~end~~ local files = parser.getFiles( excerpt ) excerpt = params.only == 'file' and files[1] or table.concat( files, '\n\n' ) end if params.only == 'list' or params.only == 'lists' then local lists = parser.getLists( excerpt ) excerpt = params.only == 'list' and lists[1] or table.concat( lists, '\n\n' ) end if params.only == 'table' or params.only == 'tables' then local tables = parser.getTables( excerpt ) excerpt = params.only == 'table' and tables[1] or table.concat( tables, '\n\n' ) end if params.only == 'paragraph' or params.only == 'paragraphs' then local paragraphs = parser.getParagraphs( excerpt ) excerpt = params.only == 'paragraph' and paragraphs[1] or table.concat( paragraphs, '\n\n' ) end if params.only == 'template' or params.only == 'templates' then local templates = parser.getTemplates( excerpt ) excerpt = params.only == 'template' and templates[1] or table.concat( templates, '\n\n' ) end -- @todo Make more robust and move downwards ~~-- Get a page's content, following redirects, and processing file description pages for files.~~ if params.briefDates then ~~-- Also returns the page name, or the target page name if a redirect was followed, or false if no page found~~ excerpt = Excerpt.fixDates( excerpt ) ~~local function getContent(page, frame)~~ end ~~local title = mw.title.new(page) -- Read description page (for :File:Foo rather than File:Foo)~~ ~~if not title then return false, false end~~ -- Remove unwanted elements ~~local target = title.redirectTarget~~ excerpt = Excerpt.removeComments( excerpt ) ~~if target then title = target end~~ excerpt = Excerpt.removeSelfLinks( excerpt ) excerpt = Excerpt.removeNonFreeFiles( excerpt ) excerpt = Excerpt.removeBehaviorSwitches( excerpt ) -- Fix or remove the references ~~return title:getContent(), title.prefixedText~~ if params.references then ~~end~~ excerpt = Excerpt.fixReferences( excerpt, page, wikitext ) else ~~-- Check image for suitability~~ excerpt = Excerpt.removeReferences( excerpt ) ~~local function checkImage(image)~~ end ~~local page = matchAny(image, "", d.fileNamespaces, "%s:[^\|%]]") -- match File:(name) or Image:(name)~~ ~~if not page then return false end~~ -- Remove wikilinks ~~-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)~~ if not params.links then ~~if not matchAny(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s$") then~~ excerpt = Excerpt.removeLinks( excerpt ) ~~return false~~ end -- Link the bold text near the start of most leads and then remove it ~~local fileDescription, fileTitle = getContent(page) -- get file description and title after following any redirect~~ if not section then ~~if fileDescription and fileDescription ~= "" then -- found description on local wiki~~ excerpt = Excerpt.linkBold( excerpt, page ) ~~if mw.ustring.match(fileDescription, "[Nn]on%-free") then return false end~~ end ~~fileDescription = mw.ustring.gsub(fileDescription, "%b{}", stripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess~~ ~~elseif~~if not ~~fileTitle~~params.bold then excerpt = Excerpt.removeBold( excerpt ) ~~return false~~ ~~else~~ ~~-- try commons~~ ~~fileDescription = "{{" .. fileTitle .. "}}"~~ end ~~frame = frame or mw.getCurrentFrame()~~ ~~fileDescription = frame:preprocess(fileDescription)~~ -- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly ~~return ( fileDescription and fileDescription ~= "" and not mw.ustring.match(fileDescription, "[Nn]on%-free") ) and true or false -- hide non-free image~~ excerpt = excerpt:gsub( '\n\n\n+', '\n\n' ) ~~end~~ excerpt = mw.text.trim( excerpt ) excerpt = '\n' .. excerpt .. '\n' -- Remove nested categories ~~-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)~~ excerpt = frame:preprocess( excerpt ) ~~local function parseImage(text, start)~~ excerpt = Excerpt.removeCategories( excerpt ) ~~local startre = ""~~ ~~if start then startre = "^" end -- a true flag restricts search to start of string~~ -- Add tracking categories ~~local image = matchAny(text, startre .. "%[%[%s", d.fileNamespaces, "%s:.") -- [[File: or [[Image: ...~~ if ~~image~~config.categories then excerpt = Excerpt.addTrackingCategories( excerpt ) ~~image = mw.ustring.match(image, "%b[]%s") -- matching [[...]] to handle wikilinks nested in caption~~ end ~~return image~~ ~~end~~ -- Build the final output ~~-- Parse a caption, which ends at a \| (end of parameter) or } (end of infobox) but may contain nested [..] and {..}~~ if params.inline then ~~local function parseCaption(caption)~~ return mw.text.trim( excerpt ) ~~if not caption then return nil end~~ ~~local length = mw.ustring.len(caption)~~ ~~local position = 1~~ ~~while position <= length do~~ ~~local linkStart, linkEnd = mw.ustring.find(caption, "%b[]", position)~~ ~~linkStart = linkStart or length + 1 -- avoid comparison with nil when no link~~ ~~local templateStart, templateEnd = mw.ustring.find(caption, "%b{}", position)~~ ~~templateStart = templateStart or length + 1 -- avoid comparison with nil when no template~~ ~~local argEnd = mw.ustring.find(caption, "[\|}]", position) or length + 1~~ ~~if linkStart < templateStart and linkStart < argEnd then~~ ~~position = linkEnd + 1 -- skip wikilink~~ ~~elseif templateStart < argEnd then~~ ~~position = templateEnd + 1 -- skip template~~ ~~else -- argument ends before the next wikilink or template~~ ~~return mw.ustring.sub(caption, 1, argEnd - 1)~~ ~~end~~ end ~~return caption -- No terminator found: return entire caption~~ ~~end~~ local tag = params.quote and 'blockquote' or 'div' ~~-- Attempt to construct a [[File:...]] block from {{infobox ... \|image= ...}}~~ local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( params.class ) ~~local function argImage(text)~~ ~~local token = nil~~ ~~local hasNamedArgs = mw.ustring.find(text, "\|") and mw.ustring.find(text, "=")~~ ~~if not hasNamedArgs then return nil end -- filter out any template that obviously doesn't contain an image~~ if config.styles then ~~-- ensure image map is captured~~ local styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } ) ~~text = mw.ustring.gsub(text, '<!%-%-imagemap%-%->', '\|imagemap=')~~ block:node( styles ) end if params.hat then ~~-- find all images~~ local hat = Excerpt.getHat( page, section, params ) ~~local hasImages = false~~ block:node( hat ) ~~local images = {}~~ ~~local captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local argname, position, image = mw.ustring.match(text, "\|%s([^=\|]-[Ii][Mm][Aa][Gg][Ee][^=\|]-)%s=%s()(.)", captureFrom)~~ ~~if image then -- ImageCaption=, image_size=, image_upright=, etc. do not introduce an image~~ ~~local lcArgname = mw.ustring.lower(argname)~~ ~~if mw.ustring.find(lcArgname, "caption")~~ ~~or mw.ustring.find(lcArgname, "size")~~ ~~or mw.ustring.find(lcArgname, "upright") then~~ ~~image = nil~~ ~~end~~ ~~end~~ ~~if image then~~ ~~hasImages = true~~ ~~images[position] = image~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ ~~end~~ ~~captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local position, image = mw.ustring.match(text, "\|%s[^=\|]-[Pp][Hh][Oo][Tt][Oo][^=\|]-%s=%s()(.)", captureFrom)~~ ~~if image then~~ ~~hasImages = true~~ ~~images[position] = image~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ ~~end~~ ~~captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local position, image = mw.ustring.match(text, "\|%s[^=\|{}]-%s=%s()%[?%[?([^\|{}]%.%a%a%a%a?)%s%f[\|}]", captureFrom)~~ ~~if image then~~ ~~hasImages = true~~ ~~if not images[position] then~~ ~~images[position] = image~~ ~~end~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ end excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt ) ~~if not hasImages then return nil end~~ block:node( excerpt ) if params.more then ~~-- find all captions~~ local more = Excerpt.getReadMore( page, section ) ~~local captions = {}~~ block:node( more ) ~~captureFrom = 1~~ ~~while captureFrom < mw.ustring.len(text) do~~ ~~local position, caption = matchAny(text, "\|%s", d.captionParams, "%s=%s()([^\n]+)", captureFrom)~~ ~~if caption then~~ ~~-- extend caption to parse "\| caption = Foo {{Template\n on\n multiple lines}} Bar\n"~~ ~~local bracedCaption = mw.ustring.match(text, "^[^\n]-%b{}[^\n]+", position)~~ ~~if bracedCaption and bracedCaption ~= "" then caption = bracedCaption end~~ ~~caption = mw.text.trim(caption)~~ ~~local captionStart = mw.ustring.sub(caption, 1, 1)~~ ~~if captionStart == '\|' or captionStart == '}' then caption = nil end~~ ~~end~~ ~~if caption then~~ ~~-- find nearest image, and use same index for captions table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not captions[i] then~~ ~~captions[i] = parseCaption(caption)~~ ~~end~~ ~~end~~ ~~end~~ ~~captureFrom = position~~ ~~else~~ ~~captureFrom = mw.ustring.len(text)~~ ~~end~~ end return block ~~-- find all alt text~~ end ~~local altTexts = {}~~ ~~for position, altText in mw.ustring.gmatch(text, "\|%s[Aa][Ll][Tt]%s=%s()([^\n])") do~~ ~~if altText then~~ -- Filter the files in the given wikitext against the given filter ~~-- altText is terminated by }} or \|, but first skip any matched [[...]] and {{...}}~~ function Excerpt.filterFiles( wikitext, filter ) ~~local lookfrom = math.max( -- find position after whichever comes last: start of string, end of last ]] or end of last }}~~ if not filter then return wikitext end ~~mw.ustring.match(altText, ".{%b{}}()") or 1, -- if multiple {{...}}, . consumes all but one, leaving the last for %b~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~mw.ustring.match(altText, ".%[%b[]%]()") or 1)~~ local files = parser.getFiles( wikitext ) for index, file in pairs( files ) do ~~local length = mw.ustring.len(altText)~~ local name = parser.getFileName( file ) ~~local aftertext = math.min( -- find position after whichever comes first: end of string, }} or \|~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) ) ~~mw.ustring.match(altText, "()}}", lookfrom) or length+1,~~ or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then ~~mw.ustring.match(altText, "()\|", lookfrom) or length+1)~~ wikitext = Excerpt.removeString( wikitext, file ) ~~altText = mw.ustring.sub(altText, 1, aftertext-1) -- chop off \|... or }}... which is not part of [[...]] or {{...}}~~ ~~altText = mw.text.trim(altText)~~ ~~local altTextStart = mw.ustring.sub(altText, 1, 1)~~ ~~if altTextStart == '\|' or altTextStart == '}' then altText = nil end~~ ~~end~~ ~~if altText then~~ ~~-- find nearest image, and use same index for altTexts table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not altTexts[i] then~~ ~~altTexts[i] = altText~~ ~~end~~ ~~end~~ ~~end~~ end end return wikitext end -- Filter the lists in the given wikitext against the given filter ~~-- find all image sizes~~ function Excerpt.filterLists( wikitext, filter ) ~~local imageSizes = {}~~ if not filter then return wikitext end ~~for position, imageSizeMatch in mw.ustring.gmatch(text, "\|%s[Ii][Mm][Aa][Gg][Ee][ _]?[Ss][Ii][Zz][Ee]%s=%s()([^}\|\n])") do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~local imageSize = mw.ustring.match(imageSizeMatch, "=%s([^}\|\n])")~~ local lists = parser.getLists( wikitext ) ~~if imageSize then~~ for index, list in pairs( lists ) do ~~imageSize = mw.text.trim(imageSize )~~ if isBlacklist and Excerpt.matchFilter( index, filters ) ~~local imageSizeStart = mw.ustring.sub(imageSize, 1, 1)~~ or not isBlacklist and not Excerpt.matchFilter( index, filters ) then ~~if imageSizeStart == '\|' or imageSizeStart == '}' then imageSize = nil end~~ wikitext = Excerpt.removeString( wikitext, list ) ~~end~~ ~~if imageSize then~~ ~~-- find nearest image, and use same index for imageSizes table~~ ~~local i = position~~ ~~while i > 0 and not images[i] do~~ ~~i = i - 1~~ ~~if images[i] then~~ ~~if not imageSizes[i] then~~ ~~imageSizes[i] = imageSize~~ ~~end~~ ~~end~~ ~~end~~ end end return wikitext end -- Filter the tables in the given wikitext against the given filter ~~-- sort the keys of the images table (in a table sequence), so that images can be iterated over in order~~ function Excerpt.filterTables( wikitext, filter ) ~~local keys = {}~~ if not filter then return wikitext end ~~for key, val in pairs(images) do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~table.insert(keys, key)~~ local tables = parser.getTables( wikitext ) for index, t in pairs( tables ) do local id = string.match( t, '{\|[^\n]-id%s=%s["\']?([^"\'\n]+)["\']?[^\n]\n' ) if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( id, filters ) ) or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( id, filters ) ) then wikitext = Excerpt.removeString( wikitext, t ) end end return wikitext ~~table.sort(keys)~~ end -- Filter the paragraphs in the given wikitext against the given filter ~~-- add in relevant optional parameters for each image: caption, alt text and image size~~ function Excerpt.filterParagraphs( wikitext, filter ) ~~local imageTokens = {}~~ if not filter then return wikitext end ~~for _, index in ipairs(keys) do~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~local image = images[index]~~ local paragraphs = parser.getParagraphs( wikitext ) ~~local token = parseImage(image, true) -- look for image=[[File:...]] etc.~~ for index, paragraph in pairs( paragraphs ) do ~~if not token then~~ if isBlacklist and Excerpt.matchFilter( index, filters ) ~~image = mw.ustring.match(image, "^[^}\|\n]") -- remove later arguments~~ or not isBlacklist and not Excerpt.matchFilter( index, filters ) then ~~token = "[[" -- Add File: unless name already begins File: or Image:~~ wikitext = Excerpt.removeString( wikitext, paragraph ) ~~if not matchAny(image, "^", d.fileNamespaces, "%s:") then~~ ~~token = token .. "File:"~~ ~~end~~ ~~token = token .. image~~ ~~local caption = captions[index]~~ ~~if caption and mw.ustring.match(caption, "%S") then token = token .. "\|" .. caption end~~ ~~local alt = altTexts[index]~~ ~~if alt then token = token .. "\|alt=" .. alt end~~ ~~local image_size = imageSizes[index]~~ ~~if image_size and mw.ustring.match(image_size, "%S") then token = token .. "\|" .. image_size end~~ ~~token = token .. "]]"~~ end ~~token = mw.ustring.gsub(token, "\n","") .. "\n"~~ ~~table.insert(imageTokens, token)~~ end return ~~imageTokens~~wikitext end -- Filter the templates in the given wikitext against the given filter ~~-- Help gsub convert imagemaps into standard images~~ function Excerpt.filterTemplates( wikitext, filter ) ~~local function convertImageMap(imagemap)~~ if not filter then return wikitext end ~~local image = matchAny(imagemap, "[>\n]%s", d.fileNamespaces, "[^\n]")~~ local filters, isBlacklist = Excerpt.parseFilter( filter ) ~~if image then~~ local templates = parser.getTemplates( wikitext ) ~~return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s", "", 1) .. "]]"~~ for index, template in pairs( templates ) do ~~else~~ local name = parser.getTemplateName( template ) ~~return "" -- remove entire block if image can't be extracted~~ if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) ) or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then wikitext = Excerpt.removeString( wikitext, template ) end end return wikitext end function Excerpt.addInfoboxFile( excerpt ) ~~-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}~~ -- We cannot distinguish the infobox from the other templates, so we search them all ~~local function numberFlags(str)~~ local templates = parser.getTemplates( excerpt ) ~~local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}~~ for _, template in pairs( templates ) do ~~local flags = {}~~ local parameters = parser.getTemplateParameters( template ) ~~for _, r in pairs(ranges) do~~ local file, captions, caption, cssClasses, cssClass ~~local min, max = mw.ustring.match(r, "^%s(%d+)%s%-%s(%d+)%s$") -- "3-5" → min=3 max=5~~ for _, pair in pairs( config.captions ) do ~~if not max then min, max = mw.ustring.match(r, "^%s((%d+))%s$") end -- "1" → min=1 max=1~~ if file ~~max~~= ~~then~~pair[1] file = parameters[file] ~~for p = min, max do flags[p] = true end~~ if file and Excerpt.matchAny( file, '^.%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.' ) then file = string.match( file, '%[?%[?.-:([^{\|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg captions = pair[2] for _, p in pairs( captions ) do if parameters[ p ] then caption = parameters[ p ] break end end -- Check for CSS classes -- We opt to use skin-invert-image instead of skin-invert -- in all other cases, the CSS provided in the infobox is used if pair[3] then cssClasses = pair[3] for _, p in pairs( cssClasses ) do if parameters[ p ] then cssClass = ( parameters[ p ] == 'skin-invert' ) and 'skin-invert-image' or parameters[ p ] break end end end local class = cssClass and ( '\|class=' .. cssClass ) or '' return '[[File:' .. file .. class .. '\|thumb\|' .. ( caption or '' ) .. ']]' .. excerpt end end end return ~~flags~~excerpt end function Excerpt.removeNonFreeFiles( wikitext ) ~~local imageArgGroups = {~~ local files = parser.getFiles( wikitext ) ~~{"thumb", "thumbnail", "frame", "framed", "frameless"},~~ for _, file in pairs( files ) do ~~{"right", "left", "center", "none"},~~ local fileName = 'File:' .. parser.getFileName( file ) ~~{"baseline", "middle", "sub", "super", "text-top", "text-bottom", "top", "bottom"}~~ local fileTitle = mw.title.new( fileName ) } if fileTitle then local fileDescription = fileTitle:getContent() ~~local function modifyImage(image, fileArgs)~~ if not fileDescription or fileDescription == '' then ~~if fileArgs then~~ local frame = mw.getCurrentFrame() ~~for _, filearg in pairs(mw.text.split(fileArgs, "\|")) do -- handle fileArgs=left\|border etc.~~ fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons ~~local fa = mw.ustring.gsub(filearg, "=.", "") -- "upright=0.75" → "upright"~~ ~~local group = {fa} -- group of "border" is ["border"]...~~ ~~for _, g in pairs(imageArgGroups) do~~ ~~for _, a in pairs(g) do~~ ~~if fa == a then group = g end -- ...but group of "left" is ["right", "left", "center", "none"]~~ ~~end~~ end if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then ~~for _, a in pairs(group) do~~ wikitext = Excerpt.removeString( wikitext, file ) ~~image = mw.ustring.gsub(image, "\|%s" .. a .. "%f[%A]%s=[^\|%]]", "") -- remove "\|upright=0.75" etc.~~ ~~image = mw.ustring.gsub(image, "\|%s" .. a .. "%s([\|%]])", "%1") -- replace "\|left\|" by "\|" etc.~~ end ~~image = mw.ustring.gsub(image, "([\|%]])", "\|" .. filearg .. "%1", 1) -- replace "\|" by "\|left\|" etc.~~ end end return ~~image~~wikitext end function Excerpt.getHat( page, section, params ) ~~-- a basic parser to trim down extracted wikitext~~ local hat ~~-- @param text : Wikitext to be processed~~ ~~-- @param options : A table of options...~~ -- Build the text -- options.paraflags : Which number paragraphs to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`. If not present, all paragraphs will be kept. if params.this then ~~-- options.fileflags : table of which files to keep, as either a string (e.g. `1,3-5`) or a table (e.g. `{1=true,2=false,3=true,4=true,5=true}`~~ hat = params.this ~~-- options.fileargs : args for the [[File:]] syntax, such as `left`~~ elseif params.quote then ~~-- @param filesOnly : If set, only return the files and not the prose~~ hat = Excerpt.getMessage( 'this' ) ~~local function parse(text, options, filesOnly)~~ elseif params.only then ~~local allParagraphs = true -- keep all paragraphs?~~ hat = Excerpt.getMessage( params.only ) ~~if options.paraflags then~~ else ~~if type(options.paraflags) ~= "table" then options.paraflags = numberFlags(options.paraflags) end~~ hat = Excerpt.getMessage( 'section' ) ~~for _, v in pairs(options.paraflags) do~~ ~~if v then allParagraphs = false end -- if any para specifically requested, don't keep all~~ ~~end~~ end hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' ) ~~if filesOnly then~~ ~~allParagraphs = false~~ -- Build the link ~~options.paraflags = {}~~ if section then hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '\|' .. params.displayTitle .. ' § ' .. section:gsub( '%[%[([^]\|]+)\|?[^]]%]%]', '%1' ) .. ']].' -- remove nested links else hat = hat .. ' [[:' .. page .. '\|' .. params.displayTitle .. ']].' end -- Build the edit link ~~local maxfile = 0 -- for efficiency, stop checking images after this many have been found~~ local title = mw.title.new( page ) ~~if options.fileflags then~~ local editUrl = title:fullUrl( 'action=edit' ) ~~if type(options.fileflags) ~= "table" then options.fileflags = numberFlags(options.fileflags) end~~ hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>[' ~~for k, v in pairs(options.fileflags) do~~ hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain() ~~if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags~~ hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>' ~~end~~ if config.hat then local frame = mw.getCurrentFrame() hat = config.hat .. hat .. '}}' hat = frame:preprocess( hat ) else hat = mw.html.create( 'div' ):addClass( 'dablink excerpt-hat' ):wikitext( hat ) end ~~local fileArgs = options.fileargs and mw.text.trim(options.fileargs)~~ ~~if fileArgs == '' then fileArgs = nil end~~ return hat ~~local leadStart = nil -- have we found some text yet?~~ end ~~local t = "" -- the stripped down output text~~ ~~local fileText = "" -- output text with concatenated [[File:Foo\|...]]\n entries~~ ~~local files = 0 -- how many images so far~~ ~~local paras = 0 -- how many paragraphs so far~~ ~~local startLine = true -- at the start of a line (no non-spaces found since last \n)?~~ function Excerpt.getReadMore( page, section ) ~~text = mw.ustring.gsub(text,"^%s","") -- remove initial white space~~ local link = "'''[[" .. page if section then ~~-- Add named files~~ link = link .. '#' .. section ~~local f = options.files~~ ~~if f and mw.ustring.match(f, "[^%d%s%-,]") then -- filename rather than number list~~ ~~f = mw.ustring.gsub(f, "^%sFile%s:%s", "", 1)~~ ~~f = mw.ustring.gsub(f, "^%sImage%s:%s", "", 1)~~ ~~f = "[[File:" .. f .. "]]"~~ ~~f = modifyImage(f, "thumb")~~ ~~f = modifyImage(f, fileArgs)~~ ~~if checkImage(f) then fileText = fileText .. f .. "\n" end~~ end local text = Excerpt.getMessage( 'more' ) link = link .. '\|' .. text .. "]]'''" link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link ) return link end ~~repeat~~ -- ~~loop~~Fix ~~around~~birth ~~parsing~~and adeath ~~template~~dates, ~~image~~but only in the orfirst paragraph -- @todo Use parser.getParagraphs() to get the first paragraph ~~local token = mw.ustring.match(text, "^%b{}%s") or false -- {{Template}} or {\| Table \|}~~ function Excerpt.fixDates( excerpt ) ~~if not leadStart and not token then token = mw.ustring.match(text, "^%b<>%s%b{}%s") end -- allow <tag>{{template}} before lead has started~~ local start = 1 -- skip initial templates local s ~~local line = mw.ustring.match(text, "[^\n]")~~ local e = 0 ~~if token and line and mw.ustring.len(token) < mw.ustring.len(line) then -- template is followed by text (but it may just be other templates)~~ repeat ~~line = mw.ustring.gsub(line, "%b{}", "") -- remove all templates from this line~~ start = e + 1 ~~line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line~~ s, e = mw.ustring.find( excerpt, '%s%b{}%s', start ) ~~-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line~~ until not s or s > start ~~if mw.ustring.find(line, "%S") and not matchAny(line, "^%s", { "{{", "%[%[%s[Ff]ile:", "%[%[%s[Ii]mage:" }, "") then~~ s, e = mw.ustring.find( excerpt, '%b()', start ) -- get (...), which may be (year–year) ~~token = nil~~ if s and s < start + 100 then -- look only near the start local excerptStart = mw.ustring.sub( excerpt, s, e ) local year1, conjunction, year2 = string.match( excerptStart, '(%d%d%d+)(.-)(%d%d%d+)' ) if year1 and year2 and ( string.match( conjunction, '[%-–—]' ) or string.match( conjunction, '{{%s[sS]nd%s}}' ) ) then local y1 = tonumber( year1 ) local y2 = tonumber( year2 ) if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( '%Y' ) ) then excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. '–' .. year2 .. mw.ustring.sub( excerpt, e ) end end end return excerpt end -- Replace the first call to each reference defined outside of the excerpt for the full reference, to prevent undefined references ~~if token then -- found a template which is not the prefix to a line of text~~ -- Then prefix the page title to the reference names to prevent conflicts -- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo"> ~~if leadStart then -- lead has already started, so keep the template within the text, unless it's a whole line (navbox etc.)~~ -- and also <ref name="Foo" /> for <ref name="Title of the article Foo" /> ~~if not filesOnly and not startLine then t = t .. token end~~ -- also remove reference groups: <ref name="Foo" group="Bar"> for <ref name="Title of the article Foo"> -- and <ref group="Bar"> for <ref> ~~elseif matchAny(token, "{{%s", d.wantedBlockTemplates, "%s%f[\|}]") then~~ -- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book"> ~~t = t .. token -- keep wanted block templates~~ function Excerpt.fixReferences( excerpt, page, wikitext ) local references = parser.getReferences( excerpt ) ~~elseif is(options.keepTables) and mw.ustring.sub(token, 1, 2) == '{\|' then~~ local fixed = {} ~~t = t .. token -- keep tables~~ for _, reference in pairs( references ) do local name = parser.getTagAttribute( reference, 'name' ) ~~elseif files < maxfile then -- discard template, but if we are still collecting images...~~ if not fixed[ name ] then -- fix each reference only once ~~local images = argImage(token) or {}~~ local content = parser.getTagContent( reference ) ~~if not images then~~ if not content then -- reference is self-closing ~~local image = parseImage(token, false) -- look for embedded [[File:...]], \|image=, etc.~~ local full = parser.getReference( excerpt, name ) ~~if image then table.insert(images, image) end~~ if not full then -- the reference is not defined in the excerpt ~~end~~ full = parser.getReference( wikitext, name ) ~~for _, image in ipairs(images) do~~ if full then ~~if files < maxfile and checkImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)~~ excerpt = excerpt:gsub( Excerpt.escapeString( reference ), Excerpt.escapeString( full ), 1 ) ~~files = files + 1 -- count the file, whether displaying it or not~~ ~~if options.fileflags and options.fileflags[files] then -- if displaying this image~~ ~~image = modifyImage(image, "thumb")~~ ~~image = modifyImage(image, fileArgs)~~ ~~fileText = fileText .. image~~ ~~end~~ end table.insert( fixed, name ) end ~~end~~ ~~else -- the next token in text is not a template~~ ~~token = parseImage(text, true)~~ ~~if token then -- the next token in text looks like an image~~ ~~if files < maxfile and checkImage(token) then -- if more images are wanted and this is a wanted image~~ ~~files = files + 1~~ ~~if options.fileflags and options.fileflags[files] then~~ ~~local image = token -- copy token for manipulation by adding \|right etc. without changing the original~~ ~~image = modifyImage(image, fileArgs)~~ ~~fileText = fileText .. image~~ ~~end~~ ~~end~~ ~~else -- got a paragraph, which ends at a file, image, blank line or end of text~~ ~~local afterEnd = mw.ustring.len(text) + 1~~ ~~local blankPosition = mw.ustring.find(text, "\n%s\n") or afterEnd -- position of next paragraph delimiter (or end of text)~~ ~~local endPosition = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter~~ ~~mw.ustring.find(text, "%[%[%s[Ff]ile%s:") or afterEnd,~~ ~~mw.ustring.find(text, "%[%[%s[Ii]mage%s:") or afterEnd,~~ ~~blankPosition)~~ ~~token = mw.ustring.sub(text, 1, endPosition-1)~~ ~~if blankPosition < afterEnd and blankPosition == endPosition then -- paragraph ends with a blank line~~ ~~token = token .. mw.ustring.match(text, "\n%s\n", blankPosition)~~ ~~end~~ ~~local isHatnote = not(leadStart) and mw.ustring.sub(token, 1, 1) == ':'~~ ~~if not isHatnote then~~ ~~leadStart = leadStart or mw.ustring.len(t) + 1 -- we got a paragraph, so mark the start of the lead section~~ ~~paras = paras + 1~~ ~~if allParagraphs or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted~~ ~~end~~ ~~end -- of "else got a paragraph"~~ ~~end -- of "else not a template"~~ ~~if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text~~ ~~startLine = mw.ustring.find(token, "\n%s$") -- will the next token be the first non-space on a line?~~ ~~until not text or text == "" or not token or token == "" -- loop until all text parsed~~ ~~text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt\|Foo}} more" flows on one line~~ ~~return fileText, text~~ ~~end~~ ~~local function cleanupText(text, options)~~ ~~text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments~~ ~~text = mw.ustring.gsub(text, "<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove noinclude bits~~ ~~if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible~~ ~~text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text between onlyinclude sections~~ ~~text = mw.ustring.gsub(text, "^.-<[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>", "") -- remove text before first onlyinclude section~~ ~~text = mw.ustring.gsub(text, "</[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.", "") -- remove text after last onlyinclude section~~ ~~end~~ ~~if is(options.fragment) then~~ ~~local escapedFragment = mw.ustring.gsub( options.fragment, "[%^%$%(%)%%%.%[%]%%+%-%?]", "%%%0" )~~ ~~local fragments = ""~~ ~~local fragment = ""~~ ~~local position = 1~~ ~~while position < mw.ustring.len( text ) do~~ fragment, position = mw.ustring.match(text, "<%s[Ss]ection%s+begin%s=%s[\"\']?%s" .. escapedFragment .. "%s[\"\']?%s/>(.-)<%s[Ss]ection%s+end=%s[\"\']?%s" .. escapedFragment .. "%s[\"\']?%s/>()", position ) ~~if fragment and position then~~ ~~fragments = fragments .. fragment~~ ~~else~~ ~~position = mw.ustring.len( text )~~ end end ~~text = fragments~~ end -- Prepend the page title to the reference names to prevent conflicts with other references in the transcluding page ~~if not is(options.keepSubsections) then~~ excerpt = excerpt:gsub( '< [Rr][Ee][Ff][^>]name = ["\']?([^"\'>/]+)["\']?[^>/](/?) >', '<ref name="' .. page:gsub( '"', '' ) .. ' %1"%2>' ) ~~text = mw.ustring.gsub(text, "\n==.","") -- remove first ==Heading== and everything after it~~ -- Remove reference groups because they don't apply to the transcluding page ~~text = mw.ustring.gsub(text, "^==.","") -- ...even if the lead is empty~~ excerpt = excerpt:gsub( '< [Rr][Ee][Ff] group = ["\']?[^"\'>/]+["\'] >', '<ref>' ) ~~end~~ return excerpt ~~if not is(options.keepRefs) then~~ ~~text = mw.ustring.gsub(text, "<%s[Rr][Ee][Ff][^>]-/%s>", "") -- remove refs cited elsewhere~~ ~~text = mw.ustring.gsub(text, "<%s[Rr][Ee][Ff].->.-<%s/%s[Rr][Ee][Ff]%s>", "") -- remove refs~~ ~~text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references~~ ~~end~~ ~~text = mw.ustring.gsub(text, "<%s[Ss][Cc][Oo][Rr][Ee].->.-<%s/%s[Ss][Cc][Oo][Rr][Ee]%s>", "") -- remove musical scores~~ ~~text = mw.ustring.gsub(text, "<%s[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s/%s[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s>", convertImageMap) -- convert imagemaps into standard images~~ ~~text = mw.ustring.gsub(text, "%s{{%s[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents~~ ~~text = mw.ustring.gsub(text, "%s__[A-Z]TOC__", "") -- remove TOC behavior switches~~ ~~text = mw.ustring.gsub(text, "\n%s{{%s[Pp]p%-.-}}", "\n") -- remove protection templates~~ ~~text = mw.ustring.gsub(text, "%s{{[^{\|}][Ss]idebar%s}}", "") -- remove most sidebars~~ ~~text = mw.ustring.gsub(text, "%s{{[^{\|}]%-[Ss]tub%s}}", "") -- remove most stub templates~~ ~~text = mw.ustring.gsub(text, "%s%[%[%s:?[Cc]ategory:.-%]%]", "") -- remove categories~~ ~~text = mw.ustring.gsub(text, "^:[^\n]+\n","") -- remove DIY hatnote indented with a colon~~ ~~return text~~ end function Excerpt.removeReferences( excerpt ) ~~-- Parse a ==Section== from a page~~ local references = parser.getReferences( excerpt ) ~~local function getSection(text, section, mainOnly)~~ for _, reference in pairs( references ) do ~~local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.~~ excerpt = Excerpt.removeString( excerpt, reference ) ~~local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s" .. escapedSection .. "%s==.-\n(.)")~~ ~~if not content then return nil end -- no such section~~ ~~local nextSection~~ ~~if mainOnly then~~ ~~nextSection = "\n==." -- Main part of section terminates at any level of header~~ ~~else~~ ~~nextSection = "\n==" .. mw.ustring.rep("=?", #level - 2) .. "[^=]." -- "===" → "\n===?[^=].", matching "==" or "===" but not "===="~~ end return excerpt ~~content = mw.ustring.gsub(content, nextSection, "") -- remove later sections with headings at this level or higher~~ ~~return content~~ end function Excerpt.removeCategories( excerpt ) ~~-- Remove unmatched <tag> or </tag> tags~~ local categories = parser.getCategories( excerpt ) ~~local function fixTags(text, tag)~~ for _, category in pairs( categories ) do ~~local startCount = 0~~ excerpt = Excerpt.removeString( excerpt, category ) ~~for i in mw.ustring.gmatch(text, "<%s" .. tag .. "%f[^%w_].->") do startCount = startCount + 1 end~~ ~~local endCount = 0~~ ~~for i in mw.ustring.gmatch(text, "<%s/" .. tag .. "%f[^%w_].->") do endCount = endCount + 1 end~~ ~~if startCount > endCount then -- more <tag> than </tag>: remove the last few <tag>s~~ ~~local i = 0~~ ~~text = mw.ustring.gsub(text, "<%s" .. tag .. "%f[^%w_].->", function(t)~~ ~~i = i + 1~~ ~~if i > endCount then return "" else return nil end~~ ~~end) -- "end" here terminates the anonymous replacement function(t) passed to gsub~~ ~~elseif endCount > startCount then -- more </tag> than <tag>: remove the first few </tag>s~~ ~~text = mw.ustring.gsub(text, "<%s/" .. tag .. "%f[^%w_].->", "", endCount - startCount)~~ end return ~~text~~excerpt end function Excerpt.removeBehaviorSwitches( excerpt ) ~~-- Main function returns a string value: text of the lead of a page~~ return excerpt:gsub( '__[A-Z]+__', '' ) ~~local function main(pageNames, options)~~ end ~~if not pageNames or #pageNames < 1 then return err("No page names given") end~~ ~~local pageName~~ ~~local text~~ ~~local pageCount = #pageNames~~ ~~local firstPage = pageNames[1] or "(nil)" -- save for error message, as it the name will be deleted~~ ~~local gotOptions~~ ~~local pageOptionsString~~ ~~local section~~ function Excerpt.removeComments( excerpt ) ~~-- read the page, or a random one if multiple pages were provided~~ return excerpt:gsub( '<!%-%-.-%-%->', '' ) ~~if pageCount > 1 then math.randomseed(os.time()) end~~ end ~~while not text and pageCount > 0 do~~ ~~local pageNumber = 1~~ ~~if pageCount > 1 then pageNumber = math.random(pageCount) end -- pick a random title~~ ~~pageName = pageNames[pageNumber]~~ ~~if pageName and pageName ~= "" then~~ ~~-- We have page or [[page]] or [[page\|text]], possibly followed by \|opt1\|opt2...~~ ~~local pn~~ ~~pn, gotOptions, pageOptionsString = mw.ustring.match(pageName, "^%s(%[%b[]%])%s(\|?)(.)")~~ ~~if pn then~~ ~~pageName = mw.ustring.match(pn, "%[%[([^\|%]])") -- turn [[page\|text]] into page, discarding text~~ ~~else -- we have page or page\|opt...~~ ~~pageName, gotOptions, pageOptionsString = mw.ustring.match(pageName, "%s([^\|][^\|%s])%s(\|?)(.)")~~ ~~end~~ function Excerpt.removeBold( excerpt ) ~~if pageName and pageName ~= "" then~~ return excerpt:gsub( "'''", '' ) ~~local pn~~ end ~~pn, section = mw.ustring.match(pageName, "(.-)#(.)")~~ ~~pageName = pn or pageName~~ ~~text, normalisedPageName = getContent(pageName)~~ ~~if not normalisedPageName then~~ ~~return err("No title for page name " .. pageName)~~ ~~else~~ ~~pageName = normalisedPageName~~ ~~end~~ ~~if text and options.nostubs then~~ ~~local isStub = mw.ustring.find(text, "%s{{[^{\|}]%-[Ss]tub%s}}")~~ ~~if isStub then text = nil end~~ ~~end~~ ~~if not section then~~ ~~section = mw.ustring.match(pageName, ".-#(.)") -- parse redirect to Page#Section~~ ~~end~~ ~~if text and section and section ~= "" then text = getSection(text, section) end~~ ~~end~~ ~~end~~ ~~if not text then table.remove(pageNames, pageNumber) end -- this one didn't work; try another~~ ~~pageCount = pageCount - 1 -- ensure that we exit the loop after at most #pageNames iterations~~ ~~end~~ ~~if not text then return err("Cannot read a valid page: first name is " .. firstPage) end~~ function Excerpt.removeLinks( excerpt ) ~~text = cleanupText(text, options)~~ local links = parser.getLinks( excerpt ) for _, link in pairs( links ) do ~~local pageOptions = {} -- pageOptions (even if value is "") have priority over global options~~ excerpt = Excerpt.removeString( excerpt, link ) ~~for k, v in pairs(options) do pageOptions[k] = v end~~ ~~if gotOptions and gotOptions ~= "" then~~ ~~for _, t in pairs(mw.text.split(pageOptionsString, "\|")) do~~ ~~local k, v = mw.ustring.match(t, "%s([^=]-)%s=(.-)%s$")~~ ~~pageOptions[k] = v~~ ~~end~~ ~~pageOptions.paraflags = numberFlags(pageOptions["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ ~~pageOptions.fileflags = numberFlags(pageOptions["files"] or "") -- parse file numbers~~ ~~if pageOptions.more and pageOptions.more == "" then pageOptions.more = "Read more..." end -- more= is short for this default text~~ end return excerpt end -- @todo Use parser.getLinks ~~local fileText~~ function Excerpt.removeSelfLinks( excerpt, page ) ~~fileText, text = parse(text, pageOptions)~~ local lang = mw.language.getContentLanguage() local page = Excerpt.escapeString( mw.title.getCurrentTitle().prefixedText ) local ucpage = lang:ucfirst( page ) local lcpage = lang:lcfirst( page ) excerpt = excerpt :gsub( '%[%[(' .. ucpage .. ')%]%]', '%1' ) :gsub( '%[%[(' .. lcpage .. ')%]%]', '%1' ) :gsub( '%[%[' .. ucpage .. '\|([^]]+)%]%]', '%1' ) :gsub( '%[%[' .. lcpage .. '\|([^]]+)%]%]', '%1' ) return excerpt end -- ~~replace~~Replace the bold title or synonym near the start of the ~~article~~page by a ~~wikilink~~link to the ~~article~~page function Excerpt.linkBold( excerpt, page ) local lang = mw.language.getContentLanguage() local ~~pos~~position = mw.ustring.find(~~text~~ excerpt, "'''" .. lang:ucfirst(~~pageName~~ page ) .. "'''", 1, true ) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc) or mw.ustring.find(~~text~~ excerpt, "'''" .. lang:lcfirst(~~pageName~~ page ) .. "'''", 1, true ) -- plain search: special characters in ~~pageName~~page represent themselves if ~~pos~~position then local ~~len~~length = mw.ustring.len(~~pageName~~ page ) ~~text~~excerpt = mw.ustring.sub(~~text~~ excerpt, 1, ~~pos~~position + 2 ) .. "'[["' .. mw.ustring.sub(~~text~~ excerpt, ~~pos~~position + 3, ~~pos~~position + ~~len~~length + 2 ) .. "']]"' .. mw.ustring.sub(~~text~~ excerpt, ~~pos~~position + ~~len~~length + 3, -1 ) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) ~~text~~excerpt = mw.ustring.gsub(~~text~~ excerpt, "()'''(.-')'''", function ( a, b ) if anot <mw.ustring.find( ~~100~~b, '%[' ) and not mw.ustring.find( b, "'%["{' ) then --- if ~~early~~not inwikilinked ~~article~~or ~~and~~some ~~not~~weird ~~wikilinked~~template return "'''[[" .. ~~pageName~~page .. "'\|"' .. b .. "]]'''" -- replace '''Foo''' by '''[[~~pageName~~page\|Foo]]''' else return nil -- instruct gsub to make no change end end, 1 ) -- ~~"end" here~~ terminates the anonymous replacement function~~(a, b)~~ passed to gsub end return excerpt end function Excerpt.addTrackingCategories( excerpt ) ~~-- remove '''bold text''' if requested~~ local currentTitle = mw.title.getCurrentTitle() ~~if is(pageOptions.nobold) then text = mw.ustring.gsub(text, "'''", "") end~~ local contentCategory = config.categories.content if contentCategory and currentTitle.isContentPage then ~~text = fileText .. text~~ excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]' ~~-- Seek and destroy unterminated templates and wikilinks~~ ~~repeat -- hide matched {{template}}s including nested templates~~ ~~local t = text~~ ~~text = mw.ustring.gsub(text, "{(%b{})}", "\27{\27%1\27}\27") -- {{sometemplate}} → E{Esometemplate}E}E where E represents escape~~ ~~text = mw.ustring.gsub(text, "(< math[^>]>[^<]-)}}(.-< /math >)", "%1}\27}\27%2") -- <math>\{sqrt\{hat{x}}</math> → <math>\{sqrt\{hat{x}E}E</math>~~ ~~until text == t~~ ~~repeat -- do similar for [[wikilink]]s~~ ~~local t = text~~ ~~text = mw.ustring.gsub(text, "%[(%b[])%]", "\27[\27%1\27]\27")~~ ~~until text == t~~ ~~text = text.gsub(text, "([{}%[%]])%1[^\27].", "") -- remove unmatched {{, }}, [[ or ]] and everything thereafter, avoiding ]E]E etc.~~ ~~text = text.gsub(text, "([{}%[%]])%1$", "") -- remove unmatched {{, }}, [[ or ]] at end of text~~ ~~text = mw.ustring.gsub(text, "\27", "") -- unhide matched pairs: E{E{ → {{, ]E]E → ]], etc.~~ ~~-- Ensure div tags match~~ ~~text = fixTags(text, "div")~~ ~~if pageOptions.more then text = text .. " '''[[" .. pageName .. "\|" .. pageOptions.more .. "]]'''" end -- wikilink to article for more info~~ ~~if pageOptions.list and not pageOptions.showall then -- add a collapsed list of pages which might appear~~ ~~local listtext = pageOptions.list~~ ~~if listtext == "" then listtext = "Other articles" end~~ ~~text = text .. "{{collapse top\|title={{resize\|85%\|" ..listtext .. "}}\|bg=fff}}{{hlist"~~ ~~for _, p in pairs(pageNames) do~~ ~~if mw.ustring.match(p, "%S") then text = text .. "\|[[" .. mw.text.trim(p) .. "]]" end~~ ~~end~~ ~~text = text .. "}}\n{{collapse bottom}}"~~ end local namespaceCategory = config.categories[ currentTitle.namespace ] if namespaceCategory then ~~return text~~ excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]' end return excerpt end -- Helper method to match from a list of regular expressions ~~-- Shared template invocation code for lead and random functions~~ -- Like so: match pre..list[1]..post or pre..list[2]..post or ... ~~local function invoke(frame, func)~~ function Excerpt.matchAny( text, pre, list, post, init ) ~~-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}~~ local match = {} ~~local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)~~ for i = 1, #list do ~~for k, v in pairs(frame:getParent().args) do args[k] = v end~~ match = { mw.ustring.match( text, pre .. list[ i ] .. post, init ) } ~~for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template~~ if match[1] then return unpack( match ) end ~~errors = args["errors"] -- set the module level boolean used in local function err~~ ~~local articlecount = #args -- must be 1 except with selected=Foo and Foo=Somepage~~ ~~if articlecount < 1 and not (func == "selected" and args[func] and args[args[func]]) then~~ ~~return err("No articles provided")~~ end return nil end -- Helper function to get arguments ~~local pageNames = {}~~ -- args from Lua calls have priority over parent args from template ~~if func == "lead" then~~ function Excerpt.getArg( key, default ) ~~pageNames = { args[1] }~~ local frame = mw.getCurrentFrame() ~~elseif func == "linked" or func == "listitem" then~~ for k, value in pairs( frame:getParent().args ) do ~~-- Read named page and find its wikilinks~~ if k == key and mw.text.trim( value ) ~= '' then ~~local page = args[1]~~ return value ~~local text, title = getContent(page)~~ ~~if not title then~~ ~~return err("No title for page name " .. page)~~ ~~elseif not text then~~ ~~return err("No content for page name " .. page)~~ end ~~if args["section"] then -- check relevant section only~~ ~~text = getSection(text, args["section"], args["sectiononly"])~~ ~~if not text then return err("No section " .. args["section"] .. " in page " .. page) end~~ ~~end~~ ~~-- replace annotated links with real links~~ ~~text = mw.ustring.gsub(text, "{{%s[Aa]nnotated[ _]link%s\|%s(.-)%s}}", "[[%1]]")~~ ~~if func == "linked" then~~ ~~for p in mw.ustring.gmatch(text, "%[%[%s([^%]\|\n])") do table.insert(pageNames, p) end~~ ~~else -- listitem: first wikilink on a line beginning , :#, etc. except in "See also" or later section~~ ~~text = mw.ustring.gsub(text, "\n== See also.", "")~~ ~~for p in mw.ustring.gmatch(text, "\n:[%#][^\n]-%[%[%s([^%]\|\n])") do table.insert(pageNames, p) end~~ ~~end~~ ~~elseif func == "random" then~~ ~~-- accept any number of page names. If more than one, we'll pick one randomly~~ ~~for i, p in pairs(args) do~~ ~~if p and type(i) == 'number' then table.insert(pageNames, p) end~~ ~~end~~ ~~elseif func == "selected" then~~ ~~local articlekey = args[func]~~ ~~if tonumber(articlekey) then -- normalise article number into the range 1..#args~~ ~~articlekey = articlekey % articlecount~~ ~~if articlekey == 0 then articlekey = articlecount end~~ ~~end~~ ~~pageNames = { args[articlekey] }~~ end for k, value in pairs( frame.args ) do if k == key and mw.text.trim( value ) ~= '' then ~~local options = args -- pick up miscellaneous options: more, errors, fileargs~~ return value ~~options.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ ~~options.fileflags = numberFlags(args["files"] or "") -- parse file numbers~~ ~~if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text~~ ~~local text = ""~~ ~~if options.showall then~~ ~~local separator = ""~~ ~~for _, p in pairs(pageNames) do~~ ~~local t = main({ p }, options)~~ ~~if t ~= "" then~~ ~~text = text .. separator .. t~~ ~~separator = options.showall~~ ~~if separator == "" then separator = "{{clear}}{{hr}}" end~~ ~~end~~ end ~~else~~ ~~text = main(pageNames, options)~~ end return default end -- Helper method to get an error message ~~if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then~~ -- This method also categorizes the current page in one of the configured error categories ~~return "[[Category:" .. d.brokenCategory .. "]]"~~ function Excerpt.getError( key, value ) ~~else~~ local message = Excerpt.getMessage( 'error-' .. key, value ) ~~return frame:preprocess(text)~~ local markup = mw.html.create( 'div' ):addClass( 'error' ):wikitext( message ) if config.categories and config.categories.errors and mw.title.getCurrentTitle().isContentPage then markup:node( '[[Category:' .. config.categories.errors .. ']]' ) end return markup end -- Helper method to get a localized message ~~local function excerpt(frame) -- Replicate {{Excerpt}} entirely in Lua for reduced Post-expand include size~~ -- This method uses Module:TNT to get localized messages from https://commons.wikimedia.org/wiki/Data:I18n/Module:Excerpt.tab ~~local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)~~ -- If Module:TNT is not available or the localized message does not exist, the key is returned instead ~~for k, v in pairs(frame:getParent().args) do args[k] = v end~~ function Excerpt.getMessage( key, value ) ~~for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template~~ local ok, TNT = pcall( require, 'Module:TNT' ) if not ok then return key end local ok2, message = pcall( TNT.format, 'I18n/Module:Excerpt.tab', key, value ) if not ok2 then return key end return message end -- Helper method to escape a string for use in regexes ~~local tag = is(args.tag) and args.tag or 'div'~~ function Excerpt.escapeString( str ) ~~local article = is(args.article) and args.article or args[1] or '{{{1}}}'~~ return str:gsub( '[%^%$%(%)%.%[%]%%+%-%?%%]', '%%%0' ) ~~local section = is(args.section) and args.section or args[2]~~ end -- Helper method to remove a string from a text ~~local output = {}~~ -- @param text Text from where to remove the string ~~output[1] = frame:extensionTag{ name = 'templatestyles', args = {src='Excerpt/styles.css'} }~~ -- @param str String to remove ~~output[2] = '<' .. tag .. ' class="excerpt-block">'~~ -- @return The given text with the string removed ~~output[3] = is(args.indicator) and ('<' .. tag .. ' class="excerpt-indicator">') or ''~~ function Excerpt.removeString( text, str ) ~~if is(args.nohat) then~~ local pattern = Excerpt.escapeString( str ) ~~output[4] = ''~~ if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes ~~else~~ pattern = Excerpt.escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. Excerpt.escapeString( mw.ustring.sub( str, -999 ) ) ~~local hatnote = {}~~ end ~~hatnote[1] = 'This' .. (is(args.indicator) and '' or ' section') .. ' is an excerpt from '~~ return text:gsub( pattern, '' ) ~~hatnote[2] = '[['~~ ~~hatnote[3] = article .. (is(section) and ('#' .. frame:callParserFunction( 'urlencode', section, 'WIKI' )) or '')~~ ~~hatnote[4] = '\|'~~ ~~hatnote[5] = article .. (is(section) and (frame:callParserFunction( '#tag:nowiki', ' § ' ) .. section) or '')~~ ~~hatnote[6] = ']]'~~ ~~hatnote[7] = "''" .. '<span class="mw-editsection-like plainlinks"><span>[ </span>['~~ ~~local title = mw.title.new(article) or mw.title.getCurrentTitle()~~ ~~hatnote[8] = title:fullUrl('action=edit') .. ' edit'~~ ~~hatnote[9] = ']<span> ]</span></span>' .. "''"~~ ~~output[4] = require('Module:Hatnote')._hatnote(table.concat(hatnote), {selfref=true}) or err("Error generating hatnote")~~ ~~end~~ ~~output[5] = '<' .. tag .. ' class="excerpt">\n'~~ ~~if article ~= '{{{1}}}' then~~ ~~if is(args.fragment) then~~ ~~output[6] = frame:callParserFunction( '#lst', article, args.fragment) or err("Error transcluding text")~~ ~~else~~ ~~local options = args -- pick up miscellaneous options: more, errors, fileargs~~ ~~options.paraflags = numberFlags(args.paragraphs or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}~~ ~~options.fileflags = numberFlags(args.files or "1") -- parse file numbers~~ ~~options.nobold=1~~ ~~options.keepTables = is(args.tables) and args.tables or 1~~ ~~options.keepRefs = is(args.references) and args.references or 1~~ ~~options.keepSubsections = is(args.subsections) and args.subsections or ""~~ ~~if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text~~ ~~local pageNames = { (article .. '#' .. (section or '')) }~~ ~~local text = main(pageNames, options)~~ ~~if text == "" and d.brokenCategory and d.brokenCategory ~= "" and mw.title.getCurrentTitle().isContentPage then~~ ~~output[6] = "[[Category:" .. d.brokenCategory .. "]]"~~ ~~else~~ ~~output[6] = frame:preprocess(text) or err("Error processing text")~~ ~~end~~ ~~end~~ ~~else~~ ~~output[6] = err("No article provided")~~ ~~end~~ ~~output[7] = '</' .. tag .. '>'~~ ~~output[8] = is(args.indicator) and ('</' .. tag .. '>') or ''~~ ~~output[9] = '</' .. tag .. '>'~~ ~~output[10] = mw.title.getCurrentTitle().isContentPage and '[[Category:Articles with excerpts]]' or ''~~ ~~return table.concat(output)~~ end -- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans ~~-- Entry points for template callers using #invoke:~~ -- @param filter Required. Comma-separated list of numbers or min-max ranges, for example '1,3-5' ~~function p.lead(frame) return invoke(frame, "lead") end -- {{Transclude lead excerpt}} reads the first and only article~~ -- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true} ~~function p.linked(frame) return invoke(frame, "linked") end -- {{Transclude linked excerpt}} reads a randomly selected article linked from the given page~~ -- @return Boolean indicating whether the filters should be treated as a blacklist or not ~~function p.listitem(frame) return invoke(frame, "listitem") end -- {{Transclude list item excerpt}} reads a randomly selected article listed on the given page~~ -- @note Merging this into matchFilter is possible, but way too inefficient ~~function p.random(frame) return invoke(frame, "random") end -- {{Transclude random excerpt}} reads any article (default for invoke with one argument)~~ function Excerpt.parseFilter( filter ) ~~function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected excerpt}} reads the article whose key is in the selected= parameter~~ local filters = {} ~~function p.excerpt(frame) return excerpt(frame) end -- {{Excerpt}} transcludes part of an article into another article~~ local isBlacklist = false if string.sub( filter, 1, 1 ) == '-' then isBlacklist = true filter = string.sub( filter, 2 ) end local values = mw.text.split( filter, ',' ) -- split values: '1,3-5' to {'1','3-5'} for _, value in pairs( values ) do value = mw.text.trim( value ) local min, max = mw.ustring.match( value, '^(%d+)%s[-–—]%s(%d+)$' ) -- '3-5' to min=3 max=5 if not max then min, max = string.match( value, '^((%d+))$' ) end -- '1' to min=1 max=1 if max then for i = min, max do filters[ i ] = true end else filters[ value ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3' end end local filter = {cache = {}, terms = filters} return filter, isBlacklist end -- Helper function to see if a value matches any of the given filters ~~-- Entry points for other Lua modules~~ function Excerpt.matchFilter( value, filter ) ~~function p.getContent(page, frame) return getContent(page, frame) end~~ if type(value) == "number" then ~~function p.getsection(text, section) return getSection(text, section) end~~ return filter.terms[value] ~~function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end~~ else ~~function p.argimage(text) return argImage(text) end~~ local cached = filter.cache[value] ~~function p.checkimage(image) return checkImage(image) end~~ if cached ~= nil then ~~function p.parseimage(text, start) return parseImage(text, start) end~~ return cached ~~function p.cleanupText(text, options) return cleanupText(text, options) end~~ end ~~function p.main(pageNames, options) return main(pageNames, options) end~~ local lang = mw.language.getContentLanguage() ~~function p.numberflags(str) return numberFlags(str) end~~ local lcvalue = lang:lcfirst(value) local ucvalue = lang:ucfirst(value) for term in pairs( filter.terms ) do if value == tostring(term) or type(term) == "string" and ( lcvalue == term or ucvalue == term or mw.ustring.match( value, term ) ) then filter.cache[value] = true return true end end filter.cache[value] = false end end return pExcerpt