Module:Excerpt/sandbox: Difference between revisions

Content deleted Content added
Recover green status of most test cases
Try to automatically account for page merge-like redirects
 
(25 intermediate revisions by 2 users not shown)
Line 7:
local yesno = require( 'Module:Yesno' )
 
local ok, config = pcall( require, 'Module:Excerpt/config/sandbox' )
if not ok then config = {} end
 
Line 15:
function Excerpt.main( frame )
 
-- Make sure the requested page exists and get the wikitext
local page = Excerpt.getArg( 1 )
if not page or page == '{{{1}}}' then return Excerpt.getError( 'no-page' ) end
local title = mw.title.new( page )
if not title then return Excerpt.getError( 'invalid-title', page ) end
local fragment = title.fragment -- save for later
if title.isRedirect then title = title.redirectTarget end
if title.isRedirect then
title = title.redirectTarget
if fragment == "" then
fragment = title.fragment -- page merge potential
end
end
if not title.exists then return Excerpt.getError( 'page-not-found', page ) end
page = title.prefixedText
 
-- Set variables from the template parameters
local hash = string.match( Excerpt.getArg( 1 ), '[^#]+#(.+)' )
local section = Excerpt.getArg( 2, hash )
local hat = yesno( Excerpt.getArg( 'hat', true ) )
local edit = yesno( Excerpt.getArg( 'edit', true ) )
local editIntro = Excerpt.getArg( 'editintro' )
local this = Excerpt.getArg( 'this' )
local only = Excerpt.getArg( 'only' )
local files = Excerpt.getArg( 'files', Excerpt.getArg( 'file', ( only == 'file' and 1 ) ) )
local lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list', ( only == 'list' and 1 ) ) )
local tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table', ( only == 'table' and 1 ) ) )
local templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template', ( only == 'template' and 1 ) ) )
local paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph', ( only == 'paragraph' and 1 ) ) )
local references = yesno( Excerpt.getArg( 'references', true ) )
local subsections = yesno( Excerpt.getArg( 'subsections', false ) )
local links = yesno( Excerpt.getArg( 'links', true ) )
local bold = yesno( Excerpt.getArg( 'bold', false ) )
local briefDates = yesno( Excerpt.getArg( 'briefdates', false ) )
local inline = yesno( Excerpt.getArg( 'inline' ) )
local quote = yesno( Excerpt.getArg( 'quote' ) )
local more = yesno( Excerpt.getArg( 'more' ) )
local class = Excerpt.getArg( 'class' )
local displayTitle = Excerpt.getArg( 'displaytitle', page )
 
-- Get the full wikitext
local wikitext = title:getContent()
 
-- Reduce toGet the sectiontemplate params we'reand interestedprocess inthem
local params = {
hat = yesno( Excerpt.getArg( 'hat', true ) ),
this = Excerpt.getArg( 'this' ),
only = Excerpt.getArg( 'only' ),
files = Excerpt.getArg( 'files', Excerpt.getArg( 'file' ) ),
lists = Excerpt.getArg( 'lists', Excerpt.getArg( 'list' ) ),
tables = Excerpt.getArg( 'tables', Excerpt.getArg( 'table' ) ),
templates = Excerpt.getArg( 'templates', Excerpt.getArg( 'template' ) ),
paragraphs = Excerpt.getArg( 'paragraphs', Excerpt.getArg( 'paragraph' ) ),
references = yesno( Excerpt.getArg( 'references', true ) ),
subsections = yesno( Excerpt.getArg( 'subsections', false ) ),
links = yesno( Excerpt.getArg( 'links', true ) ),
bold = yesno( Excerpt.getArg( 'bold', false ) ),
briefDates = yesno( Excerpt.getArg( 'briefdates', false ) ),
inline = yesno( Excerpt.getArg( 'inline' ) ),
quote = yesno( Excerpt.getArg( 'quote' ) ),
more = yesno( Excerpt.getArg( 'more' ) ),
class = Excerpt.getArg( 'class' ),
displayTitle = Excerpt.getArg( 'displaytitle', page ),
}
 
-- Make sure the requested section exists and get the excerpt
local excerpt
local section = Excerpt.getArg( 2, fragment )
section = mw.text.trim( section )
if section == '' then section = nil end
if section then
excerpt = parser.getSectionTag( wikitext, section )
if not excerpt then
if params.subsections then
excerpt = parser.getSection( wikitext, section )
else
Line 63 ⟶ 69:
end
if not excerpt then return Excerpt.getError( 'section-not-found', section ) end
if excerpt == '' and not only then return Excerpt.getError( 'section-empty', section ) end
else
excerpt = parser.getLead( wikitext )
if excerpt == '' and not only then return Excerpt.getError( 'lead-empty' ) end
end
 
-- Remove noinclude bits
-- Leave only the requested elements
excerpt = excerpt:gsub( '<[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>.-</[Nn][Oo][Ii][Nn][Cc][Ll][Uu][Dd][Ee]>', '' )
if only then
if only == 'table' then
local tables = parser.getTables( excerpt )
excerpt = tables[1]
end
if only == 'tables' then
local tables = parser.getTables( excerpt )
excerpt = table.concat( tables, '\n' )
end
end
 
-- Filter various elements from the excerpt
if briefDates then
excerpt = Excerpt.fixDatesfilterFiles( excerpt, params.files )
excerpt = Excerpt.filterLists( excerpt, params.lists )
end
excerpt = Excerpt.filterTables( excerpt, params.tables )
excerpt = Excerpt.filterParagraphs( excerpt, params.paragraphs )
 
-- If no file wasis found, try to get one from the infobox
if ( params.only == 'file' or params.only == 'files' ) or ( not params.only and ( not params.files or params.files ~= '0' or not files ) ) -- caller asked for files
and not section -- and we're in the lead section
and config.captions -- and we have the config option required to try finding files in infoboxes
and #parser.getFiles( excerpt ) == 0 -- and there are're no files in the excerpt
then
excerpt = Excerpt.addInfoboxFile( excerpt )
end
 
-- Filter the templates by appending the templates blacklist to the templates filter
if config.blacklist then
local blacklist = table.concat( config.blacklist, ',' )
if params.templates then
if string.sub( params.templates, 1, 1 ) == '-' then
params.templates = params.templates .. ',' .. blacklist
end
else
params.templates = '-' .. blacklist
end
end
excerpt = Excerpt.filterTemplates( excerpt, params.templates )
 
-- Leave only the requested elements
if params.only == 'file' or params.only == 'files' then
local files = parser.getFiles( excerpt )
excerpt = params.only == 'file' and files[1] or table.concat( files, '\n\n' )
end
if params.only == 'list' or params.only == 'lists' then
local lists = parser.getLists( excerpt )
excerpt = params.only == 'list' and lists[1] or table.concat( lists, '\n\n' )
end
if params.only == 'table' or params.only == 'tables' then
local tables = parser.getTables( excerpt )
excerpt = params.only == 'table' and tables[1] or table.concat( tables, '\n\n' )
end
if params.only == 'paragraph' or params.only == 'paragraphs' then
local paragraphs = parser.getParagraphs( excerpt )
excerpt = params.only == 'paragraph' and paragraphs[1] or table.concat( paragraphs, '\n\n' )
end
if params.only == 'template' or params.only == 'templates' then
local templates = parser.getTemplates( excerpt )
excerpt = params.only == 'template' and templates[1] or table.concat( templates, '\n\n' )
end
 
-- @todo Make more robust and move downwards
if params.briefDates then
excerpt = Excerpt.fixDates( excerpt )
end
 
-- Remove unwanted elements
excerpt = Excerpt.removeComments( excerpt )
excerpt = Excerpt.removeBlacklist( excerpt )
excerpt = Excerpt.removeSelfLinks( excerpt )
excerpt = Excerpt.removeNonFreeFiles( excerpt )
excerpt = Excerpt.removeBehaviorSwitches( excerpt )
 
-- Fix or remove the references
if params.references then
excerpt = Excerpt.fixReferences( excerpt, page, wikitext )
else
excerpt = Excerpt.removeReferences( excerpt )
end
 
-- Remove wikilinks
if not params.links then
excerpt = Excerpt.removeLinks( excerpt )
end
 
-- Link the bold text near the start of most leads and then remove it
if not section then
excerpt = Excerpt.linkBold( excerpt, page )
excerpt = Excerpt.linkBold( excerpt, page )
if not bold then
end
if not params.bold then
excerpt = Excerpt.removeBold( excerpt )
end
 
if references then
excerpt = Excerpt.fixReferences( excerpt, page, wikitext )
else
excerpt = Excerpt.removeReferences( excerpt )
end
 
-- Remove extra line breaks but leave one before and after so the parser interprets lists, tables, etc. correctly
excerpt = excerpt:gsub( '\n\n\n+', '\n\n' )
excerpt = mw.text.trim( excerpt )
excerpt = string.gsub( excerpt, '\n\n\n+', '\n\n' )
excerpt = '\n' .. excerpt .. '\n'
 
Line 128 ⟶ 170:
-- Add tracking categories
if config.categories then
excerpt = Excerpt.addTrackingCategories( excerpt )
local currentTitle = mw.title.getCurrentTitle()
local contentCategory = config.categories.content
if contentCategory and currentTitle.isContentPage then
excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
end
local namespaceCategory = config.categories[ currentTitle.namespace ]
if namespaceCategory then
excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
end
end
 
-- LoadBuild the stylesfinal output
if params.inline then
local styles
return mw.text.trim( excerpt )
end
 
local tag = params.quote and 'blockquote' or 'div'
local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( params.class )
 
if config.styles then
local styles = frame:extensionTag( 'templatestyles', '', { src = config.styles } )
block:node( styles )
end
 
if params.hat then
-- Combine and return the elements
local hat = Excerpt.getHat( page, section, params )
if inline then
block:node( hat )
return mw.text.trim( excerpt )
end
local tag = 'div'
if quote then
tag = 'blockquote'
end
 
excerpt = mw.html.create( 'div' ):addClass( 'excerpt' ):wikitext( excerpt )
block:node( excerpt )
 
if hat and not inlineparams.more then
hatlocal more = Excerpt.getHatgetReadMore( page, section, displayTitle, this, quote, only, edit, editIntro )
block:node( more )
end
 
return block
if more and not inline then
end
more = Excerpt.getReadMore( page, section, more )
 
-- Filter the files in the given wikitext against the given filter
function Excerpt.filterFiles( wikitext, filter )
if not filter then return wikitext end
local filters, isBlacklist = Excerpt.parseFilter( filter )
local files = parser.getFiles( wikitext )
for index, file in pairs( files ) do
local name = parser.getFileName( file )
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) )
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then
wikitext = Excerpt.removeString( wikitext, file )
end
end
return wikitext
end
 
-- Filter the lists in the given wikitext against the given filter
local block = mw.html.create( tag ):addClass( 'excerpt-block' ):addClass( class )
function Excerpt.filterLists( wikitext, filter )
return block:node( styles ):node( hat ):node( excerpt ):node( more )
if not filter then return wikitext end
local filters, isBlacklist = Excerpt.parseFilter( filter )
local lists = parser.getLists( wikitext )
for index, list in pairs( lists ) do
if isBlacklist and Excerpt.matchFilter( index, filters )
or not isBlacklist and not Excerpt.matchFilter( index, filters ) then
wikitext = Excerpt.removeString( wikitext, list )
end
end
return wikitext
end
 
-- Filter the tables in the given wikitext against the given filter
function Excerpt.filterTables( wikitext, filter )
if not filter then return wikitext end
local filters, isBlacklist = Excerpt.parseFilter( filter )
local tables = parser.getTables( wikitext )
for index, t in pairs( tables ) do
local id = string.match( t, '{|[^\n]-id%s*=%s*["\']?([^"\'\n]+)["\']?[^\n]*\n' )
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( id, filters ) )
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( id, filters ) ) then
wikitext = Excerpt.removeString( wikitext, t )
end
end
return wikitext
end
 
-- Filter the paragraphs in the given wikitext against the given filter
function Excerpt.filterParagraphs( wikitext, filter )
if not filter then return wikitext end
local filters, isBlacklist = Excerpt.parseFilter( filter )
local paragraphs = parser.getParagraphs( wikitext )
for index, paragraph in pairs( paragraphs ) do
if isBlacklist and Excerpt.matchFilter( index, filters )
or not isBlacklist and not Excerpt.matchFilter( index, filters ) then
wikitext = Excerpt.removeString( wikitext, paragraph )
end
end
return wikitext
end
 
-- Filter the templates in the given wikitext against the given filter
function Excerpt.filterTemplates( wikitext, filter )
if not filter then return wikitext end
local filters, isBlacklist = Excerpt.parseFilter( filter )
local templates = parser.getTemplates( wikitext )
for index, template in pairs( templates ) do
local name = parser.getTemplateName( template )
if isBlacklist and ( Excerpt.matchFilter( index, filters ) or Excerpt.matchFilter( name, filters ) )
or not isBlacklist and ( not Excerpt.matchFilter( index, filters ) and not Excerpt.matchFilter( name, filters ) ) then
wikitext = Excerpt.removeString( wikitext, template )
end
end
return wikitext
end
 
Line 177 ⟶ 285:
file = parameters[file]
if file and Excerpt.matchAny( file, '^.*%.', { '[Jj][Pp][Ee]?[Gg]', '[Pp][Nn][Gg]', '[Gg][Ii][Ff]', '[Ss][Vv][Gg]' }, '.*' ) then
file = mw.ustringstring.match( file, '%[?%[?.-:([^{|]+)%]?%]?' ) or file -- [[File:Example.jpg{{!}}upright=1.5]] to Example.jpg
captions = pair[2]
for _, p in pairs( captions ) do
Line 207 ⟶ 315:
local fileName = 'File:' .. parser.getFileName( file )
local fileTitle = mw.title.new( fileName )
if fileTitle then
local fileDescription = fileTitle:getContent()
if not fileDescription or local fileDescription == '' thenfileTitle:getContent()
if not fileDescription or fileDescription == '' then
local frame = mw.getCurrentFrame()
local frame = mw.getCurrentFrame()
fileDescription = frame:preprocess( '{{' .. fileName .. '}}' ) -- try Commons
end
if fileDescription and string.match( fileDescription, '[Nn]on%-free' ) then
wikitext = Excerpt.removeString( wikitext, file )
end
end
end
Line 219 ⟶ 329:
end
 
function Excerpt.getHat( page, section, displayTitle, this, quote, only, edit, editIntroparams )
local hat
 
-- Build the main part of the hatnotetext
if params.this then
hat = params.this
elseif params.quote then
hat = Excerpt.getMessage( 'this' )
elseif params.only then
hat = Excerpt.getMessage( params.only )
else
hat = Excerpt.getMessage( 'section' )
Line 234 ⟶ 344:
hat = hat .. ' ' .. Excerpt.getMessage( 'excerpt' )
 
-- Build the section link
if section then
hat = hat .. ' [[:' .. page .. '#' .. mw.uri.anchorEncode( section ) .. '|' .. params.displayTitle
.. ' § ' .. mw.ustring.section:gsub( section, '%[%[([^]|]+)|?[^]]*%]%]', '%1' ) .. ']].' -- remove nested links
else
hat = hat .. ' [[:' .. page .. '|' .. params.displayTitle .. ']].'
end
 
-- Build the edit link
local title = mw.title.new( page )
if edit then
local titleeditUrl = mw.title.new:fullUrl( page'action=edit' )
hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
local editUrl = title:fullUrl( 'action=edit' )
hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain()
if editIntro then
hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
editUrl = title:fullUrl( 'action=edit&editintro=' .. editIntro )
end
hat = hat .. '<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span>['
hat = hat .. editUrl .. ' ' .. mw.message.new( 'editsection' ):plain()
hat = hat .. ']<span class="mw-editsection-bracket">]</span></span>'
end
 
if config.hat then
Line 265 ⟶ 370:
end
 
function Excerpt.getReadMore( page, section, more )
local link = "'''[[" .. page
if section then
link = link .. '#' .. section
end
linklocal text = '|' .Excerpt.getMessage( 'more' .. "]]'''")
link = link .. '|' .. text .. "]]'''"
link = mw.html.create( 'div' ):addClass( 'noprint excerpt-more' ):wikitext( link )
return link
Line 278 ⟶ 384:
-- @todo Use parser.getParagraphs() to get the first paragraph
function Excerpt.fixDates( excerpt )
local startposstart = 1 -- skip initial templates
local s
local e = 0
repeat
startposstart = e + 1
s, e = mw.ustring.find( excerpt, "'%s*%b{}%s*"', startposstart )
until not s or s > startposstart
s, e = mw.ustring.find( excerpt, "'%b()"', startposstart ) -- get (...), which may be (year–year)
if s and s < startposstart + 100 then -- look only near the start
local year1, conjunction, year2excerptStart = mw.ustring.match( mw.ustring.sub( excerpt, s, e ), '(%d%d%d+)(.-)(%d%d%d+)' )
iflocal year1, andconjunction, year2 and= ( mw.ustringstring.match( conjunctionexcerptStart, '[(%d%d%d+)(.-–—]' ) or mw.ustring.match( conjunction, '{{%s*[sS]ndd%d%s*}}d+)' ) ) then
if year1 and year2 and ( string.match( conjunction, '[%-–—]' ) or string.match( conjunction, '{{%s*[sS]nd%s*}}' ) ) then
local y1 = tonumber( year1 )
local y2 = tonumber( year2 )
if y2 > y1 and y2 < y1 + 125 and y1 <= tonumber( os.date( "'%Y"' ) ) then
excerpt = mw.ustring.sub( excerpt, 1, s ) .. year1 .. "'"' .. year2 .. mw.ustring.sub( excerpt, e )
end
end
Line 299 ⟶ 406:
end
 
-- Replace the first call to each reference defined outside of the textexcerpt for the full reference, to prevent undefined references
-- Then prefix the page title to the reference names to prevent conflicts
-- that is, replace <ref name="Foo"> for <ref name="Title of the article Foo">
Line 307 ⟶ 414:
-- @todo The current regex may fail in cases with both kinds of quotes, like <ref name="Darwin's book">
function Excerpt.fixReferences( excerpt, page, wikitext )
local references = parser.getReferences( excerpt )
local refNames = {}
local refNamefixed = {}
for _, reference in pairs( references ) do
local refBody
local name = parser.getTagAttribute( reference, 'name' )
local position = 1
if not fixed[ name ] then -- fix each reference only once
while position < mw.ustring.len( excerpt ) do
local content = parser.getTagContent( reference )
refName, position = mw.ustring.match( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>]+)["\']?[^>]*/%s*>()', position )
if not content then -- reference is self-closing
if refName then
refName local full = mwparser.text.trimgetReference( refNameexcerpt, name )
if not refNames[ refName ]full then -- makethe surereference weis process eachnot refdefined namein onlythe onceexcerpt
table full = parser.insertgetReference( refNameswikitext, refNamename )
if full then
refName = Excerpt.escapeString( refName )
excerpt = excerpt:gsub( Excerpt.escapeString( reference ), Excerpt.escapeString( full ), 1 )
refBody = mw.ustring.match( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>' )
if not refBody then -- the ref body is not in the excerpt
refBody = mw.ustring.match( wikitext, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>' )
if refBody then -- the ref body was found elsewhere
excerpt = mw.ustring.gsub( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^>]*/?%s*>', mw.ustring.gsub( refBody, '%%', '%%%%' ), 1 )
end
table.insert( fixed, name )
end
end
else
position = mw.ustring.len( excerpt )
end
end
page = string.gsub( page, '"', '' ) -- remove any quotation marks from the page title
excerpt = mw.ustring.gsub( excerpt, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>/]*(/?)%s*>', '<ref name="' .. page .. ' %1"%2>' )
excerpt = mw.ustring.gsub( excerpt, '<%s*[Rr][Ee][Ff]%s*group%s*=%s*["\']?[^"\'>/]+["\']%s*>', '<ref>' )
return excerpt
end
 
-- Remove blacklisted templates
function Excerpt.removeBlacklist( excerpt )
local blacklist = config.blacklist and table.concat( config.blacklist, ',' ) or ''
local filters = Excerpt.parseFilter( blacklist )
for _, template in pairs( parser.getTemplates( excerpt ) ) do
local templateName = parser.getTemplateName( template )
if Excerpt.matchFilter( templateName, filters ) then
excerpt = Excerpt.removeString( excerpt, template )
end
end
-- Prepend the page title to the reference names to prevent conflicts with other references in the transcluding page
excerpt = excerpt:gsub( '< *[Rr][Ee][Ff][^>]*name *= *["\']?([^"\'>/]+)["\']?[^>/]*(/?) *>', '<ref name="' .. page:gsub( '"', '' ) .. ' %1"%2>' )
-- Remove reference groups because they don't apply to the transcluding page
excerpt = excerpt:gsub( '< *[Rr][Ee][Ff] *group *= *["\']?[^"\'>/]+["\'] *>', '<ref>' )
return excerpt
end
Line 363 ⟶ 453:
end
return excerpt
end
 
function Excerpt.removeBold( excerpt )
return string.gsub( excerpt, "'''", '' )
end
 
function Excerpt.removeBehaviorSwitches( excerpt )
return string.excerpt:gsub( excerpt, '__[A-Z]+__', '' )
end
 
function Excerpt.removeComments( excerpt )
return string.excerpt:gsub( excerpt, '<!%-%-.-%-%->', '' )
end
 
function Excerpt.removeBold( excerpt )
return string.excerpt:gsub( excerpt, "'''", '' )
end
 
Line 418 ⟶ 504:
return nil -- instruct gsub to make no change
end
end, 1 ) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub
end
return excerpt
end
 
function Excerpt.addTrackingCategories( excerpt )
-- Helper method to get the local name of a namespace and all its aliases
local currentTitle = mw.title.getCurrentTitle()
-- @param name Canonical name of the namespace, for example 'File'
local contentCategory = config.categories.content
-- @return Local name of the namespace and all aliases, for example {'File','Image','Archivo','Imagen'}
if contentCategory and currentTitle.isContentPage then
function Excerpt.getNamespaces( name )
excerpt = excerpt .. '[[Category:' .. contentCategory .. ']]'
local namespaces = mw.clone( mw.site.namespaces[ name ].aliases ) -- Clone because https://en.wikipedia.org/w/index.php?diff=1056921358
end
table.insert( namespaces, mw.site.namespaces[ name ].name )
local namespaceCategory = config.categories[ currentTitle.namespace ]
table.insert( namespaces, mw.site.namespaces[ name ].canonicalName )
if namespaceCategory then
return namespaces
excerpt = excerpt .. '[[Category:' .. namespaceCategory .. ']]'
end
return excerpt
end
 
Line 489 ⟶ 578:
 
-- Helper method to remove a string from a text
-- @param text Text from where to search forremove the string to remove
-- @param str String to remove
-- @return The given text with the string removed
Line 495 ⟶ 584:
local pattern = Excerpt.escapeString( str )
if #pattern > 9999 then -- strings longer than 10000 bytes can't be put into regexes
pattern = Excerpt.escapeString( mw.ustring.sub( str, 1, 999 ) ) .. '.-' .. Excerpt.escapeString( mw.ustring.sub( str, -999 ) )
end
return string.text:gsub( text, pattern, '' )
end
 
-- Helper method to convert a comma-separated list of numbers or min-max ranges into a list of booleans
-- @param filter Required. Comma-separated list of numbers or min-max ranges, for example '1,3-5'
-- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true}
-- @return Boolean indicating whether the flagsfilters should be treated as a blacklist or not
-- @todonote MergeMerging withthis into matchFilter is possible, but way too inefficient
function Excerpt.parseFilter( valuefilter )
local flagsfilters = {}
local blacklistisBlacklist = false
if string.sub( filter, 1, 1 ) == '-' then
if not value then return nil, false end
isBlacklist = true
if string.sub( value, 1, 1 ) == '-' then
filter = string.sub( filter, 2 )
blacklist = true
value = string.sub( value, 2 )
end
local rangesvalues = mw.text.split( valuefilter, ',' ) -- split rangesvalues: '1,3-5' to {'1','3-5'}
for _, rangevalue in pairs( rangesvalues ) do
rangevalue = mw.text.trim( rangevalue )
local min, max = mw.ustring.match( rangevalue, '^(%d+)%s*[-–—]%s*(%d+)$' ) -- '3-5' to min=3 max=5
if not max then min, max = string.match( rangevalue, '^((%d+))$' ) end -- '1' to min=1 max=1
if max then
for i = min, max do flagsfilters[ i ] = true end
else
flagsfilters[ rangevalue ] = true -- if we reach this point, the string had the form 'a,b,c' rather than '1,2,3'
end
end
local filter = {cache = {}, terms = filters}
return flags, blacklist
return filter, isBlacklist
end
 
-- Helper function to see if a value matches any of the given filters
function Excerpt.matchFilter( value, filtersfilter )
if not type(value) then== return"number" false endthen
return filter.terms[value]
value = tostring( value )
else
local lang = mw.language.getContentLanguage()
local lcvaluecached = lang:lcfirst( filter.cache[value )]
if cached ~= nil then
local ucvalue = lang:ucfirst( value )
return cached
for filter in pairs( filters ) do
if value == tostring( filter )
or lcvalue == filter
or ucvalue == filter
or ( not tonumber( filter ) and mw.ustring.match( value, filter ) ) then
return true
end
local lang = mw.language.getContentLanguage()
local lcvalue = lang:lcfirst(value)
local ucvalue = lang:ucfirst(value)
for term in pairs( filter.terms ) do
if value == tostring(term)
or type(term) == "string" and (
lcvalue == term
or ucvalue == term
or mw.ustring.match( value, term )
) then
filter.cache[value] = true
return true
end
end
filter.cache[value] = false
end
end
 
-- Entry points for backwards compatibility
-- @todo Verify that no one uses them and remove them
function Excerpt.lead( frame ) return Excerpt.main( frame ) end
function Excerpt.excerpt( frame ) return Excerpt.main( frame ) end
 
return Excerpt