Module:Excerpt/sandbox: Difference between revisions

Content deleted Content added
Change l10n for i18n, as discussed
Change local function names to camelCase except the entry points for backwards compatibility (see talk page technical debt). Also improve a couple comments.
Line 1:
-- Get localizationlocalized data
local d = require("Module:Excerpt/i18n")
 
local p = {}
 
-- Helper function to debug
-- ReturnReturns blank text, or an error message if requested
local errors
-- Return blank text, or an error message if requested
local function err(text)
if errors then error(text, 2) end
Line 19 ⟶ 20:
end
 
-- InHelper text,function to match pre..list[1]..postfrom ora pre..list[2]..post orregular ...expressions
-- Like so: match pre..list[1]..post or pre..list[2]..post or ...
local function matchanymatchAny(text, pre, list, post, init)
local match = {}
for i = 1, #list do
Line 30 ⟶ 32:
 
-- Help gsub to remove unwanted templates and pseudo-templates such as #tag:ref and DEFAULTSORT
local function striptemplatestripTemplate(t)
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
if matchanymatchAny(t, "^{{%s*", d.unwantedInlineTemplates, "%s*%f[|}]") then return "" end
 
-- If template is wanted but produces an unwanted reference then return the string with |shortref or |ref removed
Line 39 ⟶ 41:
 
-- If a wanted template has unwanted nested templates, purge them too
noref = mw.ustring.sub(noref, 1, 2) .. mw.ustring.gsub(mw.ustring.sub(noref, 3), "%b{}", striptemplatestripTemplate)
 
-- Replace {{audio}} by its text parameter: {{Audio|Foo.ogg|Bar}} → Bar
Line 65 ⟶ 67:
 
-- Check image for suitability
local function checkimagecheckImage(image)
local page = matchanymatchAny(image, "", d.fileNamespaces, "%s*:[^|%]]*") -- match File:(name) or Image:(name)
if not page then return false end
 
-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
if not matchanymatchAny(page, "%.", {"[Gg][Ii][Ff]", "[Jj][Pp][Ee]?[Gg]", "[Pp][Nn][Gg]", "[Ss][Vv][Gg]", "[Tt][Ii][Ff][Ff]", "[Xx][Cc][Ff]"}, "%s*$") then
return false
end
Line 77 ⟶ 79:
if desc and desc ~= "" then -- found description on local wiki
if mw.ustring.match(desc, "[Nn]on%-free") then return false end
desc = mw.ustring.gsub(desc, "%b{}", striptemplatestripTemplate) -- remove DEFAULTSORT etc. to avoid side effects of frame:preprocess
elseif not rtitle then
return false
Line 91 ⟶ 93:
 
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseimageparseImage(text, start)
local startre = ""
if start then startre = "^" end -- a true flag restricts search to start of string
local image = matchanymatchAny(text, startre .. "%[%[%s*", d.fileNamespaces, "%s*:.*") -- [[File: or [[Image: ...
if image then
image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
Line 102 ⟶ 104:
 
-- Parse a caption, which ends at a | (end of parameter) or } (end of infobox) but may contain nested [..] and {..}
local function parsecaptionparseCaption(caption)
if not caption then return nil end
local len = mw.ustring.len(caption)
Line 124 ⟶ 126:
 
-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local function argimageargImage(text)
local token = nil
local hasNamedArgs = mw.ustring.find(text, "|") and mw.ustring.find(text, "=")
Line 185 ⟶ 187:
capture_from = 1
while capture_from < mw.ustring.len(text) do
local position, caption = matchanymatchAny(text, "|%s*", d.captionParams, "%s*=%s*()([^\n]+)", capture_from)
if caption then
-- extend caption to parse "| caption = Foo {{Template\n on\n multiple lines}} Bar\n"
Line 201 ⟶ 203:
if images[i] then
if not captions[i] then
captions[i] = parsecaptionparseCaption(caption)
end
end
Line 279 ⟶ 281:
for _, index in ipairs(keys) do
local image = images[index]
local token = parseimageparseImage(image, true) -- look for image=[[File:...]] etc.
if not token then
image = mw.ustring.match(image, "^[^}|\n]*") -- remove later arguments
token = "[[" -- Add File: unless name already begins File: or Image:
if not matchanymatchAny(image, "^", d.fileNamespaces, "%s*:") then
token = token .. "File:"
end
Line 302 ⟶ 304:
 
-- Help gsub convert imagemaps into standard images
local function convertImagemapconvertImageMap(imagemap)
local image = matchanymatchAny(imagemap, "[>\n]%s*", d.fileNamespaces, "[^\n]*")
if image then
return "<!--imagemap-->[[" .. mw.ustring.gsub(image, "[>\n]%s*", "", 1) .. "]]"
Line 312 ⟶ 314:
 
-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
local function numberflagsnumberFlags(str)
local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
local flags = {}
Line 362 ⟶ 364:
local allparas = true -- keep all paragraphs?
if options.paraflags then
if type(options.paraflags) ~= "table" then options.paraflags = numberflagsnumberFlags(options.paraflags) end
for _, v in pairs(options.paraflags) do
if v then allparas = false end -- if any para specifically requested, don't keep all
Line 374 ⟶ 376:
local maxfile = 0 -- for efficiency, stop checking images after this many have been found
if options.fileflags then
if type(options.fileflags) ~= "table" then options.fileflags = numberflagsnumberFlags(options.fileflags) end
for k, v in pairs(options.fileflags) do
if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
Line 400 ⟶ 402:
f = modifyImage(f, "thumb")
f = modifyImage(f, fileargs)
if checkimagecheckImage(f) then filetext = filetext .. f .. "\n" end
end
 
Line 412 ⟶ 414:
line = mw.ustring.gsub(line, "%b<>", "") -- remove all HTML tags from this line
-- if anything is left, other than an incomplete further template or an image, keep the template: it counts as part of the line
if mw.ustring.find(line, "%S") and not matchanymatchAny(line, "^%s*", { "{{", "%[%[%s*[Ff]ile:", "%[%[%s*[Ii]mage:" }, "") then
token = nil
end
Line 422 ⟶ 424:
if not filesOnly and not startLine then t = t .. token end
 
elseif matchanymatchAny(token, "{{%s*", d.wantedBlockTemplates, "%s*%f[|}]") then
t = t .. token -- keep wanted block templates
 
Line 429 ⟶ 431:
 
elseif files < maxfile then -- discard template, but if we are still collecting images...
local images = argimageargImage(token) or {}
if not images then
local image = parseimageparseImage(token, false) -- look for embedded [[File:...]], |image=, etc.
if image then table.insert(images, image) end
end
for _, image in ipairs(images) do
if files < maxfile and checkimagecheckImage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
files = files + 1 -- count the file, whether displaying it or not
if options.fileflags and options.fileflags[files] then -- if displaying this image
Line 446 ⟶ 448:
end
else -- the next token in text is not a template
token = parseimageparseImage(text, true)
if token then -- the next token in text looks like an image
if files < maxfile and checkimagecheckImage(token) then -- if more images are wanted and this is a wanted image
files = files + 1
if options.fileflags and options.fileflags[files] then
Line 514 ⟶ 516:
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere
text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs
text = mw.ustring.gsub(text, "%b{}", striptemplatestripTemplate) -- remove unwanted templates such as references
end
text = mw.ustring.gsub(text, "<%s*[Ss][Cc][Oo][Rr][Ee].->.-<%s*/%s*[Ss][Cc][Oo][Rr][Ee]%s*>", "") -- remove musical scores
text = mw.ustring.gsub(text, "<%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp].->.-<%s*/%s*[Ii][Mm][Aa][Gg][Ee][Mm][Aa][Pp]%s*>", convertImagemapconvertImageMap) -- convert imagemaps into standard images
text = mw.ustring.gsub(text, "%s*{{%s*[Tt][Oo][Cc].-}}", "") -- remove most common tables of contents
text = mw.ustring.gsub(text, "%s*__[A-Z]*TOC__", "") -- remove TOC behavior switches
Line 529 ⟶ 531:
 
-- Parse a ==Section== from a page
local function getsectiongetSection(text, section, mainonly)
local escapedSection = mw.ustring.gsub(mw.uri.decode(section), "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%%1") -- %26 → & etc, then ^ → %^ etc.
local level, content = mw.ustring.match(text .. "\n", "\n(==+)%s*" .. escapedSection .. "%s*==.-\n(.*)")
Line 544 ⟶ 546:
 
-- Remove unmatched <tag> or </tag> tags
local function fixtagsfixTags(text, tag)
local startcount = 0
for i in mw.ustring.gmatch(text, "<%s*" .. tag .. "%f[^%w_].->") do startcount = startcount + 1 end
Line 607 ⟶ 609:
section = mw.ustring.match(pagename, ".-#(.*)") -- parse redirect to Page#Section
end
if text and section and section ~= "" then text = getsectiongetSection(text, section) end
end
end
Line 624 ⟶ 626:
pageopts[k] = v
end
pageopts.paraflags = numberflagsnumberFlags(pageopts["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
pageopts.fileflags = numberflagsnumberFlags(pageopts["files"] or "") -- parse file numbers
if pageopts.more and pageopts.more == "" then pageopts.more = "Read more..." end -- more= is short for this default text
end
Line 670 ⟶ 672:
 
-- Ensure div tags match
text = fixtagsfixTags(text, "div")
 
if pageopts.more then text = text .. " '''[[" .. pagename .. "|" .. pageopts.more .. "]]'''" end -- wikilink to article for more info
Line 713 ⟶ 715:
end
if args["section"] then -- check relevant section only
text = getsectiongetSection(text, args["section"], args["sectiononly"])
if not text then return err("No section " .. args["section"] .. " in page " .. page) end
end
Line 739 ⟶ 741:
 
local options = args -- pick up miscellaneous options: more, errors, fileargs
options.paraflags = numberflagsnumberFlags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
options.fileflags = numberflagsnumberFlags(args["files"] or "") -- parse file numbers
if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text
 
Line 801 ⟶ 803:
else
local options = args -- pick up miscellaneous options: more, errors, fileargs
options.paraflags = numberflagsnumberFlags(args.paragraphs or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
options.fileflags = numberflagsnumberFlags(args.files or "1") -- parse file numbers
options.nobold=1
options.keepTables = is(args.tables) and args.tables or 1
Line 839 ⟶ 841:
-- Entry points for other Lua modules
function p.getContent(page, frame) return getContent(page, frame) end
function p.getsection(text, section) return getsectiongetSection(text, section) end
function p.parse(text, options, filesOnly) return parse(text, options, filesOnly) end
function p.argimage(text) return argimageargImage(text) end
function p.checkimage(image) return checkimagecheckImage(image) end
function p.parseimage(text, start) return parseimageparseImage(text, start) end
function p.cleanupText(text, options) return cleanupText(text, options) end
function p.main(pagenames, options) return main(pagenames, options) end
function p.numberflags(str) return numberflagsnumberFlags(str) end
 
return p