Module:Excerpt: Difference between revisions

Content deleted Content added
Limit File: to images to exclude audio files and other detritus. Parse image_flag= (Infobox country) and PD_image=.
Improve comments; minor bug fix (parsing Image:... within paragraph)
Line 9:
end
 
-- Check image for suitablitysuitability
local function checkimage(image)
local page = mw.ustring.match(image, "([Ff]ile%s*:[^|%]]*)") -- File:(name) ...
Line 15:
if not page then return false end
 
-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
if not mw.ustring.match(page, "%.[Gg][Ii][Ff]%s*$")
and not mw.ustring.match(page, "%.[Jj][Pp][Ee]?[Gg]%s*$")
Line 34:
end
 
-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseimage(text, start)
local startre = ""
Line 41:
or mw.ustring.match(text, startre .. "%[%[%s*[Ii]mage%s*:.*") -- or [[Image: ...
if image then
image = mw.ustring.match(image, "%b[]%s*") -- matchmatching [[...]] to handle nestingwikilinks nested in caption
end
return image
Line 51:
if mw.ustring.match(text, "{{%s*[Ii]nfobox") then
local image = mw.ustring.match(text, "|%s*image%s*=%s*([^}|]*)") -- parse image= argument...
or mw.ustring.match(text, "|%s*PD_image%s*=%s*([^}|]-)") -- or its known alternatives such as...
or mw.ustring.match(text, "|%s*image_flag%s*=%s*([^}|]-)") -- image_flag= from Infobox country
or mw.ustring.match(text, "|%s*Cover%s*=%s*([^}|]-)") -- or Cover= from Infobox album
if image then -- add in relevant optional parameters: caption, alt text and image size
token = "[[" -- Add File: unless name already begins File: or Image:
if not (mw.ustring.match(image, "^[Ff]ile%s*:")
Line 75:
 
-- Help gsub to remove unwanted templates
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
 
local function striptemplate(t)
local unwanted = {"[Ee]fn", "[Ee]fn%-[lu]a", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bmp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef", "[Rr]efn?",
Line 86 ⟶ 85:
end
 
-- ReturnsMain function returns a string value: text of the lead of a page
-- Entry point for Lua callers
-- Returns a string value: text of the lead of a page
local function main(pagenames, options)
errors = options.errors -- set the module level boolean used in local function err
 
if not pagenames or #pagenames < 1 then return err("No page names given") end
Line 118 ⟶ 116:
end
if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another
pagecount = pagecount - 1 -- ensure that we exit the loop eventuallyafter at most #pagenames iterations
end
if not text then return err("Cannot read a valid page: first name is " .. firstpage) end
 
text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first heading==Heading== and everything after it
text = mw.ustring.gsub(text, "<noinclude>.-</noinclude>", "") -- remove noinclude bits
text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
Line 141 ⟶ 139:
if options.fileflags then
for k, v in pairs(options.fileflags) do
if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
end
end
Line 148 ⟶ 146:
local inlead = false -- have we found some text yet?
local t = "" -- the stripped down output text
local files = 0 -- how many [[Image: or [[File:images so far
local paras = 0 -- how many paragraphs so far
 
Line 154 ⟶ 152:
repeat -- loop around parsing a template, image or paragraph
local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}}
if token then -- found a template
if inlead then -- keeplead commentshas andalready templatesstarted, onlyso keep the template within textthe bodytext
t = t .. token
elseif files < maxfile then -- lookdiscard fortemplate, [[File:...but embeddedif inwe anare infoboxstill etccollecting images... in the preamble
local image = parseimage(token, false) or argimage(token) -- look for embedded [[File:...]], |image=, etc.
if image and checkimage(image) then -- keepif commentsimage is found and templatesqualifies (not a onlysound withinfile, textnon-free, bodyetc.)
files = files + 1 -- count the file, whether displaying it or not
if options.fileflags and options.fileflags[files] then -- if displaying this image
image = mw.ustring.gsub(image, "|%s*frameless%s*%f[|%]]", "") -- make image a thumbnail, not frameless etc.
image = mw.ustring.gsub(image, "|%s*framed?%s*%f[|%]]", "")
Line 173 ⟶ 171:
end
end
else -- the next token in text is not a template
else
token = parseimage(text, true)
if token then -- the next token in text looks like an image
if files < maxfile and checkimage(token) then -- if more images are wanted and this is a wanted image
files = files + 1
if options.fileflags and options.fileflags[files] then
local image = token -- copy token for manipulation by adding |right etc. without changing the original
if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end
t = t .. image
Line 186 ⟶ 184:
else -- got a paragraph, which ends at a file, image, blank line or end of text
local afterend = mw.ustring.len(text) + 1
local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend -- position of next paragraph delimiter (or end of text)
local endpos = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
mw.ustring.find(text, "%[%[%s*[FfIi]ilemage%s*:") or afterend,
blankpos)
token = mw.ustring.sub(text, 1, endpos-1)
Line 195 ⟶ 193:
token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
end
inlead = true -- we got a paragraph, so we are inside the lead section
paras = paras + 1
if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
end -- of "else got a paragraph"
end
end -- of "else not a template"
end
 
if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
until not text or text == "" or not token or token == "" -- loop until all text parsed
 
text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
 
if options.more then text = text .. " '''[[" .. pagename .. "|" .. options.more .. "]]'''" end -- wikilink to article for more info
return text
end
Line 229 ⟶ 227:
local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
for k, v in pairs(frame:getParent().args) do args[k] = v end
for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template
 
local pagenames = {}
Line 260 ⟶ 258:
 
-- Entry points for template callers using #invoke:
function p.lead(frame) return invoke(frame, 1) end -- {{Transclude lead article}} reads the first and only article
function p.random(frame) return invoke(frame) end -- {{Transclude random article}} reads any article (default for invoke with one argument)
function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected article}} reads the article whose key is in the selected= parameter
 
return p