Module:Excerpt

This is an old revision of this page, as edited by Certes (talk | contribs) at 21:37, 20 May 2018 (Make the bold title near the start of the article into a wikilink to the article). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}
local mRedirect = require('Module:Redirect')

local errors
-- Return blank text, or an error message if requested
local function err(text)
	if errors then error(text, 2) end
	return ""
end

-- Check image for suitability
local function checkimage(image)
	local page = mw.ustring.match(image, "([Ff]ile%s*:[^|%]]*)") -- File:(name) ...
	 or mw.ustring.match(image, "([Ii]mage%s*:[^|%]]*)") -- or Image:(name) ...
	if not page then return false end

	-- Limit to image types: .gif, .jpg, .jpeg, .png, .svg, .tiff, .xcf (exclude .ogg audio etc.)
	if not mw.ustring.match(page, "%.[Gg][Ii][Ff]%s*$")
	 and not mw.ustring.match(page, "%.[Jj][Pp][Ee]?[Gg]%s*$")
	 and not mw.ustring.match(page, "%.[Pp][Nn][Gg]%s*$")
	 and not mw.ustring.match(page, "%.[Ss][Vv][Gg]%s*$")
	 and not mw.ustring.match(page, "%.[Tt][Ii][Ff][Ff]%s*$")
	 and not mw.ustring.match(page, "%.[Xx][Cc][Ff]%s*$") then return false end

	local title = mw.title.new(":" .. page) -- Read description page (for :File:Foo rather than File:Foo)
	if not title then return false end

	local redir = mRedirect.getTarget(title)
	if redir then title = mw.title.new(redir) end

	local frame = mw.getCurrentFrame()
	local desc = frame:preprocess("{{" .. title.prefixedText .. "}}")
	return desc and desc ~= "" and not mw.ustring.match(desc, "[Nn]on%-free") -- hide non-free image
end

-- Attempt to parse [[File:...]] or [[Image:...]], either anywhere (start=false) or at the start only (start=true)
local function parseimage(text, start)
	local startre = ""
	if start then startre = "^" end -- a true flag restricts search to start of string
	local image = mw.ustring.match(text, startre .. "%[%[%s*[Ff]ile%s*:.*") -- [[File: ...
	 or mw.ustring.match(text, startre .. "%[%[%s*[Ii]mage%s*:.*") -- or [[Image: ...
	if image then
		image = mw.ustring.match(image, "%b[]%s*") -- matching [[...]] to handle wikilinks nested in caption
	end
	return image
end

-- Attempt to construct a [[File:...]] block from {{infobox ... |image= ...}}
local function argimage(text)
	local token = nil
	if mw.ustring.match(text, "{{%s*[Ii]nfobox") then
		local image = mw.ustring.match(text, "|%s*image%s*=%s*([^}|]*)") -- parse image= argument...
		 or mw.ustring.match(text, "|%s*PD_image%s*=%s*([^}|]-)") -- or its known alternatives such as...
		 or mw.ustring.match(text, "|%s*image_flag%s*=%s*([^}|]-)") -- image_flag= from Infobox country
		 or mw.ustring.match(text, "|%s*Cover%s*=%s*([^}|]-)") -- or Cover= from Infobox album
		if image then -- add in relevant optional parameters: caption, alt text and image size
			token = "[[" -- Add File: unless name already begins File: or Image:
			if not (mw.ustring.match(image, "^[Ff]ile%s*:")
			 or mw.ustring.match(image, "^[Ii]mage%s*:")) then
				token = token .. "File:"
			end
			token = token .. image
			local caption = mw.ustring.match(text, "|%s*[Cc]aption%s*=%s*([^}|]*)")
			if caption then token = token .. "|" .. caption end
			local alt = mw.ustring.match(text, "|%s*alt%s*=%s*([^}|]*)")
			if alt then token = token .. "|alt=" .. alt end
			local image_size = mw.ustring.match(text, "|%s*image_size%s*=%s*([^}|]*)")
			if image_size then token = token .. "|" .. image_size end
			token = mw.ustring.gsub(token, "\n","") .. "]]\n"
		end
	end

	return token
end

-- Help gsub to remove unwanted templates
-- If template is unwanted then return "" (gsub will replace by nothing), else return nil (gsub will keep existing string)
local function striptemplate(t)
	local unwanted = {"[Ee]fn", "[Ee]fn%-[lu]a", "[Ee]l[mn]", "[Rr]p?", "[Ss]fn[bmp]", "[Ss]f[bn]", "NoteTag", "#[Tt]ag:%s*[Rr]ef", "[Rr]efn?",
	 "[CcDd]n", "[Cc]itation needed", "[Dd]isambiguation needed"}
	for _, u in pairs(unwanted) do
		if mw.ustring.match(t, "^{{%s*" .. u .. "%s*%f[|}]") then return "" end -- unwanted template: remove
	end
	return nil -- not an unwanted template: keep
end

-- Main function returns a string value: text of the lead of a page
local function main(pagenames, options)
	errors = options.errors -- set the module level boolean used in local function err

	if not pagenames or #pagenames < 1 then return err("No page names given") end
	local pagename
	local text
	local pagecount = #pagenames
	local firstpage = pagenames[1] or "(nil)" -- save for error message, as it the name will be deleted

	-- read the page, or a random one if multiple pages were provided
	if pagecount > 1 then math.randomseed(os.time()) end
	while not text and pagecount > 0 do
		local pagenum = 1
		if pagecount > 1 then pagenum = math.random(pagecount) end -- pick a random title
		pagename = pagenames[pagenum]
		if pagename and pagename ~= "" then
			pagename = mw.ustring.match(pagename, "%[%[%s*(.-)[]|#]") or pagename -- "[[Foo|Bar]]" → "Foo"
			pagename = mw.ustring.gsub(pagename, "^%s+", "") -- strip leading ...
			pagename = mw.ustring.gsub(pagename, "%s+$", "") -- ...and trailing white space

			if pagename and pagename ~= "" then
				local title = mw.title.new(pagename) -- Find the lead section of the named page
				if not title then return err("No title for page name " .. pagename) end
				local redir = mRedirect.getTarget(title)
				if redir then title = mw.title.new(redir) end
				pagename = redir or pagename

				text = title:getContent()
			end
		end
		if not text then table.remove(pagenames, pagenum) end -- this one didn't work; try another
		pagecount = pagecount - 1 -- ensure that we exit the loop after at most #pagenames iterations
	end
	if not text then return err("Cannot read a valid page: first name is " .. firstpage) end

	text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments
	text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first ==Heading== and everything after it
	text = mw.ustring.gsub(text, "<noinclude>.-</noinclude>", "") -- remove noinclude bits
	text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
	text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs
	text = mw.ustring.gsub(text, "<%s*imagemap.->.-<%s*/%s*imagemap%s*>", "") -- remove imagemaps
	text = mw.ustring.gsub(text, "%b{}", striptemplate) -- remove unwanted templates such as references
	text = mw.ustring.gsub(text, "\n%s*{{%s*[Tt][Oo][Cc].-}}", "\n") -- remove most common tables of contents

	local allparas = true -- keep all paragraphs?
	if options.paraflags then
		for _, v in pairs(options.paraflags) do
			if v then allparas = false end -- if any para specifically requested, don't keep all
		end
	end

	local maxfile = 0 -- for efficiency, stop checking images after this many have been found
	if options.fileflags then
		for k, v in pairs(options.fileflags) do
			if v and k > maxfile then maxfile = k end -- set maxfile = highest key in fileflags
		end
	end

	-- a basic parser to trim down the lead
	local inlead = false -- have we found some text yet?
	local t = "" -- the stripped down output text
	local files = 0 -- how many images so far
	local paras = 0 -- how many paragraphs so far

	text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
	repeat -- loop around parsing a template, image or paragraph
		local token = mw.ustring.match(text, "^%b{}%s*") or false -- {{Template}}
		if token then -- found a template
			if inlead then -- lead has already started, so keep the template within the text
				t = t .. token
			elseif files < maxfile then -- discard template, but if we are still collecting images...
				local image = parseimage(token, false) or argimage(token) -- look for embedded [[File:...]], |image=, etc.
				if image and checkimage(image) then -- if image is found and qualifies (not a sound file, non-free, etc.)
					files = files + 1 -- count the file, whether displaying it or not
					if options.fileflags and options.fileflags[files] then -- if displaying this image
						image = mw.ustring.gsub(image, "|%s*frameless%s*%f[|%]]", "") -- make image a thumbnail, not frameless etc.
						image = mw.ustring.gsub(image, "|%s*framed?%s*%f[|%]]", "")
						if not mw.ustring.match(image, "|%s*thumb%s*%f[|%]]")
						 and not mw.ustring.match(image, "|%s*thumbnail%s*%f[|%]]") then
							image = mw.ustring.gsub(image, "(%]%]%s*)$", "|thumb%1")
						end
						if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end
						t = t .. image
					end
				end
			end
		else -- the next token in text is not a template
			token = parseimage(text, true)
			if token then -- the next token in text looks like an image
				if files < maxfile and checkimage(token) then -- if more images are wanted and this is a wanted image
					files = files + 1
					if options.fileflags and options.fileflags[files] then
						local image = token -- copy token for manipulation by adding |right etc. without changing the original
						if options.fileargs then image = mw.ustring.gsub(image, "(%]%]%s*)$", "|" .. options.fileargs .. "%1") end
						t = t .. image
					end
				end
			else -- got a paragraph, which ends at a file, image, blank line or end of text
				local afterend = mw.ustring.len(text) + 1
				local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend -- position of next paragraph delimiter (or end of text)
				local endpos = math.min( -- find position of whichever comes first: [[File:, [[Image: or paragraph delimiter
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
				 mw.ustring.find(text, "%[%[%s*[Ii]mage%s*:") or afterend,
				 blankpos)
				token = mw.ustring.sub(text, 1, endpos-1)
				if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line
					token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
				end
				inlead = true -- we got a paragraph, so we are inside the lead section
				paras = paras + 1
				if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end -- add if this paragraph wanted
			end -- of "else got a paragraph"
		end -- of "else not a template"

		if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end -- remove parsed token from remaining text
	until not text or text == "" or not token or token == "" -- loop until all text parsed

	text = mw.ustring.gsub(t, "\n+$", "") -- remove trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line

	-- replace the bold title or synonym near the start of the article by a wikilink to the article
	local lang = mw.language.getContentLanguage()
	local pos = mw.ustring.find(text, "'''" .. lang:ucfirst(pagename) .. "'''", 1, true) -- look for "'''Foo''' is..." (uc) or "A '''foo''' is..." (lc)
	 or mw.ustring.find(text, "'''" .. lang:lcfirst(pagename) .. "'''", 1, true) -- plain search: special characters in pagename represent themselves
	if pos then
		local len = mw.ustring.len(pagename)
		text = mw.ustring.sub(text, 1, pos + 2) .. "[[" .. mw.ustring.sub(text, pos + 3, pos + len + 2) .. "]]" .. mw.ustring.sub(text, pos + len + 3, -1) -- link it
	else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name)
		text = mw.ustring.gsub(text, "(.-'''+)(.-)'''", function(a, b) -- replace '''Foo''' by '''[[pagename|Foo]] if early in article and not wikilinked
			if mw.ustring.len(a) < 100 and not mw.ustring.find(b, "%[") then return a .. "[[" .. pagename .. "|" .. b .. "]]'''" else return nil end
		 end, 1)
	end

	if options.more then text = text .. " '''[[" .. pagename .. "|" .. options.more .. "]]'''" end -- wikilink to article for more info

	return text
end

-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
local function numberflags(str)
	local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
	local flags = {}
	for _, r in pairs(ranges) do
		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
		if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
		if max then
			for p = min, max do flags[p] = true end
		end
	end
	return flags
end

-- Shared template invocation code for lead and random functions
local function invoke(frame, articlekey)
	-- args = { 1,2,... = page names, paragraphs = list e.g. "1,3-5", files = list, more = text}
	local args = {} -- args[k] = frame.args[k] or frame:getParent().args[k] for all k in either (numeric or not)
	for k, v in pairs(frame:getParent().args) do args[k] = v end
	for k, v in pairs(frame.args) do args[k] = v end -- args from a Lua call have priority over parent args from template

	local pagenames = {}
	local articlecount = #args
	if articlekey then -- 1 for lead template; "selected" for selected template
	articlekey = tonumber(articlekey) or args[articlekey]
		if tonumber(articlekey) then
			-- normalise article number into the range 1..#args
			if articlecount < 1 then err("No articles provided") end
			articlekey = articlekey % articlecount
			if articlekey == 0 then articlekey = articlecount end
		end
		pagenames = { args[articlekey] }
	else
		-- For random, accept any number of page names.  If more than one, we'll pick one randomly
		if articlecount < 1 then err("No articles provided") end
		for i, p in pairs(args) do
			if p and type(i) == 'number' then table.insert(pagenames, p) end
		end
	end

	local options = args -- pick up miscellaneous options: more, errors, fileargs
	options.paraflags = numberflags(args["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
	options.fileflags = numberflags(args["files"] or "") -- parse file numbers
	if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text

	local text = main(pagenames, options)
	return frame:preprocess(text)
end

-- Entry points for template callers using #invoke:
function p.lead(frame) return invoke(frame, 1) end -- {{Transclude lead article}} reads the first and only article
function p.random(frame) return invoke(frame) end -- {{Transclude random article}} reads any article (default for invoke with one argument)
function p.selected(frame) return invoke(frame, "selected") end -- {{Transclude selected article}} reads the article whose key is in the selected= parameter

return p