Module:Excerpt

This is an old revision of this page, as edited by Certes (talk | contribs) at 14:32, 28 April 2018 (Strip leading and trailing white space from article names). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}
local mRedirect = require('Module:Redirect')

-- Entry point for Lua callers
-- Returns a string value: text of the lead of a page
function p._lead(pagenames, options)
	if not pagenames or #pagenames < 1 then return "" end -- Return blank text rather than an error splurge
	local pagename = pagenames[1]
	if #pagenames > 1 then -- we could do this even with one page, but it would be inefficient
		math.randomseed(os.time())
		pagename = pagenames[math.random(#pagenames)] --pick a random title
	end
	pagename = mw.ustring.match(pagename, "%[%[%s*(.-)[]|#]") or pagename -- "[[Foo|Bar]]" → "Foo"
	pagename = mw.ustring.match(pagename, "%S.*%S") -- strip leading and trailing white space

	local title = mw.title.new(pagename) -- Find the lead section of the named page
	if not title then return "" end
	local redir = mRedirect.getTarget(title)
	if redir then title = mw.title.new(redir) end

	local text = title:getContent() or ""
	text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first heading and everything after it
	text = mw.ustring.gsub(text, "<noinclude>.-</noinclude>", "") -- remove noinclude bits
	text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
	text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs
	for _, t in pairs {"[Ee]fn", "[Ee]fn-la", "[Ee]l[mn]", "[Rr]", "[Ss]fn[bp]", "[Ss]f[bn]", "NoteTag"} do
		text = mw.ustring.gsub(text, "{{%s*" .. t .. "%s*|.-}}", "") -- remove ref and footnote templates
	end
	text = mw.ustring.gsub(text, "\n%s*{{%s*[Tt][Oo][Cc].-}}", "\n") -- remove most common tables of contents

	local allparas = true -- keep all paragraphs?
	if options.paraflags then
		for _, v in pairs(options.paraflags) do
			if v then allparas = false end -- if any para specifically requested, don't keep all
		end
	end

	-- a basic parser to trim down the lead
	local inlead = false -- have we found some text yet?
	local t = "" -- the stripped down output text
	local files = 0 -- how many [[Image: or [[File: so far
	local paras = 0 -- how many paragraphs so far

	text = mw.ustring.gsub(text,"^%s*","") -- remove initial white space
	repeat -- loop around parsing a comment, template, image or paragraph
		local token = mw.ustring.match(text, "^%s*<!%-%-.-%-%->%s*") -- <!--HTML comment-->
		 or mw.ustring.match(text, "^%b{}%s*") or false -- or {{Template}}
		if token then
			 if inlead then t = t .. token end -- keep comments and templates only within text body
		else
			token = mw.ustring.match(text, "^%[%[%s*[Ff]ile%s*:") -- [[File: ...
			 or mw.ustring.match(text, "^%[%[%s*[Ii]mage%s*:") -- or [[Image: ...
			if token then
				token = mw.ustring.match(text, "^%b[]%s*") -- match [[...]] to handle nesting
				files = files + 1
				if options.fileflags and options.fileflags[files] then t = t .. token end
			else -- got a paragraph, which ends at a file, image, blank line or end of text
				local afterend = mw.ustring.len(text) + 1
				local blankpos = mw.ustring.find(text, "\n%s*\n") or afterend
				local endpos = math.min(
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
				 mw.ustring.find(text, "%[%[%s*[Ff]ile%s*:") or afterend,
				 blankpos)
				token = mw.ustring.sub(text, 1, endpos-1)
				if blankpos < afterend and blankpos == endpos then -- paragraph ends with a blank line
					token = token .. mw.ustring.match(text, "\n%s*\n", blankpos)
				end
				inlead = true
				paras = paras + 1
				if allparas or (options.paraflags and options.paraflags[paras]) then t = t .. token end
			end
		end

		if token then text = mw.ustring.sub(text, mw.ustring.len(token)+1) end
	until not text or text == "" or not token or token == ""

	text = mw.ustring.gsub(t, "\n+$", "") -- remove  trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line

	if options.more then text = text .. " '''[[" .. pagename .. "|" .. options.more .. "]]'''" end
	return text
end

-- Convert a comma-separated list of numbers or min-max ranges into a list of booleans, e.g. "1,3-5" → {1=true,2=false,3=true,4=true,5=true}
function p.numberflags(str)
	local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
	local flags = {}
	for _, r in pairs(ranges) do
		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
		if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
		if max then
			for p = min, max do flags[p] = true end
		end
	end
	return flags
end

-- Entry point for template callers using #invoke:
function p.lead(frame)
	-- args = { 1 = page name, paragraphs = list e.g. "1,3-5", files = list, more = text}
	local args = frame.args -- from calling module
	local pargs = frame:getParent().args -- from template

	-- Accept any number of page names.  If more than one, we'll pick one randomly
	local pagenames = {}
	for i, p in pairs(args) do
		if p and type(i) == 'number' then table.insert(pagenames, p) end
	end
	for i, p in pairs(pargs) do
		if p and type(i) == 'number' and not args[i] then table.insert(pagenames, p) end
	end

	local options = {}
	options.paraflags = p.numberflags(args["paragraphs"] or pargs["paragraphs"] or "") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
	options.fileflags = p.numberflags(args["files"] or pargs["files"] or "") -- parse file numbers
	options.more = args["more"] or pargs["more"]
	if options.more and options.more == "" then options.more = "Read more..." end -- more= is short for this default text

	local text = p._lead(pagenames, options)
	return frame:preprocess(text)
end

-- Function lead is just function random picking from a list of one article,
-- but we advertise different entry points in case they should differ in future
function p.random(frame)
	return p.lead(frame)
end

return p