Module:Excerpt

This is an old revision of this page, as edited by Certes (talk | contribs) at 16:58, 26 April 2018 (Remove HTML comments from between initial templates. Remove refs earlier to cope with incomplete citation templates nested in infoboxes.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

-- Entry point for Lua callers
-- Returns a string value: text of the lead of a page
function p._lead(pagename, options)
	if not pagename then return "" end -- Return blank text rather than an error splurge
	local title = mw.title.new(pagename) -- Find the lead section of the named page
	if not title then return "" end
	local text = title:getContent() or ""

	text = mw.ustring.gsub(text, "%c%s*==.*","") -- remove first heading and everything after it
	text = mw.ustring.gsub(text, "<noinclude>.-</noinclude>", "") -- remove noinclude bits
	text = mw.ustring.gsub(text, "<%s*ref[^>]-/%s*>", "") -- remove refs cited elsewhere
	text = mw.ustring.gsub(text, "<%s*ref.->.-<%s*/%s*ref%s*>", "") -- remove refs

	repeat -- remove initial HTML comments and templates such as hatnotes
		local oldtext=text
		text = mw.ustring.gsub(text,"^%s*<!%-%-.-%-%->%s*","") -- remove HTML comment from front
		text = mw.ustring.gsub(text,"^%A-%b{}%s*","") -- remove infobox, hatnote, tag, etc. from front
	until text == oldtext

	local paragraphlist = options.paragraphs or {}
	if #paragraphlist > 0 then -- limit to requested paragraphs e.g. {1, 3, 4, 5}
		local paras = mw.text.split(text, "\n%s*\n") -- %s* may include \n if three or more appear together
		local sep="" -- no separator before first paragraph
		local newtext = ""
		for _, p in pairs(paragraphlist) do
			if paras[p] then newtext = newtext .. sep .. paras[p] end -- else p exceeds number of paragraphs found
			sep = "\n\n"
		end
		text = newtext
	end

	for _, t in pairs {"[Ee]fn", "[Ee]fn-la", "[Ee]l[mn]", "[Rr]", "[Ss]fn[bp]", "[Ss]fb"} do -- remove refs and footnotes
		text = mw.ustring.gsub(text, "{{%s*" .. t .. "%s*|.-}}", "")
	end

	local filelist = options.files or {}
	local keepfile = {} -- keepfile[n] is true if we want to keep the nth image
	for _, v in pairs(filelist) do
		keepfile[v] = true
	end

	text = mw.ustring.gsub(text, "%[%[%s*[Ii]mage%s*:", "[[File:") -- now we can ignore Image:
	local n = 1 -- image count
	local text2 = ""
	for t, f in mw.ustring.gmatch(text.."\n[[File:#DUMMY#]]", "(.-)(%[%[%s*[Ff]ile%s*:%C*%]%])") do -- split around files
		text2 = text2 .. t -- always keep the non-file text
		if keepfile[n] then text2 = text2 .. f end -- only keep file text if we want this image
		n = n + 1
	end
	text = mw.ustring.gsub(text2, "^%s*", "") -- remove initial white space
	text = mw.ustring.gsub(text, "\n%[%[File:#DUMMY#%]%]$", "") -- remove dummy image
	text = mw.ustring.gsub(text, "\n+$", "") -- remove  trailing line feeds, so "{{Transclude text excerpt|Foo}} more" flows on one line
	return text
end

-- Convert a comma-separated list of numbers or min-max ranges into a list of numbers, e.g. "1,3-5" → {1,3,4,5}
function p.numberlist(str)
	local ranges = mw.text.split(str, ",") -- parse ranges, e.g. "1,3-5" → {"1","3-5"}
	local nlist = {}
	for _, r in pairs(ranges) do
		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" → min=3 max=5
		if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" → min=1 max=1
		if max then
			for p = min, max do table.insert(nlist, p) end
		end
	end
	return nlist
end

-- Entry point for template callers using #invoke:
function p.lead(frame)
	-- args = { 1 = page name, paragraphs = list e.g. "1,3-5" }
	local args = frame.args -- from calling module
	local pargs = frame:getParent().args -- from template

	local pagename = args[1] or pargs[1] or ""
	pagename = mw.ustring.match(pagename, "%[%[%s*(.-)[]|#]") or pagename -- "[[Foo|Bar]]" → "Foo"

	local paralist = p.numberlist(args["paragraphs"] or pargs["paragraphs"] or "", ",") -- parse paragraphs, e.g. "1,3-5" → {"1","3-5"}
	local filelist = p.numberlist(args["files"] or pargs["files"] or "", ",") -- parse file numbers

	return frame:preprocess(p._lead(pagename, {paragraphs = paralist, files = filelist}))
end

return p