Module:Sandbox/Erutuon/author citation

This is an old revision of this page, as edited by Erutuon (talk | contribs) at 17:59, 5 July 2019 (use simpler linking function). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
local p = {}

local function advance_pos_if_starts_with(str, pattern, pos)
	local i, j = mw.ustring.find(str, pattern, pos)
	if i == pos and j >= i then
		return j + 1
	else
		return pos
	end
end

local function advance_by_prefixes(str, prefixes, pos)
	for _, prefix in ipairs(prefixes) do
		pos = advance_pos_if_starts_with(str, prefix, pos)
	end
	
	return pos
end

-- [[d:Property:P428#P1793]]
-- ('t )?(d')?(de )?(la )?(van (der )?)?(Ma?c)?(De)?(Di)?\p{Lu}?C?['\p{Ll}]*([-'. ]*(van )?(y )?(d[ae][nr]?[- ])?(Ma?c)?[\p{Lu}bht]?C?['\p{Ll}]*)*\.? ?f?\.?
function p.find_end_of_author_citation(str, i, j)
	local pos = i or 1
	pos = advance_by_prefixes(str, {
		"'t ", "d'", "de ", "la ", "van der ", "van ", "Ma?c", "De", "Di", "%u?C?['%l]*",
	}, pos)
	
	repeat
		local orig_pos = pos
		pos = advance_by_prefixes(str, {
			"Ma?c", "[%ubht]?C?", "['%l]*", "[-'. ]*", "d[ae][nr]?[- ]", "van ", "y "
		}, pos)
	until orig_pos == pos
	
	pos = advance_pos_if_starts_with(str, "%.? ?f?%.?", pos)
	
	if pos and (not j or pos <= j) then
		return pos
	end
end

-- Does not attempt to validate form of author citation. Returns successfully if
-- the citation is composed of valid author citations separated by
-- single commas, parentheses, or ampersands with optional whitespace around
-- them.
function p.transform_author_abbrevs(citation, func)
	local pos = 1
	while pos < mw.ustring.len(citation) do
		pos = advance_pos_if_starts_with(citation, "%s*[(),&]%s*", pos)
		local author_end = p.find_end_of_author_citation(citation, pos)
		if not author_end then
			return nil
		end
		local before_author, author, after_author =
			mw.ustring.sub(citation, 1, pos - 1),
			mw.ustring.sub(citation, pos, author_end),
			mw.ustring.sub(citation, author_end + 1)
		author = func(author) or author
		pos = pos + mw.ustring.len(author)
		citation = before_author .. author .. after_author
	end
	
	if pos == mw.ustring.len(citation) + 1 then
		return citation
	end
end

local author_abbrevs = {
	["L."]      = "Carl Linnaeus",
	["Schldl."] = "Diederich Franz Leonhard von Schlechtendal",
    ["Cham."]   = "Adelbert von Chamisso",
}

function p.link_author_citation(citation)
	return p.transform_author_abbrevs(
		citation,
		function (author)
			if author_abbrevs[author] then
				return "[[" .. author_abbrevs[author] .. "|" .. author .. "]]"
			end
		end)
end

return p