Module:Diff/sandbox: Difference between revisions

Content deleted Content added
testcases lie to me
If we are going to support blocks, let's start both blocks at the top of the table cell.
 
(6 intermediate revisions by 3 users not shown)
Line 4:
-- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007 Hisham Muhammad
-- Adapted to MediaWiki LUA:Lua originally by [[User:Ebraminio]] <ebrahim -at- gnu.org>Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
Line 23:
-- @param separator [optional] the separator pattern (defaults to any
-- whitespace - %s+).
-- @param skip_separator [optional] don't include the separator in the results.
-- @return A list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
separator = separator or "%s+"
local parts = {}
local start = 1
local split_start, split_end = mw.ustring.find(text, separator, start)
while split_start do
table.insert(parts, mw.ustring.sub(text, start, split_start-1))
if not skip_separator then
table.insert(parts, mw.ustring.sub(text, split_start, split_end))
end
start = split_end + 1
split_start, split_end = mw.ustring.find(text, separator, start)
end
if mw.ustring.sub(text, start) ~= "" then
table.insert(parts, mw.ustring.sub(text, start))
end
return parts
end
 
Line 48:
-----------------------------------------------------------------------------
-- Derives the longest common subsequence of two strings. This is a faster
-- implementation than one provided by stdlib. Submitted by Hisham Muhammad.
-- The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence
Line 57:
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
local m = #t1
local n = #t2
 
-- Build matrix on demand
local C = {}
local setmetatable = setmetatable
local mt_tbl = {
__index = function(t, k)
t[k] = 0
return 0
end
}
local mt_C = {
__index = function(t, k)
local tbl = {}
setmetatable(tbl, mt_tbl)
t[k] = tbl
return tbl
end
}
setmetatable(C, mt_C)
local max = math.max
for i = 1, m+1 do
local ci1 = C[i+1]
local ci = C[i]
for j = 1, n+1 do
if t1[i-1] == t2[j-1] then
ci1[j+1] = ci[j] + 1
else
ci1[j+1] = max(ci1[j], ci[j+1])
end
end
end
return C
end
 
Line 97:
-----------------------------------------------------------------------------
-- Formats an inline diff as HTML, with <ins> and <del> tags.
--
-- @param tokens a table of {token, status} pairs.
-- @return an HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
local diff_buffer = ""
local token, status
for i, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1])
status = token_record[2]
if status == "in" then
diff_buffer = diff_buffer..'<ins>'..token..'</ins>'
elseif status == "out" then
diff_buffer = diff_buffer..'<del>'..token..'</del>'
else
diff_buffer = diff_buffer..token
end
end
return diff_buffer
end
 
Line 129:
-----------------------------------------------------------------------------
local function diff(old, new, separator)
assert(old); assert(new)
new = split(new, separator); old = split(old, separator)
 
-- First, compare the beginnings and ends of strings to remove the common
-- prefix and suffix. Chances are, there is only a small number of tokens
-- in the middle that differ, in which case we can save ourselves a lot
-- in terms of LCS computation.
local prefix = "" -- common text in the beginning
local suffix = "" -- common text in the end
while old[1] and old[1] == new[1] do
local token = table.remove(old, 1)
table.remove(new, 1)
prefix = prefix..token
end
while old[#old] and old[#old] == new[#new] do
local token = table.remove(old)
table.remove(new)
suffix = token..suffix
end
 
-- Setup a table that will store the diff (an upvalue for get_diff). We'll
-- store it in the reverse order to allow for tail calls. We'll also keep
-- in this table functions to handle different events.
local rev_diff = {
put = function(self, token, type) table.insert(self, {token,type}) end,
ins = function(self, token) self:put(token, IN) end,
del = function(self, token) self:put(token, OUT) end,
same = function(self, token) if token then self:put(token, SAME) end end,
}
 
-- Put the suffix as the first token (we are storing the diff in the
-- reverse order)
 
rev_diff:same(suffix)
 
-- Define a function that will scan the LCS matrix backwards and build the
-- diff output recursively.
local function get_diff(C, old, new, i, j)
local old_i = old[i]
local new_j = new[j]
if i >= 1 and j >= 1 and old_i == new_j then
rev_diff:same(old_i)
return get_diff(C, old, new, i-1, j-1)
else
local Cij1 = C[i][j-1]
local Ci1j = C[i-1][j]
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
rev_diff:ins(new_j)
return get_diff(C, old, new, i, j-1)
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
rev_diff:del(old_i)
return get_diff(C, old, new, i-1, j)
end
end
end
-- Then call it.
get_diff(quick_LCS(old, new), old, new, #old + 1, #new + 1)
 
-- Put the prefix in at the end
rev_diff:same(prefix)
 
-- Reverse the diff.
local diff = {}
 
for i = #rev_diff, 1, -1 do
table.insert(diff, rev_diff[i])
end
diff.to_html = format_as_html
return diff
end
 
Line 204:
-----------------------------------------------------------------------------
local function wikiDiff(old, new, separator)
local tokens = diff(old, new, separator)
local root = mw.html.create('')
 
local token, status
 
local plusMinusStyle = 'width: 2%; padding: 0.25em; font-weight: bold;' ..
'font-size: 1.25em; text-align: end;'
local tdDivStyle = 'word-wrap: break-word; direction: ltr;'
 
local tdSharedStyle = 'vertical-align:top; width: 48%; border-style: solid; border-radius: 0.33em; ' ..
'padding: 0.33em 0.5em; color: inherit; font-size: 1em; font-family: monospace; white-space: pre-wrap; border-width: 1px 1px 1px 4px; ' ..
'-webkit-border-end-width: 1px; -webkit-border-start-width: 4px; ' ..
'-moz-border-end-width: 1px; -moz-border-start-width: 4px;' -- these override default border-width for browsers that support them, needed for RTL UI on commons
local insDelSharedStyle = 'padding: 0.25em 0; font-weight: bold; text-decoration: initial;'
 
 
local tr = root:tag('table'):addClass('diff'):css('width', '100%'):tag('tr')
 
tr:tag('td')
:addClass('diff-marker')
:cssText(plusMinusStyle)
:wikitext('−')
 
local deleted = tr
:tag('td')
:cssText('border-color: var(--background-color-content-removed,#ffe49c); ' .. tdSharedStyle)
:addClass('diff-deletedline')
:tag('div')
:cssText(tdDivStyle)
 
for i, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
if status == OUT then
deleted
:tag('del')
:cssText('background: var(--background-color-content-removed,#feeec8ffe49c); color: inherit; ' .. insDelSharedStyle)
:addClass('diffchange')
:addClass('diffchange-inline')
:wikitext(token)
elseif status == SAME then
deleted:wikitext(token)
end
end
 
tr:tag('td')
:cssText(plusMinusStyle)
:wikitext('+')
 
local inserted = tr
:tag('td')
:cssText('border-color: var(--background-color-content-added,#a3d3ff); ' .. tdSharedStyle)
:addClass('diff-addedline')
:tag('div')
:cssText(tdDivStyle)
 
for i, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
if status == IN then
inserted
:tag('ins')
:cssText('background: var(--background-color-content-added,#d8ecffa3d3ff); color: inherit; ' .. insDelSharedStyle)
:addClass('diffchange')
:addClass('diffchange-inline')
:wikitext(token)
elseif status == SAME then
inserted:wikitext(token)
end
end
 
return tostring(root)
end
 
local function main(frame)
return wikiDiff(mw.text.decode(mw.text.unstrip(frame.args[1])), mw.text.decode(mw.text.unstrip(frame.args[2])), frame.args[3] or '[%s%.:-]+')
end
 
return {
diff = diff,
wikiDiff = wikiDiff,
main = main
}