User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.0.6a (August 27, 2014) shorten output fix (substring)
1.0.7 (September 01, 2014) jshint, split wDiff.DetectBlocks, unlinking too short groups, Greasemonkey header
Line 1:
// <syntaxhighlight lang="JavaScript">
 
// ==UserScript==
// @name wDiff
// @version 1.0.6a7
// @date AugustSeptember 2701, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 9 ⟶ 10:
// @author Cacycle (https://en.wikipedia.org/wiki/User:Cacycle)
// @license released into the public ___domain
// ==/UserScript==
 
/*
Line 61 ⟶ 63:
 
blocks[]: array of objects that holds block (consecutive text tokens) data in order of the new text
.oldBlock: blocks.length, number of block in order of the old text order
.newBlock: number of block in new text order
.oldNumber: old text token number of first token in block
.newNumber: new text token number of first token in block
.oldStart: old text token index of first token in block
.count number of token in block
.chars: char length of block
.type: 'same', 'del', 'ins'
Line 75 ⟶ 80:
blockStart: first block index of group
blockEnd: last block index of group
maxWords: word count of longest uninterrupted block
words: word count of group
chars: char count of group
Line 85 ⟶ 90:
 
*/
 
// JSHint options: W004: is already defined, W097: Use the function form of "use strict", W100: This character may get silently deleted by one or more browsers
/* jshint -W004, -W097, -W100, newcap: false, browser: true, jquery: true, sub: true, bitwise: true, curly: false, evil: true, forin: true, freeze: true, immed: true, latedef: true, loopfunc: true, quotmark: single, undef: true */
/* global console */
 
// turn on ECMAScript 5 strict mode
'use strict';
 
// define global object
if (typeof wDiff == 'undefined') { window.wDiff = {}; }
var wDiff; if (wDiff === undefined) { wDiff = {}; }
var WED;
 
//
Line 95 ⟶ 106:
//
 
if (typeof wDiff.styleContainer === 'undefined') { wDiff.styleContainer = ''; }
if (typeof wDiff.StyleDelete === 'undefined') { wDiff.styleDelete = 'font-weight: normal; text-decoration: none; color: #fff; background-color: #c33; border-radius: 0.25em; padding: 0.2em 1px;'; }
if (typeof wDiff.styleInsert === 'undefined') { wDiff.styleInsert = 'font-weight: normal; text-decoration: none; color: #fff; background-color: #07e; border-radius: 0.25em; padding: 0.2em 1px;'; }
if (typeof wDiff.styleBlockLeft === 'undefined') { wDiff.styleBlockLeft = 'background-color: #d0d0d0; border-radius: 0.25em; padding: 0.25em 1px; margin: 0 1px;'; }
if (typeof wDiff.styleBlockRight === 'undefined') { wDiff.styleBlockRight = 'background-color: #d0d0d0; border-radius: 0.25em; padding: 0.25em 1px; margin: 0 1px;'; }
if (typeof wDiff.styleBlockColor === 'undefined') { wDiff.styleBlockColor = [
'background-color: #ffff60;',
'background-color: #c0ff60;',
Line 111 ⟶ 122:
'background-color: #a0e8a0;'
]; }
if (typeof wDiff.styleMarkLeft === 'undefined') { wDiff.styleMarkLeft = 'color: #d0d0d0; background-color: #c33; border-radius: 0.25em; padding: 0.2em 0.2em; margin: 0 1px;'; }
if (typeof wDiff.styleMarkRight === 'undefined') { wDiff.styleMarkRight = 'color: #d0d0d0; background-color: #c33; border-radius: 0.25em; padding: 0.2em 0.2em; margin: 0 1px;'; }
if (typeof wDiff.styleMarkColor === 'undefined') { wDiff.styleMarkColor = [
'color: #ffff60;',
'color: #c0ff60;',
Line 124 ⟶ 135:
'color: #90d090;'
]; }
if (typeof wDiff.styleNewline === 'undefined') { wDiff.styleNewline = ''; }
if (typeof wDiff.styleTab === 'undefined') { wDiff.styleTab = ''; }
if (typeof wDiff.stylesheet === 'undefined') { wDiff.stylesheet = '.wDiffTab:before { content: "→"; color: #bbb; font-size: smaller; } .wDiffNewline:before { content: "¶"; color: #ccc; padding: 0 0.2em 0 1px; } .wDiffMarkRight:before { content: "▶"; } .wDiffMarkLeft:before { content: "◀"; }'; }
 
//
Line 132 ⟶ 143:
//
 
if (typeof wDiff.styleFragment === 'undefined') { wDiff.styleFragment = 'white-space: pre-wrap; background: #fcfcfc; border: #bbb solid; border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: inherit; font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0;'; }
if (typeof wDiff.styleNoChange === 'undefined') { wDiff.styleNoChange = 'white-space: pre-wrap; background: #f0f0f0; border: #bbb solid; border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: inherit; font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em; margin: 1em 0;'; }
if (typeof wDiff.styleSeparator === 'undefined') { wDiff.styleSeparator = 'margin-bottom: 1em;'; }
if (typeof wDiff.styleOmittedChars === 'undefined') { wDiff.styleOmittedChars = ''; }
 
//
Line 143 ⟶ 154:
// {block} and {mark} are replaced by block number color style, {title} is replaced by title attribute (popup)
// class plus html comment are required indicators for wDiff.ShortenOutput()
if (typeof wDiff.htmlContainerStart === 'undefined') { wDiff.htmlContainerStart = '<div class="wDiffContainer" style="' + wDiff.styleContainer + '">'; }
if (typeof wDiff.htmlContainerEnd === 'undefined') { wDiff.htmlContainerEnd = '</div>'; }
 
if (typeof wDiff.htmlDeleteStart === 'undefined') { wDiff.htmlDeleteStart = '<span class="wDiffDelete" style="' + wDiff.styleDelete + '" title="−">'; }
if (typeof wDiff.htmlDeleteEnd === 'undefined') { wDiff.htmlDeleteEnd = '</span><!--wDiffDelete-->'; }
 
if (typeof wDiff.htmlInsertStart === 'undefined') { wDiff.htmlInsertStart = '<span class="wDiffInsert" style="' + wDiff.styleInsert + '" title="+">'; }
if (typeof wDiff.htmlInsertEnd === 'undefined') { wDiff.htmlInsertEnd = '</span><!--wDiffInsert-->'; }
 
if (typeof wDiff.htmlBlockLeftStart === 'undefined') { wDiff.htmlBlockLeftStart = '<span class="wDiffBlockLeft" style="' + wDiff.styleBlockLeft + ' {block}" title="▶ ▢">'; }
if (typeof wDiff.htmlBlockLeftEnd === 'undefined') { wDiff.htmlBlockLeftEnd = '</span><!--wDiffBlockLeft-->'; }
 
if (typeof wDiff.htmlBlockRightStart === 'undefined') { wDiff.htmlBlockRightStart = '<span class="wDiffBlockRight" style="' + wDiff.styleBlockRight + ' {block}" title="▭ ◀">'; }
if (typeof wDiff.htmlBlockRightEnd === 'undefined') { wDiff.htmlBlockRightEnd = '</span><!--wDiffBlockRight-->'; }
 
if (typeof wDiff.htmlMarkRight === 'undefined') { wDiff.htmlMarkRight = '<span class="wDiffMarkRight" style="' + wDiff.styleMarkRight + ' {mark}"{title}></span><!--wDiffMarkRight-->'; }
if (typeof wDiff.htmlMarkLeft === 'undefined') { wDiff.htmlMarkLeft = '<span class="wDiffMarkLeft" style="' + wDiff.styleMarkLeft + ' {mark}"{title}></span><!--wDiffMarkLeft-->'; }
 
if (typeof wDiff.htmlNewline === 'undefined') { wDiff.htmlNewline = '<span class="wDiffNewline" style="' + wDiff.styleNewline + '"></span>\n'; }
if (typeof wDiff.htmlTab === 'undefined') { wDiff.htmlTab = '<span class="wDiffTab" style="' + wDiff.styleTab + '">\t</span>'; }
 
//
Line 168 ⟶ 179:
//
 
if (typeof wDiff.htmlFragmentStart === 'undefined') { wDiff.htmlFragmentStart = '<pre class="wDiffFragment" style="' + wDiff.styleFragment + '">'; }
if (typeof wDiff.htmlFragmentEnd === 'undefined') { wDiff.htmlFragmentEnd = '</pre>'; }
 
if (typeof wDiff.htmlNoChange === 'undefined') { wDiff.htmlNoChange = '<pre class="wDiffFragment" style="' + wDiff.styleNoChange + '" title="="></pre>'; }
if (typeof wDiff.htmlSeparator === 'undefined') { wDiff.htmlSeparator = '<div class="wDiffStyleSeparator" style="' + wDiff.styleSeparator + '"></div>'; }
if (typeof wDiff.htmlOmittedChars === 'undefined') { wDiff.htmlOmittedChars = '<span class="wDiffOmittedChars" style="' + wDiff.styleOmittedChars + '">…</span>'; }
 
//
Line 180 ⟶ 191:
 
// enable block move layout with color coded blocks and marks at their original position
if (typeof wDiff.showBlockMoves === 'undefined') { wDiff.showBlockMoves = true; }
 
// minimal number of real words for a moved block (0 for always showing color coded blocks)
if (typeof wDiff.blockMinLength === 'undefined') { wDiff.blockMinLength = 3; }
 
// further resolve replacements character-wise from start and end
if (typeof wDiff.charDiff === 'undefined') { wDiff.charDiff = true; }
 
// enable recursive diff to resolve problematic sequences
if (typeof wDiff.recursiveDiff === 'undefined') { wDiff.recursiveDiff = true; }
 
// UniCode letter support for regexps, from http://xregexp.com/addons/unicode/unicode-base.js v1.0.0
if (typeof wDiff.letters === 'undefined') { wDiff.letters = 'a-zA-Z0-9' + '00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F151F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC41FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-211321152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC'.replace(/(\w{4})/g, '\\u$1'); }
 
// regExp for splitting into paragraphs after newline
if (typeof wDiff.regExpParagraph === 'undefined') { wDiff.regExpParagraph = new RegExp('(.|\\n)+?(\\n|$)', 'g'); }
 
// regExp for splitting into sentences after .spaces or before newline
if (typeof wDiff.regExpSentence === 'undefined') { wDiff.regExpSentence = new RegExp('\\n|.*?\\.( +|(?=\\n))|.+?(?=\\n)', 'g'); }
 
// regExp for splitting into words, multi-char markup, and chars
if (typeof wDiff.regExpWord === 'undefined') { wDiff.regExpWord = new RegExp('([' + wDiff.letters + '])+|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.', 'g'); }
 
// regExp for splitting into chars
if (typeof wDiff.regExpChar === 'undefined') { wDiff.regExpChar = new RegExp('[' + wDiff.letters + ']', 'g'); }
 
// regExps for bubbling up gaps
if (typeof wDiff.regExpBubbleStop === 'undefined') { wDiff.regExpBubbleStop = /\n$/; }
if (typeof wDiff.regExpBubbleClosing === 'undefined') { wDiff.regExpBubbleClosing = /^[\s)\]}>\-–—.,:;?!’\/\\=+]/; }
 
// regExp for counting words
if (wDiff.regExpWordCount === undefined) { wDiff.regExpWordCount = new RegExp('(^|[^' + wDiff.letters + '])[' + wDiff.letters + '][' + wDiff.letters + '_\'’]*', 'g'); }
 
 
Line 216 ⟶ 230:
 
// characters before diff tag to search for previous heading, paragraph, line break, cut characters
if (typeof wDiff.headingBefore === 'undefined') { wDiff.headingBefore = 1500; }
if (typeof wDiff.paragraphBefore === 'undefined') { wDiff.paragraphBefore = 1500; }
if (typeof wDiff.lineBeforeMax === 'undefined') { wDiff.lineBeforeMax = 1000; }
if (typeof wDiff.lineBeforeMin === 'undefined') { wDiff.lineBeforeMin = 500; }
if (typeof wDiff.blankBeforeMax === 'undefined') { wDiff.blankBeforeMax = 1000; }
if (typeof wDiff.blankBeforeMin === 'undefined') { wDiff.blankBeforeMin = 500; }
if (typeof wDiff.charsBefore === 'undefined') { wDiff.charsBefore = 500; }
 
// characters after diff tag to search for next heading, paragraph, line break, or characters
if (typeof wDiff.headingAfter === 'undefined') { wDiff.headingAfter = 1500; }
if (typeof wDiff.paragraphAfter === 'undefined') { wDiff.paragraphAfter = 1500; }
if (typeof wDiff.lineAfterMax === 'undefined') { wDiff.lineAfterMax = 1000; }
if (typeof wDiff.lineAfterMin === 'undefined') { wDiff.lineAfterMin = 500; }
if (typeof wDiff.blankAfterMax === 'undefined') { wDiff.blankAfterMax = 1000; }
if (typeof wDiff.blankAfterMin === 'undefined') { wDiff.blankAfterMin = 500; }
if (typeof wDiff.charsAfter === 'undefined') { wDiff.charsAfter = 500; }
 
// lines before and after diff tag to search for previous heading, paragraph, line break, cut characters
if (typeof wDiff.linesBeforeMax === 'undefined') { wDiff.linesBeforeMax = 10; }
if (typeof wDiff.linesAfterMax === 'undefined') { wDiff.linesAfterMax = 10; }
 
// maximal fragment distance to join close fragments
if (typeof wDiff.fragmentJoinLines === 'undefined') { wDiff.fragmentJoinLines = 10; }
if (typeof wDiff.fragmentJoinChars === 'undefined') { wDiff.fragmentJoinChars = 1000; }
 
 
//
// wDiff.Init: initialize wDiff
// called from: on code load
// calls: wDiff.AddStyleSheet()
 
wDiff.Init = function () {
 
// compatibility fixes for old names of functions
Line 255 ⟶ 268:
 
// shortcut to wikEd.Debug()
if (typeof WED !=== 'function'undefined) {
if (typeof console == 'object') {
window.WED = console.log;
}
else {
window.WED = window.alert;
}
}
Line 277 ⟶ 290:
// returns: diff html code, call wDiff.ShortenOutput() for shortening this output
 
wDiff.Diff = function (oldString, newString) {
 
var diff = '';
Line 304 ⟶ 317:
// trap trivial changes: no change
if (oldString == newString) {
text.diff = wDiff.HtmlEscape(newString);
wDiff.HtmlFormat(text);
return text.diff;
Line 310 ⟶ 323:
 
// trap trivial changes: old text deleted
if ( (oldString === null) || (oldString.length === 0) ) {
text.diff = wDiff.htmlInsertStart + wDiff.HtmlEscape(newString) + wDiff.htmlInsertEnd;
wDiff.HtmlFormat(text);
Line 317 ⟶ 330:
 
// trap trivial changes: new text deleted
if ( (newString === null) || (newString.length === 0) ) {
text.diff = wDiff.htmlDeleteStart + wDiff.HtmlEscape(oldString) + wDiff.htmlDeleteEnd;
wDiff.HtmlFormat(text);
Line 349 ⟶ 362:
 
// split tokens into chars in selected unresolved gaps
if (wDiff.charDiff === true) {
wDiff.SplitRefineChars(text);
 
Line 381 ⟶ 394:
// called from: wDiff.Diff()
 
wDiff.Split = function (text, regExp, token) {
 
var prev = null;
Line 390 ⟶ 403:
 
// split full text or specified token
if (token === nullundefined) {
string = text.string;
}
Line 402 ⟶ 415:
var number = 0;
var regExpMatch;
while ( (regExpMatch = regExp.exec(string)) !== null) {
 
// insert current item, link to previous
Line 411 ⟶ 424:
link: null,
number: null,
parsed: false,
};
number ++;
 
// link previous item to current
if (prev !== null) {
text.tokens[prev].next = current;
}
Line 424 ⟶ 437:
 
// connect last new item and existing next item
if ( (number > 0) && (token !== nullundefined) ) {
if (prev !== null) {
text.tokens[prev].next = next;
}
if (next !== null) {
text.tokens[next].prev = prev;
}
Line 437 ⟶ 450:
 
// initial text split
if (token === nullundefined) {
text.first = 0;
text.last = prev;
Line 461 ⟶ 474:
// calls: wDiff.Split()
 
wDiff.SplitRefine = function (text, regExp) {
 
// cycle through tokens list
var i = text.first;
while ( (i !== null) && (text.tokens[i] !== null) ) {
 
// refine unique unmatched tokens into smaller tokens
if (text.tokens[i].link === null) {
wDiff.Split(text, regExp, i);
}
Line 494 ⟶ 507:
// refine words into chars in selected gaps
 
wDiff.SplitRefineChars = function (text) {
 
//
Line 505 ⟶ 518:
var i = text.newText.first;
var j = text.oldText.first;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
 
// get listtoken item propertieslinks
var newLink = text.newText.tokens[i].link;
var oldLink = null;
if (j !== null) {
oldLink = text.oldText.tokens[j].link;
}
 
// start of gap in new and old
if ( (gap === null) && (newLink === null) && (oldLink === null) ) {
gap = gaps.length;
gaps.push({
Line 529 ⟶ 542:
 
// count chars and tokens in gap
else if ( (gap !== null) && (newLink === null) ) {
gaps[gap].newLast = i;
gaps[gap].newTokens ++;
Line 535 ⟶ 548:
 
// gap ended
else if ( (gap !== null) && (newLink !== null) ) {
gap = null;
}
 
// next list elements
if (newLink !== null) {
j = text.oldText.tokens[newLink].next;
}
Line 551 ⟶ 564:
// cycle trough old text tokens list
var j = gaps[gap].oldFirst;
while ( (j !== null) && (text.oldText.tokens[j] !== null) && (text.oldText.tokens[j].link === null) ) {
 
// count old chars and tokens in gap
Line 590 ⟶ 603:
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
while (i !== null) {
var newToken = text.newText.tokens[i].token;
var oldToken = text.oldText.tokens[j].token;
Line 660 ⟶ 673:
// do not split into chars this gap
charSplit = false;
break;
}
}
Line 679 ⟶ 692:
 
for (var gap = 0; gap < gaps.length; gap ++) {
if (gaps[gap].charSplit === true) {
 
// cycle trough new text tokens list
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
while (i !== null) {
var newToken = text.newText.tokens[i].token;
var oldToken = text.oldText.tokens[j].token;
Line 721 ⟶ 734:
// called from: wDiff.Diff()
 
wDiff.BubbleUpGaps = function (text, textLinked) {
 
// cycle through tokens list
var i = text.first;
var gapStart = null;
while ( (i !== null) && (text.tokens[i] !== null) ) {
 
// remember gap start
if ( (gapStart === null) && (text.tokens[i].link === null) ) {
gapStart = i;
}
 
// find gap end
else if ( (gapStart !== null) && (text.tokens[i].link !== null) ) {
 
// bubble up, stop at line breaks
Line 740 ⟶ 753:
var back = text.tokens[i].prev;
while (
(front !== null) && (back !== null) && (wDiff.regExpBubbleStop.test(text.tokens[front].token) === false) &&
(text.tokens[front].link !== null) && (text.tokens[back].link === null) &&
(text.tokens[front].token == text.tokens[back].token)
) {
Line 750 ⟶ 763:
back = text.tokens[back].prev;
}
 
// do not start gap with spaces or other closing characters, roll back (bubble down)
if ( (back !== null) && (front !== null) ) {
front = text.tokens[front].next;
back = text.tokens[back].next;
}
while (
(back !== null) && (front !== null) && (wDiff.regExpBubbleClosing.test(text.tokens[front].token) === true) &&
(text.tokens[front].link === null) && (text.tokens[back].link !== null) &&
(text.tokens[front].token === text.tokens[back].token)
) {
text.tokens[front].link = text.tokens[back].link;
Line 779 ⟶ 792:
// called from: wDiff.Diff()
 
wDiff.EnumerateTokens = function (text) {
 
// enumerate tokens list
var number = 0;
var i = text.first;
while ( (i !== null) && (text.tokens[i] !== null) ) {
text.tokens[i].number = number;
number ++;
Line 806 ⟶ 819:
// recursively diff still unresolved regions upwards
 
wDiff.CalculateDiff = function (text, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) {
 
// symbol (token) data
Line 813 ⟶ 826:
 
// set defaults
if (typeof newStart === 'undefined') { newStart = text.newText.first; }
if (typeof newEnd === 'undefined') { newEnd = text.newText.last; }
if (typeof oldStart === 'undefined') { oldStart = text.oldText.first; }
if (typeof oldEnd === 'undefined') { oldEnd = text.oldText.last; }
if (typeof recursionLevel === 'undefined') { recursionLevel = 0; }
 
// limit recursion depth
Line 830 ⟶ 843:
// cycle trough new text tokens list
var i = newStart;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
 
// parse token only once during split refinement
if ( (text.newText.tokens[i].parsed === false) || (recursionLevel > 0) ) {
text.newText.tokens[i].parsed = true;
 
// add new entry to symbol table
var token = text.newText.tokens[i].token;
if (Object.prototype.hasOwnProperty.call(symbols, token) === false) {
var current = symbol.length;
symbols[token] = current;
Line 871 ⟶ 884:
// cycle trough old text tokens list
var j = oldStart;
while ( (j !== null) && (text.oldText.tokens[j] !== null) ) {
 
// parse token only once during split refinement
if ( (text.oldText.tokens[j].parsed === false) || (recursionLevel > 0) ) {
text.oldText.tokens[j].parsed = true;
 
// add new entry to symbol table
var token = text.oldText.tokens[j].token;
if (Object.prototype.hasOwnProperty.call(symbols, token) === false) {
var current = symbol.length;
symbols[token] = current;
Line 903 ⟶ 916:
 
// next list element
if (j === oldEnd) {
break;
}
Line 922 ⟶ 935:
 
// do not use spaces as unique markers
if (/^\s+$/.test(text.newText.tokens[newToken].token) === false) {
 
// connect from new to old and from old to new
if (text.newText.tokens[newToken].link === null) {
text.newText.tokens[newToken].link = oldToken;
text.oldText.tokens[oldToken].link = newToken;
Line 939 ⟶ 952:
// cycle trough new text tokens list
var i = text.newText.first;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
var iNext = text.newText.tokens[i].next;
 
// find already connected pairs
var j = text.newText.tokens[i].link;
if (j !== null) {
var jNext = text.oldText.tokens[j].next;
 
// check if the following tokens are not yet connected
if ( (iNext !== null) && (jNext !== null) ) {
if ( (text.newText.tokens[iNext].link === null) && (text.oldText.tokens[jNext].link === null) ) {
 
// connect if the following tokens are the same
Line 968 ⟶ 981:
// cycle trough new text tokens list
var i = text.newText.last;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
var iNext = text.newText.tokens[i].prev;
 
// find already connected pairs
var j = text.newText.tokens[i].link;
if (j !== null) {
var jNext = text.oldText.tokens[j].prev;
 
// check if the preceeding tokens are not yet connected
if ( (iNext !== null) && (jNext !== null) ) {
if ( (text.newText.tokens[iNext].link === null) && (text.oldText.tokens[jNext].link === null) ) {
 
// connect if the preceeding tokens are the same
Line 992 ⟶ 1,005:
 
// refine by recursively diffing unresolved regions caused by addition of common tokens around sequences of common tokens, only at word level split
if ( (recurse === true) && (wDiff.recursiveDiff === true) ) {
 
//
Line 1,002 ⟶ 1,015:
var j = oldStart;
 
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
 
// get j from previous tokens match
var iPrev = text.newText.tokens[i].prev;
if (iPrev !== null) {
var jPrev = text.newText.tokens[iPrev].link;
if (jPrev !== null) {
j = text.oldText.tokens[jPrev].next;
}
Line 1,014 ⟶ 1,027:
 
// check for the start of an unresolved sequence
if ( (j !== null) && (text.oldText.tokens[j] !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) ) {
 
// determine the limits of of the unresolved new sequence
Line 1,021 ⟶ 1,034:
var iLength = 0;
var iNext = i;
while ( (iNext !== null) && (text.newText.tokens[iNext].link === null) ) {
iEnd = iNext;
iLength ++;
Line 1,035 ⟶ 1,048:
var jLength = 0;
var jNext = j;
while ( (jNext !== null) && (text.oldText.tokens[jNext].link === null) ) {
jEnd = jNext;
jLength ++;
Line 1,069 ⟶ 1,082:
var i = newEnd;
var j = oldEnd;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
 
// get j from next matched tokens
var iPrev = text.newText.tokens[i].next;
if (iPrev !== null) {
var jPrev = text.newText.tokens[iPrev].link;
if (jPrev !== null) {
j = text.oldText.tokens[jPrev].prev;
}
Line 1,081 ⟶ 1,094:
 
// check for the start of an unresolved sequence
if ( (j !== null) && (text.oldText.tokens[j] !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) ) {
 
// determine the limits of of the unresolved new sequence
Line 1,088 ⟶ 1,101:
var iLength = 0;
var iNext = i;
while ( (iNext !== null) && (text.newText.tokens[iNext].link === null) ) {
iStart = iNext;
iLength ++;
Line 1,102 ⟶ 1,115:
var jLength = 0;
var jNext = j;
while ( (jNext !== null) && (text.oldText.tokens[jNext].link === null) ) {
jStart = jNext;
jLength ++;
Line 1,138 ⟶ 1,151:
// blocks: empty array for block data
// groups: empty array for group data
// changes: text, blocks, groups
// called from: wDiff.Diff()
// steps:
// collect identical corresponding ('same') blocks from old text
// sort blocks by new text token number
// collect groups of continuous old text blocks
// collect independent block sections (no old/new crosses outside section)
// find groups of continuous old text blocks
// set longest sequence of increasing groups in sections as fixed (not moved)
// collect insertion ('ins') blocks from new text
// collect deletion ('del') blocks from old text
// position 'del' blocks into new text order
// re-sort blocks by new text token number and update groups
// set group numbers of 'ins' and 'del' blocks inside existing groups
// add remaining 'ins' and 'del' blocks to groups
// mark original positions of moved groups
// set moved block colors
//
// scheme of blocks, sections, and groups (old block numbers):
// old: 1 2 3D4 5E6 7 8 9 10 11
Line 1,165 ⟶ 1,162:
// type: = + =-= = -= = + = = = = =
 
wDiff.DetectBlocks = function (text, blocks, groups) {
 
// WED('text.oldText', wDiff.DebugText(text.oldText));
// WED('text.newText', wDiff.DebugText(text.newText));
 
// collect identical corresponding ('same') blocks from old text and sort by new text
//
wDiff.GetSameBlocks(text, blocks);
// collect identical corresponding ('same') blocks from old text
 
//
// collect independent block sections (no old/new crosses outside section) for per-section determination of non-moving (fixed) groups
var sections = [];
wDiff.GetSections(blocks, sections);
 
// find groups of continuous old text blocks
wDiff.GetGroups(blocks, groups);
 
// convert groups to insertions/deletions if maximal block length is too short
if ( (wDiff.blockMinLength > 0) && (wDiff.UnlinkBlocks(text, blocks, groups) === true) ) {
// repeat from start after conversion to insertions/deletions
wDiff.GetSameBlocks(text, blocks);
wDiff.GetSections(blocks, sections);
wDiff.GetGroups(blocks, groups);
}
 
// set longest sequence of increasing groups in sections as fixed (not moved)
wDiff.SetFixed(blocks, groups, sections);
 
// collect deletion ('del') blocks from old text
wDiff.GetDelBlocks(text, blocks);
 
// position 'del' blocks into new text order
wDiff.PositionDelBlocks(blocks);
 
// collect insertion ('ins') blocks from new text
wDiff.GetInsBlocks(text, blocks);
 
// sort blocks by new text token number and update groups
wDiff.SortBlocks(blocks, groups);
 
// set group numbers of 'ins' and 'del' blocks
wDiff.SetInsDelGroups(blocks, groups);
 
// mark original positions of moved groups
wDiff.MarkMoved(groups);
 
// set moved block colors
wDiff.ColorMoved(groups);
 
// WED('Groups', wDiff.DebugGroups(groups));
// WED('Blocks', wDiff.DebugBlocks(blocks));
 
return;
};
 
 
// wDiff.GetSameBlocks: collect identical corresponding ('same') blocks from old text and sort by new text
// called from: DetectBlocks()
// changes: creates blocks
 
wDiff.GetSameBlocks = function (text, blocks) {
 
// clear blocks array
blocks.splice(0);
 
// cycle through old text to find matched (linked) blocks
var j = text.oldText.first;
var i = null;
while (j !== null) {
var deletions = [];
while (j != null) {
 
// detectskip 'del' blocks and remember for later
while ( (j !== null) && (text.oldText.tokens[j].link === null) ) {
var delStart = j;
var delEnd = null;
var string = '';
while ( (j != null) && (text.oldText.tokens[j].link == null) ) {
string += text.oldText.tokens[j].token;
delEnd = j;
j = text.oldText.tokens[j].next;
}
 
// save old text 'del' block data
if (delEnd != null) {
deletions.push({
oldStart: delStart,
oldBlock: blocks.length,
string: string
});
}
 
// get 'same' block
if (j !== null) {
i = text.oldText.tokens[j].link;
var iStart = i;
Line 1,206 ⟶ 1,243:
 
// detect matching blocks ('same')
var count = 0;
var chars = 0;
var string = '';
while ( (i !== null) && (j !== null) && (text.oldText.tokens[j].link == i) ) {
var token = text.oldText.tokens[j].token;
count ++;
chars += token.length;
string += token;
Line 1,219 ⟶ 1,258:
blocks.push({
oldBlock: blocks.length,
newBlock: null,
oldNumber: text.oldText.tokens[jStart].number,
newNumber: text.newText.tokens[iStart].number,
oldStart: jStart,
count: count,
chars: chars,
type: 'same',
Line 1,231 ⟶ 1,273:
}
 
//
// sort blocks by new text token number
//
 
blocks.sort(function(a, b) {
return a.newNumber - b.newNumber;
});
 
// number blocks in new text order
//
for (var block = 0; block < blocks.length; block ++) {
// collect independent block sections (no old/new crosses outside section) for per-section determination of non-moving (fixed) groups
blocks[block].newBlock = block;
//
}
return;
};
 
 
var sections = [];
// wDiff.GetSections: collect independent block sections (no old/new crosses outside section) for per-section determination of non-moving (fixed) groups
// called from: DetectBlocks()
// changes: creates sections, blocks[].section
 
wDiff.GetSections = function (blocks, sections) {
 
// clear sections array
sections.splice(0);
 
// cycle through blocks
var nextSectionStart = 0;
for (var block = 0; block < blocks.length; block ++) {
 
Line 1,280 ⟶ 1,329:
blockStart: sectionStart,
blockEnd: sectionEnd,
deleted: false
});
block = sectionEnd;
}
}
return;
};
 
//
// find groups of continuous old text blocks
//
 
// wDiff.GetGroups: find groups of continuous old text blocks
var regExpWordCount = new RegExp('(^|[^' + wDiff.letters + '])[' + wDiff.letters + '][' + wDiff.letters + '_\'’]*', 'g');
// called from: DetectBlocks()
// changes: creates groups, blocks[].group
 
wDiff.GetGroups = function (blocks, groups) {
 
// clear groups array
groups.splice(0);
 
// cycle through blocks
for (var block = 0; block < blocks.length; block ++) {
if (blocks[block].deleted === true) {
var groupStart = null;
continue;
var groupEnd = null;
}
var groupStart = block;
var groupEnd = block;
var oldBlock = blocks[groupStart].oldBlock;
 
// get word and char count of block
var words = wDiff.WordCount(blocks[block].string.match(regExpWordCount) || []).length;
var maxWords = words;
var chars = blocks[block].chars;
 
groupStart = block;
groupEnd = block;
var oldBlock = blocks[groupStart].oldBlock;
 
// check right
Line 1,315 ⟶ 1,371:
 
// get word and char count of block
var blockWords = wDiff.WordCount(blocks[i].string.match(regExpWordCount) || []).length;
if (blockWords > maxWords) {
maxWords = blockWords;
Line 1,321 ⟶ 1,377:
words += blockWords;
chars += blocks[i].chars;
 
// skip trailing 'del'
groupEnd = i;
}
 
// save crossing groupsgroup
if ( (groupStart != null) && (groupEnd !>= null) groupStart) {
 
// set groups outside sections as fixed
var fixed = false;
if (blocks[groupStart].section === null) {
fixed = true;
}
Line 1,346 ⟶ 1,400:
blockStart: groupStart,
blockEnd: groupEnd,
words: words,
maxWords: maxWords,
words: words,
chars: chars,
fixed: fixed,
Line 1,358 ⟶ 1,412:
}
}
return;
};
 
 
//
// wDiff.UnlinkBlocks: remove 'same' blocks in groups of continuous old text blocks if too short
// set longest sequence of increasing groups in sections as fixed (not moved)
// called from: DetectBlocks()
//
// changes: text.newText/oldText[].link
// returns: true if text tokens were unlinked
 
wDiff.UnlinkBlocks = function (text, blocks, groups) {
 
var unlinked = false;
 
// cycle through groups
for (var group = 0; group < groups.length; group ++) {
if ( (groups[group].maxWords < wDiff.blockMinLength) && (groups[group].fixed === false) ) {
var blockStart = groups[group].blockStart;
var blockEnd = groups[group].blockEnd;
 
// cycle through blocks
for (var block = blockStart; block <= blockEnd; block ++) {
 
// cycle through old text
var j = blocks[block].oldStart;
for (var count = 0; count < blocks[block].count; count ++) {
 
// unlink tokens
text.newText.tokens[ text.oldText.tokens[j].link ].link = null;
text.oldText.tokens[j].link = null;
j = text.oldText.tokens[j].next;
}
unlinked = true;
}
}
}
return unlinked;
};
 
 
// wDiff.SetFixed: set longest sequence of increasing groups in sections as fixed (not moved)
// called from: DetectBlocks()
// calls: wDiff.FindMaxPath()
// changes: groups[].fixed, blocks[].fixed
 
wDiff.SetFixed = function (blocks, groups, sections) {
 
// cycle through sections
Line 1,381 ⟶ 1,476:
if (pathObj.chars > maxChars) {
maxPath = pathObj.path;
maxChars = pathObj.chars;
}
}
Line 1,388 ⟶ 1,483:
for (var i = 0; i < maxPath.length; i ++) {
var group = maxPath[i];
groups[group].fixed = true;
 
// mark fixed blocks
Line 1,396 ⟶ 1,491:
}
}
return;
};
 
//
// collect insertion ('ins') blocks from new text
//
 
// wDiff.FindMaxPath: recusively find path of groups in increasing old group order with longest char length
// cycle through new text to find insertion blocks
// input: start, path start group; path, array of path groups; chars, char count of path; cache, cached sub-path lengths; groups, groups, group object; groupEnd, last group
var i = text.newText.first;
// returns: returnObj, contains path and length
while (i != null) {
// called from: wDiff.SetFixed()
// calls: itself recursively
 
wDiff.FindMaxPath = function (start, path, chars, cache, groups, groupEnd) {
// jump over linked (matched) block
 
while ( (i != null) && (text.newText.tokens[i].link != null) ) {
// add current path point
i = text.newText.tokens[i].next;
var pathLocal = path.slice();
pathLocal.push(start);
chars = chars + groups[start].chars;
 
// last group, terminate recursion
var returnObj = { path: pathLocal, chars: chars };
if (start == groupEnd) {
return returnObj;
}
 
// find longest sub-path
var maxChars = 0;
var oldNumber = groups[start].oldNumber;
for (var i = start + 1; i <= groupEnd; i ++) {
 
// only in increasing old group order
if (groups[i].oldNumber < oldNumber) {
continue;
}
 
// get longest sub-path from cache
// detect insertion blocks ('ins')
if (icache[start] !== nullundefined) {
var iStartreturnObj = icache[start];
}
var string = '';
 
while ( (i != null) && (text.newText.tokens[i].link == null) ) {
// get longest sub-path by recursion
string += text.newText.tokens[i].token;
else {
i = text.newText.tokens[i].next;
var pathObj = wDiff.FindMaxPath(i, pathLocal, chars, cache, groups, groupEnd);
 
// select longest sub-path
if (pathObj.chars > maxChars) {
returnObj = pathObj;
}
}
}
 
// save newlongest textpath 'ins'to blockcache
if (cache[i] === undefined) {
cache[start] = returnObj;
}
return returnObj;
};
 
 
// wDiff.GetDelBlocks: collect deletion ('del') blocks from old text
// called from: DetectBlocks()
// changes: blocks
 
wDiff.GetDelBlocks = function (text, blocks) {
 
// cycle through old text to find matched (linked) blocks
var j = text.oldText.first;
var i = null;
while (j !== null) {
 
// collect 'del' blocks
var oldStart = j;
var count = 0;
var string = '';
while ( (j !== null) && (text.oldText.tokens[j].link === null) ) {
count ++;
string += text.oldText.tokens[j].token;
j = text.oldText.tokens[j].next;
}
 
// save old text 'del' block
if (count !== 0) {
blocks.push({
oldBlock: null,
oldNumbernewBlock: null,
newNumberoldNumber: text.newTextoldText.tokens[iStartoldStart].number,
newNumber: null,
oldStart: oldStart,
count: count,
chars: null,
type: 'insdel',
section: null,
group: null,
Line 1,431 ⟶ 1,585:
string: string
});
}
 
// skip 'same' block
if (j !== null) {
i = text.oldText.tokens[j].link;
while ( (i !== null) && (j !== null) && (text.oldText.tokens[j].link == i) ) {
i = text.newText.tokens[i].next;
j = text.oldText.tokens[j].next;
}
}
}
return;
};
 
//
// collect deletion ('del') blocks from old text
//
 
// cyclewDiff.PositionDelBlocks: throughposition 'del' blocks andinto hashnew oldBlocktext indexesorder
// called from: DetectBlocks()
var oldBlocks = [];
// changes: blocks[].section/group/fixed/newNumber
for (var block = 0; block < blocks.length; block ++) {
//
oldBlocks[ blocks[block].oldBlock ] = block;
// deletion blocks move with fixed neighbor (new number +/- 0.1):
}
// old: 1 D 2 1 D 2
// / / \ / \ \
// new: 1 D 2 1 D 2
// fixed: * *
// new number: 1 1.1 1.9 2
 
wDiff.PositionDelBlocks = function (blocks) {
// cycle through deletions detected earlier
for (var del = 0; del < deletions.length; del ++) {
var newNumber = 0;
var oldBlock = deletions[del].oldBlock;
 
// sort shallow copy of blocks by oldNumber
// get old text next block
var nextBlockblocksOld = oldBlocks[oldBlock]blocks.slice();
blocksOld.sort(function(a, b) {
return a.oldNumber - b.oldNumber;
});
 
// cycle through 'del' blocks in old text order
for (var blockOld = 0; blockOld < blocksOld.length; blockOld ++) {
var delBlock = blocksOld[blockOld];
if (delBlock.type != 'del') {
continue;
}
 
// get old text prev block
var prevBlock = null;
if (oldBlockblockOld > 0) {
prevBlock = oldBlocksblocks[oldBlock blocksOld[blockOld - 1].newBlock ];
}
 
// get old text next block
//
var nextBlock;
// position 'del' blocks into new text order
if (blockOld < blocksOld.length - 1) {
//
nextBlock = blocks[ blocksOld[blockOld + 1].newBlock ];
 
}
// deletion blocks move with fixed neighbor (new number +/- 0.3):
// old: 1 D 2 1 D 2
// / / \ ‾/-/_
// new: 1 D 2 D 2 1
// fixed: * *
// new number: 1 1.3 1.7 2
 
// move direction important for general del-ins order
 
// move after prev block if fixed
var neighbor = null;
if ( (prevBlock !== nullundefined) && (blocks[prevBlock].fixed === true) ) {
neighbor = blocks[prevBlock];
delBlock.newNumber = neighbor.newNumber + 0.31;
}
 
// move before next block if fixed
else if ( (nextBlock !== nullundefined) && (blocks[nextBlock].fixed === true) ) {
neighbor = blocks[nextBlock];
delBlock.newNumber = neighbor.newNumber - 0.31;
}
 
// move after prev block if existent
else if (prevBlock !== nullundefined) {
neighbor = blocks[prevBlock];
delBlock.newNumber = neighbor.newNumber + 0.31;
}
 
// move before next block
else if (nextBlock !== nullundefined) {
neighbor = blocks[nextBlock];
delBlock.newNumber = neighbor.newNumber - 0.31;
}
 
// move before first block
else {
delBlock.newNumber = -0.31;
}
 
// getupdate 'del' block with neighbor data
if (neighbor !== undefined) {
var section = null;
delBlock.section = neighbor.section;
var group = null;
delBlock.group = neighbor.group;
var fixed = null;
delBlock.fixed = neighbor.fixed;
if (neighbor != null) {
section = neighbor.section;
group = neighbor.group;
fixed = neighbor.fixed;
}
}
return;
};
 
 
// save old text 'del' block
// wDiff.GetInsBlocks: collect insertion ('ins') blocks from new text
blocks.push({
// called from: DetectBlocks()
oldBlock: null,
// changes: blocks
oldNumber: text.oldText.tokens[ deletions[del].oldStart ].number,
 
newNumber: newNumber,
wDiff.GetInsBlocks = function (text, blocks) {
chars: null,
 
type: 'del',
// cycle through new text to find insertion blocks
section: section,
var i = text.newText.first;
group: group,
while (i !== null) {
fixed: fixed,
 
string: deletions[del].string
// jump over linked (matched) block
});
while ( (i !== null) && (text.newText.tokens[i].link !== null) ) {
i = text.newText.tokens[i].next;
}
 
// detect insertion blocks ('ins')
if (i !== null) {
var iStart = i;
var count = 0;
var string = '';
while ( (i !== null) && (text.newText.tokens[i].link === null) ) {
count ++;
string += text.newText.tokens[i].token;
i = text.newText.tokens[i].next;
}
 
// save new text 'ins' block
blocks.push({
oldBlock: null,
newBlock: null,
oldNumber: null,
newNumber: text.newText.tokens[iStart].number,
oldStart: null,
count: count,
chars: null,
type: 'ins',
section: null,
group: null,
fixed: null,
string: string
});
}
}
return;
};
 
 
//
// re-wDiff.SortBlocks: sort blocks by new text token number and update groups
// called from: DetectBlocks()
//
// changes: blocks
 
wDiff.SortBlocks = function (blocks, groups) {
 
// sort by newNumber
Line 1,538 ⟶ 1,741:
for (var block = 0; block < blocks.length; block ++) {
var blockGroup = blocks[block].group;
if (blockGroup !== null) {
if (blockGroup != group) {
group = blocks[block].group;
Line 1,544 ⟶ 1,747:
groups[group].oldNumber = blocks[block].oldNumber;
}
groups[blockGroup].blockEnd = block;
}
}
return;
};
 
//
// set group numbers of 'ins' and 'del' blocks inside existing groups
//
 
// wDiff.SetInsDelGroups: set group numbers of 'ins' and 'del' blocks
// called from: DetectBlocks()
// changes: groups, blocks[].fixed/group
 
wDiff.SetInsDelGroups = function (blocks, groups) {
 
// set group numbers of 'ins' and 'del' blocks inside existing groups
for (var group = 0; group < groups.length; group ++) {
var fixed = groups[group].fixed;
for (var block = groups[group].blockStart; block <= groups[group].blockEnd; block ++) {
if (blocks[block].group === null) {
blocks[block].group = group;
blocks[block].fixed = fixed;
Line 1,562 ⟶ 1,771:
}
 
//
// add remaining 'ins' and 'del' blocks to groups
//
 
// cycle through blocks
Line 1,570 ⟶ 1,777:
 
// skip existing groups
if (blocks[block].group === null) {
blocks[block].group = groups.length;
var fixed = blocks[block].fixed;
Line 1,590 ⟶ 1,797:
}
}
return;
};
 
//
// mark original positions of moved groups
//
 
// movedwDiff.MarkMoved: block marks atmark original positions relativeof to fixedmoved groups:
// groupscalled from: 3 7DetectBlocks()
// changes: groups[].moved/movedFrom
// 1 <| | (no next smaller fixed)
// moved block marks at original positions relative to fixed groups:
// 5 |< |
// groups: 3 |> 5 |7
// 1 <| | 5 <| (no next smaller fixed)
// 5 |< >| 5
// |> 5 |> 9 (no next larger fixed)
// fixed: * | * 5 <|
// | >| 5
// mark direction: groups[movedGroup].blockStart < groups[group].blockStart
// | |> 9 (no next larger fixed)
// group side: groups[movedGroup].oldNumber < groups[group].oldNumber
// fixed: * *
// mark direction: groups[movedGroup].blockStart < groups[group].blockStart
// group side: groups[movedGroup].oldNumber < groups[group].oldNumber
 
wDiff.MarkMoved = function (groups) {
 
// cycle through groups (moved group)
for (var movedGroup = 0; movedGroup < groups.length; movedGroup ++) {
if (groups[movedGroup].fixed !== false) {
continue;
}
Line 1,622 ⟶ 1,833:
// cycle through groups (original positions)
for (var group = 0; group < groups.length; group ++) {
if ( (groups[group].fixed !== true) || (group == movedGroup) ) {
continue;
}
Line 1,628 ⟶ 1,839:
// find fixed group with closest smaller oldNumber
var oldNumber = groups[group].oldNumber;
if ( (oldNumber < movedOldNumber) && ( (nextSmallerNumber === null) || (oldNumber > nextSmallerNumber) ) ) {
nextSmallerNumber = oldNumber;
nextSmallerGroup = group;
Line 1,634 ⟶ 1,845:
 
// find fixed group with closest larger oldNumber
if ( (oldNumber > movedOldNumber) && ( (nextLargerNumber === null) || (oldNumber < nextLargerNumber) ) ) {
nextLargerNumber = oldNumber;
nextLargerGroup = group;
Line 1,642 ⟶ 1,853:
// no larger fixed group, moved right
var movedFrom = '';
if (nextLargerGroup === null) {
movedFrom = 'left';
}
 
// no smaller fixed group, moved right
else if (nextSmallerGroup === null) {
movedFrom = 'right';
}
Line 1,691 ⟶ 1,902:
for (var group = 0; group < groups.length; group ++) {
var moved = groups[group].moved;
if (moved !== null) {
moved.sort(function(a, b) {
return groups[a].oldNumber - groups[b].oldNumber;
Line 1,697 ⟶ 1,908:
}
}
return;
};
 
 
//
// wDiff.ColorMoved: set moved block colorscolor numbers
// called from: DetectBlocks()
//
// changes: groups[].color
 
wDiff.ColorMoved = function (groups) {
 
// cycle through groups
Line 1,717 ⟶ 1,933:
for (var i = 0; i < moved.length; i ++) {
var movedGroup = moved[i];
if ( (groups[movedGroup].maxWords >= wDiff.blockMinLength) && (wDiff.showBlockMoves === true) ) {
groups[movedGroup].color = color;
color ++;
}
}
 
// WED('Deletions', wDiff.DebugDeletions(deletions));
// WED('Groups', wDiff.DebugGroups(groups));
// WED('Blocks', wDiff.DebugBlocks(blocks));
 
return;
};
 
 
// wDiff.FindMaxPath: recusively find path of groups in increasing old group order with longest char length
// input: start, path start group; path, array of path groups; chars, char count of path; cache, cached sub-path lengths; groups, groups, group object; groupEnd, last group
// returns: returnObj, contains path and length
// called from: wDiff.DetectBlocks()
// calls: itself recursively
 
wDiff.FindMaxPath = function(start, path, chars, cache, groups, groupEnd) {
 
// add current path point
var pathLocal = path.slice();
pathLocal.push(start);
chars = chars + groups[start].chars;
 
// last group, terminate recursion
var returnObj = { path: pathLocal, chars: chars };
if (i == groupEnd) {
return returnObj;
}
 
// find longest sub-path
var maxChars = 0;
var oldNumber = groups[start].oldNumber;
for (var i = start + 1; i <= groupEnd; i ++) {
 
// only in increasing old group order
if (groups[i].oldNumber < oldNumber) {
continue;
}
 
// get longest sub-path from cache
if (cache[start] != null) {
returnObj = cache[start];
}
 
// get longest sub-path by recursion
else {
var pathObj = wDiff.FindMaxPath(i, pathLocal, chars, cache, groups, groupEnd);
 
// select longest sub-path
if (pathObj.chars > maxChars) {
returnObj = pathObj;
}
}
}
 
// save longest path to cache
if (cache[i] == null) {
cache[start] = returnObj;
}
return returnObj;
};
 
Line 1,790 ⟶ 1,948:
// calls: wDiff.HtmlCustomize(), wDiff.HtmlFormat()
 
wDiff.AssembleDiff = function (text, blocks, groups) {
 
//
Line 1,806 ⟶ 1,964:
// check for colored block and move direction
var blockFrom = null;
if ( (fixed === false) && (color !== null) ) {
if (groups[ groups[group].movedFrom ].blockStart < blockStart) {
blockFrom = 'left';
Line 1,830 ⟶ 1,988:
// html escape text string
string = wDiff.HtmlEscape(string);
 
// moved block too small, make it an insertion and place it as a deletion at its original position
if ( ( (groups[group].maxWords < wDiff.blockMinLength) || (wDiff.showBlockMoves == false) ) && (fixed == false) ) {
if (type != 'del') {
string = string.replace(/\n/g, wDiff.htmlNewline);
diff += wDiff.htmlInsertStart + string + wDiff.htmlInsertEnd;
}
}
 
// add 'same' (unchanged) text
else if (type == 'same') {
diff += string;
}
Line 1,881 ⟶ 2,031:
for (var i = 0; i < moved.length; i ++) {
var movedGroup = moved[i];
var markColor = groups[movedGroup].color;
var mark = '';
 
Line 1,893 ⟶ 2,043:
 
// moved block too small, make it a deletion at its original position
if ( (groups[movedGroup].maxWords < wDiff.blockMinLength) || (wDiff.showBlockMoves === false) ) {
mark = wDiff.htmlDeleteStart + wDiff.HtmlEscape(movedText) + wDiff.htmlDeleteEnd;
}
Line 1,952 ⟶ 2,102:
// called from: wDiff.AssembleDiff()
 
wDiff.HtmlCustomize = function (text, number, title) {
 
text = text.replace(/\{block\}/, wDiff.styleBlockColor[number] || '');
Line 1,958 ⟶ 2,108:
 
// shorten title text, replace {title}
if ( (title !== nullundefined) && (title !== '') ) {
var max = 512;
var end = 128;
Line 1,980 ⟶ 2,130:
// called from: wDiff.Diff(), wDiff.AssembleDiff()
 
wDiff.HtmlEscape = function (text) {
 
text = text.replace(/&/g, '&amp;');
Line 1,995 ⟶ 2,145:
// called from: wDiff.Diff(), wDiff.AssembleDiff()
 
wDiff.HtmlFormat = function (text) {
 
text.diff = text.diff.replace(/<\/(\w+)><!--wDiff(Delete|Insert)--><\1\b[^>]*\bclass="wDiff\2"[^>]*>/g, '');
Line 2,008 ⟶ 2,158:
// returns: shortened html with removed unchanged passages indicated by (...) or separator
 
wDiff.ShortenOutput = function (html) {
 
var diff = '';
 
// empty text
if ( (html === nullundefined) || (html === '') ) {
return '';
}
Line 2,021 ⟶ 2,171:
html = html.replace(wDiff.htmlFragmentStart, '');
html = html.replace(wDiff.htmlFragmentEnd, '');
html = html.replace(wDiff.htmlContainerEnd, '');
 
// scan for diff html tags
var regExpDiff = /<\w+\b[^>]*\bclass="wDiff(MarkLeft|MarkRight|BlockLeft|BlockRight|Delete|Insert)"[^>]*>(.|\n)*?<!--wDiff\1-->/g;
Line 2,031 ⟶ 2,181:
 
// save tag positions
while ( (regExpMatch = regExpDiff.exec(html)) !== null ) {
 
// combine consecutive diff tags
Line 2,045 ⟶ 2,195:
 
// no diff tags detected
if (tagStart.length === 0) {
return wDiff.htmlNoChange;
}
Line 2,069 ⟶ 2,219:
} while (pos != -1);
lineBreaks.push(html.length);
 
// cycle through diff tag start positions
for (var i = 0; i < tagStart.length; i ++) {
Line 2,091 ⟶ 2,241:
}
regExpHeading.lastIndex = lastPos;
while ( (regExpMatch = regExpHeading.exec(html)) !== null ) {
if (regExpMatch.index > tagStart[i]) {
break;
Line 2,100 ⟶ 2,250:
 
// find last paragraph before diff tag
if (rangeStart[i] === nullundefined) {
lastPos = tagStart[i] - wDiff.paragraphBefore;
if (lastPos < rangeStartMin) {
Line 2,106 ⟶ 2,256:
}
regExpParagraph.lastIndex = lastPos;
while ( (regExpMatch = regExpParagraph.exec(html)) !== null) {
if (regExpMatch.index > tagStart[i]) {
break;
Line 2,116 ⟶ 2,266:
 
// find last line break before diff tag
if (rangeStart[i] === nullundefined) {
lastPos = tagStart[i] - wDiff.lineBeforeMax;
if (lastPos < rangeStartMin) {
Line 2,122 ⟶ 2,272:
}
regExpLine.lastIndex = lastPos;
while ( (regExpMatch = regExpLine.exec(html)) !== null ) {
if (regExpMatch.index > tagStart[i] - wDiff.lineBeforeMin) {
break;
Line 2,132 ⟶ 2,282:
 
// find last blank before diff tag
if (rangeStart[i] === nullundefined) {
lastPos = tagStart[i] - wDiff.blankBeforeMax;
if (lastPos < rangeStartMin) {
Line 2,138 ⟶ 2,288:
}
regExpBlank.lastIndex = lastPos;
while ( (regExpMatch = regExpBlank.exec(html)) !== null ) {
if (regExpMatch.index > tagStart[i] - wDiff.blankBeforeMin) {
break;
Line 2,148 ⟶ 2,298:
 
// fixed number of chars before diff tag
if (rangeStart[i] === nullundefined) {
if (rangeStart[i] > rangeStartMin) {
rangeStart[i] = tagStart[i] - wDiff.charsBefore;
Line 2,156 ⟶ 2,306:
 
// fixed number of lines before diff tag
if (rangeStart[i] === nullundefined) {
rangeStart[i] = rangeStartMin;
rangeStartType[i] = 'lines';
Line 2,175 ⟶ 2,325:
// find first heading after diff tag
regExpHeading.lastIndex = tagEnd[i];
if ( (regExpMatch = regExpHeading.exec(html)) !== null ) {
if ( (regExpMatch.index < tagEnd[i] + wDiff.headingAfter) && (regExpMatch.index < rangeEndMax) ) {
rangeEnd[i] = regExpMatch.index + regExpMatch[0].length;
Line 2,183 ⟶ 2,333:
 
// find first paragraph after diff tag
if (rangeEnd[i] === nullundefined) {
regExpParagraph.lastIndex = tagEnd[i];
if ( (regExpMatch = regExpParagraph.exec(html)) !== null ) {
if ( (regExpMatch.index < tagEnd[i] + wDiff.paragraphAfter) && (regExpMatch.index < rangeEndMax) ) {
rangeEnd[i] = regExpMatch.index;
Line 2,194 ⟶ 2,344:
 
// find first line break after diff tag
if (rangeEnd[i] === nullundefined) {
regExpLine.lastIndex = tagEnd[i] + wDiff.lineAfterMin;
if ( (regExpMatch = regExpLine.exec(html)) !== null ) {
if ( (regExpMatch.index < tagEnd[i] + wDiff.lineAfterMax) && (regExpMatch.index < rangeEndMax) ) {
rangeEnd[i] = regExpMatch.index;
Line 2,206 ⟶ 2,356:
 
// find blank after diff tag
if (rangeEnd[i] === nullundefined) {
regExpBlank.lastIndex = tagEnd[i] + wDiff.blankAfterMin;
if ( (regExpMatch = regExpBlank.exec(html)) !== null ) {
if ( (regExpMatch.index < tagEnd[i] + wDiff.blankAfterMax) && (regExpMatch.index < rangeEndMax) ) {
rangeEnd[i] = regExpMatch.index;
Line 2,217 ⟶ 2,367:
 
// fixed number of chars after diff tag
if (rangeEnd[i] === nullundefined) {
if (rangeEnd[i] < rangeEndMax) {
rangeEnd[i] = tagEnd[i] + wDiff.charsAfter;
Line 2,223 ⟶ 2,373:
}
}
 
// fixed number of lines after diff tag
if (rangeEnd[i] === nullundefined) {
rangeEnd[i] = rangeEndMax;
rangeEndType[i] = 'lines';
Line 2,242 ⟶ 2,392:
var j = 1;
for (var i = 1; i < rangeStart.length; i ++) {
 
// get lines between fragments
var lines = 0;
Line 2,313 ⟶ 2,463:
//
 
wDiff.AddStyleSheet = function (css) {
 
var style = document.createElement('style');
style.type = 'text/css';
if (style.styleSheet !== nullundefined) {
style.styleSheet.cssText = css;
}
Line 2,325 ⟶ 2,475:
document.getElementsByTagName('head')[0].appendChild(style);
return;
};
 
 
//
// wDiff.WordCount: count words in string
//
 
wDiff.WordCount = function (string) {
 
return (string.match(wDiff.regExpWordCount) || []).length;
};
 
Line 2,332 ⟶ 2,492:
//
 
wDiff.DebugText = function (text) {
var dump = 'first: ' + text.first + '\tlast: ' + text.last + '\n';
dump += '\ni \tlink \t(prev \tnext) \t#num \t"token"\n';
var i = text.first;
while ( (i !== null) && (text.tokens[i] !== null) ) {
dump += i + ' \t' + text.tokens[i].link + ' \t(' + text.tokens[i].prev + ' \t' + text.tokens[i].next + ') \t#' + text.tokens[i].number + ' \t' + wDiff.DebugShortenString(text.tokens[i].token) + '\n';
i = text.tokens[i].next;
Line 2,348 ⟶ 2,508:
//
 
wDiff.DebugBlocks = function (blocks) {
var dump = '\ni \toldBl \tnewBl \toldNm \tnewNm \toldSt \tcount \tchars \ttype \tsect \tgroup \tfixed \tstring\n';
for (var i = 0; i < blocks.length; i ++) {
dump += i + ' \t' + blocks[i].oldBlock + ' \t' + blocks[i].newBlock + ' \t' + blocks[i].oldNumber + ' \t' + blocks[i].newNumber + ' \t' + blocks[i].oldStart + ' \t' + blocks[i].count + ' \t' + blocks[i].chars + ' \t' + blocks[i].type + ' \t' + blocks[i].section + ' \t' + blocks[i].group + ' \t' + blocks[i].fixed + ' \t' + wDiff.DebugShortenString(blocks[i].string) + '\n';
}
return dump;
Line 2,361 ⟶ 2,521:
//
 
wDiff.DebugGroups = function (groups) {
var dump = '\ni \tblSta \tblEnd \tmWordtmaxWo \twords \tchars \tfixed \oldNmtoldNm \tmFrom \tcolor \tmoved \tdiff\n';
for (var i = 0; i < groups.length; i ++) {
dump += i + ' \t' + groups[i].blockStart + ' \t' + groups[i].blockEnd + ' \t' + groups[i].maxWords + ' \t' + groups[i].words + ' \t' + groups[i].chars + ' \t' + groups[i].fixed + ' \t' + groups[i].oldNumber + ' \t' + groups[i].movedFrom + ' \t' + groups[i].color + ' \t' + groups[i].moved.toString() + ' \t' + wDiff.DebugShortenString(groups[i].diff) + '\n';
Line 2,374 ⟶ 2,534:
//
 
wDiff.DebugGaps = function (gaps) {
var dump = '\ni \tnFirs \tnLast \tnTok \toFirs \toLast \toTok \tcharSplit\n';
for (var i = 0; i < gaps.length; i ++) {
Line 2,387 ⟶ 2,547:
//
 
wDiff.DebugShortenString = function (string) {
if (string === null) {
return 'null';
}