User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.1.2 (September 22, 2014) fix .splitRefineChars(): one token becomes connected or separated by token
1.1.3 (September 23, 2014) fix del and mark positioning, update sort, fix slide up to border, added error flag, new option unlinkBlocks
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.1.23
// @date September 2223, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 46:
wDiff: namespace object (global)
.configurations see top of code below for configuration and customization options
.error result has not passed unit tests
 
class Text: diff text object (new or old version)
Line 127 ⟶ 128:
// enable block move layout with highlighted blocks and marks at their original positions
if (wDiff.showBlockMoves === undefined) { wDiff.showBlockMoves = true; }
 
// minimal number of real words for a moved block (0 for always showing highlighted blocks)
if (wDiff.blockMinLength === undefined) { wDiff.blockMinLength = 3; }
 
// further resolve replacements character-wise from start and end
Line 136 ⟶ 134:
// enable recursive diff to resolve problematic sequences
if (wDiff.recursiveDiff === undefined) { wDiff.recursiveDiff = true; }
 
// unlink blocks if too short and too common
if (wDiff.unlinkBlocks === undefined) { wDiff.unlinkBlocks = true; }
 
// minimal number of real words for a moved block
if (wDiff.blockMinLength === undefined) { wDiff.blockMinLength = 3; }
 
// display blocks in different colors
Line 143 ⟶ 147:
if (wDiff.debug === undefined) { wDiff.debug = false; }
 
// show debug infostiming and statsresults
if (wDiff.debugTime === undefined) { wDiff.debugTime = false; }
 
Line 312 ⟶ 316:
'.wDiffBlockHighlight:hover .wDiffSpaceSymbol:before { color: #ddd; }' +
'.wDiffBlockHighlight:hover .wDiffInsert .wDiffSpaceSymbol:before, .wDiffInsert:hover .wDiffSpaceSymbol:before { color: #888; }' +
'.wDiffBlockHighlight:hover .wDiffDelete .wDiffSpaceSymbol:before, .wDiffDelete:hover .wDiffSpaceSymbol:before { color: #999; }'; +
 
// error
'.wDiffError .wDiffContainer, .wDiffError .wDiffFragment, .wDiffError .wDiffNoChange { background: #faa; }';
}
 
Line 337 ⟶ 344:
if (wDiff.styleSeparator === undefined) { wDiff.styleSeparator = ''; }
if (wDiff.styleOmittedChars === undefined) { wDiff.styleOmittedChars = ''; }
if (wDiff.styleError === undefined) { wDiff.styleError = ''; }
if (wDiff.styleNewline === undefined) { wDiff.styleNewline = ''; }
if (wDiff.styleTab === undefined) { wDiff.styleTab = ''; }
Line 412 ⟶ 420:
if (wDiff.htmlSeparator === undefined) { wDiff.htmlSeparator = '<div class="wDiffSeparator" style="' + wDiff.styleSeparator + '"></div>'; }
if (wDiff.htmlOmittedChars === undefined) { wDiff.htmlOmittedChars = '<span class="wDiffOmittedChars" style="' + wDiff.styleOmittedChars + '">…</span>'; }
 
if (wDiff.htmlErrorStart === undefined) { wDiff.htmlErrorStart = '<div class="wDiffError" style="' + wDiff.styleError + '">'; }
if (wDiff.htmlErrorEnd === undefined) { wDiff.htmlErrorEnd = '</div>'; }
 
//
Line 418 ⟶ 429:
 
// wDiff.blockHandler: event handler for block and mark elements
if (wDiff.blockHandler === undefined) { wDiff.blockHandler = function (event, element, type) {
 
// IE compatibility
Line 519 ⟶ 530:
 
wDiff.init = function () {
 
wDiff.error = false;
 
// legacy for short time
Line 540 ⟶ 553:
 
wDiff.diff = function (oldString, newString, full) {
 
wDiff.error = false;
 
// create text diff object
Line 609 ⟶ 624:
if (diff != text) {
console.log('Error: wDiff unit test failure: output not consistent with new text');
wDiff.error = true;
console.log('new text:\n', text);
console.log('new diff:\n', diff);
Line 622 ⟶ 638:
if (diff != text) {
console.log('Error: wDiff unit test failure: output not consistent with old text');
wDiff.error = true;
console.log('old text:\n', text);
console.log('old diff:\n', diff);
Line 629 ⟶ 646:
}
 
if (wDiff.error === true) {
html = wDiff.htmlErrorStart + html + wDiff.htmlErrorEnd;
}
textDiff.html = html;
 
Line 933 ⟶ 953:
 
// calculate refined diff information with recursion for unresolved gaps
this.calculateDiff(symbols, 'word', false, true);
 
// slide gaps
Line 944 ⟶ 964:
 
// calculate refined diff information with recursion for unresolved gaps
this.calculateDiff(symbols, 'character', false, true);
 
// slide gaps
Line 1,267 ⟶ 1,287:
var front = text.tokens[gapFront].prev;
var back = gapBack;
var gapFrontBlankTest = wDiff.regExpSlideBorder.test(text.tokens[gapFront].token);
var frontStop = null;
var frontStop = front;
while (
(frontif !== null) && (text.tokens[back].link !=== null) &&{
while (
(text.tokens[front].link !== null) && (text.tokens[back].link === null) &&
(text.tokens[front].token !== text.tokens[null) && (back].token !== null) &&
(text.tokens[front].link !== null) &&
) {
(text.tokens[front].token == text.tokens[back].token)
) {
 
front = text.tokens[front].prev;
// stop at line break
if back = (wDiff.regExpSlideStop.test(text.tokens[frontback].token) === true) {prev;
frontStop = front;
break;
}
 
// stop at first space/wordline break
else if ( (frontStop === null) && (wDiff.regExpSlideBorderregExpSlideStop.test(text.tokens[front].token) === true) ) {
frontStop = front;
break;
}
 
// stop at first word border (blank/word or word/blank)
if (wDiff.regExpSlideBorder.test(text.tokens[front].token) !== gapFrontBlankTest) {
frontStop = front;
}
}
front = text.tokens[front].prev;
back = text.tokens[back].prev;
}
 
Line 1,326 ⟶ 1,350:
// recursively diff still unresolved regions upwards
 
this.calculateDiff = function (symbols, level, repeat, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) {
 
// start timer
if ( (wDiff.debugTime === true) && (repeat !== true) && (recursionLevel === undefined) ) {
console.time(level);
}
Line 1,352 ⟶ 1,376:
var i = newStart;
while ( (i !== null) && (this.newText.tokens[i] !== null) ) {
if (this.newText.tokens[i].link === null) {
 
// add new entry to symbol table
var token = this.newText.tokens[i].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
 
// or update existing entry
else {
 
// increment token counter for new text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].newCount ++;
}
}
 
Line 1,388 ⟶ 1,414:
var j = oldStart;
while ( (j !== null) && (this.oldText.tokens[j] !== null) ) {
if (this.oldText.tokens[j].link === null) {
 
// add new entry to symbol table
var token = this.oldText.tokens[j].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
};
}
 
// or update existing entry
else {
 
// increment token counter for old text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].oldCount ++;
 
// add token number for old text
symbols.token[hashToArray].oldToken = j;
}
}
 
Line 1,432 ⟶ 1,460:
var oldToken = symbols.token[i].oldToken;
 
// doconnect notfrom usenew spacesto asold uniqueand markersfrom old to new
if (/^\s+$/.test(this.newText.tokens[newToken].token)link === falsenull) {
 
// do not use spaces as unique markers
if (/^\s+$/.test(this.newText.tokens[newToken].token) === false) {
 
// connect from new to old and from old to new
if (this.newText.tokens[newToken].link === null) {
this.newText.tokens[newToken].link = oldToken;
this.oldText.tokens[oldToken].link = newToken;
Line 1,587 ⟶ 1,616:
i = this.newText.tokens[i].prev;
}
}
 
//
// repeat with empty symbol table to link hidden unresolved common tokens in cross-overs ("and" in "and this a and b that" -> "and this a and b that")
//
 
// new empty symbols object
if (repeat !== true) {
var symbolsRepeat = {
token: [],
hash: {},
linked: false
};
this.calculateDiff(symbolsRepeat, level, true, false, newStart, newEnd, oldStart, oldEnd);
}
 
Line 1,654 ⟶ 1,697:
linked: false
};
this.calculateDiff(symbolsRecurse, level, false, true, iStart, iEnd, jStart, jEnd, recursionLevel + 1);
}
i = iEnd;
Line 1,724 ⟶ 1,767:
linked: false
};
this.calculateDiff(symbolsRecurse, level, false, true, iStart, iEnd, jStart, jEnd, recursionLevel + 1);
}
i = iStart;
Line 1,739 ⟶ 1,782:
 
// stop timer
if ( (wDiff.debugTime === true) && (repeat !== true) && (recursionLevel === 0) ) {
console.timeEnd(level);
}
Line 1,790 ⟶ 1,833:
// convert groups to insertions/deletions if maximal block length is too short
var unlink = 0;
if ( (wDiff.unlinkBlocks = true) && (wDiff.blockMinLength > 0) ) {
 
// repeat as long as unlinking is possible
Line 2,208 ⟶ 2,251:
// changes: .blocks[].section/group/fixed/newNumber
//
// deletion blocks move with fixed neighborreference (new number +/- 0.1):
// old: 1 D 2 1 D 2
// / \ / \ \
// new: 1 D 2 1 D 2
// fixed: * *
// new number: 1 1.1 1.9 2 2
// 'mark' and 'del' get new number of reference block and are sorted around it by old text number
 
this.positionDelBlocks = function () {
Line 2,234 ⟶ 2,278:
continue;
}
 
// find fixed 'same' reference block from original block position to position 'del' block, similar to position 'mark' code
 
// get old text prev block
var prevBlockNumber = null;
var prevBlock = null;
if (block > 0) {
prevBlockNumber = blocksOld[block - 1].newBlock;
Line 2,244 ⟶ 2,290:
 
// get old text next block
var nextBlockNumber = null;
var nextBlock = null;
if (block < blocksOld.length - 1) {
nextBlockNumber = blocksOld[block + 1].newBlock;
Line 2,252 ⟶ 2,298:
 
// move after prev block if fixed
var neighborrefBlock = null;
if ( (prevBlock !== undefinednull) && (prevBlock.type == 'same') && (prevBlock.fixed === true) ) {
neighborrefBlock = prevBlock;
delBlock.newNumber = neighbor.newNumber + 0.1;
}
 
// move before next block if fixed
else if ( (nextBlock !== undefinednull) && (nextBlock.type == 'same') && (nextBlock.fixed === true) ) {
neighborrefBlock = nextBlock;
delBlock.newNumber = neighbor.newNumber - 0.1;
}
 
// move after prev block if not start of group
else if ( (prevBlock !== undefinednull) && (prevBlock.type == 'same') && (prevBlockNumber != groups[ prevBlock.group ].blockEnd) ) {
neighborrefBlock = prevBlock;
delBlock.newNumber = neighbor.newNumber + 0.1;
}
 
// move before next block if not start of group
else if ( (nextBlock !== undefinednull) && (nextBlock.type == 'same') && (nextBlockNumber != groups[ nextBlock.group ].blockStart) ) {
neighborrefBlock = nextBlock;
delBlock.newNumber = neighbor.newNumber - 0.1;
}
 
Line 2,279 ⟶ 2,321:
else {
for (var fixed = block; fixed >= 0; fixed --) {
if ( (blocksOld[fixed].type == 'same') && (blocksOld[fixed].fixed === true) ) {
neighborrefBlock = blocksOld[fixed];
delBlock.newNumber = neighbor.newNumber + 0.1;
break;
}
Line 2,288 ⟶ 2,329:
 
// move before first block
if (neighborrefBlock === undefinednull) {
delBlock.newNumber = -0.1;
}
 
// update 'del' block data
else {
delBlock.sectionnewNumber = neighborrefBlock.sectionnewNumber;
delBlock.groupsection = neighborrefBlock.groupsection;
delBlock.fixedgroup = neighborrefBlock.fixedgroup;
delBlock.fixed = refBlock.fixed;
}
}
Line 2,307 ⟶ 2,349:
 
 
// TextDiff.unlinkBlocks(): convert 'same' blocks in groups into 'ins'/'del' pairs if too short and too common
// called from: .detectBlocks()
// calls: .unlinkSingleBlock()
Line 2,542 ⟶ 2,584:
// mark direction: .movedGroup.blockStart < .groups[group].blockStart
// group side: .movedGroup.oldNumber < .groups[group].oldNumber
// 'mark' and 'del' get new number of reference block and are sorted around it by old text number
 
this.insertMarks = function () {
Line 2,560 ⟶ 2,603:
// sort copy by oldNumber
blocksOld.sort(function(a, b) {
returnvar comp = a.oldNumber - b.oldNumber;
if (comp === 0) {
comp = a.newNumber - b.newNumber;
}
return comp;
});
 
// this.debugGroups('insertMarks after Groups');
this.debugBlocks('blocksOld', blocksOld);
 
// create lookup table: original to sorted
Line 2,577 ⟶ 2,627:
var movedOldNumber = movedGroup.oldNumber;
 
// find closest fixed 'same' reference block from original block position to theposition 'mark' block, similar to position 'del' leftcode
var fixedLeft = null;
var leftChars = 0;
for (var block = lookupSorted[ groups[moved].blockStart ] - 1; block >= 0; block --) {
leftChars += blocksOld[block].chars;
if (blocksOld[block].fixed === true) {
fixedLeft = blocksOld[block];
break;
}
}
 
// findget closestold fixedtext prev block to the right
var fixedRightprevBlock = null;
var block = lookupSorted[ groups[moved].blockStart ];
var rightChars = 0;
if (block > 0) {
for (var block = lookupSorted[ groups[moved].blockEnd ] + 1; block < blocksOld.length; block ++) {
rightChars prevBlock += blocksOld[block - 1].chars;
if (blocksOld[block].fixed === true) {
fixedLeft = blocksOld[block];
break;
}
}
 
// noget largerold fixedtext next block, moved right
var fixedBlocknextBlock = null;
var block = lookupSorted[ groups[moved].blockEnd ];
if (fixedRight === null) {
if (block < blocksOld.length - 1) {
fixedBlock = fixedLeft;
nextBlock = blocksOld[block + 1];
}
 
// nomove smallerafter fixedprev block, movedif leftfixed
elsevar if (fixedLeftrefBlock === null) {;
if ( (prevBlock !== null) && (prevBlock.type == 'same') && (prevBlock.fixed === true) ) {
fixedBlock = fixedRight;
refBlock = prevBlock;
}
 
// move before next block if fixed
// group moved from between two closest fixed neighbors, moved left or right depending on char distance
else if ( (nextBlock !== null) && (nextBlock.type == 'same') && (nextBlock.fixed === true) ) {
else if (rightChars <= leftChars) {
fixedBlockrefBlock = fixedRightnextBlock;
}
 
// movedfind closest fixed block to the left
else {
for (var fixed = lookupSorted[ groups[moved].blockStart ] - 1; fixed >= 0; fixed --) {
fixedBlock = fixedLeft;
if ( (blocksOld[fixed].type == 'same') && (blocksOld[fixed].fixed === true) ) {
refBlock = blocksOld[fixed];
break;
}
}
}
 
// fromget left sideposition of fixednew mark groupblock
var newNumber;
var markGroup;
if (movedOldNumber < fixedBlock.oldNumber) {
newNumber = fixedBlock.newNumber - 0.1;
}
 
// fromno smaller fixed block, moved right sidefrom ofbefore fixedfirst groupblock
if (refBlock === null) {
newNumber = -1;
markGroup = groups.length;
 
// save new single-mark-block group
groups.push({
oldNumber: 0,
blockStart: blocks.length,
blockEnd: blocks.length,
unique: false,
maxWords: null,
words: null,
chars: 0,
fixed: null,
movedFrom: null,
color: null
});
}
else {
newNumber = fixedBlockrefBlock.newNumber + 0.1;
markGroup = refBlock.group;
}
 
Line 2,644 ⟶ 2,705:
type: 'mark',
section: null,
group: fixedBlock.groupmarkGroup,
fixed: true,
moved: moved,
Line 2,652 ⟶ 2,713:
// set group color
movedGroup.color = color;
movedGroup.movedFrom = fixedBlock.groupmarkGroup;
color ++;
}
 
// sort 'mark' blocks in and update groups
this.sortBlocks();
 
Line 2,947 ⟶ 3,008:
var tagsEnd = [];
var i = 0;
var regExpMatch;
 
// save tag positions
var regExpMatch;
while ( (regExpMatch = regExpDiff.exec(html)) !== null ) {
 
Line 2,976 ⟶ 3,037:
 
// get line positions
var regExpMatch;
var lines = [];
var regExpMatch;
while ( (regExpMatch = regExpLine.exec(html)) !== null) {
lines.push(regExpMatch.index);
Line 3,286 ⟶ 3,347:
var dump = '\ni \toldBl \tnewBl \toldNm \tnewNm \toldSt \tcount \tuniq \twords \tchars \ttype \tsect \tgroup \tfixed \tmoved \tstring\n';
for (var i = 0; i < blocks.length; i ++) {
dump += i + ' \t' + blocks[i].oldBlock + ' \t' + blocks[i].newBlock + ' \t' + blocks[i].oldNumber + ' \t' + (blocks[i].newNumber || 'null').toString().substr(0, 6) + ' \t' + blocks[i].oldStart + ' \t' + blocks[i].count + ' \t' + blocks[i].unique + ' \t' + blocks[i].words + ' \t' + blocks[i].chars + ' \t' + blocks[i].type + ' \t' + blocks[i].section + ' \t' + blocks[i].group + ' \t' + blocks[i].fixed + ' \t' + blocks[i].moved + ' \t' + this.debugShortenString(blocks[i].string) + '\n';
}
console.log(text + ':\n' + dump);