Content deleted Content added
1.1.2 (September 22, 2014) fix .splitRefineChars(): one token becomes connected or separated by token |
1.1.3 (September 23, 2014) fix del and mark positioning, update sort, fix slide up to border, added error flag, new option unlinkBlocks |
||
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.1.
// @date September
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 46:
wDiff: namespace object (global)
.configurations see top of code below for configuration and customization options
.error result has not passed unit tests
class Text: diff text object (new or old version)
Line 127 ⟶ 128:
// enable block move layout with highlighted blocks and marks at their original positions
if (wDiff.showBlockMoves === undefined) { wDiff.showBlockMoves = true; }
// further resolve replacements character-wise from start and end
Line 136 ⟶ 134:
// enable recursive diff to resolve problematic sequences
if (wDiff.recursiveDiff === undefined) { wDiff.recursiveDiff = true; }
// unlink blocks if too short and too common
if (wDiff.unlinkBlocks === undefined) { wDiff.unlinkBlocks = true; }
// minimal number of real words for a moved block
if (wDiff.blockMinLength === undefined) { wDiff.blockMinLength = 3; }
// display blocks in different colors
Line 143 ⟶ 147:
if (wDiff.debug === undefined) { wDiff.debug = false; }
// show debug
if (wDiff.debugTime === undefined) { wDiff.debugTime = false; }
Line 312 ⟶ 316:
'.wDiffBlockHighlight:hover .wDiffSpaceSymbol:before { color: #ddd; }' +
'.wDiffBlockHighlight:hover .wDiffInsert .wDiffSpaceSymbol:before, .wDiffInsert:hover .wDiffSpaceSymbol:before { color: #888; }' +
'.wDiffBlockHighlight:hover .wDiffDelete .wDiffSpaceSymbol:before, .wDiffDelete:hover .wDiffSpaceSymbol:before { color: #999; }'
// error
'.wDiffError .wDiffContainer, .wDiffError .wDiffFragment, .wDiffError .wDiffNoChange { background: #faa; }';
}
Line 337 ⟶ 344:
if (wDiff.styleSeparator === undefined) { wDiff.styleSeparator = ''; }
if (wDiff.styleOmittedChars === undefined) { wDiff.styleOmittedChars = ''; }
if (wDiff.styleError === undefined) { wDiff.styleError = ''; }
if (wDiff.styleNewline === undefined) { wDiff.styleNewline = ''; }
if (wDiff.styleTab === undefined) { wDiff.styleTab = ''; }
Line 412 ⟶ 420:
if (wDiff.htmlSeparator === undefined) { wDiff.htmlSeparator = '<div class="wDiffSeparator" style="' + wDiff.styleSeparator + '"></div>'; }
if (wDiff.htmlOmittedChars === undefined) { wDiff.htmlOmittedChars = '<span class="wDiffOmittedChars" style="' + wDiff.styleOmittedChars + '">…</span>'; }
if (wDiff.htmlErrorStart === undefined) { wDiff.htmlErrorStart = '<div class="wDiffError" style="' + wDiff.styleError + '">'; }
if (wDiff.htmlErrorEnd === undefined) { wDiff.htmlErrorEnd = '</div>'; }
//
Line 418 ⟶ 429:
// wDiff.blockHandler: event handler for block and mark elements
if (wDiff.blockHandler === undefined) {
// IE compatibility
Line 519 ⟶ 530:
wDiff.init = function () {
wDiff.error = false;
// legacy for short time
Line 540 ⟶ 553:
wDiff.diff = function (oldString, newString, full) {
wDiff.error = false;
// create text diff object
Line 609 ⟶ 624:
if (diff != text) {
console.log('Error: wDiff unit test failure: output not consistent with new text');
wDiff.error = true;
console.log('new text:\n', text);
console.log('new diff:\n', diff);
Line 622 ⟶ 638:
if (diff != text) {
console.log('Error: wDiff unit test failure: output not consistent with old text');
wDiff.error = true;
console.log('old text:\n', text);
console.log('old diff:\n', diff);
Line 629 ⟶ 646:
}
if (wDiff.error === true) {
html = wDiff.htmlErrorStart + html + wDiff.htmlErrorEnd;
}
textDiff.html = html;
Line 933 ⟶ 953:
// calculate refined diff information with recursion for unresolved gaps
this.calculateDiff(symbols, 'word', false, true);
// slide gaps
Line 944 ⟶ 964:
// calculate refined diff information with recursion for unresolved gaps
this.calculateDiff(symbols, 'character', false, true);
// slide gaps
Line 1,267 ⟶ 1,287:
var front = text.tokens[gapFront].prev;
var back = gapBack;
var gapFrontBlankTest = wDiff.regExpSlideBorder.test(text.tokens[gapFront].token);
var frontStop = front;
while (
(
(text.tokens[front].link !== null) &&
(text.tokens[front].token == text.tokens[back].token)
) {
front = text.tokens[front].prev;
// stop at
frontStop = front;
break;
}
// stop at first word border (blank/word or word/blank)
if (wDiff.regExpSlideBorder.test(text.tokens[front].token) !== gapFrontBlankTest) {
frontStop = front;
}
}
}
Line 1,326 ⟶ 1,350:
// recursively diff still unresolved regions upwards
this.calculateDiff = function (symbols, level, repeat, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) {
// start timer
if ( (wDiff.debugTime === true) && (repeat !== true) && (recursionLevel === undefined) ) {
console.time(level);
}
Line 1,352 ⟶ 1,376:
var i = newStart;
while ( (i !== null) && (this.newText.tokens[i] !== null) ) {
if (this.newText.tokens[i].link === null) {
// add new entry to symbol table
var token = this.newText.tokens[i].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
// or update existing entry
else {
// increment token counter for new text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].newCount ++;
}
}
Line 1,388 ⟶ 1,414:
var j = oldStart;
while ( (j !== null) && (this.oldText.tokens[j] !== null) ) {
if (this.oldText.tokens[j].link === null) {
// add new entry to symbol table
var token = this.oldText.tokens[j].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
};
}
// or update existing entry
else {
// increment token counter for old text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].oldCount ++;
// add token number for old text
symbols.token[hashToArray].oldToken = j;
}
}
Line 1,432 ⟶ 1,460:
var oldToken = symbols.token[i].oldToken;
//
if
// do not use spaces as unique markers
if (/^\s+$/.test(this.newText.tokens[newToken].token) === false) {
this.newText.tokens[newToken].link = oldToken;
this.oldText.tokens[oldToken].link = newToken;
Line 1,587 ⟶ 1,616:
i = this.newText.tokens[i].prev;
}
}
//
// repeat with empty symbol table to link hidden unresolved common tokens in cross-overs ("and" in "and this a and b that" -> "and this a and b that")
//
// new empty symbols object
if (repeat !== true) {
var symbolsRepeat = {
token: [],
hash: {},
linked: false
};
this.calculateDiff(symbolsRepeat, level, true, false, newStart, newEnd, oldStart, oldEnd);
}
Line 1,654 ⟶ 1,697:
linked: false
};
this.calculateDiff(symbolsRecurse, level, false, true, iStart, iEnd, jStart, jEnd, recursionLevel + 1);
}
i = iEnd;
Line 1,724 ⟶ 1,767:
linked: false
};
this.calculateDiff(symbolsRecurse, level, false, true, iStart, iEnd, jStart, jEnd, recursionLevel + 1);
}
i = iStart;
Line 1,739 ⟶ 1,782:
// stop timer
if ( (wDiff.debugTime === true) && (repeat !== true) && (recursionLevel === 0) ) {
console.timeEnd(level);
}
Line 1,790 ⟶ 1,833:
// convert groups to insertions/deletions if maximal block length is too short
var unlink = 0;
if ( (wDiff.unlinkBlocks = true) && (wDiff.blockMinLength > 0) ) {
// repeat as long as unlinking is possible
Line 2,208 ⟶ 2,251:
// changes: .blocks[].section/group/fixed/newNumber
//
// deletion blocks move with fixed
// old: 1 D 2 1 D 2
// / \ / \ \
// new: 1 D 2 1 D 2
// fixed: * *
// new number: 1
// 'mark' and 'del' get new number of reference block and are sorted around it by old text number
this.positionDelBlocks = function () {
Line 2,234 ⟶ 2,278:
continue;
}
// find fixed 'same' reference block from original block position to position 'del' block, similar to position 'mark' code
// get old text prev block
var prevBlockNumber = null;
var prevBlock = null;
if (block > 0) {
prevBlockNumber = blocksOld[block - 1].newBlock;
Line 2,244 ⟶ 2,290:
// get old text next block
var nextBlockNumber = null;
var nextBlock = null;
if (block < blocksOld.length - 1) {
nextBlockNumber = blocksOld[block + 1].newBlock;
Line 2,252 ⟶ 2,298:
// move after prev block if fixed
var
if ( (prevBlock !==
}
// move before next block if fixed
else if ( (nextBlock !==
}
// move after prev block if not start of group
else if ( (prevBlock !==
}
// move before next block if not start of group
else if ( (nextBlock !==
}
Line 2,279 ⟶ 2,321:
else {
for (var fixed = block; fixed >= 0; fixed --) {
if ( (blocksOld[fixed].type == 'same') && (blocksOld[fixed].fixed === true) ) {
break;
}
Line 2,288 ⟶ 2,329:
// move before first block
if (
delBlock.newNumber = -
}
// update 'del' block data
else {
delBlock.
delBlock.
delBlock.
delBlock.fixed = refBlock.fixed;
}
}
Line 2,307 ⟶ 2,349:
// TextDiff.unlinkBlocks(): convert 'same' blocks in groups into 'ins'/'del' pairs if too short and too common
// called from: .detectBlocks()
// calls: .unlinkSingleBlock()
Line 2,542 ⟶ 2,584:
// mark direction: .movedGroup.blockStart < .groups[group].blockStart
// group side: .movedGroup.oldNumber < .groups[group].oldNumber
// 'mark' and 'del' get new number of reference block and are sorted around it by old text number
this.insertMarks = function () {
Line 2,560 ⟶ 2,603:
// sort copy by oldNumber
blocksOld.sort(function(a, b) {
if (comp === 0) {
comp = a.newNumber - b.newNumber;
}
return comp;
});
// this.debugGroups('insertMarks after Groups');
this.debugBlocks('blocksOld', blocksOld);
// create lookup table: original to sorted
Line 2,577 ⟶ 2,627:
var movedOldNumber = movedGroup.oldNumber;
// find
//
var
var block = lookupSorted[ groups[moved].blockStart ];
if (block > 0) {
}
//
var
var block = lookupSorted[ groups[moved].blockEnd ];
if (block < blocksOld.length - 1) {
nextBlock = blocksOld[block + 1];
}
//
if ( (prevBlock !== null) && (prevBlock.type == 'same') && (prevBlock.fixed === true) ) {
refBlock = prevBlock;
}
// move before next block if fixed
else if ( (nextBlock !== null) && (nextBlock.type == 'same') && (nextBlock.fixed === true) ) {
}
//
else {
for (var fixed = lookupSorted[ groups[moved].blockStart ] - 1; fixed >= 0; fixed --) {
if ( (blocksOld[fixed].type == 'same') && (blocksOld[fixed].fixed === true) ) {
refBlock = blocksOld[fixed];
break;
}
}
}
//
var newNumber;
var markGroup;
//
if (refBlock === null) {
newNumber = -1;
markGroup = groups.length;
// save new single-mark-block group
groups.push({
oldNumber: 0,
blockStart: blocks.length,
blockEnd: blocks.length,
unique: false,
maxWords: null,
words: null,
chars: 0,
fixed: null,
movedFrom: null,
color: null
});
}
else {
newNumber =
markGroup = refBlock.group;
}
Line 2,644 ⟶ 2,705:
type: 'mark',
section: null,
group:
fixed: true,
moved: moved,
Line 2,652 ⟶ 2,713:
// set group color
movedGroup.color = color;
movedGroup.movedFrom =
color ++;
}
// sort 'mark' blocks in and update groups
this.sortBlocks();
Line 2,947 ⟶ 3,008:
var tagsEnd = [];
var i = 0;
// save tag positions
var regExpMatch;
while ( (regExpMatch = regExpDiff.exec(html)) !== null ) {
Line 2,976 ⟶ 3,037:
// get line positions
var lines = [];
var regExpMatch;
while ( (regExpMatch = regExpLine.exec(html)) !== null) {
lines.push(regExpMatch.index);
Line 3,286 ⟶ 3,347:
var dump = '\ni \toldBl \tnewBl \toldNm \tnewNm \toldSt \tcount \tuniq \twords \tchars \ttype \tsect \tgroup \tfixed \tmoved \tstring\n';
for (var i = 0; i < blocks.length; i ++) {
dump += i + ' \t' + blocks[i].oldBlock + ' \t' + blocks[i].newBlock + ' \t' + blocks[i].oldNumber + ' \t' +
}
console.log(text + ':\n' + dump);
|