User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.0.15 (September 12, 2014) customizable block mark symbols, fix dynamic highlighting and scrolling, fix bubbling
1.0.16 (September 13, 2014) fix bubbling: bubble after unlink, fix unique word detection and word hash/counter, +debug timer, pass 4,5: start from surrounding tokens, then 4/5 from start/end
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.0.1516
// @date September 1213, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 56:
.number: list enumeration number
.parsed: token has been added to symbol table
.unique: token is unique tokenword in whole text
.first: index of first token in tokens list
.last: index of last token in tokens list
.words{}: word count
.diff: diff html
 
Line 147 ⟶ 148:
 
// inline chunks
// [[wiki link]] | {{template}} | [ext. link] |<html> | [[wiki link| | {{template| | url
chunk: /\[\[[^\[\]\n]+\]\]|\{\{[^\{\}\n]+\}\}|\[[^\[\]\n]+\]|<\/?[^<>\[\]\{\}\n]+>|\[\[[^\[\]\|\n]+\]\]\||\{\{[^\{\}\|\n]+\||\b((https?:|)\/\/)[^\x00-\x20\s"\[\]\x7f]+/g,
 
// words, multi-char markup, and chars
word: new RegExp('[' + wDiff.letters + ']+([\'’_]?[' + wDiff.letters + '])+)*|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.', 'g'),
 
// chars
Line 163 ⟶ 164:
 
// regExp for counting words
if (wDiff.regExpWordCount === undefined) { wDiff.regExpWordCount = new RegExp('(^|[^' + wDiff.letters + '])+([\' + wDiff.letters + '’_]?[' + wDiff.letters + '_\'’]+)*', 'g'); }
 
// regExp for wiki code non-letter characters
Line 448 ⟶ 449:
 
var diff = '';
 
// wikEd.debugTimer.push(['diff?', new Date]);
 
// IE / Mac fix
Line 459 ⟶ 462:
tokens: [],
first: null,
last: null,
words: {}
},
oldText: {
Line 465 ⟶ 469:
tokens: [],
first: null,
last: null,
words: {}
},
diff: ''
Line 490 ⟶ 495:
return text.diff;
}
 
// parse and count count words in texts for later identification of unique words
wDiff.CountTextWords(text.newText);
wDiff.CountTextWords(text.oldText);
 
// new symbols object
Line 536 ⟶ 545:
// calculate refined diff information with recursion for unresolved gaps
wDiff.CalculateDiff(text, symbols, 'character', true);
 
// bubble up gaps
wDiff.BubbleUpGaps(text.newText, text.oldText);
wDiff.BubbleUpGaps(text.oldText, text.newText);
}
 
// bubble up gaps
wDiff.BubbleUpGaps(text.newText, text.oldText);
wDiff.BubbleUpGaps(text.oldText, text.newText);
 
// enumerate tokens lists
Line 553 ⟶ 562:
// assemble diff blocks into formatted html text
diff = wDiff.AssembleDiff(text, blocks, groups);
 
// wikEd.debugTimer.push(['diff=', new Date]);
// wikEd.DebugTimer();
 
return diff;
};
 
 
// wDiff.CountTextWords: parse and count words in text for later identification of unique words
// changes: text (text.newText or text.oldText) .words
// called from: wDiff.Diff()
 
wDiff.CountTextWords = function (text) {
 
var regExpMatch;
while ( (regExpMatch = wDiff.regExpWordCount.exec(text.string)) !== null) {
var word = text.words[ regExpMatch[0] ];
if (word === undefined) {
word = 1;
}
else {
word ++;
}
}
return;
};
 
Line 608 ⟶ 640:
number: null,
parsed: false,
unique: false
};
number ++;
Line 1,024 ⟶ 1,055:
 
wDiff.CalculateDiff = function (text, symbols, level, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) {
 
// if (recursionLevel === undefined) { wikEd.debugTimer.push([level + '?', new Date]); }
 
// set defaults
Line 1,037 ⟶ 1,070:
}
 
// parse and connect unique (pass 1 - 3) only if symbol table provided
//
if (symbols !== null) {
// pass 1: parse new text into symbol table
//
 
//
// cycle trough new text tokens list
// pass 1: parse new text into symbol table
var i = newStart;
//
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
 
// addcycle newtrough entrynew totext symboltokens tablelist
var tokeni = text.newText.tokens[i].tokennewStart;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
 
// oradd update existingnew entry to symbol table
var token = text.newText.tokens[i].token;
else {
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
 
// incrementor tokenupdate counterexisting for new textentry
else {
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].newCount ++;
}
 
// increment token counter for new text
// next list element
var hashToArray = symbols.hash[token];
if (i == newEnd) {
symbols.token[hashToArray].newCount ++;
break;
}
 
// next list element
if (i == newEnd) {
break;
}
i = text.newText.tokens[i].next;
}
i = text.newText.tokens[i].next;
}
 
//
// pass 2: parse old text into symbol table
//
 
// cycle trough old text tokens list
var j = oldStart;
while ( (j !== null) && (text.oldText.tokens[j] !== null) ) {
 
// add new entry to symbol table
var token = text.oldText.tokens[j].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
};
}
 
// or update existing entry
else {
 
// increment token counter for old text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].oldCount ++;
 
// add token number for old text
symbols.token[hashToArray].oldToken = j;
}
 
// next list element
if (j === oldEnd) {
break;
}
j = text.oldText.tokens[j].next;
}
j = text.oldText.tokens[j].next;
}
 
//
// pass 3: connect unique tokens
//
 
// cycle trough symbol array
for (var i = 0; i < symbols.token.length; i ++) {
 
// find tokens in the symbol table that occur only once in both versions
if ( (symbols.token[i].newCount == 1) && (symbols.token[i].oldCount == 1) ) {
var newToken = symbols.token[i].newToken;
var oldToken = symbols.token[i].oldToken;
 
// do not use spaces as unique markers
if (/^\s+$/.test(text.newText.tokens[newToken].token) === false) {
 
// connect from new to old and from old to new
if (text.newText.tokens[newToken].link === null) {
text.newText.tokens[newToken].link = oldToken;
text.oldText.tokens[oldToken].link = newToken;
symbols.linked = true;
 
if ( (level != 'character') && (recursionLevel === 0) ) {
// check if unique word
text.newText.tokens[newToken].unique = true;
if ( (level == 'word') && (recursionLevel === 0) ) {
text.oldText.tokens[oldToken].unique = true;
var token = text.newText.tokens[newToken].token;
if ( (text.oldText.words[token] == 1) && (text.newText.words[token] == 1) ) {
text.newText.tokens[newToken].unique = true;
text.oldText.tokens[oldToken].unique = true;
}
}
}
}
Line 1,141 ⟶ 1,183:
}
 
// continue only if unique tokens have been linked previously or no symbol table provided
//
if ( (symbols === null) || (symbols.linked === true) ) {
// pass 4: connect adjacent identical tokens downwards
//
 
//
// cycle trough new text tokens list
// pass 4: connect adjacent identical tokens downwards
if (symbols.linked === true) {
//
var i = text.newText.first;
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
var iNext = text.newText.tokens[i].next;
 
// findget alreadysurrounding connected pairstokens
var ji = text.newText.tokens[i].linknewStart;
if (jtext.newText.tokens[i].prev !== null) {
var jNexti = text.oldTextnewText.tokens[ji].nextprev;
}
var iStop = newEnd;
if (text.newText.tokens[iStop].next !== null) {
iStop = text.newText.tokens[iStop].next;
}
var j = null;
 
// checkcycle iftrough thenew followingtext tokens arelist not yet connecteddown
do {
if ( (iNext !== null) && (jNext !== null) ) {
if ( (text.newText.tokens[iNext].link === null) && (text.oldText.tokens[jNext].link === null) ) {
 
// connected pair
// connect if the following tokens are the same
ifvar (text.newText.tokens[iNext].tokenlink == text.oldTextnewText.tokens[jNexti].token) {link;
if (link !== null) {
text.newText.tokens[iNext].link = jNext;
j = text.oldText.tokens[jNextlink].link = iNextnext;
}
}
}
}
 
i = iNext;
// connect if tokens are the same
}
else if ( (j !== null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) {
text.newText.tokens[i].link = j;
text.oldText.tokens[j].link = i;
j = text.oldText.tokens[j].next;
}
 
// not same
else {
j = null;
}
i = text.newText.tokens[i].next;
} while (i !== iStop);
 
//
Line 1,175 ⟶ 1,228:
//
 
// cycleget troughsurrounding new textconnected tokens list
var i = text.newText.lastnewEnd;
while ( (i !== null) &&if (text.newText.tokens[i].next !== null) ) {
var iNexti = text.newText.tokens[i].prevnext;
}
var iStop = newStart;
if (text.newText.tokens[iStop].prev !== null) {
iStop = text.newText.tokens[iStop].prev;
}
var j = null;
 
// cycle trough new text tokens list up
// find already connected pairs
do {
var j = text.newText.tokens[i].link;
if (j !== null) {
var jNext = text.oldText.tokens[j].prev;
 
// connected pair
// check if the preceeding tokens are not yet connected
var link = text.newText.tokens[i].link;
if ( (iNext !== null) && (jNext !== null) ) {
if ( (text.newText.tokens[iNext].link =!== null) && (text.oldText.tokens[jNext].link === null) ) {
j = text.oldText.tokens[link].prev;
}
 
// connect if the preceeding tokens are the same
else if ( (j !== null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[iNexti].token == text.oldText.tokens[jNextj].token) ) {
text.newText.tokens[iNexti].link = jNextj;
text.oldText.tokens[jNextj].link = iNexti;
j = text.oldText.tokens[j].prev;
}
}
 
}
// not same
else {
j = null;
}
i = text.newText.tokens[i].prev;
} while (i !== iStop);
 
//
// connect adjacent identical tokens downwards from text start, treat boundary as connected, stop after first connected token
//
 
// only for full text diff
if ( (newStart == text.newText.first) && (newEnd == text.newText.last) ) {
 
// from start
var i = text.newText.first;
var j = text.oldText.first;
 
// cycle trough new text tokens list down, connect identical tokens, stop after first connected token
while ( (i !== null) && (j !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) {
text.newText.tokens[i].link = j;
text.oldText.tokens[j].link = i;
j = text.oldText.tokens[j].next;
i = text.newText.tokens[i].next;
}
 
// from end
var i = text.newText.last;
var j = text.oldText.last;
 
// cycle trough old text tokens list up, connect identical tokens, stop after first connected token
while ( (i !== null) && (j !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) {
text.newText.tokens[i].link = j;
text.oldText.tokens[j].link = i;
j = text.oldText.tokens[j].prev;
i = text.newText.tokens[i].prev;
}
i = iNext;
}
 
//
// refine by recursively diffing unresolved regions caused by addition of common tokens around sequences of common tokens, only at word level split
//
 
if ( (recurse === true) && (wDiff.recursiveDiff === true) ) {
 
Line 1,345 ⟶ 1,442:
}
}
 
// if (recursionLevel === 0) { wikEd.debugTimer.push([level + '=', new Date]); }
 
return;
};
Line 1,398 ⟶ 1,498:
// repeat from start after conversion
if (unlinked === true) {
 
// diff unlinked blocks
wDiff.CalculateDiff(text, null, 'unlinked', true);
wDiff.BubbleUpGaps(text.newText, text.oldText);
wDiff.BubbleUpGaps(text.oldText, text.newText);
 
// repeat block detection from start
wDiff.GetSameBlocks(text, blocks);
wDiff.GetSections(blocks, sections);
Line 1,462 ⟶ 1,569:
var token = text.oldText.tokens[j].token;
count ++;
unique = unique ||if (text.newText.tokens[i].unique; === true) {
unique = true;
}
chars += token.length;
string += token;
Line 1,591 ⟶ 1,700:
maxWords = blocks[i].words;
}
unique = unique ||if (blocks[i].unique; === true) {
unique = true;
}
words += blocks[i].words;
chars += blocks[i].chars;
Line 2,451 ⟶ 2,562:
 
var diff = '';
 
// wikEd.debugTimer.push(['shorten?', new Date]);
 
// empty text
Line 2,781 ⟶ 2,894:
 
// WED('diff', diff);
 
// wikEd.debugTimer.push(['shorten=', new Date]);
// wikEd.DebugTimer();
 
return diff;