Content deleted Content added
1.0.15 (September 12, 2014) customizable block mark symbols, fix dynamic highlighting and scrolling, fix bubbling |
1.0.16 (September 13, 2014) fix bubbling: bubble after unlink, fix unique word detection and word hash/counter, +debug timer, pass 4,5: start from surrounding tokens, then 4/5 from start/end |
||
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.0.
// @date September
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 56:
.number: list enumeration number
.parsed: token has been added to symbol table
.unique: token is unique
.first: index of first token in tokens list
.last: index of last token in tokens list
.words{}: word count
.diff: diff html
Line 147 ⟶ 148:
// inline chunks
// [[wiki link]] | {{template}} | [ext. link] |<html> | [[wiki link| | {{template| | url
chunk: /\[\[[^\[\]\n]+\]\]|\{\{[^\{\}\n]+\}\}|\[[^\[\]\n]+\]|<\/?[^<>\[\]\{\}\n]+>|\[\[[^\[\]\|\n]+\]\]\||\{\{[^\{\}\|\n]+\||\b((https?:|)\/\/)[^\x00-\x20\s"\[\]\x7f]+/g,
// words, multi-char markup, and chars
word: new RegExp('[' + wDiff.letters + ']+([\'’_]?[' + wDiff.letters + ']
// chars
Line 163 ⟶ 164:
// regExp for counting words
if (wDiff.regExpWordCount === undefined) { wDiff.regExpWordCount = new RegExp('
// regExp for wiki code non-letter characters
Line 448 ⟶ 449:
var diff = '';
// wikEd.debugTimer.push(['diff?', new Date]);
// IE / Mac fix
Line 459 ⟶ 462:
tokens: [],
first: null,
last: null,
words: {}
},
oldText: {
Line 465 ⟶ 469:
tokens: [],
first: null,
last: null,
words: {}
},
diff: ''
Line 490 ⟶ 495:
return text.diff;
}
// parse and count count words in texts for later identification of unique words
wDiff.CountTextWords(text.newText);
wDiff.CountTextWords(text.oldText);
// new symbols object
Line 536 ⟶ 545:
// calculate refined diff information with recursion for unresolved gaps
wDiff.CalculateDiff(text, symbols, 'character', true);
// bubble up gaps
wDiff.BubbleUpGaps(text.newText, text.oldText);
wDiff.BubbleUpGaps(text.oldText, text.newText);
}
// enumerate tokens lists
Line 553 ⟶ 562:
// assemble diff blocks into formatted html text
diff = wDiff.AssembleDiff(text, blocks, groups);
// wikEd.debugTimer.push(['diff=', new Date]);
// wikEd.DebugTimer();
return diff;
};
// wDiff.CountTextWords: parse and count words in text for later identification of unique words
// changes: text (text.newText or text.oldText) .words
// called from: wDiff.Diff()
wDiff.CountTextWords = function (text) {
var regExpMatch;
while ( (regExpMatch = wDiff.regExpWordCount.exec(text.string)) !== null) {
var word = text.words[ regExpMatch[0] ];
if (word === undefined) {
word = 1;
}
else {
word ++;
}
}
return;
};
Line 608 ⟶ 640:
number: null,
parsed: false,
};
number ++;
Line 1,024 ⟶ 1,055:
wDiff.CalculateDiff = function (text, symbols, level, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) {
// if (recursionLevel === undefined) { wikEd.debugTimer.push([level + '?', new Date]); }
// set defaults
Line 1,037 ⟶ 1,070:
}
// parse and connect unique (pass 1 - 3) only if symbol table provided
if (symbols !== null) {
//
// pass 1: parse new text into symbol table
//
//
var
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
//
var token = text.newText.tokens[i].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
//
else {
// increment token counter for new text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].newCount ++;
}
// next list element
if (i == newEnd) {
break;
}
i = text.newText.tokens[i].next;
}
// add new entry to symbol table
var token = text.oldText.tokens[j].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
};
}
// or update existing entry
else {
// increment token counter for old text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].oldCount ++;
// add token number for old text
symbols.token[hashToArray].oldToken = j;
}
// next list element
if (j === oldEnd) {
break;
}
j = text.oldText.tokens[j].next;
}
// find tokens in the symbol table that occur only once in both versions
if ( (symbols.token[i].newCount == 1) && (symbols.token[i].oldCount == 1) ) {
var newToken = symbols.token[i].newToken;
var oldToken = symbols.token[i].oldToken;
// do not use spaces as unique markers
if (/^\s+$/.test(text.newText.tokens[newToken].token) === false) {
// connect from new to old and from old to new
if (text.newText.tokens[newToken].link === null) {
text.newText.tokens[newToken].link = oldToken;
text.oldText.tokens[oldToken].link = newToken;
symbols.linked = true;
// check if unique word
if ( (level == 'word') && (recursionLevel === 0) ) {
var token = text.newText.tokens[newToken].token;
if ( (text.oldText.words[token] == 1) && (text.newText.words[token] == 1) ) {
text.newText.tokens[newToken].unique = true;
text.oldText.tokens[oldToken].unique = true;
}
}
}
}
Line 1,141 ⟶ 1,183:
}
// continue only if unique tokens have been linked previously or no symbol table provided
if ( (symbols === null) || (symbols.linked === true) ) {
//
// pass 4: connect adjacent identical tokens downwards
//
}
var iStop = newEnd;
if (text.newText.tokens[iStop].next !== null) {
iStop = text.newText.tokens[iStop].next;
}
var j = null;
do {
// connected pair
if (link !== null) {
}
// connect if tokens are the same
else if ( (j !== null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) {
text.newText.tokens[i].link = j;
text.oldText.tokens[j].link = i;
j = text.oldText.tokens[j].next;
}
// not same
else {
j = null;
}
i = text.newText.tokens[i].next;
} while (i !== iStop);
//
Line 1,175 ⟶ 1,228:
//
//
var i =
}
var iStop = newStart;
if (text.newText.tokens[iStop].prev !== null) {
iStop = text.newText.tokens[iStop].prev;
}
var j = null;
// cycle trough new text tokens list up
do {
// connected pair
var link = text.newText.tokens[i].link;
j = text.oldText.tokens[link].prev;
}
j = text.oldText.tokens[j].prev;
// not same
else {
j = null;
}
i = text.newText.tokens[i].prev;
} while (i !== iStop);
//
// connect adjacent identical tokens downwards from text start, treat boundary as connected, stop after first connected token
//
// only for full text diff
if ( (newStart == text.newText.first) && (newEnd == text.newText.last) ) {
// from start
var i = text.newText.first;
var j = text.oldText.first;
// cycle trough new text tokens list down, connect identical tokens, stop after first connected token
while ( (i !== null) && (j !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) {
text.newText.tokens[i].link = j;
text.oldText.tokens[j].link = i;
j = text.oldText.tokens[j].next;
i = text.newText.tokens[i].next;
}
// from end
var i = text.newText.last;
var j = text.oldText.last;
// cycle trough old text tokens list up, connect identical tokens, stop after first connected token
while ( (i !== null) && (j !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) {
text.newText.tokens[i].link = j;
text.oldText.tokens[j].link = i;
j = text.oldText.tokens[j].prev;
i = text.newText.tokens[i].prev;
}
}
//
// refine by recursively diffing unresolved regions caused by addition of common tokens around sequences of common tokens, only at word level split
//
if ( (recurse === true) && (wDiff.recursiveDiff === true) ) {
Line 1,345 ⟶ 1,442:
}
}
// if (recursionLevel === 0) { wikEd.debugTimer.push([level + '=', new Date]); }
return;
};
Line 1,398 ⟶ 1,498:
// repeat from start after conversion
if (unlinked === true) {
// diff unlinked blocks
wDiff.CalculateDiff(text, null, 'unlinked', true);
wDiff.BubbleUpGaps(text.newText, text.oldText);
wDiff.BubbleUpGaps(text.oldText, text.newText);
// repeat block detection from start
wDiff.GetSameBlocks(text, blocks);
wDiff.GetSections(blocks, sections);
Line 1,462 ⟶ 1,569:
var token = text.oldText.tokens[j].token;
count ++;
unique = true;
}
chars += token.length;
string += token;
Line 1,591 ⟶ 1,700:
maxWords = blocks[i].words;
}
unique = true;
}
words += blocks[i].words;
chars += blocks[i].chars;
Line 2,451 ⟶ 2,562:
var diff = '';
// wikEd.debugTimer.push(['shorten?', new Date]);
// empty text
Line 2,781 ⟶ 2,894:
// WED('diff', diff);
// wikEd.debugTimer.push(['shorten=', new Date]);
// wikEd.DebugTimer();
return diff;
|