User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.0.16 (September 13, 2014) fix bubbling: bubble after unlink, fix unique word detection and word hash/counter, +debug timer, pass 4,5: start from surrounding tokens, then 4/5 from start/end
1.0.17 (September 13, 2014) newline symbol on block hovering, darker blank-only blocks, unlink not followed by calculate diff but bubbling
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.0.1617
// @date September 13, 2014
// @description improved word-based diff library with block move detection
Line 136:
// UniCode letter support for regexps, from http://xregexp.com/addons/unicode/unicode-base.js v1.0.0
if (wDiff.letters === undefined) { wDiff.letters = 'a-zA-Z0-9' + '00AA00B500BA00C0-00D600D8-00F600F8-02C102C6-02D102E0-02E402EC02EE0370-037403760377037A-037D03860388-038A038C038E-03A103A3-03F503F7-0481048A-05270531-055605590561-058705D0-05EA05F0-05F20620-064A066E066F0671-06D306D506E506E606EE06EF06FA-06FC06FF07100712-072F074D-07A507B107CA-07EA07F407F507FA0800-0815081A082408280840-085808A008A2-08AC0904-0939093D09500958-09610971-09770979-097F0985-098C098F09900993-09A809AA-09B009B209B6-09B909BD09CE09DC09DD09DF-09E109F009F10A05-0A0A0A0F0A100A13-0A280A2A-0A300A320A330A350A360A380A390A59-0A5C0A5E0A72-0A740A85-0A8D0A8F-0A910A93-0AA80AAA-0AB00AB20AB30AB5-0AB90ABD0AD00AE00AE10B05-0B0C0B0F0B100B13-0B280B2A-0B300B320B330B35-0B390B3D0B5C0B5D0B5F-0B610B710B830B85-0B8A0B8E-0B900B92-0B950B990B9A0B9C0B9E0B9F0BA30BA40BA8-0BAA0BAE-0BB90BD00C05-0C0C0C0E-0C100C12-0C280C2A-0C330C35-0C390C3D0C580C590C600C610C85-0C8C0C8E-0C900C92-0CA80CAA-0CB30CB5-0CB90CBD0CDE0CE00CE10CF10CF20D05-0D0C0D0E-0D100D12-0D3A0D3D0D4E0D600D610D7A-0D7F0D85-0D960D9A-0DB10DB3-0DBB0DBD0DC0-0DC60E01-0E300E320E330E40-0E460E810E820E840E870E880E8A0E8D0E94-0E970E99-0E9F0EA1-0EA30EA50EA70EAA0EAB0EAD-0EB00EB20EB30EBD0EC0-0EC40EC60EDC-0EDF0F000F40-0F470F49-0F6C0F88-0F8C1000-102A103F1050-1055105A-105D106110651066106E-10701075-1081108E10A0-10C510C710CD10D0-10FA10FC-1248124A-124D1250-12561258125A-125D1260-1288128A-128D1290-12B012B2-12B512B8-12BE12C012C2-12C512C8-12D612D8-13101312-13151318-135A1380-138F13A0-13F41401-166C166F-167F1681-169A16A0-16EA1700-170C170E-17111720-17311740-17511760-176C176E-17701780-17B317D717DC1820-18771880-18A818AA18B0-18F51900-191C1950-196D1970-19741980-19AB19C1-19C71A00-1A161A20-1A541AA71B05-1B331B45-1B4B1B83-1BA01BAE1BAF1BBA-1BE51C00-1C231C4D-1C4F1C5A-1C7D1CE9-1CEC1CEE-1CF11CF51CF61D00-1DBF1E00-1F151F18-1F1D1F20-1F451F48-1F4D1F50-1F571F591F5B1F5D1F5F-1F7D1F80-1FB41FB6-1FBC1FBE1FC2-1FC41FC6-1FCC1FD0-1FD31FD6-1FDB1FE0-1FEC1FF2-1FF41FF6-1FFC2071207F2090-209C21022107210A-211321152119-211D212421262128212A-212D212F-2139213C-213F2145-2149214E218321842C00-2C2E2C30-2C5E2C60-2CE42CEB-2CEE2CF22CF32D00-2D252D272D2D2D30-2D672D6F2D80-2D962DA0-2DA62DA8-2DAE2DB0-2DB62DB8-2DBE2DC0-2DC62DC8-2DCE2DD0-2DD62DD8-2DDE2E2F300530063031-3035303B303C3041-3096309D-309F30A1-30FA30FC-30FF3105-312D3131-318E31A0-31BA31F0-31FF3400-4DB54E00-9FCCA000-A48CA4D0-A4FDA500-A60CA610-A61FA62AA62BA640-A66EA67F-A697A6A0-A6E5A717-A71FA722-A788A78B-A78EA790-A793A7A0-A7AAA7F8-A801A803-A805A807-A80AA80C-A822A840-A873A882-A8B3A8F2-A8F7A8FBA90A-A925A930-A946A960-A97CA984-A9B2A9CFAA00-AA28AA40-AA42AA44-AA4BAA60-AA76AA7AAA80-AAAFAAB1AAB5AAB6AAB9-AABDAAC0AAC2AADB-AADDAAE0-AAEAAAF2-AAF4AB01-AB06AB09-AB0EAB11-AB16AB20-AB26AB28-AB2EABC0-ABE2AC00-D7A3D7B0-D7C6D7CB-D7FBF900-FA6DFA70-FAD9FB00-FB06FB13-FB17FB1DFB1F-FB28FB2A-FB36FB38-FB3CFB3EFB40FB41FB43FB44FB46-FBB1FBD3-FD3DFD50-FD8FFD92-FDC7FDF0-FDFBFE70-FE74FE76-FEFCFF21-FF3AFF41-FF5AFF66-FFBEFFC2-FFC7FFCA-FFCFFFD2-FFD7FFDA-FFDC'.replace(/(\w{4})/g, '\\u$1'); }
if (wDiff.smallSpaces === undefined) { wDiff.smallSpaces = ' \\u00a0\\u1680​\\u180e\\u2000​\\u2002\\u2004-\\u200a​\\u2028\\u2029​​\\u202f\\u205f​\\u3000'; }
if (wDiff.wideSpaces === undefined) { wDiff.wideSpaces = '\\u2001\\u2003'; }
 
// regExps for splitting text
Line 168 ⟶ 170:
// regExp for wiki code non-letter characters
if (wDiff.regExpWikiCodeChars === undefined) { wDiff.regExpWikiCodeChars = /^[ \t\n\[\]{}|+\-!*#:;=<>'\/_,.&?]+$/; }
 
// regExp detecting blank-only blocks
if (wDiff.regExpBlankBlock === undefined) { wDiff.regExpBlankBlock = new RegExp('^([' + wDiff.smallSpaces + ']{0,2}|[' + wDiff.wideSpaces + ']?|[' + wDiff.smallSpaces + ']?\\n([' + wDiff.smallSpaces + ']{0,2}|[' + wDiff.wideSpaces + ']?|[' + wDiff.smallSpaces + ']?\\n)?)$'); }
 
//
Line 210 ⟶ 215:
wDiff.stylesheet =
'.wDiffTab:before { content: "→"; color: #bbb; font-size: smaller; }' +
'.wDiffNewline:before { content: " "; color: transparent; }' +
'.wDiffInsert:hover .wDiffNewline:before { color: #999; }' +
'.wDiffDelete:hover .wDiffNewline:before { color: #aaa; }' +
'.wDiffInsertBlank:hover .wDiffNewline:before { color: #888; }' +
'.wDiffDeleteBlank:hover .wDiffNewline:before { color: #999; }' +
'.wDiffBlockLeft:hover .wDiffNewline:before, .wDiffBlockRight:hover .wDiffNewline:before { color: #ccc; }' +
'.wDiffMarkRight:before { content: "' + wDiff.symbolMarkRight + '"; }' +
'.wDiffMarkLeft:before { content: "' + wDiff.symbolMarkLeft + '"; }' +
'.wDiffDelete { font-weight: bold; background-color: #ffe49c; color: #222; border-radius: 0.25em; padding: 0.2em 1px; }' +
'.wDiffInsert { font-weight: bold; background-color: #bbddff; color: #222; border-radius: 0.25em; padding: 0.2em 1px; }' +
'.wDiffDeleteBlank { background-color: #ffc840; }' +
'.wDiffInsertBlank { background-color: #66b8ff; }' +
'.wDiffBlockLeft { font-weight: bold; background-color: #e8e8e8; border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; }' +
'.wDiffBlockRight { font-weight: bold; background-color: #e8e8e8; border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; }' +
Line 254 ⟶ 266:
if (wDiff.styleDelete === undefined) { wDiff.styleDelete = ''; }
if (wDiff.styleInsert === undefined) { wDiff.styleInsert = ''; }
if (wDiff.styleDeleteBlank === undefined) { wDiff.styleDeleteBlank = ''; }
if (wDiff.styleInsertBlank === undefined) { wDiff.styleInsertBlank = ''; }
if (wDiff.styleBlockLeft === undefined) { wDiff.styleBlockLeft = ''; }
if (wDiff.styleBlockRight === undefined) { wDiff.styleBlockRight = ''; }
Line 281 ⟶ 295:
 
if (wDiff.htmlDeleteStart === undefined) { wDiff.htmlDeleteStart = '<span class="wDiffDelete" style="' + wDiff.styleDelete + '" title="−">'; }
if (wDiff.htmlDeleteStartBlank === undefined) { wDiff.htmlDeleteStartBlank = '<span class="wDiffDelete wDiffDeleteBlank" style="' + wDiff.styleDelete + ' ' + wDiff.styleDeleteBlank + '" title="−">'; }
if (wDiff.htmlDeleteEnd === undefined) { wDiff.htmlDeleteEnd = '</span><!--wDiffDelete-->'; }
 
if (wDiff.htmlInsertStart === undefined) { wDiff.htmlInsertStart = '<span class="wDiffInsert" style="' + wDiff.styleInsert + '" title="+">'; }
if (wDiff.htmlInsertStartBlank === undefined) { wDiff.htmlInsertStartBlank = '<span class="wDiffInsert wDiffInsertBlank" style="' + wDiff.styleInsert + ' ' + wDiff.styleInsertBlank + '" title="+">'; }
if (wDiff.htmlInsertEnd === undefined) { wDiff.htmlInsertEnd = '</span><!--wDiffInsert-->'; }
 
Line 640 ⟶ 656:
number: null,
parsed: false,
unique: false
};
number ++;
Line 981 ⟶ 998:
// test baloon up, remember last line break or closing text
var frontStop = null;
if ( (front !== text.tokens[front].prev;null) && (back !== null) ) {
back front = text.tokens[backfront].prev;
back = text.tokens[back].prev;
var frontTest = front;
 
var backTest = back;
var frontTest = front;
while (
(frontTestvar !== null) && (backTest !== null) &&back;
while (
(text.tokens[frontTest].link !== null) && (text.tokens[backTest].link === null) &&
(text.tokens[frontTest].token !== text.tokens[null) && (backTest].token !== null) &&
(text.tokens[frontTest].link !== null) && (text.tokens[backTest].link === null) &&
) {
if (wDiff.regExpBubbleStop.test (text.tokens[frontTest].token) === truetext.tokens[backTest].token) {
) {
frontStop = frontTest;
if (wDiff.regExpBubbleStop.test(text.tokens[frontTest].token) === true) {
break;
frontStop = frontTest;
}
break;
else if ( (frontStop === null) && (wDiff.regExpBubbleClosing.test(text.tokens[frontTest].token) === true) ) {
}
frontStop = frontTest;
else if ( (frontStop === null) && (wDiff.regExpBubbleClosing.test(text.tokens[frontTest].token) === true) ) {
frontStop = frontTest;
}
frontTest = text.tokens[frontTest].prev;
backTest = text.tokens[backTest].prev;
}
frontTest = text.tokens[frontTest].prev;
backTest = text.tokens[backTest].prev;
}
 
Line 1,070 ⟶ 1,090:
}
 
//
// parse and connect unique (pass 1 - 3) only if symbol table provided
// pass 1: parse new text into symbol table
if (symbols !== null) {
//
 
// cycle trough new text tokens list
//
var i = newStart;
// pass 1: parse new text into symbol table
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
//
 
// cycle troughadd new textentry to tokenssymbol listtable
var itoken = newStarttext.newText.tokens[i].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
while ( (i !== null) && (text.newText.tokens[i] !== null) ) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
 
// addor newupdate existing entry to symbol table
else {
var token = text.newText.tokens[i].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
};
}
 
// orincrement updatetoken existingcounter entryfor new text
var hashToArray = symbols.hash[token];
else {
symbols.token[hashToArray].newCount ++;
}
 
// next list element
// increment token counter for new text
if (i == newEnd) {
var hashToArray = symbols.hash[token];
break;
symbols.token[hashToArray].newCount ++;
}
 
// next list element
if (i == newEnd) {
break;
}
i = text.newText.tokens[i].next;
}
i = text.newText.tokens[i].next;
}
 
//
// pass 2: parse old text into symbol table
//
 
// cycle trough old text tokens list
var j = oldStart;
while ( (j !== null) && (text.oldText.tokens[j] !== null) ) {
 
// add new entry to symbol table
var token = text.oldText.tokens[j].token;
if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {
var current = symbols.token.length;
symbols.hash[token] = current;
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
};
}
 
// or update existing entry
else {
 
// increment token counter for old text
var hashToArray = symbols.hash[token];
symbols.token[hashToArray].oldCount ++;
 
// add token number for old text
symbols.token[hashToArray].oldToken = j;
}
 
// next list element
if (j === oldEnd) {
break;
}
j = text.oldText.tokens[j].next;
}
j = text.oldText.tokens[j].next;
}
 
//
// pass 3: connect unique tokens
//
 
// cycle trough symbol array
for (var i = 0; i < symbols.token.length; i ++) {
 
// find tokens in the symbol table that occur only once in both versions
if ( (symbols.token[i].newCount == 1) && (symbols.token[i].oldCount == 1) ) {
var newToken = symbols.token[i].newToken;
var oldToken = symbols.token[i].oldToken;
 
// do not use spaces as unique markers
if (/^\s+$/.test(text.newText.tokens[newToken].token) === false) {
 
// connect from new to old and from old to new
if (text.newText.tokens[newToken].link === null) {
text.newText.tokens[newToken].link = oldToken;
text.oldText.tokens[oldToken].link = newToken;
symbols.linked = true;
 
// check if unique word
if ( (level == 'word') && (recursionLevel === 0) ) {
var token = text.newText.tokens[newToken].token;
if ( (text.oldText.words[token] == 1) && (text.newText.words[token] == 1) ) {
text.newText.tokens[newToken].unique = true;
text.oldText.tokens[oldToken].unique = true;
}
}
}
Line 1,183 ⟶ 1,199:
}
 
// continue only if unique tokens have been linked previously or no symbol table provided
if ( (symbols === null) || (symbols.linked === true) ) {
 
//
Line 1,498 ⟶ 1,514:
// repeat from start after conversion
if (unlinked === true) {
 
// diff unlinked blocks
wDiff.CalculateDiff(text, null, 'unlinked', true);
wDiff.BubbleUpGaps(text.newText, text.oldText);
wDiff.BubbleUpGaps(text.oldText, text.newText);
Line 1,994 ⟶ 2,007:
// unlink whole moved group if it contains no unique matched token
if ( (groups[group].fixed === false) && (groups[group].unique === false) ) {
 
for (var block = blockStart; block <= blockEnd; block ++) {
if (blocks[block].type == 'same') {
Line 2,379 ⟶ 2,393:
// add 'del' text
else if (type == 'del') {
if (wDiff.regExpBlankBlock.test(string) === true) {
string = string.replace(/\n/g, wDiff.htmlNewline);
diff += wDiff.htmlDeleteStart + string + wDiff.htmlDeleteEndhtmlDeleteStartBlank;
}
else {
diff += wDiff.htmlDeleteStart;
}
diff += string.replace(/\n/g, wDiff.htmlNewline) + wDiff.htmlDeleteEnd;
}
 
// add 'ins' text
else if (type == 'ins') {
if (wDiff.regExpBlankBlock.test(string) === true) {
string = string.replace(/\n/g, wDiff.htmlNewline);
diff += wDiff.htmlInsertStart + string + wDiff.htmlInsertEndhtmlInsertStartBlank;
}
else {
diff += wDiff.htmlInsertStart;
}
diff += string.replace(/\n/g, wDiff.htmlNewline) + wDiff.htmlInsertEnd;
}
}
Line 2,415 ⟶ 2,439:
var movedGroup = moved[i];
var markColor = groups[movedGroup].color;
var mark = '';
 
// get moved block text
Line 2,427 ⟶ 2,451:
// display as deletion at original position
if (wDiff.showBlockMoves === false) {
mark =if (wDiff.htmlDeleteStart + wDiffregExpBlankBlock.HtmlEscapetest(movedText) +=== wDiff.htmlDeleteEnd;true) {
mark = wDiff.htmlDeleteStartBlank;
}
else {
mark = wDiff.htmlDeleteStart;
}
mark += wDiff.HtmlEscape(movedText) + wDiff.htmlDeleteEnd;
}
 
Line 2,440 ⟶ 2,470:
mark = wDiff.HtmlCustomize(mark, markColor, movedText);
}
 
 
// get side of group to mark
Line 3,014 ⟶ 3,043:
}
string = string.replace(/\n/g, '\\n');
string = string.replace(/\t/g, ' ');
var max = 100;
if (string.length > max) {