Revision as of 15:46, 12 September 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.0.15 (September 12, 2014) customizable block mark symbols, fix dynamic highlighting and scrolling, fix bubbling ← Previous edit		Revision as of 13:33, 13 September 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.0.16 (September 13, 2014) fix bubbling: bubble after unlink, fix unique word detection and word hash/counter, +debug timer, pass 4,5: start from surrounding tokens, then 4/5 from start/end Next edit →
Line 3: // ==UserScript== // @name wDiff // @version 1.0.1516 // @date September 1213, 2014 // @description improved word-based diff library with block move detection // @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff Line 56: .number: list enumeration number .parsed: token has been added to symbol table .unique: token is unique ~~token~~word in ~~whole~~ text .first: index of first token in tokens list .last: index of last token in tokens list .words{}: word count .diff: diff html Line 147 ⟶ 148: // inline chunks // [[wiki link]] \| {{template}} \| [ext. link] \|<html> \| [[wiki link\| \| {{template\| \| url chunk: /\[\[[^\[\]\n]+\]\]\|\{\{[^\{\}\n]+\}\}\|\[[^\[\]\n]+\]\|<\/?[^<>\[\]\{\}\n]+>\|\[\[[^\[\]\\|\n]+\]\]\\|\|\{\{[^\{\}\\|\n]+\\|\|\b((https?:\|)\/\/)[^\x00-\x20\s"\[\]\x7f]+/g, // words, multi-char markup, and chars word: new RegExp('[' + wDiff.letters + ']+([\'’_]?[' + wDiff.letters + '])+)\|\\[\\[\|\\]\\]\|\\{\\{\|\\}\\}\|&\\w+;\|\'\'\'\|\'\'\|==+\|\\{\\\|\|\\\|\\}\|\\\|-\|.', 'g'), // chars Line 163 ⟶ 164: // regExp for counting words if (wDiff.regExpWordCount === undefined) { wDiff.regExpWordCount = new RegExp('~~(^\|~~[^' + wDiff.letters + '])+([\' ~~+ wDiff.letters + '~~’_]?[' + wDiff.letters + '~~_\'’~~]+)', 'g'); } // regExp for wiki code non-letter characters Line 448 ⟶ 449: var diff = ''; // wikEd.debugTimer.push(['diff?', new Date]); // IE / Mac fix Line 459 ⟶ 462: tokens: [], first: null, last: null, words: {} }, oldText: { Line 465 ⟶ 469: tokens: [], first: null, last: null, words: {} }, diff: '' Line 490 ⟶ 495: return text.diff; } // parse and count count words in texts for later identification of unique words wDiff.CountTextWords(text.newText); wDiff.CountTextWords(text.oldText); // new symbols object Line 536 ⟶ 545: // calculate refined diff information with recursion for unresolved gaps wDiff.CalculateDiff(text, symbols, 'character', true); // bubble up gaps wDiff.BubbleUpGaps(text.newText, text.oldText); wDiff.BubbleUpGaps(text.oldText, text.newText); } ~~// bubble up gaps~~ ~~wDiff.BubbleUpGaps(text.newText, text.oldText);~~ ~~wDiff.BubbleUpGaps(text.oldText, text.newText);~~ // enumerate tokens lists Line 553 ⟶ 562: // assemble diff blocks into formatted html text diff = wDiff.AssembleDiff(text, blocks, groups); // wikEd.debugTimer.push(['diff=', new Date]); // wikEd.DebugTimer(); return diff; }; // wDiff.CountTextWords: parse and count words in text for later identification of unique words // changes: text (text.newText or text.oldText) .words // called from: wDiff.Diff() wDiff.CountTextWords = function (text) { var regExpMatch; while ( (regExpMatch = wDiff.regExpWordCount.exec(text.string)) !== null) { var word = text.words[ regExpMatch[0] ]; if (word === undefined) { word = 1; } else { word ++; } } return; }; Line 608 ⟶ 640: number: null, parsed: false, ~~unique: false~~ }; number ++; Line 1,024 ⟶ 1,055: wDiff.CalculateDiff = function (text, symbols, level, recurse, newStart, newEnd, oldStart, oldEnd, recursionLevel) { // if (recursionLevel === undefined) { wikEd.debugTimer.push([level + '?', new Date]); } // set defaults Line 1,037 ⟶ 1,070: } // parse and connect unique (pass 1 - 3) only if symbol table provided // if (symbols !== null) { ~~// pass 1: parse new text into symbol table~~ // // ~~// cycle trough new text tokens list~~ // pass 1: parse new text into symbol table ~~var i = newStart;~~ // ~~while ( (i !== null) && (text.newText.tokens[i] !== null) ) {~~ // ~~add~~cycle ~~new~~trough ~~entry~~new totext ~~symbol~~tokens ~~table~~list var ~~token~~i = ~~text.newText.tokens[i].token~~newStart; while ( (i !== null) && (text.newText.tokens[i] !== null) ) { ~~if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) {~~ ~~var current = symbols.token.length;~~ ~~symbols.hash[token] = current;~~ ~~symbols.token[current] = {~~ ~~newCount: 1,~~ ~~oldCount: 0,~~ ~~newToken: i,~~ ~~oldToken: null~~ }; } // oradd ~~update existing~~new entry to symbol table var token = text.newText.tokens[i].token; ~~else {~~ if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) { var current = symbols.token.length; symbols.hash[token] = current; symbols.token[current] = { newCount: 1, oldCount: 0, newToken: i, oldToken: null }; } // ~~increment~~or ~~token~~update ~~counter~~existing ~~for new text~~entry else { ~~var hashToArray = symbols.hash[token];~~ ~~symbols.token[hashToArray].newCount ++;~~ } // increment token counter for new text ~~// next list element~~ var hashToArray = symbols.hash[token]; ~~if (i == newEnd) {~~ symbols.token[hashToArray].newCount ++; ~~break;~~ } // next list element if (i == newEnd) { break; } i = text.newText.tokens[i].next; } ~~i = text.newText.tokens[i].next;~~ } // // pass 2: parse old text into symbol table // // cycle trough old text tokens list var j = oldStart; while ( (j !== null) && (text.oldText.tokens[j] !== null) ) { // add new entry to symbol table var token = text.oldText.tokens[j].token; if (Object.prototype.hasOwnProperty.call(symbols.hash, token) === false) { var current = symbols.token.length; symbols.hash[token] = current; symbols.token[current] = { newCount: 0, oldCount: 1, newToken: null, oldToken: j }; } // or update existing entry else { // increment token counter for old text var hashToArray = symbols.hash[token]; symbols.token[hashToArray].oldCount ++; // add token number for old text symbols.token[hashToArray].oldToken = j; } // next list element if (j === oldEnd) { break; } j = text.oldText.tokens[j].next; } ~~j = text.oldText.tokens[j].next;~~ } // // pass 3: connect unique tokens // // cycle trough symbol array for (var i = 0; i < symbols.token.length; i ++) { // find tokens in the symbol table that occur only once in both versions if ( (symbols.token[i].newCount == 1) && (symbols.token[i].oldCount == 1) ) { var newToken = symbols.token[i].newToken; var oldToken = symbols.token[i].oldToken; // do not use spaces as unique markers if (/^\s+$/.test(text.newText.tokens[newToken].token) === false) { // connect from new to old and from old to new if (text.newText.tokens[newToken].link === null) { text.newText.tokens[newToken].link = oldToken; text.oldText.tokens[oldToken].link = newToken; symbols.linked = true; ~~if ( (level != 'character') && (recursionLevel === 0) ) {~~ // check if unique word ~~text.newText.tokens[newToken].unique = true;~~ if ( (level == 'word') && (recursionLevel === 0) ) { ~~text.oldText.tokens[oldToken].unique = true;~~ var token = text.newText.tokens[newToken].token; if ( (text.oldText.words[token] == 1) && (text.newText.words[token] == 1) ) { text.newText.tokens[newToken].unique = true; text.oldText.tokens[oldToken].unique = true; } } } } Line 1,141 ⟶ 1,183: } // continue only if unique tokens have been linked previously or no symbol table provided // if ( (symbols === null) \|\| (symbols.linked === true) ) { ~~// pass 4: connect adjacent identical tokens downwards~~ // // ~~// cycle trough new text tokens list~~ // pass 4: connect adjacent identical tokens downwards ~~if (symbols.linked === true) {~~ // ~~var i = text.newText.first;~~ ~~while ( (i !== null) && (text.newText.tokens[i] !== null) ) {~~ ~~var iNext = text.newText.tokens[i].next;~~ // ~~find~~get ~~already~~surrounding connected ~~pairs~~tokens var ji = ~~text.newText.tokens[i].link~~newStart; if (jtext.newText.tokens[i].prev !== null) { ~~var jNext~~i = text.~~oldText~~newText.tokens[ji].~~next~~prev; } var iStop = newEnd; if (text.newText.tokens[iStop].next !== null) { iStop = text.newText.tokens[iStop].next; } var j = null; // ~~check~~cycle iftrough ~~the~~new ~~following~~text tokens ~~are~~list ~~not yet connected~~down do { ~~if ( (iNext !== null) && (jNext !== null) ) {~~ ~~if ( (text.newText.tokens[iNext].link === null) && (text.oldText.tokens[jNext].link === null) ) {~~ // connected pair ~~// connect if the following tokens are the same~~ ifvar ~~(text.newText.tokens[iNext].token~~link == text.~~oldText~~newText.tokens[~~jNext~~i].~~token) {~~link; if (link !== null) { ~~text.newText.tokens[iNext].link = jNext;~~ j = text.oldText.tokens[~~jNext~~link].~~link = iNext~~next; } } } } ~~i = iNext;~~ // connect if tokens are the same } else if ( (j !== null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) { text.newText.tokens[i].link = j; text.oldText.tokens[j].link = i; j = text.oldText.tokens[j].next; } // not same else { j = null; } i = text.newText.tokens[i].next; } while (i !== iStop); // Line 1,175 ⟶ 1,228: // // ~~cycle~~get ~~trough~~surrounding ~~new text~~connected tokens ~~list~~ var i = ~~text.newText.last~~newEnd; ~~while ( (i !== null) &&~~if (text.newText.tokens[i].next !== null) ) { ~~var iNext~~i = text.newText.tokens[i].~~prev~~next; } var iStop = newStart; if (text.newText.tokens[iStop].prev !== null) { iStop = text.newText.tokens[iStop].prev; } var j = null; // cycle trough new text tokens list up ~~// find already connected pairs~~ do { ~~var j = text.newText.tokens[i].link;~~ ~~if (j !== null) {~~ ~~var jNext = text.oldText.tokens[j].prev;~~ // connected pair ~~// check if the preceeding tokens are not yet connected~~ var link = text.newText.tokens[i].link; ~~if ( (iNext !== null) && (jNext !== null) ) {~~ if ( ~~(text.newText.tokens[iNext].~~link =!== null~~) && (text.oldText.tokens[jNext].link === null)~~ ) { j = text.oldText.tokens[link].prev; } // connect if ~~the preceeding~~ tokens are the same else if ( (j !== null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[~~iNext~~i].token == text.oldText.tokens[~~jNext~~j].token) ) { text.newText.tokens[~~iNext~~i].link = ~~jNext~~j; text.oldText.tokens[~~jNext~~j].link = ~~iNext~~i; j = text.oldText.tokens[j].prev; } } } // not same else { j = null; } i = text.newText.tokens[i].prev; } while (i !== iStop); // // connect adjacent identical tokens downwards from text start, treat boundary as connected, stop after first connected token // // only for full text diff if ( (newStart == text.newText.first) && (newEnd == text.newText.last) ) { // from start var i = text.newText.first; var j = text.oldText.first; // cycle trough new text tokens list down, connect identical tokens, stop after first connected token while ( (i !== null) && (j !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) { text.newText.tokens[i].link = j; text.oldText.tokens[j].link = i; j = text.oldText.tokens[j].next; i = text.newText.tokens[i].next; } // from end var i = text.newText.last; var j = text.oldText.last; // cycle trough old text tokens list up, connect identical tokens, stop after first connected token while ( (i !== null) && (j !== null) && (text.newText.tokens[i].link === null) && (text.oldText.tokens[j].link === null) && (text.newText.tokens[i].token == text.oldText.tokens[j].token) ) { text.newText.tokens[i].link = j; text.oldText.tokens[j].link = i; j = text.oldText.tokens[j].prev; i = text.newText.tokens[i].prev; } ~~i = iNext;~~ } // // refine by recursively diffing unresolved regions caused by addition of common tokens around sequences of common tokens, only at word level split // if ( (recurse === true) && (wDiff.recursiveDiff === true) ) { Line 1,345 ⟶ 1,442: } } // if (recursionLevel === 0) { wikEd.debugTimer.push([level + '=', new Date]); } return; }; Line 1,398 ⟶ 1,498: // repeat from start after conversion if (unlinked === true) { // diff unlinked blocks wDiff.CalculateDiff(text, null, 'unlinked', true); wDiff.BubbleUpGaps(text.newText, text.oldText); wDiff.BubbleUpGaps(text.oldText, text.newText); // repeat block detection from start wDiff.GetSameBlocks(text, blocks); wDiff.GetSections(blocks, sections); Line 1,462 ⟶ 1,569: var token = text.oldText.tokens[j].token; count ++; ~~unique = unique \|\|~~if (text.newText.tokens[i].unique; === true) { unique = true; } chars += token.length; string += token; Line 1,591 ⟶ 1,700: maxWords = blocks[i].words; } ~~unique = unique \|\|~~if (blocks[i].unique; === true) { unique = true; } words += blocks[i].words; chars += blocks[i].chars; Line 2,451 ⟶ 2,562: var diff = ''; // wikEd.debugTimer.push(['shorten?', new Date]); // empty text Line 2,781 ⟶ 2,894: // WED('diff', diff); // wikEd.debugTimer.push(['shorten=', new Date]); // wikEd.DebugTimer(); return diff;

User:Cacycle/diff.js: Difference between revisions