Revision as of 11:02, 26 August 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.0.1 (August 24, 2014) +bubbling up of ambiguous gaps, make settings configurable ← Previous edit		Revision as of 11:39, 26 August 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.0.2 (August 24, 2014) text.new, text.old -> text.newText, text.oldText for MS IE 8 compatibility Next edit →
Line 2: // @name wDiff // @version 1.0.12 // @date August 26, 2014 // @description improved word-based diff library with block move detection Line 39: text: objects for text related data .~~new~~newText, new text .~~old~~oldText: old text .string: new or old text to be diffed .tokens[]: token data list for new or old string (N and O) Line 57: .newCount: new text token counter (NC) .oldCount: old text token counter (OC) .newToken: token index in text.~~new~~newText.tokens .oldToken: token index in text.~~old~~oldText.tokens blocks[]: array of objects that holds block (consecutive text tokens) data in order of the new text Line 277: // prepare text data object var text = { ~~new~~newText: { string: newString, tokens: [], Line 283: last: null }, ~~old~~oldText: { string: oldString, tokens: [], Line 314: // split new and old text into paragraps wDiff.Split(text.~~new~~newText, wDiff.regExpParagraph); wDiff.Split(text.~~old~~oldText, wDiff.regExpParagraph); // calculate diff Line 321: // refine different paragraphs into sentences wDiff.SplitRefine(text.~~new~~newText, wDiff.regExpSentence); wDiff.SplitRefine(text.~~old~~oldText, wDiff.regExpSentence); // calculate refined diff Line 328: // refine different sentences into words wDiff.SplitRefine(text.~~new~~newText, wDiff.regExpWord); wDiff.SplitRefine(text.~~old~~oldText, wDiff.regExpWord); // calculate refined diff information with recursion for unresolved gaps Line 335: // bubble up gaps wDiff.BubbleUpGaps(text.~~new~~newText, text.~~old~~oldText); wDiff.BubbleUpGaps(text.~~old~~oldText, text.~~new~~newText); // split tokens into chars in selected unresolved gaps Line 347: // bubble up gaps wDiff.BubbleUpGaps(text.~~new~~newText, text.~~old~~oldText); wDiff.BubbleUpGaps(text.~~old~~oldText, text.~~new~~newText); // enumerate tokens lists wDiff.EnumerateTokens(text.~~new~~newText); wDiff.EnumerateTokens(text.~~old~~oldText); // detect moved blocks Line 367: // wDiff.Split: split text into paragraph, sentence, or word tokens // input: text (text.~~new~~newText or text.~~old~~oldText), object containing text data and strings; regExp, regular expression for splitting text into tokens; token, tokens index of token to be split // changes: text (text.~~new~~newText or text.~~old~~oldText): text.tokens list, text.first, text.last // called from: wDiff.Diff() Line 447: // wDiff.SplitRefine: split unique unmatched tokens into smaller tokens // changes: text (text.~~new~~newText or text.~~old~~oldText) .tokens list // called from: wDiff.Diff() // calls: wDiff.Split() Line 476: // - same length and at least 50 % identity // identical tokens including space separators will be linked, resulting in word-wise char-level diffs // changes: text (text.~~new~~newText or text.~~old~~oldText) .tokens list // called from: wDiff.Diff() // calls: wDiff.Split() Line 493: var gaps = []; var gap = null; var i = text.~~new~~newText.first; var j = text.~~old~~oldText.first; while ( (i != null) && (text.~~new~~newText.tokens[i] != null) ) { // get list item properties var newLink = text.~~new~~newText.tokens[i].link; var oldLink = null; if (j != null) { oldLink = text.~~old~~oldText.tokens[j].link; } Line 531: // next list elements if (newLink != null) { j = text.~~old~~oldText.tokens[newLink].next; } i = text.~~new~~newText.tokens[i].next; } Line 541: // cycle trough old text tokens list var j = gaps[gap].oldFirst; while ( (j != null) && (text.~~old~~oldText.tokens[j] != null) && (text.~~old~~oldText.tokens[j].link == null) ) { // count old chars and tokens in gap Line 547: gaps[gap].oldTokens ++; j = text.~~old~~oldText.tokens[j].next; } } Line 563: // one word became separated by space, dash, or any string if ( (gaps[gap].newTokens == 1) && (gaps[gap].oldTokens == 3) ) { if (text.~~new~~newText.tokens[ gaps[gap].newFirst ].token != text.~~old~~oldText.tokens[ gaps[gap].oldFirst ].token + text.~~old~~oldText.tokens[ gaps[gap].oldLast ].token ) { continue; } } else if ( (gaps[gap].oldTokens == 1) && (gaps[gap].newTokens == 3) ) { if (text.~~old~~oldText.tokens[ gaps[gap].oldFirst ].token != text.~~new~~newText.tokens[ gaps[gap].newFirst ].token + text.~~new~~newText.tokens[ gaps[gap].newLast ].token ) { continue; } Line 581: var j = gaps[gap].oldFirst; while (i != null) { var newToken = text.~~new~~newText.tokens[i].token; var oldToken = text.~~old~~oldText.tokens[j].token; // get shorter and longer token Line 658: break; } i = text.~~new~~newText.tokens[i].next; j = text.~~old~~oldText.tokens[j].next; } gaps[gap].charSplit = charSplit; Line 675: var j = gaps[gap].oldFirst; while (i != null) { var newToken = text.~~new~~newText.tokens[i].token; var oldToken = text.~~old~~oldText.tokens[j].token; // link identical tokens (spaces) if (newToken == oldToken) { text.~~new~~newText.tokens[i].link = j; text.~~old~~oldText.tokens[j].link = i; } // refine different words into chars else { wDiff.Split(text.~~new~~newText, wDiff.regExpChar, i); wDiff.Split(text.~~old~~oldText, wDiff.regExpChar, j); } Line 694: break; } i = text.~~new~~newText.tokens[i].next; j = text.~~old~~oldText.tokens[j].next; } } Line 708: // wDiff.BubbleUpGaps: move gaps with ambiguous identical fronts and backs up // start ambiguous gap borders after line breaks and text section closing characters // changes: text (text.~~new~~newText or text.~~old~~oldText) .tokens list // called from: wDiff.Diff() Line 766: // wDiff.EnumerateTokens: enumerate text token list // changes: text (text.~~new~~newText or text.~~old~~oldText) .tokens list // called from: wDiff.Diff() Line 786: // input: text, object containing text data and tokens // optionally for recursive calls: newStart, newEnd, oldStart, oldEnd (tokens list indexes), recursionLevel // changes: text.~~old~~oldText/~~new~~newText.tokens[].link, links corresponding tokens from old and new text // steps: // pass 1: parse new text into symbol table Line 803: // set defaults if (typeof newStart == 'undefined') { newStart = text.~~new~~newText.first; } if (typeof newEnd == 'undefined') { newEnd = text.~~new~~newText.last; } if (typeof oldStart == 'undefined') { oldStart = text.~~old~~oldText.first; } if (typeof oldEnd == 'undefined') { oldEnd = text.~~old~~oldText.last; } if (typeof recursionLevel == 'undefined') { recursionLevel = 0; } Line 820: // cycle trough new text tokens list var i = newStart; while ( (i != null) && (text.~~new~~newText.tokens[i] != null) ) { // parse token only once during split refinement if ( (text.~~new~~newText.tokens[i].parsed == false) \|\| (recursionLevel > 0) ) { text.~~new~~newText.tokens[i].parsed = true; // add new entry to symbol table var token = text.~~new~~newText.tokens[i].token; if (Object.prototype.hasOwnProperty.call(symbols, token) == false) { var current = symbol.length; Line 852: break; } i = text.~~new~~newText.tokens[i].next; } Line 861: // cycle trough old text tokens list var j = oldStart; while ( (j != null) && (text.~~old~~oldText.tokens[j] != null) ) { // parse token only once during split refinement if ( (text.~~old~~oldText.tokens[j].parsed == false) \|\| (recursionLevel > 0) ) { text.~~old~~oldText.tokens[j].parsed = true; // add new entry to symbol table var token = text.~~old~~oldText.tokens[j].token; if (Object.prototype.hasOwnProperty.call(symbols, token) == false) { var current = symbol.length; Line 896: break; } j = text.~~old~~oldText.tokens[j].next; } Line 912: // do not use spaces as unique markers if (/^\s+$/.test(text.~~new~~newText.tokens[newToken].token) == false) { // connect from new to old and from old to new if (text.~~new~~newText.tokens[newToken].link == null) { text.~~new~~newText.tokens[newToken].link = oldToken; text.~~old~~oldText.tokens[oldToken].link = newToken; } } Line 928: // cycle trough new text tokens list var i = text.~~new~~newText.first; while ( (i != null) && (text.~~new~~newText.tokens[i] != null) ) { var iNext = text.~~new~~newText.tokens[i].next; // find already connected pairs var j = text.~~new~~newText.tokens[i].link; if (j != null) { var jNext = text.~~old~~oldText.tokens[j].next; // check if the following tokens are not yet connected if ( (iNext != null) && (jNext != null) ) { if ( (text.~~new~~newText.tokens[iNext].link == null) && (text.~~old~~oldText.tokens[jNext].link == null) ) { // connect if the following tokens are the same if (text.~~new~~newText.tokens[iNext].token == text.~~old~~oldText.tokens[jNext].token) { text.~~new~~newText.tokens[iNext].link = jNext; text.~~old~~oldText.tokens[jNext].link = iNext; } } Line 957: // cycle trough new text tokens list var i = text.~~new~~newText.last; while ( (i != null) && (text.~~new~~newText.tokens[i] != null) ) { var iNext = text.~~new~~newText.tokens[i].prev; // find already connected pairs var j = text.~~new~~newText.tokens[i].link; if (j != null) { var jNext = text.~~old~~oldText.tokens[j].prev; // check if the preceeding tokens are not yet connected if ( (iNext != null) && (jNext != null) ) { if ( (text.~~new~~newText.tokens[iNext].link == null) && (text.~~old~~oldText.tokens[jNext].link == null) ) { // connect if the preceeding tokens are the same if (text.~~new~~newText.tokens[iNext].token == text.~~old~~oldText.tokens[jNext].token) { text.~~new~~newText.tokens[iNext].link = jNext; text.~~old~~oldText.tokens[jNext].link = iNext; } } Line 992: var j = oldStart; while ( (i != null) && (text.~~new~~newText.tokens[i] != null) ) { // get j from previous tokens match var iPrev = text.~~new~~newText.tokens[i].prev; if (iPrev != null) { var jPrev = text.~~new~~newText.tokens[iPrev].link; if (jPrev != null) { j = text.~~old~~oldText.tokens[jPrev].next; } } // check for the start of an unresolved sequence if ( (j != null) && (text.~~old~~oldText.tokens[j] != null) && (text.~~new~~newText.tokens[i].link == null) && (text.~~old~~oldText.tokens[j].link == null) ) { // determine the limits of of the unresolved new sequence Line 1,011: var iLength = 0; var iNext = i; while ( (iNext != null) && (text.~~new~~newText.tokens[iNext].link == null) ) { iEnd = iNext; iLength ++; Line 1,017: break; } iNext = text.~~new~~newText.tokens[iNext].next; } Line 1,025: var jLength = 0; var jNext = j; while ( (jNext != null) && (text.~~old~~oldText.tokens[jNext].link == null) ) { jEnd = jNext; jLength ++; Line 1,031: break; } jNext = text.~~old~~oldText.tokens[jNext].next; } Line 1,049: break; } i = text.~~new~~newText.tokens[i].next; } Line 1,059: var i = newEnd; var j = oldEnd; while ( (i != null) && (text.~~new~~newText.tokens[i] != null) ) { // get j from next matched tokens var iPrev = text.~~new~~newText.tokens[i].next; if (iPrev != null) { var jPrev = text.~~new~~newText.tokens[iPrev].link; if (jPrev != null) { j = text.~~old~~oldText.tokens[jPrev].prev; } } // check for the start of an unresolved sequence if ( (j != null) && (text.~~old~~oldText.tokens[j] != null) && (text.~~new~~newText.tokens[i].link == null) && (text.~~old~~oldText.tokens[j].link == null) ) { // determine the limits of of the unresolved new sequence Line 1,078: var iLength = 0; var iNext = i; while ( (iNext != null) && (text.~~new~~newText.tokens[iNext].link == null) ) { iStart = iNext; iLength ++; Line 1,084: break; } iNext = text.~~new~~newText.tokens[iNext].prev; } Line 1,092: var jLength = 0; var jNext = j; while ( (jNext != null) && (text.~~old~~oldText.tokens[jNext].link == null) ) { jStart = jNext; jLength ++; Line 1,098: break; } jNext = text.~~old~~oldText.tokens[jNext].prev; } Line 1,116: break; } i = text.~~new~~newText.tokens[i].prev; } } Line 1,157: wDiff.DetectBlocks = function(text, blocks, groups) { // WED('text.~~old~~oldText', wDiff.DebugText(text.~~old~~oldText)); // WED('text.~~new~~newText', wDiff.DebugText(text.~~new~~newText)); // Line 1,165: // cycle through old text to find matched (linked) blocks var j = text.~~old~~oldText.first; var i = null; var deletions = []; Line 1,174: var delEnd = null; var string = ''; while ( (j != null) && (text.~~old~~oldText.tokens[j].link == null) ) { string += text.~~old~~oldText.tokens[j].token; delEnd = j; j = text.~~old~~oldText.tokens[j].next; } Line 1,191: // get 'same' block if (j != null) { i = text.~~old~~oldText.tokens[j].link; var iStart = i; var jStart = j; Line 1,198: var chars = 0; var string = ''; while ( (i != null) && (j != null) && (text.~~old~~oldText.tokens[j].link == i) ) { var token = text.~~old~~oldText.tokens[j].token; chars += token.length; string += token; i = text.~~new~~newText.tokens[i].next; j = text.~~old~~oldText.tokens[j].next; } Line 1,209: blocks.push({ oldBlock: blocks.length, oldNumber: text.~~old~~oldText.tokens[jStart].number, newNumber: text.~~new~~newText.tokens[iStart].number, chars: chars, type: 'same', Line 1,392: // cycle through new text to find insertion blocks var i = text.~~new~~newText.first; while (i != null) { // jump over linked (matched) block while ( (i != null) && (text.~~new~~newText.tokens[i].link != null) ) { i = text.~~new~~newText.tokens[i].next; } Line 1,404: var iStart = i; var string = ''; while ( (i != null) && (text.~~new~~newText.tokens[i].link == null) ) { string += text.~~new~~newText.tokens[i].token; i = text.~~new~~newText.tokens[i].next; } Line 1,413: oldBlock: null, oldNumber: null, newNumber: text.~~new~~newText.tokens[iStart].number, chars: null, type: 'ins', Line 1,504: blocks.push({ oldBlock: null, oldNumber: text.~~old~~oldText.tokens[ deletions[del].oldStart ].number, newNumber: newNumber, chars: null, Line 2,256: // // wDiff.DebugText: dump text (text.~~old~~oldText or text.~~new~~newText) object //

User:Cacycle/diff.js: Difference between revisions