Content deleted Content added
1.0.1 (August 24, 2014) +bubbling up of ambiguous gaps, make settings configurable |
1.0.2 (August 24, 2014) text.new, text.old -> text.newText, text.oldText for MS IE 8 compatibility |
||
Line 2:
// @name wDiff
// @version 1.0.
// @date August 26, 2014
// @description improved word-based diff library with block move detection
Line 39:
text: objects for text related data
.
.
.string: new or old text to be diffed
.tokens[]: token data list for new or old string (N and O)
Line 57:
.newCount: new text token counter (NC)
.oldCount: old text token counter (OC)
.newToken: token index in text.
.oldToken: token index in text.
blocks[]: array of objects that holds block (consecutive text tokens) data in order of the new text
Line 277:
// prepare text data object
var text = {
string: newString,
tokens: [],
Line 283:
last: null
},
string: oldString,
tokens: [],
Line 314:
// split new and old text into paragraps
wDiff.Split(text.
wDiff.Split(text.
// calculate diff
Line 321:
// refine different paragraphs into sentences
wDiff.SplitRefine(text.
wDiff.SplitRefine(text.
// calculate refined diff
Line 328:
// refine different sentences into words
wDiff.SplitRefine(text.
wDiff.SplitRefine(text.
// calculate refined diff information with recursion for unresolved gaps
Line 335:
// bubble up gaps
wDiff.BubbleUpGaps(text.
wDiff.BubbleUpGaps(text.
// split tokens into chars in selected unresolved gaps
Line 347:
// bubble up gaps
wDiff.BubbleUpGaps(text.
wDiff.BubbleUpGaps(text.
// enumerate tokens lists
wDiff.EnumerateTokens(text.
wDiff.EnumerateTokens(text.
// detect moved blocks
Line 367:
// wDiff.Split: split text into paragraph, sentence, or word tokens
// input: text (text.
// changes: text (text.
// called from: wDiff.Diff()
Line 447:
// wDiff.SplitRefine: split unique unmatched tokens into smaller tokens
// changes: text (text.
// called from: wDiff.Diff()
// calls: wDiff.Split()
Line 476:
// - same length and at least 50 % identity
// identical tokens including space separators will be linked, resulting in word-wise char-level diffs
// changes: text (text.
// called from: wDiff.Diff()
// calls: wDiff.Split()
Line 493:
var gaps = [];
var gap = null;
var i = text.
var j = text.
while ( (i != null) && (text.
// get list item properties
var newLink = text.
var oldLink = null;
if (j != null) {
oldLink = text.
}
Line 531:
// next list elements
if (newLink != null) {
j = text.
}
i = text.
}
Line 541:
// cycle trough old text tokens list
var j = gaps[gap].oldFirst;
while ( (j != null) && (text.
// count old chars and tokens in gap
Line 547:
gaps[gap].oldTokens ++;
j = text.
}
}
Line 563:
// one word became separated by space, dash, or any string
if ( (gaps[gap].newTokens == 1) && (gaps[gap].oldTokens == 3) ) {
if (text.
continue;
}
}
else if ( (gaps[gap].oldTokens == 1) && (gaps[gap].newTokens == 3) ) {
if (text.
continue;
}
Line 581:
var j = gaps[gap].oldFirst;
while (i != null) {
var newToken = text.
var oldToken = text.
// get shorter and longer token
Line 658:
break;
}
i = text.
j = text.
}
gaps[gap].charSplit = charSplit;
Line 675:
var j = gaps[gap].oldFirst;
while (i != null) {
var newToken = text.
var oldToken = text.
// link identical tokens (spaces)
if (newToken == oldToken) {
text.
text.
}
// refine different words into chars
else {
wDiff.Split(text.
wDiff.Split(text.
}
Line 694:
break;
}
i = text.
j = text.
}
}
Line 708:
// wDiff.BubbleUpGaps: move gaps with ambiguous identical fronts and backs up
// start ambiguous gap borders after line breaks and text section closing characters
// changes: text (text.
// called from: wDiff.Diff()
Line 766:
// wDiff.EnumerateTokens: enumerate text token list
// changes: text (text.
// called from: wDiff.Diff()
Line 786:
// input: text, object containing text data and tokens
// optionally for recursive calls: newStart, newEnd, oldStart, oldEnd (tokens list indexes), recursionLevel
// changes: text.
// steps:
// pass 1: parse new text into symbol table
Line 803:
// set defaults
if (typeof newStart == 'undefined') { newStart = text.
if (typeof newEnd == 'undefined') { newEnd = text.
if (typeof oldStart == 'undefined') { oldStart = text.
if (typeof oldEnd == 'undefined') { oldEnd = text.
if (typeof recursionLevel == 'undefined') { recursionLevel = 0; }
Line 820:
// cycle trough new text tokens list
var i = newStart;
while ( (i != null) && (text.
// parse token only once during split refinement
if ( (text.
text.
// add new entry to symbol table
var token = text.
if (Object.prototype.hasOwnProperty.call(symbols, token) == false) {
var current = symbol.length;
Line 852:
break;
}
i = text.
}
Line 861:
// cycle trough old text tokens list
var j = oldStart;
while ( (j != null) && (text.
// parse token only once during split refinement
if ( (text.
text.
// add new entry to symbol table
var token = text.
if (Object.prototype.hasOwnProperty.call(symbols, token) == false) {
var current = symbol.length;
Line 896:
break;
}
j = text.
}
Line 912:
// do not use spaces as unique markers
if (/^\s+$/.test(text.
// connect from new to old and from old to new
if (text.
text.
text.
}
}
Line 928:
// cycle trough new text tokens list
var i = text.
while ( (i != null) && (text.
var iNext = text.
// find already connected pairs
var j = text.
if (j != null) {
var jNext = text.
// check if the following tokens are not yet connected
if ( (iNext != null) && (jNext != null) ) {
if ( (text.
// connect if the following tokens are the same
if (text.
text.
text.
}
}
Line 957:
// cycle trough new text tokens list
var i = text.
while ( (i != null) && (text.
var iNext = text.
// find already connected pairs
var j = text.
if (j != null) {
var jNext = text.
// check if the preceeding tokens are not yet connected
if ( (iNext != null) && (jNext != null) ) {
if ( (text.
// connect if the preceeding tokens are the same
if (text.
text.
text.
}
}
Line 992:
var j = oldStart;
while ( (i != null) && (text.
// get j from previous tokens match
var iPrev = text.
if (iPrev != null) {
var jPrev = text.
if (jPrev != null) {
j = text.
}
}
// check for the start of an unresolved sequence
if ( (j != null) && (text.
// determine the limits of of the unresolved new sequence
Line 1,011:
var iLength = 0;
var iNext = i;
while ( (iNext != null) && (text.
iEnd = iNext;
iLength ++;
Line 1,017:
break;
}
iNext = text.
}
Line 1,025:
var jLength = 0;
var jNext = j;
while ( (jNext != null) && (text.
jEnd = jNext;
jLength ++;
Line 1,031:
break;
}
jNext = text.
}
Line 1,049:
break;
}
i = text.
}
Line 1,059:
var i = newEnd;
var j = oldEnd;
while ( (i != null) && (text.
// get j from next matched tokens
var iPrev = text.
if (iPrev != null) {
var jPrev = text.
if (jPrev != null) {
j = text.
}
}
// check for the start of an unresolved sequence
if ( (j != null) && (text.
// determine the limits of of the unresolved new sequence
Line 1,078:
var iLength = 0;
var iNext = i;
while ( (iNext != null) && (text.
iStart = iNext;
iLength ++;
Line 1,084:
break;
}
iNext = text.
}
Line 1,092:
var jLength = 0;
var jNext = j;
while ( (jNext != null) && (text.
jStart = jNext;
jLength ++;
Line 1,098:
break;
}
jNext = text.
}
Line 1,116:
break;
}
i = text.
}
}
Line 1,157:
wDiff.DetectBlocks = function(text, blocks, groups) {
// WED('text.
// WED('text.
//
Line 1,165:
// cycle through old text to find matched (linked) blocks
var j = text.
var i = null;
var deletions = [];
Line 1,174:
var delEnd = null;
var string = '';
while ( (j != null) && (text.
string += text.
delEnd = j;
j = text.
}
Line 1,191:
// get 'same' block
if (j != null) {
i = text.
var iStart = i;
var jStart = j;
Line 1,198:
var chars = 0;
var string = '';
while ( (i != null) && (j != null) && (text.
var token = text.
chars += token.length;
string += token;
i = text.
j = text.
}
Line 1,209:
blocks.push({
oldBlock: blocks.length,
oldNumber: text.
newNumber: text.
chars: chars,
type: 'same',
Line 1,392:
// cycle through new text to find insertion blocks
var i = text.
while (i != null) {
// jump over linked (matched) block
while ( (i != null) && (text.
i = text.
}
Line 1,404:
var iStart = i;
var string = '';
while ( (i != null) && (text.
string += text.
i = text.
}
Line 1,413:
oldBlock: null,
oldNumber: null,
newNumber: text.
chars: null,
type: 'ins',
Line 1,504:
blocks.push({
oldBlock: null,
oldNumber: text.
newNumber: newNumber,
chars: null,
Line 2,256:
//
// wDiff.DebugText: dump text (text.
//
|