User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.1.0 (September 21, 2014) major clean-up/partial rewrite: objectified, findMaxPath bug fix, unit tests, del block positioning, marks as blocks, unique words in paragraph/sentence tokens
1.1.1 (September 21, 2014) fix .slideGaps, made char split tokens unique
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.1.001
// @date September 21, 2014
// @description improved word-based diff library with block move detection
Line 190:
}
 
// regExps for sliding gaps: newlines and space/word breaks
if (wDiff.regExpSlideStop === undefined) { wDiff.regExpSlideStop = new RegExp('[\\n\\r' + wDiff.newLines + ']$'); }
if (wDiff.regExpSlideBorder === undefined) { wDiff.regExpSlideBorder = new RegExp('[^ \\t' + wDiff.lettersnewLinesAll + wDiff.newParagraph + '\\x0C\\x0b]$'); }
 
// regExps for counting words
Line 1,209:
 
this.slideGaps = function (text, textLinked) {
 
// cycle through tokens list
var i = text.first;
Line 1,222 ⟶ 1,221:
// find gap end
else if ( (gapStart !== null) && (text.tokens[i].link !== null) ) {
var backTestgapFront = nullgapStart;
backvar gapBack = text.tokens[backi].prev;
 
// slide down as deep as possible
var front = gapStartgapFront;
var back = itext.tokens[gapBack].next;
)if {(
var frontTest = null;
var backTest = null;
while (
(front !== null) && (back !== null) &&
(text.tokens[front].link === null) && (text.tokens[back].link !== null) &&
Line 1,236 ⟶ 1,235:
textLinked.tokens[ text.tokens[front].link ].link = front;
text.tokens[back].link = null;
 
frontTest = front;
backTestgapFront = backtext.tokens[gapFront].next;
text.tokens[back].linkgapBack = text.tokens[frontgapBack].linknext;
 
front = text.tokens[front].next;
back = text.tokens[back].next;
Line 1,243 ⟶ 1,244:
 
// test slide up, remember last line break or word border
var front = text.tokens[gapFront].prev;
var back = gapBack;
var frontStop = null;
while (
(frontTestfront !== null) && (backTestback !== null) &&
(text.tokens[frontTestfront].link !== null) && (text.tokens[backTestback].link === null) &&
(text.tokens[frontTestfront].token == text.tokens[backTestback].token)
) {
 
if (wDiff.regExpSlideStop.test(text.tokens[frontTest].token) === true) {
frontStop// =stop frontTest;at line break
if (wDiff.regExpSlideStop.test(text.tokens[frontTestfront].token) === true) {
frontTest frontStop = front;
break;
}
 
else if ( (frontStop === null) && (wDiff.regExpSlideBorder.test(text.tokens[frontTest].token) === true) ) {
// stop at first space/word break
frontStop = frontTest;
else if ( (frontStop === null) && (wDiff.regExpSlideBorder.test(text.tokens[frontTestfront].token) === true) ) {
frontStop = frontTestfront;
}
frontTestfront = text.tokens[frontTestfront].prev;
backTestback = text.tokens[backTestback].prev;
}
 
// actually slide up to line break or, if absent, word borderstop
var front = text.tokens[gapFront].prev;
if (frontStop !== null) {
var whileback (= gapBack;
while (
(front !== null) && (back !== null) && (front !== frontStop) &&
(text.tokens[front].link !== null) && (text.tokens[back].link =!== null) && (front !== frontStop) &&
(text.tokens[front].tokenlink !== null) && (text.tokens[back].tokenlink === null) &&
(text.tokens[front].token == text.tokens[back].token)
) {
}) {
text.tokens[back].link = text.tokens[front].link;
textLinked.tokens[ text.tokens[back].link = text.tokens[front].link = back;
textLinked.tokens[ text.tokens[frontback].link ].link = nullback;
front = text.tokens[front].prevlink = null;
 
back = text.tokens[back].prev;
front = text.tokens[front].prev;
}
back = text.tokens[back].prev;
}
gapStart = null;
Line 1,413 ⟶ 1,421:
 
// check if token contains unique word
if ( (recursionLevel === 0) && (level != 'character') ) {
var unique = false;
if (level == 'character') {
var token = this.newText.tokens[newToken].token;
var words = (token.match(wDiff.regExpWord) || []).concat(token.match(wDiff.regExpChunk) || []);
 
// unique if longer than min block length
if (words.length >= wDiff.blockMinLength) {
unique = true;
}
 
// unique if it contains at least one unique word
else {
for (var wordtoken = 0; word < wordsthis.lengthnewText.tokens[newToken].token; word ++) {
var words = (token.match(wDiff.regExpWord) || []).concat(token.match(wDiff.regExpChunk) || []);
if ( (this.oldText.words[ words[word] ] == 1) && (this.newText.words[ words[word] ] == 1) ) {
 
unique = true;
// unique if longer than min block length
break;
if (words.length >= wDiff.blockMinLength) {
unique = true;
}
 
// unique if it contains at least one unique word
else {
for (var word = 0; word < words.length; word ++) {
if ( (this.oldText.words[ words[word] ] == 1) && (this.newText.words[ words[word] ] == 1) ) {
unique = true;
break;
}
}
}
Line 1,732 ⟶ 1,745:
 
if (wDiff.debug === true) {
this.oldText.debugText('Old text');
this.newText.debugText('New text');
}