Content deleted Content added
1.1.0 (September 21, 2014) major clean-up/partial rewrite: objectified, findMaxPath bug fix, unit tests, del block positioning, marks as blocks, unique words in paragraph/sentence tokens |
1.1.1 (September 21, 2014) fix .slideGaps, made char split tokens unique |
||
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.1.
// @date September 21, 2014
// @description improved word-based diff library with block move detection
Line 190:
}
// regExps for sliding gaps: newlines and space/word breaks
if (wDiff.regExpSlideStop === undefined) { wDiff.regExpSlideStop = new RegExp('[\\n\\r' + wDiff.newLines + ']$'); }
if (wDiff.regExpSlideBorder === undefined) { wDiff.regExpSlideBorder = new RegExp('[
// regExps for counting words
Line 1,209:
this.slideGaps = function (text, textLinked) {
// cycle through tokens list
var i = text.first;
Line 1,222 ⟶ 1,221:
// find gap end
else if ( (gapStart !== null) && (text.tokens[i].link !== null) ) {
// slide down as deep as possible
var front =
var back =
▲ var backTest = null;
while (▼
(front !== null) && (back !== null) &&
(text.tokens[front].link === null) && (text.tokens[back].link !== null) &&
Line 1,236 ⟶ 1,235:
textLinked.tokens[ text.tokens[front].link ].link = front;
text.tokens[back].link = null;
frontTest = front;▼
front = text.tokens[front].next;
back = text.tokens[back].next;
Line 1,243 ⟶ 1,244:
// test slide up, remember last line break or word border
var front = text.tokens[gapFront].prev;
var back = gapBack;
var frontStop = null;
while (
(
(text.tokens[
(text.tokens[
) {
if (wDiff.regExpSlideStop.test(text.tokens[frontTest].token) === true) {▼
break;
}
else if ( (frontStop === null) && (wDiff.regExpSlideBorder.test(text.tokens[frontTest].token) === true) ) {▼
// stop at first space/word break
frontStop = frontTest;▼
▲ else if ( (frontStop === null) && (wDiff.regExpSlideBorder.test(text.tokens[
}
}
// actually slide up to
var front = text.tokens[gapFront].prev;
var
▲ while (
(text.tokens[front].token == text.tokens[back].token)
▲ ) {
▲ text.tokens[back].link = text.tokens[front].link;
▲ back = text.tokens[back].prev;
front = text.tokens[front].prev;
▲ }
back = text.tokens[back].prev;
}
gapStart = null;
Line 1,413 ⟶ 1,421:
// check if token contains unique word
if
var unique = false;
if (level == 'character') {
var words = (token.match(wDiff.regExpWord) || []).concat(token.match(wDiff.regExpChunk) || []);▼
// unique if longer than min block length▼
if (words.length >= wDiff.blockMinLength) {▼
unique = true;
}
// unique if it contains at least one unique word▼
else {
▲ var words = (token.match(wDiff.regExpWord) || []).concat(token.match(wDiff.regExpChunk) || []);
if ( (this.oldText.words[ words[word] ] == 1) && (this.newText.words[ words[word] ] == 1) ) {▼
unique = true;▼
▲ // unique if longer than min block length
break;▼
▲ if (words.length >= wDiff.blockMinLength) {
unique = true;
}
▲ // unique if it contains at least one unique word
else {
for (var word = 0; word < words.length; word ++) {
▲ if ( (this.oldText.words[ words[word] ] == 1) && (this.newText.words[ words[word] ] == 1) ) {
▲ unique = true;
▲ break;
}
}
}
Line 1,732 ⟶ 1,745:
if (wDiff.debug === true) {
this.oldText.debugText('Old text');
this.newText.debugText('New text');
}
|