Content deleted Content added
1.1.1 (September 21, 2014) fix .slideGaps, made char split tokens unique |
1.1.2 (September 22, 2014) fix .splitRefineChars(): one token becomes connected or separated by token |
||
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.1.
// @date September
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 969:
// TextDiff.splitRefineChars(): split tokens into chars in the following unresolved regions (gaps):
// - one token became connected or separated by space
// - same number of tokens in gap and strong similarity of all tokens:
// - addition or deletion of flanking strings in tokens
Line 1,064:
// one word became separated by space, dash, or any string
if ( (gaps[gap].newTokens == 1) && (gaps[gap].oldTokens == 3) ) {
var tokenFirst var if ( (token.indexOf(tokenFirst) !== 0) || (token.indexOf(tokenLast) != token.length - tokenLast.length) ) {
continue;
}
}
else if ( (gaps[gap].oldTokens == 1) && (gaps[gap].newTokens == 3) ) {
var tokenFirst var if ( (token.indexOf(tokenFirst) !== 0) || (token.indexOf(tokenLast) != token.length - tokenLast.length) ) {
continue;
}
Line 1,076 ⟶ 1,082:
continue;
}
}
// cycle trough new text tokens list and set charSplit
else {
var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst;
while (i !== null) {
var newToken = this.newText.tokens[i].token;
var oldToken = this.oldText.tokens[j].token;
// get shorter and longer token
var shorterToken;
var longerToken;
if (newToken.length < oldToken.length) {
shorterToken = newToken;
longerToken = oldToken;
}
else {
shorterToken = oldToken;
longerToken = newToken;
}
// not same token length
if (newToken.length != oldToken.length) {
// test for addition or deletion of internal string in tokens
// find number of identical chars from left
var left = 0;
while (left < shorterToken.length) {
if (newToken.charAt(left) != oldToken.charAt(left)) {
break;
}▼
left ++;▼
}
▲ left ++;
▲ }
// find number of identical chars from right
var right = 0;
while (right < shorterToken.length) {
if (newToken.charAt(newToken.length - 1 - right) != oldToken.charAt(oldToken.length - 1 - right)) {
break;
}▼
right ++;▼
}
▲ right ++;
▲ }
// no simple insertion or deletion of internal string
if (left + right != shorterToken.length) {
// not addition or deletion of flanking strings in tokens (smaller token not part of larger token)
if (longerToken.indexOf(shorterToken) == -1) {
// same text at start or end shorter than different text
if ( (left < shorterToken.length / 2) && (right < shorterToken.length / 2) ) {
// do not split into chars this gap
charSplit = false;
break;
}▼
}
}
}
▲ }
// same token length
else if (newToken != oldToken) {
// tokens less than 50 % identical
var ident = 0;
for (var pos = 0; pos < shorterToken.length; pos ++) {
if (shorterToken.charAt(pos) == longerToken.charAt(pos)) {
ident ++;
}▼
if (ident/shorterToken.length < 0.49) {▼
// do not split into chars this gap
charSplit = false;▼
break;
}
}
▲ if (ident/shorterToken.length < 0.49) {
if (i == gaps[gap].newLast) {▼
▲ charSplit = false;
break;
}
i = this.newText.tokens[i].next;▼
j = this.oldText.tokens[j].next;▼
}
gaps[gap].charSplit = charSplit;
▲ if (i == gaps[gap].newLast) {
▲ break;
▲ }
}
▲ gaps[gap].charSplit = charSplit;
}
Line 1,172 ⟶ 1,181:
if (gaps[gap].charSplit === true) {
// cycle trough new text tokens list, link spaces, and split into chars
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
var newGapLength = i - gaps[gap].newLast;
while (i !== null) {▼
while ( (i !== null) || (j !== null) ) {
// link identical tokens (spaces) to keep char refinement to words
if ( (newGapLength == oldGapLength) && (this.newText.tokens[i].token == this.oldText.tokens[j].token) ) {
if (newToken == oldToken) {▼
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
}
// refine
else {
if (i !== null) {
this.newText.split('character', i); }
this.oldText.split('character', j);▼
if (j !== null) {
▲ this.oldText.split('character', j);
}
}
// next list elements
if (i == gaps[gap].newLast) {
}
if (j == gaps[gap].oldLast) {
j = null;
}
▲ i = this.newText.tokens[i].next;
}
▲ j = this.oldText.tokens[j].next;
}
▲ i = this.newText.tokens[i].next;
▲ j = this.oldText.tokens[j].next;
}
}
Line 1,209 ⟶ 1,229:
this.slideGaps = function (text, textLinked) {
// cycle through tokens list
var i = text.first;
|