User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.1.1 (September 21, 2014) fix .slideGaps, made char split tokens unique
1.1.2 (September 22, 2014) fix .splitRefineChars(): one token becomes connected or separated by token
Line 3:
// ==UserScript==
// @name wDiff
// @version 1.1.12
// @date September 2122, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 969:
 
// TextDiff.splitRefineChars(): split tokens into chars in the following unresolved regions (gaps):
// - one token became connected or separated by space, or dash, (or any stringtoken)
// - same number of tokens in gap and strong similarity of all tokens:
// - addition or deletion of flanking strings in tokens
Line 1,064:
// one word became separated by space, dash, or any string
if ( (gaps[gap].newTokens == 1) && (gaps[gap].oldTokens == 3) ) {
ifvar token = (this.newText.tokens[ gaps[gap].newFirst ].token;
var tokenFirst != this.oldText.tokens[ gaps[gap].oldFirst ].token;
var +tokenLast = this.oldText.tokens[ gaps[gap].oldLast ].token ) {;
if ( (token.indexOf(tokenFirst) !== 0) || (token.indexOf(tokenLast) != token.length - tokenLast.length) ) {
continue;
}
}
else if ( (gaps[gap].oldTokens == 1) && (gaps[gap].newTokens == 3) ) {
ifvar token = (this.oldText.tokens[ gaps[gap].oldFirst ].token;
var tokenFirst != this.newText.tokens[ gaps[gap].newFirst ].token;
var +tokenLast = this.newText.tokens[ gaps[gap].newLast ].token ) {;
if ( (token.indexOf(tokenFirst) !== 0) || (token.indexOf(tokenLast) != token.length - tokenLast.length) ) {
continue;
}
Line 1,076 ⟶ 1,082:
continue;
}
gaps[gap].charSplit = charSplittrue;
}
 
// cycle trough new text tokens list and set charSplit
else {
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
while (i !== null) {
var newToken = this.newText.tokens[i].token;
var oldToken = this.oldText.tokens[j].token;
 
// get shorter and longer token
var shorterToken;
var longerToken;
if (newToken.length < oldToken.length) {
shorterToken = newToken;
longerToken = oldToken;
}
else {
shorterToken = oldToken;
longerToken = newToken;
}
 
// not same token length
if (newToken.length != oldToken.length) {
 
// test for addition or deletion of internal string in tokens
 
// find number of identical chars from left
var left = 0;
while (left < shorterToken.length) {
if (newToken.charAt(left) != oldToken.charAt(left)) {
break;
}
left ++;
}
left ++;
}
 
// find number of identical chars from right
var right = 0;
while (right < shorterToken.length) {
if (newToken.charAt(newToken.length - 1 - right) != oldToken.charAt(oldToken.length - 1 - right)) {
break;
}
right ++;
}
right ++;
}
 
// no simple insertion or deletion of internal string
if (left + right != shorterToken.length) {
 
// not addition or deletion of flanking strings in tokens (smaller token not part of larger token)
if (longerToken.indexOf(shorterToken) == -1) {
 
// same text at start or end shorter than different text
if ( (left < shorterToken.length / 2) && (right < shorterToken.length / 2) ) {
 
// do not split into chars this gap
charSplit = false;
break;
}
}
}
}
}
 
// same token length
else if (newToken != oldToken) {
 
// tokens less than 50 % identical
var ident = 0;
for (var pos = 0; pos < shorterToken.length; pos ++) {
if (shorterToken.charAt(pos) == longerToken.charAt(pos)) {
ident ++;
}
break; }
if (ident/shorterToken.length < 0.49) {
 
// do not split into chars this gap
charSplit = false;
break;
}
}
if (ident/shorterToken.length < 0.49) {
 
// donext notlist split into chars this gapelements
if (i == gaps[gap].newLast) {
charSplit = false;
break;
}
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
gaps[gap].charSplit = charSplit;
 
// next list elements
if (i == gaps[gap].newLast) {
break;
}
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
gaps[gap].charSplit = charSplit;
}
 
Line 1,172 ⟶ 1,181:
if (gaps[gap].charSplit === true) {
 
// cycle trough new text tokens list, link spaces, and split into chars
var i = gaps[gap].newFirst;
var j = gaps[gap].oldFirst;
var newGapLength = i - gaps[gap].newLast;
while (i !== null) {
var newTokenoldGapLength = this.newText.tokensj - gaps[igap].tokenoldLast;
while ( (i !== null) || (j !== null) ) {
var oldToken = this.oldText.tokens[j].token;
 
// link identical tokens (spaces) to keep char refinement to words
if ( (newGapLength == oldGapLength) && (this.newText.tokens[i].token == this.oldText.tokens[j].token) ) {
if (newToken == oldToken) {
this.newText.tokens[i].link = j;
this.oldText.tokens[j].link = i;
}
 
// refine different words into chars
else {
if (i !== null) {
this.newText.split('character', i);
}
this.oldText.split('character', j);
if (j !== null) {
this.oldText.split('character', j);
}
}
 
// next list elements
if (i == gaps[gap].newLast) {
breaki = null;
}
if (j == gaps[gap].oldLast) {
j = null;
}
while if (i !== null) {
i = this.newText.tokens[i].next;
}
if (newTokenj !== oldTokennull) {
j = this.oldText.tokens[j].next;
}
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
}
}
Line 1,209 ⟶ 1,229:
 
this.slideGaps = function (text, textLinked) {
 
// cycle through tokens list
var i = text.first;