Revision as of 10:34, 21 September 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.1.1 (September 21, 2014) fix .slideGaps, made char split tokens unique ← Previous edit		Revision as of 14:32, 21 September 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.1.2 (September 22, 2014) fix .splitRefineChars(): one token becomes connected or separated by token Next edit →
Line 3: // ==UserScript== // @name wDiff // @version 1.1.12 // @date September 2122, 2014 // @description improved word-based diff library with block move detection // @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff Line 969: // TextDiff.splitRefineChars(): split tokens into chars in the following unresolved regions (gaps): // - one token became connected or separated by space, or dash, (or any ~~string~~token) // - same number of tokens in gap and strong similarity of all tokens: // - addition or deletion of flanking strings in tokens Line 1,064: // one word became separated by space, dash, or any string if ( (gaps[gap].newTokens == 1) && (gaps[gap].oldTokens == 3) ) { ifvar token = (this.newText.tokens[ gaps[gap].newFirst ].token; var tokenFirst != this.oldText.tokens[ gaps[gap].oldFirst ].token; var +tokenLast = this.oldText.tokens[ gaps[gap].oldLast ].token ~~) {~~; if ( (token.indexOf(tokenFirst) !== 0) \|\| (token.indexOf(tokenLast) != token.length - tokenLast.length) ) { continue; } } else if ( (gaps[gap].oldTokens == 1) && (gaps[gap].newTokens == 3) ) { ifvar token = (this.oldText.tokens[ gaps[gap].oldFirst ].token; var tokenFirst != this.newText.tokens[ gaps[gap].newFirst ].token; var +tokenLast = this.newText.tokens[ gaps[gap].newLast ].token ~~) {~~; if ( (token.indexOf(tokenFirst) !== 0) \|\| (token.indexOf(tokenLast) != token.length - tokenLast.length) ) { continue; } Line 1,076 ⟶ 1,082: continue; } gaps[gap].charSplit = ~~charSplit~~true;▼ } // cycle trough new text tokens list and set charSplit else { var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst; while (i !== null) { var newToken = this.newText.tokens[i].token; var oldToken = this.oldText.tokens[j].token; // get shorter and longer token var shorterToken; var longerToken; if (newToken.length < oldToken.length) { shorterToken = newToken; longerToken = oldToken; } else { shorterToken = oldToken; longerToken = newToken; } // not same token length if (newToken.length != oldToken.length) { // test for addition or deletion of internal string in tokens // find number of identical chars from left var left = 0; while (left < shorterToken.length) { if (newToken.charAt(left) != oldToken.charAt(left)) { break; }▼ left ++;▼ } ▲ left ++; ▲ } // find number of identical chars from right var right = 0; while (right < shorterToken.length) { if (newToken.charAt(newToken.length - 1 - right) != oldToken.charAt(oldToken.length - 1 - right)) { break; }▼ right ++;▼ } ▲ right ++; ▲ } // no simple insertion or deletion of internal string if (left + right != shorterToken.length) { // not addition or deletion of flanking strings in tokens (smaller token not part of larger token) if (longerToken.indexOf(shorterToken) == -1) { // same text at start or end shorter than different text if ( (left < shorterToken.length / 2) && (right < shorterToken.length / 2) ) { // do not split into chars this gap charSplit = false; break; }▼ } } } ▲ } // same token length else if (newToken != oldToken) { // tokens less than 50 % identical var ident = 0; for (var pos = 0; pos < shorterToken.length; pos ++) { if (shorterToken.charAt(pos) == longerToken.charAt(pos)) { ident ++; }▼ ~~break;~~ }▼ if (ident/shorterToken.length < 0.49) {▼ // do not split into chars this gap charSplit = false;▼ break; } } ▲ if (ident/shorterToken.length < 0.49) { // donext ~~not~~list ~~split into chars this gap~~elements if (i == gaps[gap].newLast) {▼ ▲ charSplit = false; break; } i = this.newText.tokens[i].next;▼ j = this.oldText.tokens[j].next;▼ } gaps[gap].charSplit = charSplit; ~~// next list elements~~ ▲ if (i == gaps[gap].newLast) { ▲ break; ▲ } ~~i = this.newText.tokens[i].next;~~ ~~j = this.oldText.tokens[j].next;~~ } ▲ gaps[gap].charSplit = charSplit; } Line 1,172 ⟶ 1,181: if (gaps[gap].charSplit === true) { // cycle trough new text tokens list, link spaces, and split into chars var i = gaps[gap].newFirst; var j = gaps[gap].oldFirst; var newGapLength = i - gaps[gap].newLast; while (i !== null) {▼ var ~~newToken~~oldGapLength = ~~this.newText.tokens~~j - gaps[igap].~~token~~oldLast; while ( (i !== null) \|\| (j !== null) ) { ~~var oldToken = this.oldText.tokens[j].token;~~ // link identical tokens (spaces) to keep char refinement to words if ( (newGapLength == oldGapLength) && (this.newText.tokens[i].token == this.oldText.tokens[j].token) ) { if (newToken == oldToken) {▼ this.newText.tokens[i].link = j; this.oldText.tokens[j].link = i; } // refine ~~different~~ words into chars else { if (i !== null) { this.newText.split('character', i); } this.oldText.split('character', j);▼ if (j !== null) { ▲ this.oldText.split('character', j); } } // next list elements if (i == gaps[gap].newLast) { ~~break~~i = null; } if (j == gaps[gap].oldLast) { j = null; } ▲ ~~while~~ if (i !== null) { ▲ i = this.newText.tokens[i].next; } ▲ if (~~newToken~~j !== ~~oldToken~~null) { ▲ j = this.oldText.tokens[j].next; } ▲ i = this.newText.tokens[i].next; ▲ j = this.oldText.tokens[j].next; } } Line 1,209 ⟶ 1,229: this.slideGaps = function (text, textLinked) { // cycle through tokens list var i = text.first;

User:Cacycle/diff.js: Difference between revisions