User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.2.3 (October 21, 2014) bugfix: no clipping at end
1.2.4 (October 23, 2014) fix word count, disabled unlinking for simple texts with only short linked block lengths (i.e. test cases)
Line 3:
// ==UserScript==
// @name wikEd diff
// @version 1.2.34
// @date October 2123, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 70:
* .newText new text
* .oldText old text
* .maxWords word count of longest linked block
* .html diff html
* .error flag: result has not passed unit tests
* .bordersDown[] linked region borders downwards, [new index, old index]
* .bordersUp[] linked region borders upwards, [new index, old index]
Line 109 ⟶ 110:
* .blockEnd last block index
* .unique contains unique linked token
* .maxWords word count of longest linked block
* .words word count
* .chars char count
Line 853 ⟶ 854:
/** @var array blocks Block data (consecutive text tokens) in new text order */
this.blocks = [];
 
/** @var int maxWords Maximal detected word count of all linked blocks */
this.maxWords = 0;
 
/** @var array groups Section blocks that are consecutive in old text order */
Line 1,611 ⟶ 1,615:
}
 
// getGet object symbols table and linked region borders
var symbols;
var bordersDown;
Line 1,621 ⟶ 1,625:
}
 
// createCreate empty local symbols table and linked region borders arrays
else {
symbols = {
Line 1,633 ⟶ 1,637:
 
 
// updatedUpdated versions of linked region borders
var bordersUpNext = [];
var bordersDownNext = [];
Line 1,672 ⟶ 1,676:
}
 
// getGet next token
if ( up === false ) {
i = this.newText.tokens[i].next;
Line 1,719 ⟶ 1,723:
}
 
// getGet next token
if ( up === false ) {
j = this.oldText.tokens[j].next;
Line 1,756 ⟶ 1,760:
symbols.linked = true;
 
// saveSave linked region borders
bordersDown.push( [newToken, oldToken] );
bordersUp.push( [newToken, oldToken] );
Line 1,769 ⟶ 1,773:
var token = newTokenObj.token;
var words =
( token.match( this.config.regExp.countWords ) || [] ).length +concat(
( token.match( this.config.regExp.countChunks ) || [] ).length;
);
 
// Unique if longer than min block length
ifvar (wordsLength words >= thiswords.config.blockMinLength ) {length;
if ( this.config.unlinkBlockswordsLength ==>= true && this.config.blockMinLength > 0 ) {
unique = true;
}
Line 1,779 ⟶ 1,785:
// Unique if it contains at least one unique word
else {
for ( var wordsLengthi = words.length0;i < wordsLength; i ++ ) {
for ( var word = 0words[i]; word < wordsLength; word ++ ) {
if (
this.oldText.words[ words[word] ] === 1 &&
this.newText.words[ words[word] ] === 1 &&
Object.prototype.hasOwnProperty.call( this.oldText.words, word ) === true &&
Object.prototype.hasOwnProperty.call( this.newText.words, word ) === true
) {
unique = true;
Line 1,955 ⟶ 1,963:
}
 
// saveSave updated linked region borders to object
if ( recursionLevel === 0 && repeating === false ) {
this.bordersDown = bordersDownNext;
Line 1,961 ⟶ 1,969:
}
 
// mergeMerge local updated linked region borders into object
else {
this.bordersDown = this.bordersDown.concat( bordersDownNext );
Line 2,099 ⟶ 2,107:
 
// Convert groups to insertions/deletions if maximum block length is too short
// Only for more complex texts that actually have blocks of minimum block length
var unlinkCount = 0;
if (
if ( this.config.unlinkBlocks === true && this.config.blockMinLength > 0 ) {
this.config.unlinkBlocks === true &&
this.config.blockMinLength > 0 &&
this.maxWords >= this.config.blockMinLength
) {
if ( this.config.timer === true ) {
this.time( 'total unlinking' );
Line 2,119 ⟶ 2,132:
 
// Repeat block detection from start
this.maxWords = 0;
this.getSameBlocks();
this.getSections();
Line 2,389 ⟶ 2,403:
} );
block = groupEnd;
 
// Set global word count of longest linked block
if ( maxWords > this.maxWords ) {
this.maxWords = maxWords;
}
}
}
Line 4,346 ⟶ 4,365:
this.text = text.replace( /\r\n?/g, '\n');
 
// parseParse and count words and chunks for identification of unique real words
if ( this.parent.config.timer === true ) {
this.parent.time( 'wordParse' );
Line 4,368 ⟶ 4,387:
this.wordParse = function ( regExp ) {
 
var wordsregExpMatch = this.text.match( regExp );
if ( wordsregExpMatch !== null ) {
var wordsLengthmatchLength = wordsregExpMatch.length;
for (var i = 0; i < wordsLengthmatchLength; i ++) {
var wordCounterword = this.words[ wordsregExpMatch[i] ];
if ( wordCounterObject.prototype.hasOwnProperty.call( this.words, word ) === undefinedfalse ) {
wordCounterthis.words[word] = 1;
}
else {
wordCounterthis.words[word] ++;
}
}