User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.2.1 (October 14, 2014) fix slideGaps(), fix sentence split, fix space highlighting, pre container, optimizations: calculateDiff() borders array, for loop pre-calc, freeing memory, 'repeatedDiff' option
another background that could use some darkmode friendly color
 
(8 intermediate revisions by 3 users not shown)
Line 3:
// ==UserScript==
// @name wikEd diff
// @version 1.2.14
// @date October 1423, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 36:
* - Resolution down to characters level
* - Unicode and multilingual support
* - Stepwise split (paragraphs, lines, sentences, words, characters)
* - Recursive diff
* - Optimized code for resolving unmatched sequences
Line 70:
* .newText new text
* .oldText old text
* .maxWords word count of longest linked block
* .html diff html
* .error flag: result has not passed unit tests
* .bordersDown[] linked region borders downwards, [new index, old index]
* .bordersUp[] linked region borders upwards, [new index, old index]
Line 109 ⟶ 110:
* .blockEnd last block index
* .unique contains unique linked token
* .maxWords word count of longest linked block
* .words word count
* .chars char count
Line 349 ⟶ 350:
// Insert
'.wikEdDiffInsert {' +
'font-weight: bold; background-color: #bbddff; ' +
'color: #222; border-radius: 0.25em; padding: 0.2em 1px; ' +
'} ' +
'.wikEdDiffInsertBlank { background-color: #66bbff; } ' +
'.wikEdDiffFragment:hover .wikEdDiffInsertBlank { background-color: #bbddff; } ' +
 
// Delete
'.wikEdDiffDelete {' +
'font-weight: bold; background-color: #ffe49c; ' +
'color: #222; border-radius: 0.25em; padding: 0.2em 1px; ' +
'} ' +
'.wikEdDiffDeleteBlank { background-color: #ffd064; } ' +
'.wikEdDiffFragment:hover .wikEdDiffDeleteBlank { background-color: #ffe49c; } ' +
 
// Block
'.wikEdDiffBlock {' +
'font-weight: bold; background-color: #e8e8e8; ' +
'border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; ' +
'} ' +
'.wikEdDiffBlock { color: #000; } ' +
'.wikEdDiffBlock0 { background-color: #ffff80; } ' +
'.wikEdDiffBlock1 { background-color: #d0ff80; } ' +
'.wikEdDiffBlock2 { background-color: #ffd8f0; } ' +
'.wikEdDiffBlock3 { background-color: #c0ffff; } ' +
'.wikEdDiffBlock4 { background-color: #fff888; } ' +
'.wikEdDiffBlock5 { background-color: #bbccff; } ' +
'.wikEdDiffBlock6 { background-color: #e8c8ff; } ' +
'.wikEdDiffBlock7 { background-color: #ffbbbb; } ' +
'.wikEdDiffBlock8 { background-color: #a0e8a0; } ' +
'.wikEdDiffBlockHighlight {' +
'background-color: #777; color: #fff; ' +
'border: solid #777; border-width: 1px 0; ' +
'} ' +
 
// Mark
'.wikEdDiffMarkLeft, .wikEdDiffMarkRight {' +
'font-weight: bold; background-color: #ffe49c; ' +
'color: #666; border-radius: 0.25em; padding: 0.2em; margin: 0 1px; ' +
'} ' +
'.wikEdDiffMarkLeft:before { content: "{cssMarkLeft}"; } ' +
'.wikEdDiffMarkRight:before { content: "{cssMarkRight}"; } ' +
'.wikEdDiffMarkLeft.wikEdDiffNoUnicode:before { content: "<"; } ' +
'.wikEdDiffMarkRight.wikEdDiffNoUnicode:before { content: ">"; } ' +
'.wikEdDiffMark { background-color: #e8e8e8; color: #666; } ' +
'.wikEdDiffMark0 { background-color: #ffff60; } ' +
'.wikEdDiffMark1 { background-color: #c8f880; } ' +
'.wikEdDiffMark2 { background-color: #ffd0f0; } ' +
'.wikEdDiffMark3 { background-color: #a0ffff; } ' +
'.wikEdDiffMark4 { background-color: #fff860; } ' +
'.wikEdDiffMark5 { background-color: #b0c0ff; } ' +
'.wikEdDiffMark6 { background-color: #e0c0ff; } ' +
'.wikEdDiffMark7 { background-color: #ffa8a8; } ' +
'.wikEdDiffMark8 { background-color: #98e898; } ' +
'.wikEdDiffMarkHighlight { background-color: #777; color: #fff; } ' +
 
// Wrappers
'.wikEdDiffContainer { } ' +
'.wikEdDiffFragment {' +
'white-space: pre-wrap; background-color: var(--background-color-base, #fff); border: #bbb solid; ' +
'border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; ' +
'font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0; ' +
'} ' +
'.wikEdDiffNoChange { white-spacebackground: prevar(--wrap; background:-color-interactive, #f0f0f0eaecf0); border: 1px #bbb solid; border-radius: 0.5em; ' +
'borderline-widthheight: 1px1.6; 1pxbox-shadow: 1px2px 0.5em2px 2px #ddd; border-radiuspadding: 0.5em; font-familymargin: sans-serif1em 0; ' +
'text-align: center; ' +
'font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em;' +
'} ' +
'margin: 1em 0; text-align: center;' +
'.wikEdDiffSeparator { margin-bottom: 1em; } ' +
'}' +
'.wikEdDiffSeparatorwikEdDiffOmittedChars { margin-bottom:} 1em; }' +
'.wikEdDiffOmittedChars { }' +
 
// Newline
'.wikEdDiffNewline:before { content: "¶"; color: transparent; } ' +
'.wikEdDiffBlock:hover .wikEdDiffNewline:before { color: #aaa; } ' +
'.wikEdDiffBlockHighlight .wikEdDiffNewline:before { color: transparent; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffNewline:before { color: #ccc; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffInsert .wikEdDiffNewline:before, ' +
'.wikEdDiffInsert:hover .wikEdDiffNewline:before' +
'{ color: #999; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffDelete .wikEdDiffNewline:before, ' +
'.wikEdDiffDelete:hover .wikEdDiffNewline:before' +
'{ color: #aaa; } ' +
 
// Tab
'.wikEdDiffTab { position: relative; } ' +
'.wikEdDiffTabSymbol { position: absolute; top: -0.2em; } ' +
'.wikEdDiffTabSymbol:before { content: "→"; font-size: smaller; color: #ccc; } ' +
'.wikEdDiffBlock .wikEdDiffTabSymbol:before { color: #aaa; } ' +
'.wikEdDiffBlockHighlight .wikEdDiffTabSymbol:before { color: #aaa; } ' +
'.wikEdDiffInsert .wikEdDiffTabSymbol:before { color: #aaa; } ' +
'.wikEdDiffDelete .wikEdDiffTabSymbol:before { color: #bbb; } ' +
 
// Space
'.wikEdDiffSpace { position: relative; } ' +
'.wikEdDiffSpaceSymbol { position: absolute; top: -0.2em; left: -0.05em; } ' +
'.wikEdDiffSpaceSymbol:before { content: "·"; color: transparent; } ' +
'.wikEdDiffBlock:hover .wikEdDiffSpaceSymbol:before { color: #999; } ' +
'.wikEdDiffBlockHighlight .wikEdDiffSpaceSymbol:before { color: transparent; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffSpaceSymbol:before { color: #ddd; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffInsert .wikEdDiffSpaceSymbol:before,' +
'.wikEdDiffInsert:hover .wikEdDiffSpaceSymbol:before ' +
'{ color: #888; } ' +
'.wikEdDiffBlockHighlight:hover .wikEdDiffDelete .wikEdDiffSpaceSymbol:before,' +
'.wikEdDiffDelete:hover .wikEdDiffSpaceSymbol:before ' +
'{ color: #999; } ' +
 
// Error
Line 469:
// Split into paragraphs, after double newlines
'paragraph': new RegExp(
'(.|\\n)*?((\\r\\n|\\n|\\r){2,}|[' +
this.config.regExpNewParagraph +
'])+',
'g'
),
 
// Split into sentences /[^ \n][^\n]*?[.!?;]+(?=[ \n]|$)|\r\n|\n|\r/lines
'line': new RegExp(
'\\r\\n|\\n|\\r|[' +
this.config.regExpNewLinesAll +
']',
'g'
),
 
// Split into sentences /[^ ].*?[.!?:;]+(?= |$)/
'sentence': new RegExp(
'[^' +
this.config.regExpBlanks +
this'].config*?[.regExpNewLinesAll!?:;' +
'][^' +
this.config.regExpNewLinesAll +
']*?[.!?;' +
this.config.regExpFullStops +
this.config.regExpExclamationMarks +
Line 488 ⟶ 493:
']+(?=[' +
this.config.regExpBlanks +
']|$)',
this.config.regExpNewLinesAll +
']|$)|[' +
this.config.regExpNewLines +
']|\\r\\n|\\n|\\r',
'g'
),
Line 508 ⟶ 510:
 
// Split into words, multi-char markup, and chars
// regExpLetters speed-up: \\w+
'word': new RegExp(
'(\\w+|[_' +
this.config.regExpLetters +
'])+([\'’_]?[_' +
this.config.regExpLetters +
']+*)*|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.',
'g'
),
Line 545 ⟶ 548:
// RegExps for counting words
'countWords': new RegExp(
'(\\w+|[_' +
this.config.regExpLetters +
'])+([\'’_]?[_' +
this.config.regExpLetters +
']+ *)*',
'g'
),
Line 851 ⟶ 854:
/** @var array blocks Block data (consecutive text tokens) in new text order */
this.blocks = [];
 
/** @var int maxWords Maximal detected word count of all linked blocks */
this.maxWords = 0;
 
/** @var array groups Section blocks that are consecutive in old text order */
Line 994 ⟶ 1,000:
 
// Split new and old text into paragraps
if ( this.config.timer === true ) {
this.time( 'paragraph split' );
}
this.newText.splitText( 'paragraph' );
this.oldText.splitText( 'paragraph' );
if ( this.config.timer === true ) {
this.timeEnd( 'paragraph split' );
}
 
// Calculate diff
this.calculateDiff( 'paragraphline' );
 
// Refine different paragraphs into sentenceslines
if ( this.config.timer === true ) {
this.time( 'line split' );
}
this.newText.splitRefine( 'line' );
this.oldText.splitRefine( 'line' );
if ( this.config.timer === true ) {
this.timeEnd( 'line split' );
}
 
// Calculate refined diff
this.calculateDiff( 'line' );
 
// Refine different lines into sentences
if ( this.config.timer === true ) {
this.time( 'sentence split' );
}
this.newText.splitRefine( 'sentence' );
this.oldText.splitRefine( 'sentence' );
if ( this.config.timer === true ) {
this.timeEnd( 'sentence split' );
}
 
// Calculate refined diff
this.calculateDiff( 'sentence' );
 
// Refine different paragraphssentences into chunks
if ( this.config.timer === true ) {
this.time( 'chunk split' );
Line 1,020 ⟶ 1,051:
this.calculateDiff( 'chunk' );
 
// Refine different sentenceschunks into words
if ( this.config.timer === true ) {
this.time( 'word split' );
Line 1,069 ⟶ 1,100:
}
 
// freeFree memory
this.symbols = undefined;
this.bordersDown = undefined;
this.bordersUp = undefined;
this.newText.words = undefined;
this.oldText.words = undefined;
 
// Enumerate token lists
Line 1,087 ⟶ 1,120:
}
 
// freeFree memory
this.newText.tokens = undefined;
this.oldText.tokens = undefined;
Line 1,094 ⟶ 1,127:
this.getDiffFragments();
 
// freeFree memory
this.blocks = undefined;
this.groups = undefined;
Line 1,185 ⟶ 1,218:
var i = this.newText.first;
var j = this.oldText.first;
while ( i !== null && this.newText.tokens[i] !== null ) {
 
// Get token links
Line 1,262 ⟶ 1,295:
if (
token.indexOf( tokenFirst ) !== 0 ||
token.indexOf( tokenLast ) !== token.length - tokenLast.length )
) {
continue;
Line 1,273 ⟶ 1,306:
if (
token.indexOf( tokenFirst ) !== 0 ||
token.indexOf( tokenLast ) !== token.length - tokenLast.length )
) {
continue;
Line 1,340 ⟶ 1,373:
if ( left < shorterToken.length / 2 && (right < shorterToken.length / 2) ) {
 
// Do not split into chars in this gap
charSplit = false;
break;
Line 1,436 ⟶ 1,469:
*/
this.slideGaps = function ( text, textLinked ) {
 
var regExpSlideBorder = this.config.regExp.slideBorder;
var regExpSlideStop = this.config.regExp.slideStop;
 
// Cycle through tokens list
var i = text.first;
var gapStart = null;
while ( i !== null && text.tokens[i] !== null ) {
 
// Remember gap start
Line 1,476 ⟶ 1,512:
var front = text.tokens[gapFront].prev;
var back = gapBack;
var gapFrontBlankTest = this.config.regExp.slideBorderregExpSlideBorder.test( text.tokens[gapFront].token );
var frontStop = front;
if ( text.tokens[back].link === null ) {
Line 1,485 ⟶ 1,521:
text.tokens[front].token === text.tokens[back].token
) {
front = text.tokens[front].prev;
back = text.tokens[back].prev;
if ( front !== null ) {
 
// Stop at line break
if ( this.config.regExp.slideStopregExpSlideStop.test( text.tokens[front].token ) === true ) {
frontStop = front;
break;
Line 1,497 ⟶ 1,531:
// Stop at first word border (blank/word or word/blank)
if (
this.config.regExp.slideBorderregExpSlideBorder.test( text.tokens[front].token ) !== gapFrontBlankTest ) {
gapFrontBlankTest
) {
frontStop = front;
}
}
front = text.tokens[front].prev;
back = text.tokens[back].prev;
}
}
Line 1,546 ⟶ 1,580:
*
* @param array symbols Symbol table object
* @param string level Split level: 'paragraph', 'line', 'sentence', 'chunk', 'word', or 'character'
*
* Optionally for recursive or repeated calls:
Line 1,581 ⟶ 1,615:
}
 
// getGet object symbols table and linked region borders
var symbols;
var bordersDown;
Line 1,591 ⟶ 1,625:
}
 
// createCreate empty local symbols table and linked region borders arrays
else {
symbols = {
Line 1,603 ⟶ 1,637:
 
 
// updatedUpdated versions of linked region borders
var bordersUpNext = [];
var bordersDownNext = [];
Line 1,613 ⟶ 1,647:
// Cycle through new text tokens list
var i = newStart;
while ( i !== null && this.newText.tokens[i] !== null ) {
if ( this.newText.tokens[i].link === null ) {
 
Line 1,619 ⟶ 1,653:
var token = this.newText.tokens[i].token;
if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) {
var currentsymbols.hashTable[token] = symbols.token.length;
symbols.hashTable[token].push( = current;{
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
} );
}
 
Line 1,643 ⟶ 1,676:
}
 
// getGet next token
if ( up === false ) {
i = this.newText.tokens[i].next;
Line 1,658 ⟶ 1,691:
// Cycle through old text tokens list
var j = oldStart;
while ( j !== null && this.oldText.tokens[j] !== null ) {
if ( this.oldText.tokens[j].link === null ) {
 
Line 1,664 ⟶ 1,697:
var token = this.oldText.tokens[j].token;
if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) {
var currentsymbols.hashTable[token] = symbols.token.length;
symbols.hashTable[token].push( = current;{
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
} );
}
 
Line 1,691 ⟶ 1,723:
}
 
// getGet next token
if ( up === false ) {
j = this.oldText.tokens[j].next;
Line 1,712 ⟶ 1,744:
var newToken = symbols.token[i].newToken;
var oldToken = symbols.token[i].oldToken;
var newTokenObj = this.newText.tokens[newToken];
var oldTokenObj = this.oldText.tokens[oldToken];
 
// Connect from new to old and from old to new
if ( this.newText.tokens[newToken]newTokenObj.link === null ) {
 
// Do not use spaces as unique markers
if (
this.config.regExp.blankOnlyToken.test( this.newText.tokens[newToken]newTokenObj.token ) === true
) {
 
// Link new anand old tokens
this.newText.tokens[newToken]newTokenObj.link = oldToken;
this.oldText.tokens[oldToken]oldTokenObj.link = newToken;
symbols.linked = true;
 
// saveSave linked region borders
bordersDown.push( [newToken, oldToken] );
bordersUp.push( [newToken, oldToken] );
Line 1,737 ⟶ 1,771:
}
else {
var token = this.newText.tokens[newToken]newTokenObj.token;
var words =
( token.match( this.config.regExp.countWords ) || [] ).length +concat(
( token.match( this.config.regExp.countChunks ) || [] ).length;
);
 
// Unique if longer than min block length
ifvar (wordsLength words >= thiswords.config.blockMinLength ) {length;
if ( wordsLength >= this.config.blockMinLength ) {
unique = true;
}
Line 1,749 ⟶ 1,785:
// Unique if it contains at least one unique word
else {
for ( var wordsLengthi = words.length0;i < wordsLength; i ++ ) {
for ( var word = 0words[i]; word < wordsLength; word ++ ) {
if (
this.oldText.words[ words[word] ] === 1 &&
this.newText.words[ words[word] ] === 1 &&
Object.prototype.hasOwnProperty.call( this.oldText.words, word ) === true &&
Object.prototype.hasOwnProperty.call( this.newText.words, word ) === true
) {
unique = true;
Line 1,764 ⟶ 1,802:
// Set unique
if ( unique === true ) {
this.newText.tokens[newToken]newTokenObj.unique = true;
this.oldText.tokens[oldToken]oldTokenObj.unique = true;
}
}
Line 1,925 ⟶ 1,963:
}
 
// saveSave updated linked region borders to object
if ( recursionLevel === 0 && repeating === false ) {
this.bordersDown = bordersDownNext;
Line 1,931 ⟶ 1,969:
}
 
// mergeMerge local updated linked region borders into object
else {
this.bordersDown = this.bordersDown.concat( bordersDownNext );
Line 2,066 ⟶ 2,104:
 
// Set longest sequence of increasing groups in sections as fixed (not moved)
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
this.setFixed();
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
 
// Convert groups to insertions/deletions if maximum block length is too short
// Only for more complex texts that actually have blocks of minimum block length
var unlinkCount = 0;
if (
if ( this.config.unlinkBlocks === true && this.config.blockMinLength > 0 ) {
this.config.unlinkBlocks === true &&
this.config.blockMinLength > 0 &&
this.maxWords >= this.config.blockMinLength
) {
if ( this.config.timer === true ) {
this.time( 'unlinktotal unlinking' );
}
 
Line 2,095 ⟶ 2,132:
 
// Repeat block detection from start
this.maxWords = 0;
this.getSameBlocks();
this.getSections();
Line 2,102 ⟶ 2,140:
}
if ( this.config.timer === true ) {
this.timeEnd( 'unlinktotal unlinking' );
}
}
Line 2,140 ⟶ 2,178:
*/
this.getSameBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'getSameBlocks' );
}
 
var blocks = this.blocks;
Line 2,167 ⟶ 2,209:
var text = '';
while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) {
var tokentext += this.oldText.tokens[j].token;
count ++;
if ( this.newText.tokens[i].unique === true ) {
unique = true;
}
text += token;
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
Line 2,207 ⟶ 2,248:
for ( var block = 0; block < blocksLength; block ++ ) {
blocks[block].newBlock = block;
}
 
if ( this.config.timer === true ) {
this.timeEnd( 'getSameBlocks' );
}
return;
Line 2,220 ⟶ 2,265:
*/
this.getSections = function () {
 
if ( this.config.timer === true ) {
this.time( 'getSections' );
}
 
var blocks = this.blocks;
Line 2,265 ⟶ 2,314:
block = sectionEnd;
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getSections' );
}
return;
Line 2,277 ⟶ 2,329:
*/
this.getGroups = function () {
 
if ( this.config.timer === true ) {
this.time( 'getGroups' );
}
 
var blocks = this.blocks;
Line 2,347 ⟶ 2,403:
} );
block = groupEnd;
 
// Set global word count of longest linked block
if ( maxWords > this.maxWords ) {
this.maxWords = maxWords;
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getGroups' );
}
return;
Line 2,361 ⟶ 2,425:
*/
this.setFixed = function () {
 
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
 
var blocks = this.blocks;
Line 2,400 ⟶ 2,468:
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'setFixed' );
}
return;
Line 2,456 ⟶ 2,527:
 
return returnObj;
};
 
 
/**
* Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs
* if too short and too common.
* Prevents fragmentated diffs for very different versions.
*
* @param[in] array blocks Blocks table object
* @param[in/out] WikEdDiffText newText, oldText Text object, linked property
* @param[in/out] array groups Groups table object
* @return bool True if text tokens were unlinked
*/
this.unlinkBlocks = function () {
 
var blocks = this.blocks;
var groups = this.groups;
 
// Cycle through groups
var unlinked = false;
var groupsLength = groups.length;
for ( var group = 0; group < groupsLength; group ++ ) {
var blockStart = groups[group].blockStart;
var blockEnd = groups[group].blockEnd;
 
// Unlink whole group if no block is at least blockMinLength words long and unique
if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) {
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
 
// Otherwise unlink block flanks
else {
 
// Unlink blocks from start
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if ( blocks[block].words > 1 || blocks[block].unique === true ) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
blockStart = block;
}
}
 
// Unlink blocks from end
for ( var block = blockEnd; block > blockStart; block -- ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if (
blocks[block].words > 1 ||
( blocks[block].words === 1 && blocks[block].unique === true )
) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
}
return unlinked;
};
 
 
/**
* Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs.
*
* @param[in] array blocks Blocks table object
* @param[out] WikEdDiffText newText, oldText Text objects, link property
*/
this.unlinkSingleBlock = function ( block ) {
 
// Cycle through old text
var j = block.oldStart;
for ( var count = 0; count < block.count; count ++ ) {
 
// Unlink tokens
this.newText.tokens[ this.oldText.tokens[j].link ].link = null;
this.oldText.tokens[j].link = null;
j = this.oldText.tokens[j].next;
}
return;
};
 
Line 2,466 ⟶ 2,628:
*/
this.getDelBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'getDelBlocks' );
}
 
var blocks = this.blocks;
Line 2,513 ⟶ 2,679:
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getDelBlocks' );
}
return;
Line 2,534 ⟶ 2,703:
*/
this.positionDelBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'positionDelBlocks' );
}
 
var blocks = this.blocks;
Line 2,629 ⟶ 2,802:
this.sortBlocks();
 
if ( this.config.timer === true ) {
return;
this.timeEnd( 'positionDelBlocks' );
};
 
 
/**
* Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs
* if too short and too common.
* Prevents fragmentated diffs for very different versions.
*
* @param[in] array blocks Blocks table object
* @param[in/out] WikEdDiffText newText, oldText Text object, linked property
* @param[in/out] array groups Groups table object
* @return bool True if text tokens were unlinked
*/
this.unlinkBlocks = function () {
 
var blocks = this.blocks;
var groups = this.groups;
 
// Cycle through groups
var unlinked = false;
var groupsLength = groups.length;
for ( var group = 0; group < groupsLength; group ++ ) {
var blockStart = groups[group].blockStart;
var blockEnd = groups[group].blockEnd;
// Unlink whole group if no block is at least blockMinLength words long and unique
if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) {
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
 
// Otherwise unlink block flanks
else {
 
// Unlink blocks from start
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if ( blocks[block].words > 1 || blocks[block].unique === true ) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
blockStart = block;
}
}
 
// Unlink blocks from end
for ( var block = blockEnd; block > blockStart; block -- ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if (
blocks[block].words > 1 ||
( blocks[block].words === 1 && blocks[block].unique === true )
) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
}
return unlinked;
};
 
 
/**
* Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs.
*
* @param[in] array blocks Blocks table object
* @param[out] WikEdDiffText newText, oldText Text objects, link property
*/
this.unlinkSingleBlock = function ( block ) {
 
// Cycle through old text
var j = block.oldStart;
for ( var count = 0; count < block.count; count ++ ) {
 
// Unlink tokens
this.newText.tokens[ this.oldText.tokens[j].link ].link = null;
this.oldText.tokens[j].link = null;
j = this.oldText.tokens[j].next;
}
return;
Line 2,730 ⟶ 2,816:
*/
this.getInsBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'getInsBlocks' );
}
 
var blocks = this.blocks;
Line 2,777 ⟶ 2,867:
this.sortBlocks();
 
if ( this.config.timer === true ) {
this.timeEnd( 'getInsBlocks' );
}
return;
};
Line 2,826 ⟶ 2,919:
*/
this.setInsGroups = function () {
 
if ( this.config.timer === true ) {
this.time( 'setInsGroups' );
}
 
var blocks = this.blocks;
Line 2,866 ⟶ 2,963:
} );
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'setInsGroups' );
}
return;
Line 2,893 ⟶ 2,993:
*/
this.insertMarks = function () {
 
if ( this.config.timer === true ) {
this.time( 'insertMarks' );
}
 
var blocks = this.blocks;
Line 3,026 ⟶ 3,130:
this.sortBlocks();
 
if ( this.config.timer === true ) {
this.timeEnd( 'insertMarks' );
}
return;
};
Line 3,523 ⟶ 3,630:
// Remove split element
fragments.splice( fragment, 1 );
fragmentsLength --;
 
// Add left text to fragments list
if ( rangeLeft !== null ) {
fragments.splice( fragment ++, 0, { text: textLeft, type: '=', color: null } );
fragmentsLength ++;
if ( omittedLeft !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: omittedLeft, color: null } );
fragmentsLength ++;
}
}
Line 3,537 ⟶ 3,647:
fragments.splice( fragment ++, 0, { text: '', type: ',', color: null } );
fragments.splice( fragment ++, 0, { text: '', type: '[', color: null } );
fragmentsLength += 3;
}
 
Line 3,543 ⟶ 3,654:
if ( omittedRight !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: omittedRight, color: null } );
fragmentsLength ++;
}
fragments.splice( fragment ++, 0, { text: textRight, type: '=', color: null } );
fragmentsLength ++;
}
}
Line 4,056 ⟶ 4,169:
*
* @param string label Timer label
* @param[out] array timer Current time in milliseconds (float)
*/
this.time = function ( label ) {
Line 4,072 ⟶ 4,185:
* @param string label Timer label
* @param bool noLog Do not log result
* @return float Time in milliseconds, rounded to two decimal digits
*/
this.timeEnd = function ( label, noLog ) {
Line 4,083 ⟶ 4,196:
this.timer[label] = undefined;
if ( noLog !== true ) {
console.log( label + ': ' + diff.toFixed( 2 ) + ' ms' );
}
}
Line 4,110 ⟶ 4,223:
var timerLength = this.recursionTimer.length;
for ( var i = 0; i < timerLength; i ++ ) {
console.log( text + ' recursion ' + i + ': ' + this.recursionTimer[i].toFixed( 2 ) + ' ms\n' );
}
}
Line 4,252 ⟶ 4,365:
this.text = text.replace( /\r\n?/g, '\n');
 
// parseParse and count words and chunks for identification of unique real words
if ( this.parent.config.timer === true ) {
this.parent.time( 'wordParse' );
Line 4,274 ⟶ 4,387:
this.wordParse = function ( regExp ) {
 
var regExpMatch = this.text.match( regExp );
while (if ( regExpMatch = regExp.exec( this.text ) ) !== null ) {
var wordmatchLength = regExpMatch[0].length;
iffor (var this.words[word]i === undefined0; i < matchLength; i ++) {
this.words[var word] = 1regExpMatch[i];
if ( Object.prototype.hasOwnProperty.call( this.words, word ) === false ) {
}
this.words[word] = 1;
else {
}
this.words[word] ++;
else {
this.words[word] ++;
}
}
}
Line 4,289 ⟶ 4,405:
 
/**
* Split text into paragraph, line, sentence, chunk, word, or character tokens.
*
* @param string level Level of splitting: paragraph, line, sentence, chunk, word, or character
* @param int|null token Index of token to be split, otherwise uses full text
* @param[in] string text Full text to be split
Line 4,320 ⟶ 4,436:
var regExpMatch;
var lastIndex = 0;
whilevar ( ( regExpMatchregExp = this.parent.config.regExp.split[level].exec( text ) ) !== null ) {;
while ( ( regExpMatch = regExp.exec( text ) ) !== null ) {
if ( regExpMatch.index > lastIndex ) {
split.push( text.substring( lastIndex, regExpMatch.index ) );
}
split.push( regExpMatch[0] );
lastIndex = this.parent.config.regExp.split[level].lastIndex;
}
if ( lastIndex < text.length ) {
Line 4,336 ⟶ 4,453:
 
// Insert current item, link to previous
this.tokens[current] =.push( {
token: split[i],
prev: prev,
Line 4,343 ⟶ 4,460:
number: null,
unique: false
} );
number ++;
 
Line 4,390 ⟶ 4,507:
* Split unique unmatched tokens into smaller tokens.
*
* @param string level Level of splitting: line, sentence, chunk, or word
* @param[in] array tokens Tokens list
*/
Line 4,397 ⟶ 4,514:
// Cycle through tokens list
var i = this.first;
while ( i !== null && this.tokens[i] !== null ) {
 
// Refine unique unmatched tokens into smaller tokens
Line 4,419 ⟶ 4,536:
var number = 0;
var i = this.first;
while ( i !== null && this.tokens[i] !== null ) {
this.tokens[i].number = number;
number ++;
Line 4,441 ⟶ 4,558:
dump += '\ni \tlink \t(prev \tnext) \tuniq \t#num \t"token"\n';
var i = this.first;
while ( i !== null && tokens[i] !== null ) {
dump +=
i + ' \t' + tokens[i].link + ' \t(' + tokens[i].prev + ' \t' + tokens[i].next + ') \t' +