User:Cacycle/diff.js: Difference between revisions

Content deleted Content added
1.2.1a (October 14, 2014) fix CSS, fix 'one word became separated'
another background that could use some darkmode friendly color
 
(7 intermediate revisions by 3 users not shown)
Line 3:
// ==UserScript==
// @name wikEd diff
// @version 1.2.1a4
// @date October 1423, 2014
// @description improved word-based diff library with block move detection
// @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff
Line 36:
* - Resolution down to characters level
* - Unicode and multilingual support
* - Stepwise split (paragraphs, lines, sentences, words, characters)
* - Recursive diff
* - Optimized code for resolving unmatched sequences
Line 70:
* .newText new text
* .oldText old text
* .maxWords word count of longest linked block
* .html diff html
* .error flag: result has not passed unit tests
* .bordersDown[] linked region borders downwards, [new index, old index]
* .bordersUp[] linked region borders upwards, [new index, old index]
Line 109 ⟶ 110:
* .blockEnd last block index
* .unique contains unique linked token
* .maxWords word count of longest linked block
* .words word count
* .chars char count
Line 368 ⟶ 369:
'border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; ' +
'} ' +
'.wikEdDiffBlock { color: #000; } ' +
'.wikEdDiffBlock0 { background-color: #ffff80; } ' +
'.wikEdDiffBlock1 { background-color: #d0ff80; } ' +
Line 407 ⟶ 408:
'.wikEdDiffContainer { } ' +
'.wikEdDiffFragment {' +
'white-space: pre-wrap; background-color: var(--background-color-base, #fff); border: #bbb solid; ' +
'border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; ' +
'font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0; ' +
'} ' +
'.wikEdDiffNoChange { background: var(--background-color-interactive, #f0f0f0eaecf0); border: 1px #bbb solid; border-radius: 0.5em; ' +
'line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em; margin: 1em 0; ' +
'text-align: center; ' +
Line 468 ⟶ 469:
// Split into paragraphs, after double newlines
'paragraph': new RegExp(
'(.|\\n)*?((\\r\\n|\\n|\\r){2,}|[' +
this.config.regExpNewParagraph +
'])+',
'g'
),
 
// Split into sentences /[^ \n][^\n]*?[.!?;]+(?=[ \n]|$)|\r\n|\n|\r/lines
'line': new RegExp(
'\\r\\n|\\n|\\r|[' +
this.config.regExpNewLinesAll +
']',
'g'
),
 
// Split into sentences /[^ ].*?[.!?:;]+(?= |$)/
'sentence': new RegExp(
'[^' +
this.config.regExpBlanks +
this'].config*?[.regExpNewLinesAll!?:;' +
'][^' +
this.config.regExpNewLinesAll +
']*?[.!?;' +
this.config.regExpFullStops +
this.config.regExpExclamationMarks +
Line 487 ⟶ 493:
']+(?=[' +
this.config.regExpBlanks +
']|$)',
this.config.regExpNewLinesAll +
']|$)|[' +
this.config.regExpNewLines +
']|\\r\\n|\\n|\\r',
'g'
),
Line 507 ⟶ 510:
 
// Split into words, multi-char markup, and chars
// regExpLetters speed-up: \\w+
'word': new RegExp(
'(\\w+|[_' +
this.config.regExpLetters +
'])+([\'’_]?[_' +
this.config.regExpLetters +
']+*)*|\\[\\[|\\]\\]|\\{\\{|\\}\\}|&\\w+;|\'\'\'|\'\'|==+|\\{\\||\\|\\}|\\|-|.',
'g'
),
Line 544 ⟶ 548:
// RegExps for counting words
'countWords': new RegExp(
'(\\w+|[_' +
this.config.regExpLetters +
'])+([\'’_]?[_' +
this.config.regExpLetters +
']+ *)*',
'g'
),
Line 850 ⟶ 854:
/** @var array blocks Block data (consecutive text tokens) in new text order */
this.blocks = [];
 
/** @var int maxWords Maximal detected word count of all linked blocks */
this.maxWords = 0;
 
/** @var array groups Section blocks that are consecutive in old text order */
Line 993 ⟶ 1,000:
 
// Split new and old text into paragraps
if ( this.config.timer === true ) {
this.time( 'paragraph split' );
}
this.newText.splitText( 'paragraph' );
this.oldText.splitText( 'paragraph' );
if ( this.config.timer === true ) {
this.timeEnd( 'paragraph split' );
}
 
// Calculate diff
this.calculateDiff( 'paragraphline' );
 
// Refine different paragraphs into sentenceslines
if ( this.config.timer === true ) {
this.time( 'line split' );
}
this.newText.splitRefine( 'line' );
this.oldText.splitRefine( 'line' );
if ( this.config.timer === true ) {
this.timeEnd( 'line split' );
}
 
// Calculate refined diff
this.calculateDiff( 'line' );
 
// Refine different lines into sentences
if ( this.config.timer === true ) {
this.time( 'sentence split' );
}
this.newText.splitRefine( 'sentence' );
this.oldText.splitRefine( 'sentence' );
if ( this.config.timer === true ) {
this.timeEnd( 'sentence split' );
}
 
// Calculate refined diff
this.calculateDiff( 'sentence' );
 
// Refine different paragraphssentences into chunks
if ( this.config.timer === true ) {
this.time( 'chunk split' );
Line 1,019 ⟶ 1,051:
this.calculateDiff( 'chunk' );
 
// Refine different sentenceschunks into words
if ( this.config.timer === true ) {
this.time( 'word split' );
Line 1,068 ⟶ 1,100:
}
 
// freeFree memory
this.symbols = undefined;
this.bordersDown = undefined;
this.bordersUp = undefined;
this.newText.words = undefined;
this.oldText.words = undefined;
 
// Enumerate token lists
Line 1,086 ⟶ 1,120:
}
 
// freeFree memory
this.newText.tokens = undefined;
this.oldText.tokens = undefined;
Line 1,093 ⟶ 1,127:
this.getDiffFragments();
 
// freeFree memory
this.blocks = undefined;
this.groups = undefined;
Line 1,184 ⟶ 1,218:
var i = this.newText.first;
var j = this.oldText.first;
while ( i !== null && this.newText.tokens[i] !== null ) {
 
// Get token links
Line 1,339 ⟶ 1,373:
if ( left < shorterToken.length / 2 && (right < shorterToken.length / 2) ) {
 
// Do not split into chars in this gap
charSplit = false;
break;
Line 1,435 ⟶ 1,469:
*/
this.slideGaps = function ( text, textLinked ) {
 
var regExpSlideBorder = this.config.regExp.slideBorder;
var regExpSlideStop = this.config.regExp.slideStop;
 
// Cycle through tokens list
var i = text.first;
var gapStart = null;
while ( i !== null && text.tokens[i] !== null ) {
 
// Remember gap start
Line 1,475 ⟶ 1,512:
var front = text.tokens[gapFront].prev;
var back = gapBack;
var gapFrontBlankTest = this.config.regExp.slideBorderregExpSlideBorder.test( text.tokens[gapFront].token );
var frontStop = front;
if ( text.tokens[back].link === null ) {
Line 1,484 ⟶ 1,521:
text.tokens[front].token === text.tokens[back].token
) {
front = text.tokens[front].prev;
back = text.tokens[back].prev;
if ( front !== null ) {
 
// Stop at line break
if ( this.config.regExp.slideStopregExpSlideStop.test( text.tokens[front].token ) === true ) {
frontStop = front;
break;
Line 1,496 ⟶ 1,531:
// Stop at first word border (blank/word or word/blank)
if (
this.config.regExp.slideBorderregExpSlideBorder.test( text.tokens[front].token ) !== gapFrontBlankTest ) {
gapFrontBlankTest
) {
frontStop = front;
}
}
front = text.tokens[front].prev;
back = text.tokens[back].prev;
}
}
Line 1,545 ⟶ 1,580:
*
* @param array symbols Symbol table object
* @param string level Split level: 'paragraph', 'line', 'sentence', 'chunk', 'word', or 'character'
*
* Optionally for recursive or repeated calls:
Line 1,580 ⟶ 1,615:
}
 
// getGet object symbols table and linked region borders
var symbols;
var bordersDown;
Line 1,590 ⟶ 1,625:
}
 
// createCreate empty local symbols table and linked region borders arrays
else {
symbols = {
Line 1,602 ⟶ 1,637:
 
 
// updatedUpdated versions of linked region borders
var bordersUpNext = [];
var bordersDownNext = [];
Line 1,612 ⟶ 1,647:
// Cycle through new text tokens list
var i = newStart;
while ( i !== null && this.newText.tokens[i] !== null ) {
if ( this.newText.tokens[i].link === null ) {
 
Line 1,618 ⟶ 1,653:
var token = this.newText.tokens[i].token;
if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) {
var currentsymbols.hashTable[token] = symbols.token.length;
symbols.hashTable[token].push( = current;{
symbols.token[current] = {
newCount: 1,
oldCount: 0,
newToken: i,
oldToken: null
} );
}
 
Line 1,642 ⟶ 1,676:
}
 
// getGet next token
if ( up === false ) {
i = this.newText.tokens[i].next;
Line 1,657 ⟶ 1,691:
// Cycle through old text tokens list
var j = oldStart;
while ( j !== null && this.oldText.tokens[j] !== null ) {
if ( this.oldText.tokens[j].link === null ) {
 
Line 1,663 ⟶ 1,697:
var token = this.oldText.tokens[j].token;
if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) {
var currentsymbols.hashTable[token] = symbols.token.length;
symbols.hashTable[token].push( = current;{
symbols.token[current] = {
newCount: 0,
oldCount: 1,
newToken: null,
oldToken: j
} );
}
 
Line 1,690 ⟶ 1,723:
}
 
// getGet next token
if ( up === false ) {
j = this.oldText.tokens[j].next;
Line 1,711 ⟶ 1,744:
var newToken = symbols.token[i].newToken;
var oldToken = symbols.token[i].oldToken;
var newTokenObj = this.newText.tokens[newToken];
var oldTokenObj = this.oldText.tokens[oldToken];
 
// Connect from new to old and from old to new
if ( this.newText.tokens[newToken]newTokenObj.link === null ) {
 
// Do not use spaces as unique markers
if (
this.config.regExp.blankOnlyToken.test( this.newText.tokens[newToken]newTokenObj.token ) === true
) {
 
// Link new anand old tokens
this.newText.tokens[newToken]newTokenObj.link = oldToken;
this.oldText.tokens[oldToken]oldTokenObj.link = newToken;
symbols.linked = true;
 
// saveSave linked region borders
bordersDown.push( [newToken, oldToken] );
bordersUp.push( [newToken, oldToken] );
Line 1,736 ⟶ 1,771:
}
else {
var token = this.newText.tokens[newToken]newTokenObj.token;
var words =
( token.match( this.config.regExp.countWords ) || [] ).length +concat(
( token.match( this.config.regExp.countChunks ) || [] ).length;
);
 
// Unique if longer than min block length
ifvar (wordsLength words >= thiswords.config.blockMinLength ) {length;
if ( wordsLength >= this.config.blockMinLength ) {
unique = true;
}
Line 1,748 ⟶ 1,785:
// Unique if it contains at least one unique word
else {
for ( var wordsLengthi = words.length0;i < wordsLength; i ++ ) {
for ( var word = 0words[i]; word < wordsLength; word ++ ) {
if (
this.oldText.words[ words[word] ] === 1 &&
this.newText.words[ words[word] ] === 1 &&
Object.prototype.hasOwnProperty.call( this.oldText.words, word ) === true &&
Object.prototype.hasOwnProperty.call( this.newText.words, word ) === true
) {
unique = true;
Line 1,763 ⟶ 1,802:
// Set unique
if ( unique === true ) {
this.newText.tokens[newToken]newTokenObj.unique = true;
this.oldText.tokens[oldToken]oldTokenObj.unique = true;
}
}
Line 1,924 ⟶ 1,963:
}
 
// saveSave updated linked region borders to object
if ( recursionLevel === 0 && repeating === false ) {
this.bordersDown = bordersDownNext;
Line 1,930 ⟶ 1,969:
}
 
// mergeMerge local updated linked region borders into object
else {
this.bordersDown = this.bordersDown.concat( bordersDownNext );
Line 2,065 ⟶ 2,104:
 
// Set longest sequence of increasing groups in sections as fixed (not moved)
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
this.setFixed();
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
 
// Convert groups to insertions/deletions if maximum block length is too short
// Only for more complex texts that actually have blocks of minimum block length
var unlinkCount = 0;
if (
if ( this.config.unlinkBlocks === true && this.config.blockMinLength > 0 ) {
this.config.unlinkBlocks === true &&
this.config.blockMinLength > 0 &&
this.maxWords >= this.config.blockMinLength
) {
if ( this.config.timer === true ) {
this.time( 'unlinktotal unlinking' );
}
 
Line 2,094 ⟶ 2,132:
 
// Repeat block detection from start
this.maxWords = 0;
this.getSameBlocks();
this.getSections();
Line 2,101 ⟶ 2,140:
}
if ( this.config.timer === true ) {
this.timeEnd( 'unlinktotal unlinking' );
}
}
Line 2,139 ⟶ 2,178:
*/
this.getSameBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'getSameBlocks' );
}
 
var blocks = this.blocks;
Line 2,166 ⟶ 2,209:
var text = '';
while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) {
var tokentext += this.oldText.tokens[j].token;
count ++;
if ( this.newText.tokens[i].unique === true ) {
unique = true;
}
text += token;
i = this.newText.tokens[i].next;
j = this.oldText.tokens[j].next;
Line 2,206 ⟶ 2,248:
for ( var block = 0; block < blocksLength; block ++ ) {
blocks[block].newBlock = block;
}
 
if ( this.config.timer === true ) {
this.timeEnd( 'getSameBlocks' );
}
return;
Line 2,219 ⟶ 2,265:
*/
this.getSections = function () {
 
if ( this.config.timer === true ) {
this.time( 'getSections' );
}
 
var blocks = this.blocks;
Line 2,264 ⟶ 2,314:
block = sectionEnd;
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getSections' );
}
return;
Line 2,276 ⟶ 2,329:
*/
this.getGroups = function () {
 
if ( this.config.timer === true ) {
this.time( 'getGroups' );
}
 
var blocks = this.blocks;
Line 2,346 ⟶ 2,403:
} );
block = groupEnd;
 
// Set global word count of longest linked block
if ( maxWords > this.maxWords ) {
this.maxWords = maxWords;
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getGroups' );
}
return;
Line 2,360 ⟶ 2,425:
*/
this.setFixed = function () {
 
if ( this.config.timer === true ) {
this.time( 'setFixed' );
}
 
var blocks = this.blocks;
Line 2,399 ⟶ 2,468:
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'setFixed' );
}
return;
Line 2,455 ⟶ 2,527:
 
return returnObj;
};
 
 
/**
* Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs
* if too short and too common.
* Prevents fragmentated diffs for very different versions.
*
* @param[in] array blocks Blocks table object
* @param[in/out] WikEdDiffText newText, oldText Text object, linked property
* @param[in/out] array groups Groups table object
* @return bool True if text tokens were unlinked
*/
this.unlinkBlocks = function () {
 
var blocks = this.blocks;
var groups = this.groups;
 
// Cycle through groups
var unlinked = false;
var groupsLength = groups.length;
for ( var group = 0; group < groupsLength; group ++ ) {
var blockStart = groups[group].blockStart;
var blockEnd = groups[group].blockEnd;
 
// Unlink whole group if no block is at least blockMinLength words long and unique
if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) {
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
 
// Otherwise unlink block flanks
else {
 
// Unlink blocks from start
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if ( blocks[block].words > 1 || blocks[block].unique === true ) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
blockStart = block;
}
}
 
// Unlink blocks from end
for ( var block = blockEnd; block > blockStart; block -- ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if (
blocks[block].words > 1 ||
( blocks[block].words === 1 && blocks[block].unique === true )
) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
}
return unlinked;
};
 
 
/**
* Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs.
*
* @param[in] array blocks Blocks table object
* @param[out] WikEdDiffText newText, oldText Text objects, link property
*/
this.unlinkSingleBlock = function ( block ) {
 
// Cycle through old text
var j = block.oldStart;
for ( var count = 0; count < block.count; count ++ ) {
 
// Unlink tokens
this.newText.tokens[ this.oldText.tokens[j].link ].link = null;
this.oldText.tokens[j].link = null;
j = this.oldText.tokens[j].next;
}
return;
};
 
Line 2,465 ⟶ 2,628:
*/
this.getDelBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'getDelBlocks' );
}
 
var blocks = this.blocks;
Line 2,512 ⟶ 2,679:
}
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'getDelBlocks' );
}
return;
Line 2,533 ⟶ 2,703:
*/
this.positionDelBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'positionDelBlocks' );
}
 
var blocks = this.blocks;
Line 2,628 ⟶ 2,802:
this.sortBlocks();
 
if ( this.config.timer === true ) {
return;
this.timeEnd( 'positionDelBlocks' );
};
 
 
/**
* Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs
* if too short and too common.
* Prevents fragmentated diffs for very different versions.
*
* @param[in] array blocks Blocks table object
* @param[in/out] WikEdDiffText newText, oldText Text object, linked property
* @param[in/out] array groups Groups table object
* @return bool True if text tokens were unlinked
*/
this.unlinkBlocks = function () {
 
var blocks = this.blocks;
var groups = this.groups;
 
// Cycle through groups
var unlinked = false;
var groupsLength = groups.length;
for ( var group = 0; group < groupsLength; group ++ ) {
var blockStart = groups[group].blockStart;
var blockEnd = groups[group].blockEnd;
// Unlink whole group if no block is at least blockMinLength words long and unique
if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) {
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
 
// Otherwise unlink block flanks
else {
 
// Unlink blocks from start
for ( var block = blockStart; block <= blockEnd; block ++ ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if ( blocks[block].words > 1 || blocks[block].unique === true ) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
blockStart = block;
}
}
 
// Unlink blocks from end
for ( var block = blockEnd; block > blockStart; block -- ) {
if ( blocks[block].type === '=' ) {
 
// Stop unlinking if more than one word or a unique word
if (
blocks[block].words > 1 ||
( blocks[block].words === 1 && blocks[block].unique === true )
) {
break;
}
this.unlinkSingleBlock( blocks[block] );
unlinked = true;
}
}
}
}
return unlinked;
};
 
 
/**
* Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs.
*
* @param[in] array blocks Blocks table object
* @param[out] WikEdDiffText newText, oldText Text objects, link property
*/
this.unlinkSingleBlock = function ( block ) {
 
// Cycle through old text
var j = block.oldStart;
for ( var count = 0; count < block.count; count ++ ) {
 
// Unlink tokens
this.newText.tokens[ this.oldText.tokens[j].link ].link = null;
this.oldText.tokens[j].link = null;
j = this.oldText.tokens[j].next;
}
return;
Line 2,729 ⟶ 2,816:
*/
this.getInsBlocks = function () {
 
if ( this.config.timer === true ) {
this.time( 'getInsBlocks' );
}
 
var blocks = this.blocks;
Line 2,776 ⟶ 2,867:
this.sortBlocks();
 
if ( this.config.timer === true ) {
this.timeEnd( 'getInsBlocks' );
}
return;
};
Line 2,825 ⟶ 2,919:
*/
this.setInsGroups = function () {
 
if ( this.config.timer === true ) {
this.time( 'setInsGroups' );
}
 
var blocks = this.blocks;
Line 2,865 ⟶ 2,963:
} );
}
}
if ( this.config.timer === true ) {
this.timeEnd( 'setInsGroups' );
}
return;
Line 2,892 ⟶ 2,993:
*/
this.insertMarks = function () {
 
if ( this.config.timer === true ) {
this.time( 'insertMarks' );
}
 
var blocks = this.blocks;
Line 3,025 ⟶ 3,130:
this.sortBlocks();
 
if ( this.config.timer === true ) {
this.timeEnd( 'insertMarks' );
}
return;
};
Line 3,522 ⟶ 3,630:
// Remove split element
fragments.splice( fragment, 1 );
fragmentsLength --;
 
// Add left text to fragments list
if ( rangeLeft !== null ) {
fragments.splice( fragment ++, 0, { text: textLeft, type: '=', color: null } );
fragmentsLength ++;
if ( omittedLeft !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: omittedLeft, color: null } );
fragmentsLength ++;
}
}
Line 3,536 ⟶ 3,647:
fragments.splice( fragment ++, 0, { text: '', type: ',', color: null } );
fragments.splice( fragment ++, 0, { text: '', type: '[', color: null } );
fragmentsLength += 3;
}
 
Line 3,542 ⟶ 3,654:
if ( omittedRight !== null ) {
fragments.splice( fragment ++, 0, { text: '', type: omittedRight, color: null } );
fragmentsLength ++;
}
fragments.splice( fragment ++, 0, { text: textRight, type: '=', color: null } );
fragmentsLength ++;
}
}
Line 4,055 ⟶ 4,169:
*
* @param string label Timer label
* @param[out] array timer Current time in milliseconds (float)
*/
this.time = function ( label ) {
Line 4,071 ⟶ 4,185:
* @param string label Timer label
* @param bool noLog Do not log result
* @return float Time in milliseconds, rounded to two decimal digits
*/
this.timeEnd = function ( label, noLog ) {
Line 4,082 ⟶ 4,196:
this.timer[label] = undefined;
if ( noLog !== true ) {
console.log( label + ': ' + diff.toFixed( 2 ) + ' ms' );
}
}
Line 4,109 ⟶ 4,223:
var timerLength = this.recursionTimer.length;
for ( var i = 0; i < timerLength; i ++ ) {
console.log( text + ' recursion ' + i + ': ' + this.recursionTimer[i].toFixed( 2 ) + ' ms\n' );
}
}
Line 4,251 ⟶ 4,365:
this.text = text.replace( /\r\n?/g, '\n');
 
// parseParse and count words and chunks for identification of unique real words
if ( this.parent.config.timer === true ) {
this.parent.time( 'wordParse' );
Line 4,273 ⟶ 4,387:
this.wordParse = function ( regExp ) {
 
var regExpMatch = this.text.match( regExp );
while (if ( regExpMatch = regExp.exec( this.text ) ) !== null ) {
var wordmatchLength = regExpMatch[0].length;
iffor (var this.words[word]i === undefined0; i < matchLength; i ++) {
this.words[var word] = 1regExpMatch[i];
if ( Object.prototype.hasOwnProperty.call( this.words, word ) === false ) {
}
this.words[word] = 1;
else {
}
this.words[word] ++;
else {
this.words[word] ++;
}
}
}
Line 4,288 ⟶ 4,405:
 
/**
* Split text into paragraph, line, sentence, chunk, word, or character tokens.
*
* @param string level Level of splitting: paragraph, line, sentence, chunk, word, or character
* @param int|null token Index of token to be split, otherwise uses full text
* @param[in] string text Full text to be split
Line 4,319 ⟶ 4,436:
var regExpMatch;
var lastIndex = 0;
whilevar ( ( regExpMatchregExp = this.parent.config.regExp.split[level].exec( text ) ) !== null ) {;
while ( ( regExpMatch = regExp.exec( text ) ) !== null ) {
if ( regExpMatch.index > lastIndex ) {
split.push( text.substring( lastIndex, regExpMatch.index ) );
}
split.push( regExpMatch[0] );
lastIndex = this.parent.config.regExp.split[level].lastIndex;
}
if ( lastIndex < text.length ) {
Line 4,335 ⟶ 4,453:
 
// Insert current item, link to previous
this.tokens[current] =.push( {
token: split[i],
prev: prev,
Line 4,342 ⟶ 4,460:
number: null,
unique: false
} );
number ++;
 
Line 4,389 ⟶ 4,507:
* Split unique unmatched tokens into smaller tokens.
*
* @param string level Level of splitting: line, sentence, chunk, or word
* @param[in] array tokens Tokens list
*/
Line 4,396 ⟶ 4,514:
// Cycle through tokens list
var i = this.first;
while ( i !== null && this.tokens[i] !== null ) {
 
// Refine unique unmatched tokens into smaller tokens
Line 4,418 ⟶ 4,536:
var number = 0;
var i = this.first;
while ( i !== null && this.tokens[i] !== null ) {
this.tokens[i].number = number;
number ++;
Line 4,440 ⟶ 4,558:
dump += '\ni \tlink \t(prev \tnext) \tuniq \t#num \t"token"\n';
var i = this.first;
while ( i !== null && tokens[i] !== null ) {
dump +=
i + ' \t' + tokens[i].link + ' \t(' + tokens[i].prev + ' \t' + tokens[i].next + ') \t' +