User:Cacycle/diff.js: Difference between revisions

Browse history interactively

← Previous edit

Content deleted Content added

Revision as of 17:54, 14 October 2014 view source Cacycle (talk \| contribs) Extended confirmed users 21,997 edits 1.2.1a (October 14, 2014) fix CSS, fix 'one word became separated' ← Previous edit		Latest revision as of 19:52, 8 June 2025 view source Izno (talk \| contribs) Checkusers, Interface administrators, Administrators 138,172 edits another background that could use some darkmode friendly color
(7 intermediate revisions by 3 users not shown)
Line 3: // ==UserScript== // @name wikEd diff // @version 1.2.1a4 // @date October 1423, 2014 // @description improved word-based diff library with block move detection // @homepage https://en.wikipedia.org/wiki/User:Cacycle/diff Line 36: * - Resolution down to characters level * - Unicode and multilingual support * - Stepwise split (paragraphs, lines, sentences, words, characters) * - Recursive diff * - Optimized code for resolving unmatched sequences Line 70: * .newText new text * .oldText old text * .maxWords word count of longest linked block * .html diff html * .error flag: result has not passed unit tests * .bordersDown[] linked region borders downwards, [new index, old index] * .bordersUp[] linked region borders upwards, [new index, old index] Line 109 ⟶ 110: * .blockEnd last block index * .unique contains unique linked token * .maxWords word count of longest linked block * .words word count * .chars char count Line 368 ⟶ 369: 'border-radius: 0.25em; padding: 0.2em 1px; margin: 0 1px; ' + '} ' + '.wikEdDiffBlock { color: #000; } ' + '.wikEdDiffBlock0 { background-color: #ffff80; } ' + '.wikEdDiffBlock1 { background-color: #d0ff80; } ' + Line 407 ⟶ 408: '.wikEdDiffContainer { } ' + '.wikEdDiffFragment {' + 'white-space: pre-wrap; background-color: var(--background-color-base, #fff); border: #bbb solid; ' + 'border-width: 1px 1px 1px 0.5em; border-radius: 0.5em; font-family: sans-serif; ' + 'font-size: 88%; line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 1em; margin: 0; ' + '} ' + '.wikEdDiffNoChange { background: var(--background-color-interactive, #~~f0f0f0~~eaecf0); border: 1px #bbb solid; border-radius: 0.5em; ' + 'line-height: 1.6; box-shadow: 2px 2px 2px #ddd; padding: 0.5em; margin: 1em 0; ' + 'text-align: center; ' + Line 468 ⟶ 469: // Split into paragraphs, after double newlines 'paragraph': new RegExp( '~~(.\|\\n)?(~~(\\r\\n\|\\n\|\\r){2,}\|[' + this.config.regExpNewParagraph + '])+', 'g' ), // Split into ~~sentences /[^ \n][^\n]?[.!?;]+(?=[ \n]\|$)\|\r\n\|\n\|\r/~~lines 'line': new RegExp( '\\r\\n\|\\n\|\\r\|[' + this.config.regExpNewLinesAll + ']', 'g' ), // Split into sentences /[^ ].?[.!?:;]+(?= \|$)/ 'sentence': new RegExp( '[^' + this.config.regExpBlanks + ~~this~~'].~~config~~?[.~~regExpNewLinesAll~~!?:;' + ~~'][^' +~~ ~~this.config.regExpNewLinesAll +~~ ~~']?[.!?;' +~~ this.config.regExpFullStops + this.config.regExpExclamationMarks + Line 487 ⟶ 493: ']+(?=[' + this.config.regExpBlanks + ']\|$)', ~~this.config.regExpNewLinesAll +~~ ~~']\|$)\|[' +~~ ~~this.config.regExpNewLines +~~ ~~']\|\\r\\n\|\\n\|\\r',~~ 'g' ), Line 507 ⟶ 510: // Split into words, multi-char markup, and chars // regExpLetters speed-up: \\w+ 'word': new RegExp( '(\\w+\|[_' + this.config.regExpLetters + '])+([\'’_’]?[_' + this.config.regExpLetters + ']+)\|\\[\\[\|\\]\\]\|\\{\\{\|\\}\\}\|&\\w+;\|\'\'\'\|\'\'\|==+\|\\{\\\|\|\\\|\\}\|\\\|-\|.', 'g' ), Line 544 ⟶ 548: // RegExps for counting words 'countWords': new RegExp( '(\\w+\|[_' + this.config.regExpLetters + '])+([\'’_’]?[_' + this.config.regExpLetters + ']+ )', 'g' ), Line 850 ⟶ 854: /* @var array blocks Block data (consecutive text tokens) in new text order / this.blocks = []; /* @var int maxWords Maximal detected word count of all linked blocks / this.maxWords = 0; /* @var array groups Section blocks that are consecutive in old text order / Line 993 ⟶ 1,000: // Split new and old text into paragraps if ( this.config.timer === true ) { this.time( 'paragraph split' ); } this.newText.splitText( 'paragraph' ); this.oldText.splitText( 'paragraph' ); if ( this.config.timer === true ) { this.timeEnd( 'paragraph split' ); } // Calculate diff this.calculateDiff( '~~paragraph~~line' ); // Refine different paragraphs into ~~sentences~~lines if ( this.config.timer === true ) { this.time( 'line split' ); } this.newText.splitRefine( 'line' ); this.oldText.splitRefine( 'line' ); if ( this.config.timer === true ) { this.timeEnd( 'line split' ); } // Calculate refined diff this.calculateDiff( 'line' ); // Refine different lines into sentences if ( this.config.timer === true ) { this.time( 'sentence split' ); } this.newText.splitRefine( 'sentence' ); this.oldText.splitRefine( 'sentence' ); if ( this.config.timer === true ) { this.timeEnd( 'sentence split' ); } // Calculate refined diff this.calculateDiff( 'sentence' ); // Refine different ~~paragraphs~~sentences into chunks if ( this.config.timer === true ) { this.time( 'chunk split' ); Line 1,019 ⟶ 1,051: this.calculateDiff( 'chunk' ); // Refine different ~~sentences~~chunks into words if ( this.config.timer === true ) { this.time( 'word split' ); Line 1,068 ⟶ 1,100: } // ~~free~~Free memory this.symbols = undefined; this.bordersDown = undefined; this.bordersUp = undefined; this.newText.words = undefined; this.oldText.words = undefined; // Enumerate token lists Line 1,086 ⟶ 1,120: } // ~~free~~Free memory this.newText.tokens = undefined; this.oldText.tokens = undefined; Line 1,093 ⟶ 1,127: this.getDiffFragments(); // ~~free~~Free memory this.blocks = undefined; this.groups = undefined; Line 1,184 ⟶ 1,218: var i = this.newText.first; var j = this.oldText.first; while ( i ~~!== null && this.newText.tokens[i]~~ !== null ) { // Get token links Line 1,339 ⟶ 1,373: if ( left < shorterToken.length / 2 && (right < shorterToken.length / 2) ) { // Do not split into chars in this gap charSplit = false; break; Line 1,435 ⟶ 1,469: / this.slideGaps = function ( text, textLinked ) { var regExpSlideBorder = this.config.regExp.slideBorder; var regExpSlideStop = this.config.regExp.slideStop; // Cycle through tokens list var i = text.first; var gapStart = null; while ( i ~~!== null && text.tokens[i]~~ !== null ) { // Remember gap start Line 1,475 ⟶ 1,512: var front = text.tokens[gapFront].prev; var back = gapBack; var gapFrontBlankTest = ~~this.config.regExp.slideBorder~~regExpSlideBorder.test( text.tokens[gapFront].token ); var frontStop = front; if ( text.tokens[back].link === null ) { Line 1,484 ⟶ 1,521: text.tokens[front].token === text.tokens[back].token ) { ~~front = text.tokens[front].prev;~~ ~~back = text.tokens[back].prev;~~ if ( front !== null ) { // Stop at line break if ( ~~this.config.regExp.slideStop~~regExpSlideStop.test( text.tokens[front].token ) === true ) { frontStop = front; break; Line 1,496 ⟶ 1,531: // Stop at first word border (blank/word or word/blank) if ( ~~this.config.regExp.slideBorder~~regExpSlideBorder.test( text.tokens[front].token ) !== gapFrontBlankTest ) { ~~gapFrontBlankTest~~ ~~) {~~ frontStop = front; } } front = text.tokens[front].prev; back = text.tokens[back].prev; } } Line 1,545 ⟶ 1,580: * * @param array symbols Symbol table object * @param string level Split level: 'paragraph', 'line', 'sentence', 'chunk', 'word', or 'character' * * Optionally for recursive or repeated calls: Line 1,580 ⟶ 1,615: } // ~~get~~Get object symbols table and linked region borders var symbols; var bordersDown; Line 1,590 ⟶ 1,625: } // ~~create~~Create empty local symbols table and linked region borders arrays else { symbols = { Line 1,602 ⟶ 1,637: // ~~updated~~Updated versions of linked region borders var bordersUpNext = []; var bordersDownNext = []; Line 1,612 ⟶ 1,647: // Cycle through new text tokens list var i = newStart; while ( i ~~!== null && this.newText.tokens[i]~~ !== null ) { if ( this.newText.tokens[i].link === null ) { Line 1,618 ⟶ 1,653: var token = this.newText.tokens[i].token; if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) { ~~var current~~symbols.hashTable[token] = symbols.token.length; symbols.~~hashTable[~~token].push( ~~= current;~~{ ~~symbols.token[current] = {~~ newCount: 1, oldCount: 0, newToken: i, oldToken: null } ); } Line 1,642 ⟶ 1,676: } // ~~get~~Get next token if ( up === false ) { i = this.newText.tokens[i].next; Line 1,657 ⟶ 1,691: // Cycle through old text tokens list var j = oldStart; while ( j ~~!== null && this.oldText.tokens[j]~~ !== null ) { if ( this.oldText.tokens[j].link === null ) { Line 1,663 ⟶ 1,697: var token = this.oldText.tokens[j].token; if ( Object.prototype.hasOwnProperty.call( symbols.hashTable, token ) === false ) { ~~var current~~symbols.hashTable[token] = symbols.token.length; symbols.~~hashTable[~~token].push( ~~= current;~~{ ~~symbols.token[current] = {~~ newCount: 0, oldCount: 1, newToken: null, oldToken: j } ); } Line 1,690 ⟶ 1,723: } // ~~get~~Get next token if ( up === false ) { j = this.oldText.tokens[j].next; Line 1,711 ⟶ 1,744: var newToken = symbols.token[i].newToken; var oldToken = symbols.token[i].oldToken; var newTokenObj = this.newText.tokens[newToken]; var oldTokenObj = this.oldText.tokens[oldToken]; // Connect from new to old and from old to new if ( ~~this.newText.tokens[newToken]~~newTokenObj.link === null ) { // Do not use spaces as unique markers if ( this.config.regExp.blankOnlyToken.test( ~~this.newText.tokens[newToken]~~newTokenObj.token ) === true ) { // Link new anand old tokens ~~this.newText.tokens[newToken]~~newTokenObj.link = oldToken; ~~this.oldText.tokens[oldToken]~~oldTokenObj.link = newToken; symbols.linked = true; // ~~save~~Save linked region borders bordersDown.push( [newToken, oldToken] ); bordersUp.push( [newToken, oldToken] ); Line 1,736 ⟶ 1,771: } else { var token = ~~this.newText.tokens[newToken]~~newTokenObj.token; var words = ( token.match( this.config.regExp.countWords ) \|\| [] ).~~length +~~concat( ( token.match( this.config.regExp.countChunks ) \|\| [] )~~.length;~~ ); // Unique if longer than min block length ifvar (wordsLength ~~words >~~= ~~this~~words.~~config.blockMinLength ) {~~length; if ( wordsLength >= this.config.blockMinLength ) { unique = true; } Line 1,748 ⟶ 1,785: // Unique if it contains at least one unique word else { for ( var ~~wordsLength~~i = ~~words.length~~0;i < wordsLength; i ++ ) { ~~for (~~ var word = 0words[i]; ~~word < wordsLength; word ++ ) {~~ if ( this.oldText.~~words[~~ words[word] ] === 1 && this.newText.~~words[~~ words[word] ] === 1 && Object.prototype.hasOwnProperty.call( this.oldText.words, word ) === true && Object.prototype.hasOwnProperty.call( this.newText.words, word ) === true ) { unique = true; Line 1,763 ⟶ 1,802: // Set unique if ( unique === true ) { ~~this.newText.tokens[newToken]~~newTokenObj.unique = true; ~~this.oldText.tokens[oldToken]~~oldTokenObj.unique = true; } } Line 1,924 ⟶ 1,963: } // ~~save~~Save updated linked region borders to object if ( recursionLevel === 0 && repeating === false ) { this.bordersDown = bordersDownNext; Line 1,930 ⟶ 1,969: } // ~~merge~~Merge local updated linked region borders into object else { this.bordersDown = this.bordersDown.concat( bordersDownNext ); Line 2,065 ⟶ 2,104: // Set longest sequence of increasing groups in sections as fixed (not moved) ~~if ( this.config.timer === true ) {~~ ~~this.time( 'setFixed' );~~ } this.setFixed(); ~~if ( this.config.timer === true ) {~~ ~~this.time( 'setFixed' );~~ } // Convert groups to insertions/deletions if maximum block length is too short // Only for more complex texts that actually have blocks of minimum block length var unlinkCount = 0; if ( ~~if ( this.config.unlinkBlocks === true && this.config.blockMinLength > 0 ) {~~ this.config.unlinkBlocks === true && this.config.blockMinLength > 0 && this.maxWords >= this.config.blockMinLength ) { if ( this.config.timer === true ) { this.time( '~~unlink~~total unlinking' ); } Line 2,094 ⟶ 2,132: // Repeat block detection from start this.maxWords = 0; this.getSameBlocks(); this.getSections(); Line 2,101 ⟶ 2,140: } if ( this.config.timer === true ) { this.timeEnd( '~~unlink~~total unlinking' ); } } Line 2,139 ⟶ 2,178: / this.getSameBlocks = function () { if ( this.config.timer === true ) { this.time( 'getSameBlocks' ); } var blocks = this.blocks; Line 2,166 ⟶ 2,209: var text = ''; while ( i !== null && j !== null && this.oldText.tokens[j].link === i ) { ~~var token~~text += this.oldText.tokens[j].token; count ++; if ( this.newText.tokens[i].unique === true ) { unique = true; } ~~text += token;~~ i = this.newText.tokens[i].next; j = this.oldText.tokens[j].next; Line 2,206 ⟶ 2,248: for ( var block = 0; block < blocksLength; block ++ ) { blocks[block].newBlock = block; } if ( this.config.timer === true ) { this.timeEnd( 'getSameBlocks' ); } return; Line 2,219 ⟶ 2,265: / this.getSections = function () { if ( this.config.timer === true ) { this.time( 'getSections' ); } var blocks = this.blocks; Line 2,264 ⟶ 2,314: block = sectionEnd; } } if ( this.config.timer === true ) { this.timeEnd( 'getSections' ); } return; Line 2,276 ⟶ 2,329: / this.getGroups = function () { if ( this.config.timer === true ) { this.time( 'getGroups' ); } var blocks = this.blocks; Line 2,346 ⟶ 2,403: } ); block = groupEnd; // Set global word count of longest linked block if ( maxWords > this.maxWords ) { this.maxWords = maxWords; } } } if ( this.config.timer === true ) { this.timeEnd( 'getGroups' ); } return; Line 2,360 ⟶ 2,425: / this.setFixed = function () { if ( this.config.timer === true ) { this.time( 'setFixed' ); } var blocks = this.blocks; Line 2,399 ⟶ 2,468: } } } if ( this.config.timer === true ) { this.timeEnd( 'setFixed' ); } return; Line 2,455 ⟶ 2,527: return returnObj; }; /** * Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs * if too short and too common. * Prevents fragmentated diffs for very different versions. * * @param[in] array blocks Blocks table object * @param[in/out] WikEdDiffText newText, oldText Text object, linked property * @param[in/out] array groups Groups table object * @return bool True if text tokens were unlinked / this.unlinkBlocks = function () { var blocks = this.blocks; var groups = this.groups; // Cycle through groups var unlinked = false; var groupsLength = groups.length; for ( var group = 0; group < groupsLength; group ++ ) { var blockStart = groups[group].blockStart; var blockEnd = groups[group].blockEnd; // Unlink whole group if no block is at least blockMinLength words long and unique if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) { for ( var block = blockStart; block <= blockEnd; block ++ ) { if ( blocks[block].type === '=' ) { this.unlinkSingleBlock( blocks[block] ); unlinked = true; } } } // Otherwise unlink block flanks else { // Unlink blocks from start for ( var block = blockStart; block <= blockEnd; block ++ ) { if ( blocks[block].type === '=' ) { // Stop unlinking if more than one word or a unique word if ( blocks[block].words > 1 \|\| blocks[block].unique === true ) { break; } this.unlinkSingleBlock( blocks[block] ); unlinked = true; blockStart = block; } } // Unlink blocks from end for ( var block = blockEnd; block > blockStart; block -- ) { if ( blocks[block].type === '=' ) { // Stop unlinking if more than one word or a unique word if ( blocks[block].words > 1 \|\| ( blocks[block].words === 1 && blocks[block].unique === true ) ) { break; } this.unlinkSingleBlock( blocks[block] ); unlinked = true; } } } } return unlinked; }; /* * Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs. * * @param[in] array blocks Blocks table object * @param[out] WikEdDiffText newText, oldText Text objects, link property / this.unlinkSingleBlock = function ( block ) { // Cycle through old text var j = block.oldStart; for ( var count = 0; count < block.count; count ++ ) { // Unlink tokens this.newText.tokens[ this.oldText.tokens[j].link ].link = null; this.oldText.tokens[j].link = null; j = this.oldText.tokens[j].next; } return; }; Line 2,465 ⟶ 2,628: / this.getDelBlocks = function () { if ( this.config.timer === true ) { this.time( 'getDelBlocks' ); } var blocks = this.blocks; Line 2,512 ⟶ 2,679: } } } if ( this.config.timer === true ) { this.timeEnd( 'getDelBlocks' ); } return; Line 2,533 ⟶ 2,703: / this.positionDelBlocks = function () { if ( this.config.timer === true ) { this.time( 'positionDelBlocks' ); } var blocks = this.blocks; Line 2,628 ⟶ 2,802: this.sortBlocks(); if ( this.config.timer === true ) { ~~return;~~ this.timeEnd( 'positionDelBlocks' ); }; /* * Convert matching '=' blocks in groups into insertion/deletion ('+'/'-') pairs * if too short and too common. * Prevents fragmentated diffs for very different versions. * * @param[in] array blocks Blocks table object * @param[in/out] WikEdDiffText newText, oldText Text object, linked property * @param[in/out] array groups Groups table object * @return bool True if text tokens were unlinked / ~~this.unlinkBlocks = function () {~~ ~~var blocks = this.blocks;~~ ~~var groups = this.groups;~~ ~~// Cycle through groups~~ ~~var unlinked = false;~~ ~~var groupsLength = groups.length;~~ ~~for ( var group = 0; group < groupsLength; group ++ ) {~~ ~~var blockStart = groups[group].blockStart;~~ ~~var blockEnd = groups[group].blockEnd;~~ ~~// Unlink whole group if no block is at least blockMinLength words long and unique~~ ~~if ( groups[group].maxWords < this.config.blockMinLength && groups[group].unique === false ) {~~ ~~for ( var block = blockStart; block <= blockEnd; block ++ ) {~~ ~~if ( blocks[block].type === '=' ) {~~ ~~this.unlinkSingleBlock( blocks[block] );~~ ~~unlinked = true;~~ } } } ~~// Otherwise unlink block flanks~~ ~~else {~~ ~~// Unlink blocks from start~~ ~~for ( var block = blockStart; block <= blockEnd; block ++ ) {~~ ~~if ( blocks[block].type === '=' ) {~~ ~~// Stop unlinking if more than one word or a unique word~~ ~~if ( blocks[block].words > 1 \|\| blocks[block].unique === true ) {~~ ~~break;~~ } ~~this.unlinkSingleBlock( blocks[block] );~~ ~~unlinked = true;~~ ~~blockStart = block;~~ } } ~~// Unlink blocks from end~~ ~~for ( var block = blockEnd; block > blockStart; block -- ) {~~ ~~if ( blocks[block].type === '=' ) {~~ ~~// Stop unlinking if more than one word or a unique word~~ ~~if (~~ ~~blocks[block].words > 1 \|\|~~ ~~( blocks[block].words === 1 && blocks[block].unique === true )~~ ~~) {~~ ~~break;~~ } ~~this.unlinkSingleBlock( blocks[block] );~~ ~~unlinked = true;~~ } } } } ~~return unlinked;~~ }; /* * Unlink text tokens of single block, convert them into into insertion/deletion ('+'/'-') pairs. * * @param[in] array blocks Blocks table object * @param[out] WikEdDiffText newText, oldText Text objects, link property / ~~this.unlinkSingleBlock = function ( block ) {~~ ~~// Cycle through old text~~ ~~var j = block.oldStart;~~ ~~for ( var count = 0; count < block.count; count ++ ) {~~ ~~// Unlink tokens~~ ~~this.newText.tokens[ this.oldText.tokens[j].link ].link = null;~~ ~~this.oldText.tokens[j].link = null;~~ ~~j = this.oldText.tokens[j].next;~~ } return; Line 2,729 ⟶ 2,816: / this.getInsBlocks = function () { if ( this.config.timer === true ) { this.time( 'getInsBlocks' ); } var blocks = this.blocks; Line 2,776 ⟶ 2,867: this.sortBlocks(); if ( this.config.timer === true ) { this.timeEnd( 'getInsBlocks' ); } return; }; Line 2,825 ⟶ 2,919: / this.setInsGroups = function () { if ( this.config.timer === true ) { this.time( 'setInsGroups' ); } var blocks = this.blocks; Line 2,865 ⟶ 2,963: } ); } } if ( this.config.timer === true ) { this.timeEnd( 'setInsGroups' ); } return; Line 2,892 ⟶ 2,993: / this.insertMarks = function () { if ( this.config.timer === true ) { this.time( 'insertMarks' ); } var blocks = this.blocks; Line 3,025 ⟶ 3,130: this.sortBlocks(); if ( this.config.timer === true ) { this.timeEnd( 'insertMarks' ); } return; }; Line 3,522 ⟶ 3,630: // Remove split element fragments.splice( fragment, 1 ); fragmentsLength --; // Add left text to fragments list if ( rangeLeft !== null ) { fragments.splice( fragment ++, 0, { text: textLeft, type: '=', color: null } ); fragmentsLength ++; if ( omittedLeft !== null ) { fragments.splice( fragment ++, 0, { text: '', type: omittedLeft, color: null } ); fragmentsLength ++; } } Line 3,536 ⟶ 3,647: fragments.splice( fragment ++, 0, { text: '', type: ',', color: null } ); fragments.splice( fragment ++, 0, { text: '', type: '[', color: null } ); fragmentsLength += 3; } Line 3,542 ⟶ 3,654: if ( omittedRight !== null ) { fragments.splice( fragment ++, 0, { text: '', type: omittedRight, color: null } ); fragmentsLength ++; } fragments.splice( fragment ++, 0, { text: textRight, type: '=', color: null } ); fragmentsLength ++; } } Line 4,055 ⟶ 4,169: * * @param string label Timer label * @param[out] array timer Current time in milliseconds (float) / this.time = function ( label ) { Line 4,071 ⟶ 4,185: @param string label Timer label * @param bool noLog Do not log result * @return float Time in milliseconds~~, rounded to two decimal digits~~ / this.timeEnd = function ( label, noLog ) { Line 4,082 ⟶ 4,196: this.timer[label] = undefined; if ( noLog !== true ) { console.log( label + ': ' + diff.toFixed( 2 ) + ' ms' ); } } Line 4,109 ⟶ 4,223: var timerLength = this.recursionTimer.length; for ( var i = 0; i < timerLength; i ++ ) { console.log( text + ' recursion ' + i + ': ' + this.recursionTimer[i].toFixed( 2 ) + ' ms\n' ); } } Line 4,251 ⟶ 4,365: this.text = text.replace( /\r\n?/g, '\n'); // ~~parse~~Parse and count words and chunks for identification of unique real words if ( this.parent.config.timer === true ) { this.parent.time( 'wordParse' ); Line 4,273 ⟶ 4,387: this.wordParse = function ( regExp ) { var regExpMatch = this.text.match( regExp ); ~~while (~~if ( regExpMatch ~~= regExp.exec( this.text ) )~~ !== null ) { var ~~word~~matchLength = regExpMatch~~[0]~~.length; iffor (var ~~this.words[word]~~i === ~~undefined~~0; i < matchLength; i ++) { ~~this.words[~~var word] = 1regExpMatch[i]; if ( Object.prototype.hasOwnProperty.call( this.words, word ) === false ) { } this.words[word] = 1; ~~else {~~ } ~~this.words[word] ++;~~ else { this.words[word] ++; } } } Line 4,288 ⟶ 4,405: /* * Split text into paragraph, line, sentence, chunk, word, or character tokens. * * @param string level Level of splitting: paragraph, line, sentence, chunk, word, or character * @param int\|null token Index of token to be split, otherwise uses full text * @param[in] string text Full text to be split Line 4,319 ⟶ 4,436: var regExpMatch; var lastIndex = 0; ~~while~~var ~~( ( regExpMatch~~regExp = this.parent.config.regExp.split[level]~~.exec( text ) ) !== null ) {~~; while ( ( regExpMatch = regExp.exec( text ) ) !== null ) { if ( regExpMatch.index > lastIndex ) { split.push( text.substring( lastIndex, regExpMatch.index ) ); } split.push( regExpMatch[0] ); lastIndex = ~~this.parent.config.~~regExp~~.split[level]~~.lastIndex; } if ( lastIndex < text.length ) { Line 4,335 ⟶ 4,453: // Insert current item, link to previous this.tokens~~[current] =~~.push( { token: split[i], prev: prev, Line 4,342 ⟶ 4,460: number: null, unique: false } ); number ++; Line 4,389 ⟶ 4,507: * Split unique unmatched tokens into smaller tokens. * * @param string level Level of splitting: line, sentence, chunk, or word * @param[in] array tokens Tokens list */ Line 4,396 ⟶ 4,514: // Cycle through tokens list var i = this.first; while ( i ~~!== null && this.tokens[i]~~ !== null ) { // Refine unique unmatched tokens into smaller tokens Line 4,418 ⟶ 4,536: var number = 0; var i = this.first; while ( i ~~!== null && this.tokens[i]~~ !== null ) { this.tokens[i].number = number; number ++; Line 4,440 ⟶ 4,558: dump += '\ni \tlink \t(prev \tnext) \tuniq \t#num \t"token"\n'; var i = this.first; while ( i ~~!== null && tokens[i]~~ !== null ) { dump += i + ' \t' + tokens[i].link + ' \t(' + tokens[i].prev + ' \t' + tokens[i].next + ') \t' +