User:Phlsph7/Readability.js

This is an old revision of this page, as edited by Phlsph7 (talk | contribs) at 17:09, 11 May 2023 (update). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* Userscript to highlight sentences by readability */

// Goes through all the p-elements and splits their content into span-elements. Each span corresponds to a sentence.
function divideParagraphsIntoSentences(){
	let paragraphs = document.getElementsByTagName('p');
	for(let paragraph of paragraphs){
		let text = paragraph.textContent.split('\r').join('').split('\n').join('');
		
		// exclude very short paragraphs
		if(text.length > 20){
			// exclude a paragraph found in vector skin
			if(!paragraph.parentElement.classList.contains('vector-limited-width-popup-body')){
				divideIntoSentences(paragraph);
			}
		}
	}

	// remove very short sentences
	let sentenceElements = document.getElementsByClassName('sentence');
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		if(sentenceText.trim().length < 10){
			sentenceElement.classList.remove('sentence');
		}
	}

	// Split the content of a p-element into span-elements. Each span corresponds to a sentence.
	function divideIntoSentences(paragraph){
		// Periods are the main guide for where sentences start and end.
		// However, not all periods mark sentences, like in different forms of abbreviations.
		// Placeholders are used for exceptions.
		let periodPlaceholder = 'PERIOD_PLACEHOLDER';
		let periodExceptions = 'Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; i.e.; e.g.; etc.; a.m.; p.m.; i.a.; N.B.; et al.; vs.; e.t.a.;  A.;  B.;  C.;  D.;  E.;  F.;  G.;  H.;  I.;  J.;  K.;  L.;  M.;  N.;  O.;  P.;  Q.;  R.;  S.;  T.;  U.;  V.;  W.;  X.;  Y.;  Z; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; .0; .1; .2; .3; .4; .5; .6; .7; .8; .9; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9.'.split('; ');
		paragraph.innerHTML = insertPlaceholders(paragraph.innerHTML, periodExceptions);
		
		// We loop through all the nodes inside the p-element.
		// Span-open-tags and close-tags are placed through code.
		let innerHTML = getSpanStartTag();
		let currentChild = paragraph.firstChild;
		while(currentChild){
			// if it is a text node, modify it
			if(currentChild.nodeType === Node.TEXT_NODE){
				innerHTML += currentChild.nodeValue
					.split('.').join(getSpanEndAndStart('.'))
					.split('!').join(getSpanEndAndStart('!'))
					.split('?').join(getSpanEndAndStart('?'));
			}
			
			// if its an element, add outerHTML
			else if(currentChild.nodeType === Node.ELEMENT_NODE){
				innerHTML += currentChild.outerHTML;
			}
			
			// otherwise add nodeValue
			else{
				innerHTML += currentChild.nodeValue;
			}
			
			currentChild = currentChild.nextSibling;
		}

		innerHTML += '</span>';
		
		// Now the placeholder can be removed again.
		innerHTML = removePlaceholders(innerHTML, periodExceptions);
		paragraph.innerHTML = innerHTML;
		
		// The last span-element is not a sentence.
		/*
		var spans = paragraph.getElementsByTagName("span");
		if (spans.length > 0) {
			spans[spans.length - 1].classList.remove("sentence");
		}*/

		function getSpanStartTag(){
			return `<span class="sentence">`;
		}
		
		function getSpanEndAndStart(punctuation){
			return punctuation + '</span>' + getSpanStartTag();
		}
		
		function insertPlaceholders(text, periodExceptions){
			let modifiedText = text;
			for(let periodException of periodExceptions){
				let placeholderExpression = periodException.split('.').join(periodPlaceholder);
				modifiedText = modifiedText.split(periodException).join(placeholderExpression);
			}
			return modifiedText;
		}
		
		function removePlaceholders(text, periodExceptions){
			let modifiedText = text;
			for(let periodException of periodExceptions){
				let placeholderExpression = periodException.split('.').join(periodPlaceholder);
				modifiedText = modifiedText.split(placeholderExpression).join(periodException);
			}
			return modifiedText;
			
		}
	}
}

// Function to rate the readability of sentences and give them their class accordingly.
function rateSentences(){
	
	// class names for different scores
	const scoreClasses = ['score-10-0', 'score-20-10', 'score-30-20', 'score-40-30', 'score-50-40', 'score-60-50', 'score-70-60', 'score-80-70', 'score-90-80', 'score-100-90'];

	// css colors for the different score classes
	const scoreColors = ['rgb(255,128,128)', 'rgb(234,149,128)', 'rgb(213,170,128)', 'rgb(192,192,128)', 'rgb(170,213,128)', 'rgb(149,234,128)', 'rgb(128,255,128)', 'rgb(128,255,128)', 'rgb(128,255,192)', 'rgb(128,255,255)'];
	
	// Loop through all sentences, add their score class and their title attribute.
	let sentenceElements = document.body.getElementsByClassName('sentence');
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let score = getSentenceScore(sentenceText);
		sentenceElement.title = `Score: ${score.toFixed(2)}`;
		sentenceElement.classList.add(getScoreClass(score, scoreClasses));
	}

	// Add the style sheet to color the score classes.
	addScoreStyleSheet(scoreClasses, scoreColors);

	function getScoreClass(score, scoreClasses){
		let index = Math.floor(score / 10);
		if(index < 0){
			index = 0;
		}
		if(index > 9){
			index = 9;
		}
		return scoreClasses[index];
	}

	function addScoreStyleSheet(scoreClasses, scoreColors){
		const style = document.createElement('style');
		for(let i = 0; i < scoreClasses.length; i++){
			style.innerHTML += `.${scoreClasses[i]} {background-color: ${scoreColors[i]}; } `;
		}
		document.head.appendChild(style);
	}
}

// Creates an overview at the top of the page
// This overview shows the readability of the whole article and other information
function createOverview(){
	// Readability depends on the number of syllables, words, and sentences
	let totalSyllableCount = 0;
	let totalWordCount = 0;
	const sentenceElements = document.getElementsByClassName('sentence');
	let totalSentenceCount = sentenceElements.length;
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let words = getWords(sentenceText);
		totalWordCount += words.length;
		for(let word of words){
			totalSyllableCount += getSyllableCount(word);
		}
	}
	
	let totalReadability = getFleschKincaidReadability(totalSyllableCount, totalWordCount, totalSentenceCount);
	let totalGradeLevel = getFleschKincaidGradeLevel(totalSyllableCount, totalWordCount, totalSentenceCount);
	
	const mainDif = document.getElementById('mw-content-text');
	const overviewDif = document.createElement('div');
	mainDif.insertBefore(overviewDif, mainDif.firstChild);
	insertOverviewTable(overviewDif);
	overviewDif.appendChild(document.createElement('br'));
	insertSentenceTable(overviewDif);
	
	function insertOverviewTable(parent){
		const overviewTable = document.createElement('table');
		parent.appendChild(overviewTable);
		overviewTable.classList.add('wikitable');
		
		const overviewCaption = document.createElement('caption');
		overviewTable.appendChild(overviewCaption);
		overviewCaption.innerHTML = 'Readability overview';
		
		const overviewTableBody = document.createElement('tbody');
		overviewTable.appendChild(overviewTableBody);
		addRow(overviewTableBody, 'Readability', totalReadability.toFixed(2));
		addRow(overviewTableBody, 'Grade level', totalGradeLevel.toFixed(2));
		addRow(overviewTableBody, 'Number of sentences', totalSentenceCount);
		addRow(overviewTableBody, 'Number of words', totalWordCount);
		addRow(overviewTableBody, 'Number of syllables', totalSyllableCount);
	}
	
	
	// sentence table to display all sentences ordered by lowest score
	function insertSentenceTable(parent){
		// button to show/hide the table
		const sentenceTableButton = document.createElement('button');
		parent.append(sentenceTableButton);
		sentenceTableButton.innerHTML = 'Show sentences ordered by lowest score';
		
		// the table itself
		const sentenceTable = document.createElement('table');
		parent.appendChild(sentenceTable);
		sentenceTable.classList.add('wikitable');
		sentenceTable.style.display = 'none';
		
		const sentenceCaption = document.createElement('caption');
		sentenceTable.appendChild(sentenceCaption);
		sentenceCaption.innerHTML = 'Sentences ordered by lowest score';
		
		const sentenceTableBody = document.createElement('tbody');
		sentenceTable.appendChild(sentenceTableBody);

		// matrix to store the table values
		const sentenceMatrix = [];
		for(let sentenceElement of sentenceElements){
			let sentenceText = getSentenceText(sentenceElement);
			let score = getSentenceScore(sentenceText);
			sentenceMatrix.push([sentenceText, score]);
		}
		
		// sort by lowest score
		sentenceMatrix.sort(function(a, b){
			return a[1] - b[1];
		});
		
		// loop through the matrix and add one row per index
		for(let i = 0; i < sentenceMatrix.length; i++){
			let sentenceText = sentenceMatrix[i][0];
			let score = sentenceMatrix[i][1];
			addRow(sentenceTableBody, sentenceText, score.toFixed(2));
		}
		
		// show/hide function of the button
		sentenceTableButton.onclick = function(){
			if(sentenceTableButton.innerHTML.includes('Show')){
				sentenceTable.style.display = '';
				sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Show').join('Hide');
			}
			else{
				sentenceTable.style.display = 'none';
				sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Hide').join('Show');
			}
		};
	}
	
	// utility function to add rows to a table
	function addRow(tableBody, name, value){
		let row = document.createElement('tr');
		
		let nameCell = document.createElement('td');
		nameCell.innerHTML = name;
		row.appendChild(nameCell);
		
		let valueCell = document.createElement('td');
		valueCell.innerHTML = value;
		row.appendChild(valueCell);
		
		tableBody.appendChild(row);
	}
}

// Utility function to extract the text from a sentence element
function getSentenceText(sentenceElement){
	// hide references and certain templates
	let refs = sentenceElement.querySelectorAll('.reference, .Inline-Template');
	for(let ref of refs){
		ref.style.display = 'none';
	}
	
	// the innerText attribute ignores hidden elements
	let sentenceText = sentenceElement.innerText;
	
	// show them again
	for(let ref of refs){
		ref.style.display = '';
	}
	
	return sentenceText;
}

// utility function to get the readability score of a sentence
function getSentenceScore(sentenceText){
	let syllableCount = 0;
	
	let words = getWords(sentenceText);
	for(let word of words){
		syllableCount += getSyllableCount(word);
	}
	
	let wordCount = words.length;
	let score = getFleschKincaidReadability(syllableCount, wordCount, 1);
	
	return score;
}

// utility function: this is the main metric
function getFleschKincaidReadability(syllableCount, wordCount, sentenceCount){
	let wordsPerSentence = wordCount / sentenceCount;
	let syllablesPerWord = syllableCount / wordCount;
	let score = 206.835 - (1.015 * wordsPerSentence) - (84.6 * syllablesPerWord);
	return score;
}

// utility function: this shows the grade level and is used for the overview
function getFleschKincaidGradeLevel(syllableCount, wordCount, sentenceCount){
	let wordsPerSentence = wordCount / sentenceCount;
	let syllablesPerWord = syllableCount / wordCount;
	let score = (0.39 * wordsPerSentence) + (11.8 * syllablesPerWord) - 15.59;
	return score;
}

// utility function to extract words from a sentence
function getWords(sentenceText){
	let punctuation = '.?!,;:"()[]{}--./&*#$%@+-=<>|~^\\' + "'";
	for(let character of punctuation){
		sentenceText = sentenceText.split(character).join('');
	}
	
	sentenceText = sentenceText.trim();
	sentenceText = fullReplace(sentenceText, '  ', ' ');
	
	let words = sentenceText.split(' ');
	words = words.filter(function(word) {
		return word.length > 0;
	});
	
	return words;
}

// utility function to count the syllables of a word
function getSyllableCount(word){
	word = word.toLowerCase();
	
	if (word.endsWith('e')) {
		word = word.slice(0, -1);
	}
	word = word.split('e').join('a')
		.split('i').join('a')
		.split('o').join('a')
		.split('u').join('a')
		.split('y').join('a')
		.split('e').join('a');
		
	word = fullReplace(word, 'aa', 'a');
	
	let syllableCount = word.split('a').length - 1;
	if(syllableCount < 1){
		syllableCount = 1;
	}
	
	return syllableCount;
}

// utility function to iteratively replace a string until no more occurrences are found
function fullReplace(string, oldSubstring, newSubstring){
	let newString = string;
	while(newString.includes(oldSubstring)){
		newString = newString.split(oldSubstring).join(newSubstring);
	}
	return newString;
}

// anonymous main function
(function(){
	// restrict script to mainspace, userspace, and draftspace
	const namespaceNumber = mw.config.get('wgNamespaceNumber');
	const allowedNamespaces = [0, 2, 118];
	if (allowedNamespaces.indexOf(namespaceNumber) != -1) {
		// add a link to the toolbox
		$.when(mw.loader.using('mediawiki.util'), $.ready).then(function (){
			var portletlink = mw.util.addPortletLink('p-tb', '#', 'Readability');
			
			// run the main function when the link is clicked
			portletlink.onclick = function(e) {
				e.preventDefault();
				divideParagraphsIntoSentences();
				rateSentences();
				createOverview();
			};
		});
	}
})();