User:Phlsph7/Readability.js

This is an old revision of this page, as edited by Phlsph7 (talk | contribs) at 16:26, 11 May 2023 (update). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* wikiscript to mark readability */

let paragraphs = document.getElementsByTagName('p');
for(let paragraph of paragraphs){
	let text = paragraph.textContent.split('\r').join('').split('\n').join('');
	
	// exclude very short paragraphs
	if(text.length > 20){
		// exclude paragraph found in vector skin
		if(!paragraph.parentElement.classList.contains('vector-limited-width-popup-body')){
			divideIntoSentences(paragraph);
		}
	}
}

let sentenceElements = document.getElementsByClassName('sentence');
for(let sentenceElement of sentenceElements){
	let sentenceText = getSentenceText(sentenceElement);
	if(sentenceText.trim().length < 10){
		sentenceElement.classList.remove('sentence');
	}
}

rateSentences();
createOverview();

function divideIntoSentences(paragraph){
	let periodPlaceholder = 'PERIOD_PLACEHOLDER';
	
	// prepare text with placeholders
	//let abbreviations = 'Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; i.e.; e.g.; etc.; a.m.; p.m.; i.a.; N.B.; et al.; vs.; e.t.a.; A.; B.; C.; D.; E.; F.; G.; H.; I.; J.; K.; L.; M.; N.; O.; P.; Q.; R.; S.; T.; U.; V.; W.; X.; Y.; Z; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.; 1.; 2.; 3.; 4.; 5.; 6.; 7.; 8.; 9.; .0; .1; .2; .3; .4; .5; .6; .7; .8; .9'.split('; ');
	//let abbreviations = 'Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; i.e.; e.g.; etc.; a.m.; p.m.; i.a.; N.B.; et al.; vs.; e.t.a.;  A.;  B.;  C.;  D.;  E.;  F.;  G.;  H.;  I.;  J.;  K.;  L.;  M.;  N.;  O.;  P.;  Q.;  R.;  S.;  T.;  U.;  V.;  W.;  X.;  Y.;  Z; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.; 1.; 2.; 3.; 4.; 5.; 6.; 7.; 8.; 9.; .0; .1; .2; .3; .4; .5; .6; .7; .8; .9'.split('; ');
	let abbreviations = 'Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; i.e.; e.g.; etc.; a.m.; p.m.; i.a.; N.B.; et al.; vs.; e.t.a.;  A.;  B.;  C.;  D.;  E.;  F.;  G.;  H.;  I.;  J.;  K.;  L.;  M.;  N.;  O.;  P.;  Q.;  R.;  S.;  T.;  U.;  V.;  W.;  X.;  Y.;  Z; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; .0; .1; .2; .3; .4; .5; .6; .7; .8; .9; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9.'.split('; ');
	paragraph.innerHTML = insertPlaceholders(paragraph.innerHTML, abbreviations);
	
	let innerHTML = getSpanStartTag();
	let currentChild = paragraph.firstChild;
	while(currentChild){
		// if it is a text node, modify it
		if(currentChild.nodeType === Node.TEXT_NODE){
			innerHTML += currentChild.nodeValue
				.split('.').join(getSpanEndAndStart('.'))
				.split('!').join(getSpanEndAndStart('!'))
				.split('?').join(getSpanEndAndStart('?'));
		}
		
		// if its an element, add outerHTML
		else if(currentChild.nodeType === Node.ELEMENT_NODE){
			innerHTML += currentChild.outerHTML;
		}
		
		// otherwise add nodeValue
		else{
			innerHTML += currentChild.nodeValue;
		}
		
		currentChild = currentChild.nextSibling;
	}


	innerHTML += '</span>';
	innerHTML = removePlaceholders(innerHTML, abbreviations);
	paragraph.innerHTML = innerHTML;
	
	var spans = paragraph.getElementsByTagName("span");
	if (spans.length > 0) {
		spans[spans.length - 1].classList.remove("sentence");
	}

	function getSpanStartTag(){
		return `<span class="sentence">`;
	}
	
	function getSpanEndAndStart(punctuation){
		return punctuation + '</span>' + getSpanStartTag();
	}
	
	function insertPlaceholders(text, abbreviations){
		let modifiedText = text;
		for(let abbreviation of abbreviations){
			let placeholderExpression = abbreviation.split('.').join(periodPlaceholder);
			modifiedText = modifiedText.split(abbreviation).join(placeholderExpression);
		}
		return modifiedText;
	}
	
	function removePlaceholders(text, abbreviations){
		let modifiedText = text;
		for(let abbreviation of abbreviations){
			let placeholderExpression = abbreviation.split('.').join(periodPlaceholder);
			modifiedText = modifiedText.split(placeholderExpression).join(abbreviation);
		}
		return modifiedText;
		
	}
}

function rateSentences(){
	const scoreClasses = ['score-10-0', 'score-20-10', 'score-30-20', 'score-40-30', 'score-50-40', 'score-60-50', 'score-70-60', 'score-80-70', 'score-90-80', 'score-100-90'];
	
	//const scoreColors = ['rgb(255,0,0)', 'rgb(227,28,0)', 'rgb(198,57,0)', 'rgb(170,85,0)', 'rgb(142,113,0)', 'rgb(113,142,0)', 'rgb(85,170,0)', 'rgb(57,198,0)', 'rgb(28,227,0)', 'rgb(0,255,0)'];
	//const scoreColors = ['rgb(255,128,128)', 'rgb(241,142,128)', 'rgb(227,156,128)', 'rgb(213,170,128)', 'rgb(199,184,128)', 'rgb(184,199,128)', 'rgb(170,213,128)', 'rgb(156,227,128)', 'rgb(142,241,128)', 'rgb(128,255,128)']
	//const scoreColors = ['rgb(255,128,128)', 'rgb(223,160,128)', 'rgb(192,192,128)', 'rgb(160,223,128)', 'rgb(128,255,128)', 'rgb(128,255,128)', 'rgb(128,223,160)', 'rgb(128,192,192)', 'rgb(128,160,223)', 'rgb(128,128,255)'];
	//const scoreColors = ['rgb(255,128,128)', 'rgb(234,149,128)', 'rgb(213,170,128)', 'rgb(192,192,128)', 'rgb(170,213,128)', 'rgb(149,234,128)', 'rgb(128,255,128)', 'rgb(128,255,128)', 'rgb(128,192,192)', 'rgb(128,128,255)'];
	const scoreColors = ['rgb(255,128,128)', 'rgb(234,149,128)', 'rgb(213,170,128)', 'rgb(192,192,128)', 'rgb(170,213,128)', 'rgb(149,234,128)', 'rgb(128,255,128)', 'rgb(128,255,128)', 'rgb(128,255,192)', 'rgb(128,255,255)'];
	
	let sentenceElements = document.body.getElementsByClassName('sentence');
	
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let score = getSentenceScore(sentenceText);
		sentenceElement.title = `Score: ${score.toFixed(2)}`;
		sentenceElement.classList.add(getScoreClass(score, scoreClasses));
	}

	addStyle(scoreClasses, scoreColors);

	function getScoreClass(score, scoreClasses){
		let index = Math.floor(score / 10);
		if(index < 0){
			index = 0;
		}
		if(index > 9){
			index = 9;
		}
		return scoreClasses[index];
	}

	function addStyle(scoreClasses, scoreColors){
		const style = document.createElement('style');
		for(let i = 0; i < scoreClasses.length; i++){
			style.innerHTML += `.${scoreClasses[i]} {background-color: ${scoreColors[i]}; } `;
		}
		document.head.appendChild(style);
	}
}

function getSentenceText(sentenceElement){
	// hide references
	//let refs = sentenceElement.getElementsByClassName('reference');
	let refs = sentenceElement.querySelectorAll('.reference, .Inline-Template');
	for(let ref of refs){
		ref.style.display = 'none';
	}
	
	let sentenceText = sentenceElement.innerText;
	
	// show references
	for(let ref of refs){
		ref.style.display = '';
	}
	
	return sentenceText;
}

function createOverview(){
	let totalSyllableCount = 0;
	let totalWordCount = 0;
	const sentenceElements = document.getElementsByClassName('sentence');
	let totalSentenceCount = sentenceElements.length;
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let words = getWords(sentenceText);
		totalWordCount += words.length;
		for(let word of words){
			totalSyllableCount += getSyllableCount(word);
		}
	}
	
	let totalReadability = getFleschKincaidReadability(totalSyllableCount, totalWordCount, totalSentenceCount);
	let totalGradeLevel = getFleschKincaidGradeLevel(totalSyllableCount, totalWordCount, totalSentenceCount);
	
	const overviewDif = document.createElement('div');
	const mainDif = document.getElementById('mw-content-text');
	mainDif.insertBefore(overviewDif, mainDif.firstChild);
	
	// overview table
	const overviewTable = document.createElement('table');
	overviewDif.appendChild(overviewTable);
	overviewTable.classList.add('wikitable');
	
	const overviewCaption = document.createElement('caption');
	overviewTable.appendChild(overviewCaption);
	overviewCaption.innerHTML = 'Readability overview';
	
	const overviewTableBody = document.createElement('tbody');
	overviewTable.appendChild(overviewTableBody);
	addRow(overviewTableBody, 'Readability', totalReadability.toFixed(2));
	addRow(overviewTableBody, 'Grade level', totalGradeLevel.toFixed(2));
	addRow(overviewTableBody, 'Number of sentences', totalSentenceCount);
	addRow(overviewTableBody, 'Number of words', totalWordCount);
	addRow(overviewTableBody, 'Number of syllables', totalSyllableCount);
	
	
	//sentences table
	overviewDif.appendChild(document.createElement('br'));
	
	const sentenceTableButton = document.createElement('button');
	overviewDif.append(sentenceTableButton);
	sentenceTableButton.innerHTML = 'Show sentences ordered by lowest score';
	
	const sentenceTable = document.createElement('table');
	overviewDif.appendChild(sentenceTable);
	sentenceTable.classList.add('wikitable');
	sentenceTable.style.display = 'none';
	
	const sentenceCaption = document.createElement('caption');
	sentenceTable.appendChild(sentenceCaption);
	sentenceCaption.innerHTML = 'Sentences ordered by lowest score';
	
	const sentenceTableBody = document.createElement('tbody');
	sentenceTable.appendChild(sentenceTableBody);

	const sentenceMatrix = [];
	for(let sentenceElement of sentenceElements){
		let sentenceText = getSentenceText(sentenceElement);
		let score = getSentenceScore(sentenceText);
		sentenceMatrix.push([sentenceText, score]);
	}
	
	sentenceMatrix.sort(function(a, b){
		return a[1] - b[1];
	});
	
	for(let i = 0; i < sentenceMatrix.length; i++){
		let sentenceText = sentenceMatrix[i][0];
		let score = sentenceMatrix[i][1];
		addRow(sentenceTableBody, sentenceText, score.toFixed(2));
	}
	
	sentenceTableButton.onclick = function(){
		if(sentenceTableButton.innerHTML.includes('Show')){
			sentenceTable.style.display = '';
			sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Show').join('Hide');
		}
		else{
			sentenceTable.style.display = 'none';
			sentenceTableButton.innerHTML = sentenceTableButton.innerHTML.split('Hide').join('Show');
		}
	};
	
	overviewDif.appendChild(document.createElement('br'));
	
	function addRow(tableBody, name, value){
		let row = document.createElement('tr');
		
		let nameCell = document.createElement('td');
		nameCell.innerHTML = name;
		row.appendChild(nameCell);
		
		let valueCell = document.createElement('td');
		valueCell.innerHTML = value;
		row.appendChild(valueCell);
		
		tableBody.appendChild(row);
	}
}

function getSentenceScore(sentenceText){
	let syllableCount = 0;
	
	let words = getWords(sentenceText);
	for(let word of words){
		syllableCount += getSyllableCount(word);
	}
	
	let wordCount = words.length;
	let score = getFleschKincaidReadability(syllableCount, wordCount, 1);
	/*console.log('sentenceText: ' + sentenceText);
	console.log('wordCount: ' + wordCount);
	console.log('syllableCount: ' + syllableCount);
	console.log('score: ' + score);*/
	
	return score;
}

function getFleschKincaidReadability(syllableCount, wordCount, sentenceCount){
	let wordsPerSentence = wordCount / sentenceCount;
	let syllablesPerWord = syllableCount / wordCount;
	let score = 206.835 - (1.015 * wordsPerSentence) - (84.6 * syllablesPerWord);
	return score;
}

function getFleschKincaidGradeLevel(syllableCount, wordCount, sentenceCount){
	let wordsPerSentence = wordCount / sentenceCount;
	let syllablesPerWord = syllableCount / wordCount;
	let score = (0.39 * wordsPerSentence) + (11.8 * syllablesPerWord) - 15.59;
	return score;
}


/*
function getAutomatedReadability(characterCount, wordCount, sentenceCount){
	return 4.71 * characterCount/wordCount + 0.5 * wordCount/sentenceCount - 21.43;
}
*/
function getCharacterCount(string){
	let characterCount = 0;
	for(let character of string){
		if(/^[a-zA-Z0-9\u00C0-\u1FFF\u2800-\uFFFD]$/.test(character)){
			characterCount++;
		}
	}
	
	return characterCount;
}

function getWords(sentenceText){
	let punctuation = '.?!,;:"()[]{}--./&*#$%@+-=<>|~^\\' + "'";
	for(let character of punctuation){
		sentenceText = sentenceText.split(character).join('');
	}
	
	sentenceText = sentenceText.trim();
	sentenceText = fullReplace(sentenceText, '  ', ' ');
	
	let words = sentenceText.split(' ');
	words = words.filter(function(word) {
		return word.length > 0;
	});
	
	return words;
}


function getSyllableCount(word){
	word = word.toLowerCase();
	
	if (word.endsWith('e')) {
		word = word.slice(0, -1);
	}
	word = word.split('e').join('a')
		.split('i').join('a')
		.split('o').join('a')
		.split('u').join('a')
		.split('y').join('a')
		.split('e').join('a');
		
	word = fullReplace(word, 'aa', 'a');
	
	let syllableCount = word.split('a').length - 1;
	if(syllableCount < 1){
		syllableCount = 1;
	}
	
	return syllableCount;
}
	
function fullReplace(string, oldSubstring, newSubstring){
	let newString = string;
	while(newString.includes(oldSubstring)){
		newString = newString.split(oldSubstring).join(newSubstring);
	}
	return newString;
}