User:Phlsph7/Readability.js: Difference between revisions

Content deleted Content added
change order
more consistent labels
 
(25 intermediate revisions by the same user not shown)
Line 1:
/* Userscript to highlight sentences by readability */
 
// Set score colors for css if they have not been defined
var readabilityScoreColors = readabilityScoreColors || [
'rgb(255,96,96)',
'rgb(255,128,128)',
'rgb(255,192,128)',
'rgb(255,224,128)',
'rgb(255,255,128)',
'rgb(214,255,128)',
'rgb(171,255,128)',
'rgb(128,255,128)',
'rgb(128,255,171)',
'rgb(128,255,214)',];
 
 
// Goes through all the p-elements and splits their content into span-elements. Each span corresponds to a sentence.
function divideParagraphsIntoSentences(){
let paragraphs = document.querySelectorAll('.mw-parser-output > p');
// Periods are the main guide for where sentences start and end.
// However, not all periods mark sentences, like in different forms of abbreviations.
// Placeholders are used for exceptions.
let periodPlaceholder = 'PERIOD_PLACEHOLDER';
let exceptionString = '...; Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.; A.; B.; C.; D.; E.; F.; G.; H.; I.; J.; K.; L.; M.; N.; O.; P.; Q.; R.; S.; T.; U.; V.; W.; X.; Y.; Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;';
let exceptionStringSeparator = '; ';
let periodExceptions = exceptionString.split(exceptionStringSeparator);
let periodExceptionPlaceholders = exceptionString.split('.').join(periodPlaceholder).split(exceptionStringSeparator);
for(let paragraph of paragraphs){
let textContent = paragraph.textContent.split('\r').join('').split('\n').join('').trim();
Line 9 ⟶ 33:
// exclude very short paragraphs
if(textContent.length > 20){
divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders);
}
}
Line 23 ⟶ 47:
 
// Split the content of a p-element into span-elements. Each span corresponds to a sentence.
function divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders){
// Loop through all the nodes inside the p-element.
// Periods are the main guide for where sentences start and end.
// However, not all periods mark sentences, like in different forms of abbreviations.
// Placeholders are used for exceptions.
let periodPlaceholder = 'PERIOD_PLACEHOLDER';
let periodExceptions = 'Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.; A.; B.; C.; D.; E.; F.; G.; H.; I.; J.; K.; L.; M.; N.; O.; P.; Q.; R.; S.; T.; U.; V.; W.; X.; Y.; Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;'.split('; ');
paragraph.innerHTML = insertPlaceholders(paragraph.innerHTML, periodExceptions);
// We loop through all the nodes inside the p-element.
// Span-open-tags and close-tags are placed through code.
let innerHTML = getSpanStartTag();
Line 38 ⟶ 55:
// if it is a text node, modify it
if(currentChild.nodeType === Node.TEXT_NODE){
innerHTML += adjustTextNodes(currentChild.nodeValue, periodExceptions, periodExceptionPlaceholders);
.split('.').join(getSpanEndAndStart('.'))
.split('!').join(getSpanEndAndStart('!'))
.split('?').join(getSpanEndAndStart('?'));
}
Line 59 ⟶ 73:
innerHTML += '</span>';
// Now the placeholder can be removed again.
innerHTML = removePlaceholders(innerHTML, periodExceptions);
paragraph.innerHTML = innerHTML;
// The last span-element is not a sentence.
/*
var spans = paragraph.getElementsByTagName("span");
if (spans.length > 0) {
spans[spans.length - 1].classList.remove("sentence");
}*/
 
// utility function to get the code for the opening span tag
function getSpanStartTag(){
return `<span class="sentence">`;
}
// utility function to get the code for span tags in the middle (closing + opening)
function getSpanEndAndStart(punctuation){
return punctuation + '</span>' + getSpanStartTag();
}
// utility function insertPlaceholders(to modify text, periodExceptions){nodes
// they contain the punctuation relevant for sentences
let modifiedText = text;
forfunction adjustTextNodes(let periodException oftext, periodExceptions, periodExceptionPlaceholders){
// use placeholders to remove all periods that do not mark sentences
let placeholderExpression = periodException.split('.').join(periodPlaceholder);
text = insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders);
modifiedText = modifiedText.split(periodException).join(placeholderExpression);
}
// split using the remaining punctuation
return modifiedText;
text = text.split('.').join(getSpanEndAndStart('.'))
}
.split('!').join(getSpanEndAndStart('!'))
.split('?').join(getSpanEndAndStart('?'));
function removePlaceholders(text, periodExceptions){
let modifiedText = text;
for(let periodException of periodExceptions){
// use placeholders to return all periods that do not mark sentences
let placeholderExpression = periodException.split('.').join(periodPlaceholder);
text = removePlaceholders(text, periodExceptions, periodExceptionPlaceholders);
modifiedText = modifiedText.split(placeholderExpression).join(periodException);
return text;
function insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders){
let modifiedText = text;
for(let i = 0; i < periodExceptions.length; i++){
modifiedText = modifiedText.split(periodExceptions[i]).join(periodExceptionPlaceholders[i]);
}
return modifiedText;
}
return modifiedText;
function removePlaceholders(text, periodExceptions, periodExceptionPlaceholders){
let modifiedText = text;
for(let i = 0; i < periodExceptions.length; i++){
modifiedText = modifiedText.split(periodExceptionPlaceholders[i]).join(periodExceptions[i]);
}
return modifiedText;
}
}
}
Line 103 ⟶ 125:
// class names for different scores
const scoreClasses = [
'score-10-0',
'score-20-10',
'score-30-20',
'score-40-30',
'score-50-40',
'score-60-50',
'score-70-60',
'score-80-70',
'score-90-80', 'score-100-90'];
'score-100-90'];
 
// css colors for the different score classes
const scoreColors = ['rgb(255,128,128)', 'rgb(234,149,128)', 'rgb(213,170,128)', 'rgb(192,192,128)', 'rgb(170,213,128)', 'rgb(149,234,128)', 'rgb(128,255,128)', 'rgb(128,255,128)', 'rgb(128,255,192)', 'rgb(128,255,255)'];
// Loop through all sentences, add their score class and their title attribute.
let sentenceElements = document.body.getElementsByClassName('sentence');
Line 113 ⟶ 142:
let sentenceText = getSentenceText(sentenceElement);
let score = getSentenceScore(sentenceText);
if(!isNaN(score)){
sentenceElement.title = `Score: ${score.toFixed(2)}`;
sentenceElement.classListtitle = `Score: ${score.addtoFixed(getScoreClass(score, scoreClasses)2)}`;
sentenceElement.dataset.sentenceText = sentenceText;
sentenceElement.dataset.score = score;
sentenceElement.classList.add(getScoreClass(score, scoreClasses));
}
else{
sentenceElement.classList.remove('sentence');
}
}
 
// Add the style sheet to color the score classes.
addScoreStyleSheet(scoreClasses, scoreColorsreadabilityScoreColors);
 
function getScoreClass(score, scoreClasses){
Line 131 ⟶ 167:
}
 
function addScoreStyleSheet(scoreClasses, scoreColorsreadabilityScoreColors){
const style = document.createElement('style');
for(let i = 0; i < scoreClasses.length; i++){
style.innerHTML += `.${scoreClasses[i]} {background-color: ${scoreColorsreadabilityScoreColors[i]}; } `;
}
document.head.appendChild(style);
Line 145 ⟶ 181:
// Readability depends on the number of syllables, words, and sentences
let totalSyllableCount = 0;
let totalPolySyllableCount = 0;
let totalWordCount = 0;
const sentenceElements = document.getElementsByClassName('sentence');
let totalSentenceCount = sentenceElements.length;
const sentenceLengthArray = [];
for(let sentenceElement of sentenceElements){
let sentenceText = getSentenceText(sentenceElement);
let words = getWords(sentenceText);
totalWordCount += words.length;
sentenceLengthArray.push(words.length);
for(let word of words){
totalSyllableCountlet syllableCount += getSyllableCount(word);
totalSyllableCount += syllableCount;
if(syllableCount >= 3){
totalPolySyllableCount++;
}
}
}
Line 159 ⟶ 202:
let totalReadability = getFleschKincaidReadability(totalSyllableCount, totalWordCount, totalSentenceCount);
let totalGradeLevel = getFleschKincaidGradeLevel(totalSyllableCount, totalWordCount, totalSentenceCount);
let totalSmogGradeLevel = getSmogGradeLevel(totalPolySyllableCount, totalSentenceCount);
const mainDifmainDiv = document.getElementById('mw-content-text');
const overviewDifoverviewDiv = document.createElement('div');
mainDifmainDiv.insertBefore(overviewDifoverviewDiv, mainDifmainDiv.firstChild);
const headline = document.createElement('h2');
insertOverviewTable(overviewDif);
overviewDifoverviewDiv.appendChild(document.createElement('br')headline);
headline.innerHTML = 'Readability overview';
insertSentenceTable(overviewDif);
const tableDiv = document.createElement('div');
overviewDiv.appendChild(tableDiv);
tableDiv.style.display = 'flex';
insertOverviewTable(tableDiv);
insertKeyTable(tableDiv);
//overviewDiv.appendChild(document.createElement('br'));
insertSentenceTable(overviewDiv);
function insertOverviewTable(parent){
Line 171 ⟶ 222:
parent.appendChild(overviewTable);
overviewTable.classList.add('wikitable');
overviewTable.style.marginRight = '20px';
/*const overviewCaption = document.createElement('caption');
overviewTable.appendChild(overviewCaption);
overviewCaption.innerHTML = 'Readability overview';*/
const overviewTableBody = document.createElement('tbody');
overviewTable.appendChild(overviewTableBody);
addRow(overviewTableBody, 'Readability (<a href="https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests">Flesch</a>)', totalReadability.toFixed(2));
addRow(overviewTableBody, 'Grade level (<a href="https://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_tests">Flesch</a>)', totalGradeLevel.toFixed(2));
addRow(overviewTableBody, 'NumberGrade oflevel sentences(<a href="https://en.wikipedia.org/wiki/SMOG">SMOG</a>)', totalSentenceCounttotalSmogGradeLevel.toFixed(2));
addRow(overviewTableBody, 'Number of wordsSentences', totalWordCounttotalSentenceCount);
addRow(overviewTableBody, 'Number of syllablesWords', totalSyllableCounttotalWordCount);
addRow(overviewTableBody, 'Syllables', totalSyllableCount);
addRow(overviewTableBody, 'Average sentence length', (totalWordCount/totalSentenceCount).toFixed(2));
addRow(overviewTableBody, 'Standard deviation<br>of sentence length', (getStandardDeviation(sentenceLengthArray)).toFixed(2));
function getStandardDeviation(numbers){
const sum = numbers.reduce(function(sum, number){return sum + number;});
const mean = sum / numbers.length;
const varianceSum = numbers.reduce(function(sum, number){console.log(Math.pow(number - mean, 2)); return sum + Math.pow(number - mean, 2);}, 0);
const variance = varianceSum/numbers.length;
const standardDeviation = Math.sqrt(variance);
return standardDeviation;
}
}
// Key for the coloring
function insertKeyTable(parent){
const keyTable = document.createElement('table');
parent.appendChild(keyTable);
keyTable.classList.add('wikitable');
keyTable.innerHTML = `<thead>
<tr>
<th>Score</th>
<th>School level</th>
</tr>
</thead>
<tbody>
<tr class="score-100-90">
<td>100–90</td>
<td>5th grade</td>
</tr>
<tr class="score-90-80">
<td>90–80</td>
<td>6th grade</td>
</tr>
<tr class="score-80-70">
<td>80–70</td>
<td>7th grade</td>
</tr>
<tr class="score-70-60">
<td>70–60</td>
<td>8th & 9th grade</td>
</tr>
<tr class="score-60-50">
<td>60–50</td>
<td>10th to 12th grade</td>
</tr>
<tr class="score-50-40">
<td>50–40</td>
<td>College</td>
</tr>
<tr class="score-40-30">
<td>40–30</td>
<td>College</td>
</tr>
<tr class="score-30-20">
<td>30–20</td>
<td>College graduate</td>
</tr>
<tr class="score-20-10">
<td>20–10</td>
<td>College graduate</td>
</tr>
<tr class="score-10-0">
<td>10–0</td>
<td>College graduate</td>
</tr>
</tbody>`;
}
Line 192 ⟶ 311:
parent.append(sentenceTableButton);
sentenceTableButton.innerHTML = 'Show sentences ordered by lowest score';
sentenceTableButton.style.fontSize = "24px";
// the table itself
Line 209 ⟶ 329:
const sentenceMatrix = [];
for(let sentenceElement of sentenceElements){
let sentenceText = getSentenceText(sentenceElement).dataset.sentenceText;
let score = getSentenceScoreparseFloat(sentenceTextsentenceElement.dataset.score);
sentenceMatrix.push([sentenceText, score]);
}
Line 270 ⟶ 390:
ref.style.display = '';
}
// formatting
if(sentenceText[0] == '"'){
sentenceText = sentenceText.substring(1);
}
sentenceText = sentenceText.trim();
return sentenceText;
Line 303 ⟶ 429:
let score = (0.39 * wordsPerSentence) + (11.8 * syllablesPerWord) - 15.59;
return score;
}
 
// utility function: get SMOG readability for the overview
function getSmogGradeLevel(polySyllableCount, sentenceCount){
let smogGradeLevel = 1.0430 * Math.sqrt(polySyllableCount * 30 / sentenceCount) + 3.1291;
return smogGradeLevel;
}
 
Line 358 ⟶ 490:
// anonymous main function
(function(){
// restrict script to mainspace, userspace, wikipedia, help, and draftspace
const namespaceNumber = mw.config.get('wgNamespaceNumber');
const allowedNamespaces = [0, 2, 4, 12, 118];
if (allowedNamespaces.indexOf(namespaceNumber) != -1) {
// add a link to the toolbox