User:Phlsph7/Readability.js: Difference between revisions

Content deleted Content added
simplify
optimize
Line 4:
function divideParagraphsIntoSentences(){
let paragraphs = document.querySelectorAll('.mw-parser-output > p');
// Periods are the main guide for where sentences start and end.
// However, not all periods mark sentences, like in different forms of abbreviations.
// Placeholders are used for exceptions.
let periodPlaceholder = 'PERIOD_PLACEHOLDER';
let periodExceptionsexceptionString = '...; Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.; A.; B.; C.; D.; E.; F.; G.; H.; I.; J.; K.; L.; M.; N.; O.; P.; Q.; R.; S.; T.; U.; V.; W.; X.; Y.; Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;'.split('; ');
let exceptionStringSeparator = '; ';
let periodExceptions = exceptionString.split(exceptionStringSeparator);
let periodExceptionPlaceholders = exceptionString.split('.').join(periodPlaceholder).split(exceptionStringSeparator);
for(let paragraph of paragraphs){
let textContent = paragraph.textContent.split('\r').join('').split('\n').join('').trim();
Line 9 ⟶ 19:
// exclude very short paragraphs
if(textContent.length > 20){
divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders);
}
}
Line 23 ⟶ 33:
 
// Split the content of a p-element into span-elements. Each span corresponds to a sentence.
function divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders){
// We loopLoop through all the nodes inside the p-element.
// Periods are the main guide for where sentences start and end.
// However, not all periods mark sentences, like in different forms of abbreviations.
// Placeholders are used for exceptions.
let periodPlaceholder = 'PERIOD_PLACEHOLDER';
let periodExceptions = '...; Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.; A.; B.; C.; D.; E.; F.; G.; H.; I.; J.; K.; L.; M.; N.; O.; P.; Q.; R.; S.; T.; U.; V.; W.; X.; Y.; Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;'.split('; ');
paragraph.innerHTML = insertPlaceholders(paragraph.innerHTML, periodExceptions);
// We loop through all the nodes inside the p-element.
// Span-open-tags and close-tags are placed through code.
let innerHTML = getSpanStartTag();
Line 38 ⟶ 41:
// if it is a text node, modify it
if(currentChild.nodeType === Node.TEXT_NODE){
innerHTML += adjustTextNodes(currentChild.nodeValue, periodExceptions, periodExceptionPlaceholders);
.split('.').join(getSpanEndAndStart('.'))
.split('!').join(getSpanEndAndStart('!'))
.split('?').join(getSpanEndAndStart('?'));
}
Line 59:
innerHTML += '</span>';
// Now the placeholder can be removed again.
innerHTML = removePlaceholders(innerHTML, periodExceptions);
paragraph.innerHTML = innerHTML;
 
// utility function to get the code for the opening span tag
function getSpanStartTag(){
return `<span class="sentence">`;
}
// utility function to get the code for span tags in the middle (closing + opening)
function getSpanEndAndStart(punctuation){
return punctuation + '</span>' + getSpanStartTag();
}
// utility function insertPlaceholders(to modify text, periodExceptions){nodes
// they contain the punctuation relevant for sentences
let modifiedText = text;
forfunction adjustTextNodes(let periodException oftext, periodExceptions, periodExceptionPlaceholders){
// use placeholders to remove all periods that do not mark sentences
let placeholderExpression = periodException.split('.').join(periodPlaceholder);
text = insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders);
modifiedText = modifiedText.split(periodException).join(placeholderExpression);
// split using the remaining punctuation
text = text.split('?.').join(getSpanEndAndStart('?.'));
.split('.!').join(getSpanEndAndStart('.!'))
.split('!?').join(getSpanEndAndStart('!?'));
}
}
// use placeholders to return all periods that do not mark sentences
innerHTML text = removePlaceholders(innerHTMLtext, periodExceptions, periodExceptionPlaceholders);
return modifiedTexttext;
function removePlaceholdersinsertPlaceholders(text, periodExceptions, periodExceptionPlaceholders){
let modifiedText = text;
for(let periodExceptioni of= 0; i < periodExceptions.length; i++){
modifiedText = modifiedText.split(periodExceptionperiodExceptions[i]).join(placeholderExpressionperiodExceptionPlaceholders[i]);
}
return modifiedText;
}
return modifiedText;
}
function removePlaceholders(text, periodExceptions){
let modifiedText = text;
for(let periodException of periodExceptions){
let placeholderExpression = periodException.split('.').join(periodPlaceholder);
modifiedText = modifiedText.split(placeholderExpression).join(periodException);
}
return modifiedText;
function removePlaceholders(text, periodExceptions, periodExceptionPlaceholders){
let modifiedText = text;
for(let i = 0; i < periodExceptions.length; i++){
modifiedText = modifiedText.split(placeholderExpressionperiodExceptionPlaceholders[i]).join(periodExceptionperiodExceptions[i]);
}
return modifiedText;
}
}
}