Content deleted Content added
simplify |
optimize |
||
Line 4:
function divideParagraphsIntoSentences(){
let paragraphs = document.querySelectorAll('.mw-parser-output > p');
let exceptionStringSeparator = '; ';
let periodExceptions = exceptionString.split(exceptionStringSeparator);
let periodExceptionPlaceholders = exceptionString.split('.').join(periodPlaceholder).split(exceptionStringSeparator);
for(let paragraph of paragraphs){
let textContent = paragraph.textContent.split('\r').join('').split('\n').join('').trim();
Line 9 ⟶ 19:
// exclude very short paragraphs
if(textContent.length > 20){
divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders);
}
}
Line 23 ⟶ 33:
// Split the content of a p-element into span-elements. Each span corresponds to a sentence.
function divideIntoSentences(paragraph, periodExceptions, periodExceptionPlaceholders){
▲ // Periods are the main guide for where sentences start and end.
▲ // However, not all periods mark sentences, like in different forms of abbreviations.
▲ // Placeholders are used for exceptions.
▲ let periodPlaceholder = 'PERIOD_PLACEHOLDER';
▲ let periodExceptions = '...; Mr.; Mrs.; Dr.; Jr.; Sr.; Prof.; St.; Ave.; Corp.; Inc.; Ltd.; Co.; Gov.; Capt.; Sgt.; et al.; vs.; e.t.a.; .A.; .B.; .C.; .D.; .E.; .F.; .G.; .H.; .I.; .J.; .K.; .L.; .M.; .N.; .O.; .P.; .Q.; .R.; .S.; .T.; .U.; .V.; .W.; .X.; .Y.; .Z.; A.; B.; C.; D.; E.; F.; G.; H.; I.; J.; K.; L.; M.; N.; O.; P.; Q.; R.; S.; T.; U.; V.; W.; X.; Y.; Z.; .a.; .b.; .c.; .d.; .e.; .f.; .g.; .h.; .i.; .j.; .k.; .l.; .m.; .n.; .o.; .p.; .q.; .r.; .s.; .t.; .u.; .v.; .w.; .x.; .y.; .z.; .a; .b; .c; .d; .e; .f; .g; .h; .i; .j; .k; .l; .m; .n; .o; .p; .q; .r; .s; .t; .u; .v; .w; .x; .y; .z; 0.0; 0.1; 0.2; 0.3; 0.4; 0.5; 0.6; 0.7; 0.8; 0.9; 1.0; 1.1; 1.2; 1.3; 1.4; 1.5; 1.6; 1.7; 1.8; 1.9; 2.0; 2.1; 2.2; 2.3; 2.4; 2.5; 2.6; 2.7; 2.8; 2.9; 3.0; 3.1; 3.2; 3.3; 3.4; 3.5; 3.6; 3.7; 3.8; 3.9; 4.0; 4.1; 4.2; 4.3; 4.4; 4.5; 4.6; 4.7; 4.8; 4.9; 5.0; 5.1; 5.2; 5.3; 5.4; 5.5; 5.6; 5.7; 5.8; 5.9; 6.0; 6.1; 6.2; 6.3; 6.4; 6.5; 6.6; 6.7; 6.8; 6.9; 7.0; 7.1; 7.2; 7.3; 7.4; 7.5; 7.6; 7.7; 7.8; 7.9; 8.0; 8.1; 8.2; 8.3; 8.4; 8.5; 8.6; 8.7; 8.8; 8.9; 9.0; 9.1; 9.2; 9.3; 9.4; 9.5; 9.6; 9.7; 9.8; 9.9. .0; .1; .2; .3; .4; .5; .6; .7; .8; .9;'.split('; ');
▲ // We loop through all the nodes inside the p-element.
// Span-open-tags and close-tags are placed through code.
let innerHTML = getSpanStartTag();
Line 38 ⟶ 41:
// if it is a text node, modify it
if(currentChild.nodeType === Node.TEXT_NODE){
innerHTML += adjustTextNodes(currentChild.nodeValue, periodExceptions, periodExceptionPlaceholders);
.split('.').join(getSpanEndAndStart('.'))▼
.split('!').join(getSpanEndAndStart('!'))▼
.split('?').join(getSpanEndAndStart('?'));▼
}
Line 59:
innerHTML += '</span>';
innerHTML = removePlaceholders(innerHTML, periodExceptions);▼
paragraph.innerHTML = innerHTML;
// utility function to get the code for the opening span tag
function getSpanStartTag(){
return `<span class="sentence">`;
}
// utility function to get the code for span tags in the middle (closing + opening)
function getSpanEndAndStart(punctuation){
return punctuation + '</span>' + getSpanStartTag();
}
// utility function
// they contain the punctuation relevant for sentences
let modifiedText = text;▼
// use placeholders to remove all periods that do not mark sentences
text = insertPlaceholders(text, periodExceptions, periodExceptionPlaceholders);
modifiedText = modifiedText.split(periodException).join(placeholderExpression);▼
// split using the remaining punctuation
// use placeholders to return all periods that do not mark sentences
▲
function
▲ let modifiedText = text;
▲ modifiedText = modifiedText.split(
}
return modifiedText;▼
}
▲ return modifiedText;
▲ }
▲ function removePlaceholders(text, periodExceptions){
let modifiedText = text;▼
▲ for(let periodException of periodExceptions){
modifiedText = modifiedText.split(placeholderExpression).join(periodException);▼
▲ }
▲ return modifiedText;
function removePlaceholders(text, periodExceptions, periodExceptionPlaceholders){
▲ let modifiedText = text;
for(let i = 0; i < periodExceptions.length; i++){
▲ modifiedText = modifiedText.split(
}
return modifiedText;
}
}
}
|