User:Novem Linguae/Scripts/DraftCleaner.js: Difference between revisions

Content deleted Content added
refactor. delete some additional HTML tags such as <em><strong><p> (publish.php)
add removeUnderscoresFromWikilinks() (publish.php)
Line 43:
- if article has headings but no lead, remove first heading
- replace unicode bullets with asterisks
 
This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/
 
Line 221 ⟶ 223:
wikicode = this.deleteBlankLinesBetweenBullets(wikicode);
wikicode = this.deleteBigTag(wikicode);
wikicode = this.removeUnderscoresFromWikilinks(wikicode);
// delete empty sections. for example, empty ==See Also== section
// all ==sections== should start with a capital letter
Line 273 ⟶ 276:
let topHalf = wikicode.replace(/((== ?Further reading|== ?External link).*$)/is, '');
let bottomHalf = wikicode.match(/((== ?Further reading|== ?External link).*$)/is)[1];
let buffer = sf.surgicalReplacesurgicalReplaceOutsideTags(/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm, '$2<ref>$1</ref>', topHalf, ['<ref'], ['</ref>', '/>']);
wikicode = buffer + bottomHalf;
} else {
wikicode = sf.surgicalReplacesurgicalReplaceOutsideTags(/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm, '$2<ref>$1</ref>', wikicode, ['<ref'], ['</ref>', '/>']);
}
return wikicode;
Line 435 ⟶ 438:
let sf = new StringFilter();
// (1111-1111)
wikicode = sf.surgicalReplacesurgicalReplaceOutsideTags(/(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, ['[[File:'], [']]']);
// 1839 - 1926)
wikicode = sf.surgicalReplacesurgicalReplaceOutsideTags(/( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, ['[[File:'], [']]']);
return wikicode;
}
Line 564 ⟶ 567:
fixDoublePeriod(wikicode) {
return wikicode.replace(/(?<=[A-Za-z\]])\.\.(?=<ref| |\n)/g, '.');
}
 
deleteEmptySections(wikicode) {
// TODO:
return wikicode;
}
 
deleteDuplicateReferencesSection(wikicode) {
// TODO:
return wikicode;
}
 
Line 652 ⟶ 645:
deleteBigTag(wikicode) {
return wikicode.replace(/(?:<big>|<\/big>)/g, '');
}
 
deleteEmptySections(wikicode) {
// TODO:
return wikicode;
}
 
deleteDuplicateReferencesSection(wikicode) {
// TODO:
return wikicode;
}
 
removeUnderscoresFromWikilinks(wikicode) {
let sf = new StringFilter();
wikicode = sf.surgicalReplaceInsideTags(/_/g, ' ', wikicode, ['[['], [']]']);
return wikicode;
}
 
Line 665 ⟶ 674:
/** Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string. */
class StringFilter {
/** Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits. */
surgicalReplacesurgicalReplaceOutsideTags(regex, replacement, haystack, openingTags, closingTags) {
@param {Array} tagsToSkip */
surgicalReplace(regex, replacement, haystack, openingTags, closingTags) {
let allTags = [...openingTags, ...closingTags];
let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
Line 688 ⟶ 695:
}
 
/** Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag. */
surgicalReplaceInsideTags(regex, replacement, haystack, openingTags, closingTags) {
let allTags = [...openingTags, ...closingTags];
let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
let resultArray = [];
for ( let part of parts ) {
for ( let tag of openingTags ) {
if ( part.startsWith(tag) ) {
part = part.replace(regex, replacement);
}
}
resultArray.push(part);
}
return resultArray.join('');
}
/**
Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: <ref>Test/>Test</ref>. But should be good enough for DraftCleaner stuff.