User:Novem Linguae/Scripts/DraftCleaner.js: Difference between revisions

Content deleted Content added
removeBorderFromImagesInInfoboxes() -> album cover (publish.php)
less enters in front of {{Stubs}}, to comply with new guidance at WP:STUBSPACING (publish.php)
 
(16 intermediate revisions by the same user not shown)
Line 1:
// <nowiki>
 
// === Compiled with Novem Linguae's publish.php script ======================
$(async function() {
 
// === modules/DraftCleaner.js ======================================================
/* THIS SCRIPT IS STILL IN BETA AND IS BUGGY ABOUT 25% OF THE TIME. Be sure to check the diff that pops up before submitting.
 
- Adds "Run DraftCleaner" link to the left sidebar
 
- Top uses:
- remove extra line breaks
- in the first sentence, bold the title
- convert curly quotes to regular quotes
- put <ref>s after periods
- clean external links out of the main article area (turn them into references)
- add ==References== section
 
- More detailed list of uses:
- converts [inline external links] to <ref>s
- reduces more than 2 enters in a row, to 2 enters
- removes spaces in front of <ref>s
- get rid of any level 2 heading that contains the article's title
- bolds the first occurrence of the article title
- removes bold from headings
- converts =TitleHeading= to ==H2Heading==
- replaces Covid-19 with COVID-19
- removes enter characters between <ref>s
- trims whitespace at beginning and end
- remove self wikilinks to the article title
- convert ==Reference== to ==References==
- swap ref period with period ref
- turn bare URLs into references
- fix errant spaces at beginning of lines, which makes a blockquote looking thing
- add references section if missing
- delete whitespace at the end of lines
- convert smart quotes to regular quotes
- convert double spaces to single spaces
- remove blank heading
- in refs, turn short links into long links, so you can see the ___domain
- change year range dash to ndash
- if in draftspace, and draft in categories, disable the categories
- delete <br>. in drafts, these are usually poorly placed
- fix empty references section
- right align images
- remove whitespace if that is the only character on a line
- correct capitalization of see also, references, further reading, external links
- if article has headings but no lead, remove first heading
- replace unicode bullets with asterisks
 
This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/
 
async function getWikicode(title) {
if ( ! mw.config.get('wgCurRevisionId') ) return ''; // if page is deleted, return blank
var wikicode = '';
title = encodeURIComponent(title);
await $.ajax({
url: 'https://en.wikipedia.org/w/api.php?action=parse&page='+title+'&prop=wikitext&formatversion=2&format=json',
success: function (result) {
wikicode = result['parse']['wikitext'];
},
dataType: "json",
});
return wikicode;
}
 
// borrowed from [[Wikipedia:User scripts/Guide#Edit a page and other common actions]]
function editPage(articleName, wikicode, summary) {
let debugInfo = $.ajax({
url: mw.util.wikiScript('api'),
type: 'POST',
dataType: 'json',
data: {
format: 'json',
action: 'edit',
title: articleName,
text: wikicode, // will replace entire page content
summary: summary,
token: mw.user.tokens.get('csrfToken')
},
async: false
});
}
 
function goToShowChangesScreen(titleWithNamespaceAndUnderscores, wikicode, editSummary) {
let titleEncoded = encodeURIComponent(titleWithNamespaceAndUnderscores);
let wgServer = mw.config.get('wgServer');
let wgScriptPath = mw.config.get('wgScriptPath');
let baseURL = wgServer + wgScriptPath + '/';
// https://stackoverflow.com/a/12464290/3480193
$(`<form action="${baseURL}index.php?title=${titleEncoded}&action=submit" method="POST"/>`)
.append($('<input type="hidden" name="wpTextbox1">').val(wikicode))
.append($('<input type="hidden" name="wpSummary">').val(editSummary))
.append($('<input type="hidden" name="mode">').val('preview'))
.append($('<input type="hidden" name="wpDiff">').val('Show changes'))
.append($('<input type="hidden" name="wpUltimateParam">').val('1'))
.appendTo($(document.body)) //it has to be added somewhere into the <body>
.submit();
}
 
/** returns the pagename, including the namespace name, but with spaces replaced by underscores */
function getArticleName() {
return mw.config.get('wgPageName');
}
 
function showMessage(messageText) {
$('#DraftCleaner').hide();
$('#DraftCleanerNoClick').empty();
$('#DraftCleanerNoClick').prepend(messageText);
$('#DraftCleanerNoClick').show();
}
 
function showClickableButton() {
$('#DraftCleanerNoClick').hide();
$('#DraftCleaner').show();
}
 
/** refresh AND clear cache */
function hardRefresh() {
// window.___location.reload(true) is deprecated. use this instead
window.___location.href = window.___location.href;
}
 
// don't run when not viewing articles
let action = mw.config.get('wgAction');
if ( action != 'view' ) return;
 
// don't run when viewing diffs
let isDiff = mw.config.get('wgDiffNewId');
if ( isDiff ) return;
 
// Only run in mainspace, draftspace, and sandboxes
let titleWithNamespaceAndUnderscores = getArticleName();
let namespaceNumber = mw.config.get('wgNamespaceNumber');
let sandbox = titleWithNamespaceAndUnderscores.match(/sandbox/i);
//if ( ! [0, 118].includes(namespaceNumber) && ! sandbox ) return;
 
let menuID = window.draftCleanerPutInToolsMenu ? 'p-tb' : 'p-navigation';
 
// Add DraftCleaner to left sidebar
// Using two <li>s. One of the two is kept hidden at all times. This avoids having to delete #DraftCleanerLink, which would also delete the event listener.
$(`#${menuID} ul`).append(`
<li id="DraftCleaner">
<a id="DraftCleanerLink">Run DraftCleaner</a>
</li>
<li id="DraftCleanerNoClick" style="display:none">
</li>
`);
 
$('#DraftCleanerLink').on('click', async function() {
// prevent running the script while script is already in progress
showMessage('Editing. Please wait.');
// get page wikicode
let titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace(/_/g, ' ');
let originalWikicode = await getWikicode(titleWithNamespaceAndUnderscores);
let wikicode = originalWikicode;
 
let dc = new DraftCleaner();
wikicode = dc.cleanDraft(wikicode, namespaceNumber, titleWithNamespaceAndSpaces);
 
// if changes to be made
if ( wikicode != originalWikicode ) {
let summary = 'clean up ([[User:Novem Linguae/Scripts/DraftCleaner.js|DraftCleaner]])';
// editPage(titleWithNamespaceAndUnderscores, wikicode);
// hardRefresh();
await goToShowChangesScreen(titleWithNamespaceAndUnderscores, wikicode, summary);
// else display "no changes needed", then reset
} else {
showMessage('No changes needed.');
setTimeout(function (){
showClickableButton();
}, 2000);
}
});
 
 
class DraftCleaner {
cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces ) {
// run before other stuff
wikicode = this.deleteSomeHTMLTags( wikicode );
wikicode = this.deleteBigTagdeleteNonAFCDraftTags( wikicode );
wikicode = this.deleteAFCDraftTagsIfMainspace( wikicode, mw.config.get( 'wgNamespaceNumber' ) );
 
wikicode = this.fixWikilinksContainingURL( wikicode );
wikicode = this.fixExternalLinksToWikipediaArticles( wikicode );
wikicode = this.deleteWeirdUnicodeCharacters( wikicode );
wikicode = this.trimEveryLine( wikicode );
wikicode = this.convertH1ToH2( wikicode );
wikicode = this.convertVeryLongHeadingToParagraph( wikicode );
wikicode = this.deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces );
wikicode = this.unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces );
wikicode = this.capitalizeCOVID19( wikicode );
wikicode = this.removeBoldFromHeadings( wikicode );
wikicode = this.convertReferenceToReferences( wikicode );
wikicode = this.deleteMultipleReferenceTags( wikicode );
wikicode = this.addReferencesSectionIfMissing( wikicode );
wikicode = this.fixEmptyReferencesSection( wikicode );
wikicode = this.deleteWhitespaceAtEndOfLines( wikicode );
wikicode = this.convertSmartQuotesToRegularQuotes( wikicode );
// wikicode = this.fixWordEmphasizedWithSingleQuotes(wikicode); // most of these appear in citations as names of newspaper articles, arguably should keep these single quotes
wikicode = this.convertDoubleSpacesToSingleSpaces( wikicode );
wikicode = this.deleteBlankHeadings( wikicode );
wikicode = this.changeYearRangeDashToNDash( wikicode );
wikicode = this.disableCategoriesInDraftspace( wikicode, namespaceNumber );
// wikicode = this.deleteBRTagsOutsideInfoboxes(wikicode, namespaceNumber); // edge case in image captions, and probably other places
// wikicode = this.rightAlignImages(wikicode); // commenting out, too many false positives in featured articles
wikicode = this.correctCapitalizationOfEndMatterHeaders( wikicode );
wikicode = this.ifNoLeadSectionDeleteFirstHeading( wikicode );
wikicode = this.deleteCopyPastedEditAndEditSource( wikicode );
wikicode = this.replaceUnicodeBulletsWithAsterisks( wikicode );
wikicode = this.deleteEmptySections( wikicode );
wikicode = this.fixHeadingsInAllCaps( wikicode );
wikicode = this.deleteDuplicateReferencesSection( wikicode );
wikicode = this.deleteBlankLinesBetweenBullets( wikicode );
wikicode = this.removeUnderscoresFromWikilinks( wikicode );
wikicode = this.removeBorderFromImagesInInfoboxesfixPipedWikilinksWithIdenticalParameters( wikicode );
wikicode = this.removeExtraAFCSubmissionTemplatesremoveBorderFromImagesInInfoboxes( wikicode );
wikicode = this.removeExtraAFCSubmissionTemplates( wikicode );
wikicode = this.moveAFCSubmissionTemplatesToTop( wikicode );
 
// all ==sections== should start with a capital letter
Line 239 ⟶ 70:
 
// convert refs toward the end. we want deleteSomeHTMLTags() to run first, to get rid of tags around URLs
wikicode = this.bareURLToRef( wikicode );
wikicode = this.refShortLinkToLongLink( wikicode );
wikicode = this.inlineExternalLinksToRefs( wikicode );
wikicode = this.deleteSpacesInFrontOfRefsmoveRefsOutsideOfItalics( wikicode );
wikicode = this.deleteNewLinesBetweenRefsdeleteSpacesInFrontOfRefs( wikicode );
wikicode = this.swapRefPeriodWithPeriodRefdeleteNewLinesBetweenRefs( wikicode );
wikicode = this.swapRefPeriodWithPeriodRef( wikicode );
wikicode = this.swapRefCommaWithCommaRef( wikicode );
 
// stuff we want to run at the end
wikicode = this.fixDoublePeriod( wikicode ); // need test cases. I've seen this one not work.
wikicode = this.boldArticleTitle( wikicode, titleWithNamespaceAndSpaces );
wikicode = this.trimEmptyLines( wikicode );
wikicode = this.deleteMoreThanTwoEntersInARow( wikicode );
return wikicode;
}
 
_escapeRegEx(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}
 
// surround bare URL's with <ref></ref>
// Useful for seeing all URL's in the reflist section, and for CiteHighlighter ref quality highlighting
bareURLToRef( wikicode ) {
return wikicode.replace( /^(http[^\n ]*) {0,}$/gm, "'<ref>$1</ref>"' );
}
 
// in refs, turn [short links] into long links, so you can see the ___domain
// also fixes <ref> link </ref> issues with extra spaces in the ref
refShortLinkToLongLink( wikicode ) {
// <ref>[https://test.com''Test'']</ref>
wikicode = wikicode.replace( /(<ref[^>]*>) {0,}\[ {0,}([^'\]]*)(''[^\]]*)\] {0,}(<\/ref>)/gm, '$1$2 $3$4' );
// <ref>[https://test.com Test]</ref>
wikicode = wikicode.replace( /(<ref[^>]*>) {0,}\[ {0,}([^\]]*) {0,}\] {0,}(<\/ref>)/gm, '$1$2$3' );
return wikicode;
}
 
// convert inline external links to references
inlineExternalLinksToRefs( wikicode ) {
letconst sectionsToSkip = [ 'External link', 'Further reading', 'Links' ];
let regExString = '== ?(?:';
for ( letconst sectionToSkip of sectionsToSkip ) {
regExString += sectionToSkip + '|';
}
regExString = regExString.slice( 0, -1 ) + ')';
letconst hasSectionToSkip = wikicode.match( new RegExp( regExString, 'i' ) );
 
letconst sf = new StringFilter();
 
if ( hasSectionToSkip ) {
letconst regExToSplitArticle = new RegExp( '((' + regExString + ').*$)', 'is' );
letconst topHalf = wikicode.replace( regExToSplitArticle, '' );
letconst bottomHalf = wikicode.match( regExToSplitArticle )[ 1 ];
letconst buffer = sf.surgicalReplaceOutsideTags(
/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,
'$2<ref>$1</ref>',
topHalf,
[ ['<ref', '{{' ],
[ '</ref>', '/>', '}}' ]);
);
wikicode = buffer + bottomHalf;
} else {
wikicode = sf.surgicalReplaceOutsideTags(
/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,
'$2<ref>$1</ref>',
wikicode,
[ ['<ref', '{{' ],
[ '</ref>', '/>', '}}' ]);
);
}
 
return wikicode;
}
 
/**
* ''Test<ref></ref>'' => ''Test''<ref></ref>
*/
moveRefsOutsideOfItalics( wikicode ) {
wikicode = wikicode.replace( /''([^']+)(<ref>[^<]+<\/ref>)''/gm, '\'\'$1\'\'$2' );
return wikicode;
}
 
// get rid of spaces in front of <refs>
deleteSpacesInFrontOfRefs( wikicode ) {
return wikicode.replace( /(?<!(?:\||=)) {1,}<ref/gm, "'<ref"' );
}
 
// get rid of any level 2 heading that contains the article's title
// this takes care of 2 common cases: heading at the bottom next to the {{AFC Submission}} template, and heading at the top above the lead
deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces ) {
let headingNameToLookFor = titleWithNamespaceAndSpaces;
headingNameToLookFor = headingNameToLookFor.replace( /^Draft:/, '' );
headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );
let regEx = new RegExp(" '^== ?"' + headingNameToLookFor +" ' ?==\n"', "'gmi"' );
wikicode = wikicode.replace( regEx, ""'' );
// now look for titles that contain Draft: at the beginning, too
headingNameToLookFor = titleWithNamespaceAndSpaces;
headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );
regEx = new RegExp(" '^== ?"' + headingNameToLookFor +" ' ?==\n"', "'gmi"' );
wikicode = wikicode.replace( regEx, ""'' );
return wikicode;
}
 
// remove wikilinks to article name
// Example: if title is Draft:Menna Shahin, change [[Menna Shahin]] to Menna Shahin
unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces ) {
let wikilinkToLookFor = titleWithNamespaceAndSpaces;
wikilinkToLookFor = wikilinkToLookFor.replace( /^Draft:/, '' );
wikilinkToLookFor = this._escapeRegEx( wikilinkToLookFor );
letconst regEx = new RegExp(" '\\[\\[("' + wikilinkToLookFor +" ')\\]\\]"', "'gm"' );
wikicode = wikicode.replace( regEx, "'$1"' );
return wikicode;
}
 
// if located in the first paragraph, bold the article title
boldArticleTitle( wikicode, titleWithNamespaceAndSpaces ) {
let titleToLookFor = titleWithNamespaceAndSpaces;
titleToLookFor = titleToLookFor.replace( /^Draft:/, '' );
titleToLookFor = titleToLookFor.replace( / \(.*?\)$/, '' );
titleToLookFor = this._escapeRegEx( titleToLookFor );
// Don't bold the title if it's already bolded. Fixes a "bold twice" bug.
letconst hasBoldedTitle = wikicode.match( new RegExp( `'''${ titleToLookFor }'''`, 'i' ) );
if ( hasBoldedTitle ) {
return wikicode;
}
// Be pretty strict, to avoid adding ''' to image paths and infoboxes, which messes up the image. Also, only replace first match.
letconst regEx = new RegExp(" '^(The )?("' + titleToLookFor +" ')([ <,])"', "'mi"' );
wikicode = wikicode.replace( regEx, "$1'''$2'''$3" );
return wikicode;
}
 
// /covid-19/i -> COVID-19
// Careful of this string in URLs.
capitalizeCOVID19( wikicode ) {
letconst sf = new StringFilter();
wikicode = sf.surgicalReplaceOutsideTags( / covid-19/gmi, ' COVID-19', wikicode, [ '{{', '[[' ], [ '}}', ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /\ncovid-19/gmi, "'\nCOVID-19"', wikicode, [ '{{', '[[' ], [ '}}', ']]' ] );
return wikicode;
}
 
// remove bold from headings
removeBoldFromHeadings( wikicode ) {
return wikicode.replace( /^(=.*)'''(.*)'''(.*=)$/gm, '$1$2$3' );
}
 
// remove enter characters between <ref>s
deleteNewLinesBetweenRefs( wikicode ) {
return wikicode.replace( /<\/ref>\n{1,}<ref>/gm, '<\/ref><ref>' );
}
 
// convert ==Reference== to ==References==
convertReferenceToReferences( wikicode ) {
return wikicode.replace( /^== ?Reference ?==$/gmi, '== References ==' );
}
 
// TOOL - swap ref period with period ref
swapRefPeriodWithPeriodRef( wikicode ) {
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\. /gm, '.$1 ' );
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\.\n/gm, "'.$1\n"' );
return wikicode;
}
 
swapRefCommaWithCommaRef( wikicode ) {
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,}), /gm, ',$1 ' );
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,}),\n/gm, ',$1\n' );
return wikicode;
}
 
// fix errant spaces at beginning of lines, which makes a blockquote looking thing (AFCH does it)
trimEveryLine( wikicode ) {
let output = '';
letconst lines = wikicode.split(" '\n"' );
letconst lineCount = lines.length;
let i = 0;
for ( letconst line of lines ) {
i++;
letconst trimmed = line.trim();
if ( trimmed.startsWith( '|' ) || trimmed.startsWith( '}' ) ) { // don't trim lines that start with | or }. It is common in FAs to indent these a bit.
output += line;
} else {
Line 391 ⟶ 249:
}
if ( i !== lineCount ) {
output += "'\n"';
}
}
return output;
}
 
// add references section if missing
addReferencesSectionIfMissing( wikicode ) {
letconst hasRefSection = wikicode.match( /^== ?References ?==$/mi );
letconst hasReflist = wikicode.match( /(?:{{Reflist|<references)/mi );
if ( ! hasRefSection && ! hasReflist ) {
letconst hasBottomAFCTemplate = wikicode.match( /(\n{{AfC submission[^}]*}}\s*)$/ );
if ( hasBottomAFCTemplate ) {
wikicode = wikicode.replace( /(\n{{AfC submission[^}]*}}\s*)$/, "'\n\n== References ==\n{{Reflist}}$1"' );
} else {
wikicode = wikicode.replace( /$/, "'\n\n== References ==\n{{Reflist}}"' );
}
}
return wikicode;
}
 
// fix empty references section
fixEmptyReferencesSection( wikicode ) {
letconst hasRefSection = wikicode.match( /^== ?References ?==$/mi );
letconst hasReflist = wikicode.match( /(?:{{Reflist|<references)/mi );
if ( ! hasReflist && hasRefSection ) {
wikicode = wikicode.replace( /(?<=== ?References ?==)/gmi, "'\n{{Reflist}}"' );
}
return wikicode;
}
 
// delete whitespace at the end of lines
// (?!\|)(?!\}\}) is to stop this from deleting spaces after = in infoboxes
deleteWhitespaceAtEndOfLines( wikicode ) {
return wikicode.replace( /[ \t]+\n(?!\|)(?!\}\})/g, "'\n"' );
}
 
// convert smart quotes to regular quotes
convertSmartQuotesToRegularQuotes( wikicode ) {
letconst sf = new StringFilter();
wikicode = sf.surgicalReplaceOutsideTags( /”/g, '"', wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /“/g, '"', wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /‘/g, "'", wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /’/g, "'", wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /…/g, '...', wikicode, [ '[[File:' ], [ ']]' ] );
return wikicode;
}
 
// convert double spaces to single spaces
convertDoubleSpacesToSingleSpaces( wikicode ) {
return wikicode.replace( /\. {2,}/g, '. ' );
}
 
// remove blank heading
deleteBlankHeadings( wikicode ) {
return wikicode.replace( /\n={2,} {0,}={2,}\n/g, "'\n"' );
}
 
// Change year range dash to ndash. Skip text inside of [[File:
changeYearRangeDashToNDash( wikicode ) {
letconst sf = new StringFilter();
// (1111-1111)
wikicode = sf.surgicalReplaceOutsideTags( /(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ '[[File:' ], [ ']]' ] );
// 1839 - 1926)
wikicode = sf.surgicalReplaceOutsideTags( /( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ '[[File:' ], [ ']]' ] );
return wikicode;
}
 
// if in draftspace, and draft has categories, disable the categories
disableCategoriesInDraftspace( wikicode, namespace ) {
letconst draft = ( namespace == 118 );
if ( draft ) {
wikicode = wikicode.replace( /:?(\[\[)(Category:[^\]]*\]\])/gm, '$1:$2' );
}
wikicode = wikicode.replace( /\[\[:Category:Created via preloaddraft\]\]/gi, '[[Category:Created via preloaddraft]]' );
return wikicode;
}
 
// delete <br> in drafts, these are usually poorly placed
deleteBRTagsOutsideInfoboxes( wikicode ) {
let output = '';
letconst lines = wikicode.split(" '\n"' );
letconst lineCount = lines.length;
let i = 0;
for ( letconst line of lines ) {
i++;
// Skip lines that start with { or |. This is the easiest way to detect infoboxes
if ( line.startsWith( '{' ) || line.startsWith( '|' ) ) {
output += line;
} else {
output += line.replace( /\<br ?\/?\>/gm, '' );
}
if ( i !== lineCount ) {
output += "'\n"';
}
}
return output;
}
 
// right align images
rightAlignImages( wikicode ) {
return wikicode.replace( /(\[\[File:[^\]]*\|)left(\|[^\]]*\]\])/gm, '$1right$2' );
}
 
// correct capitalization of see also, references, further reading, external links
correctCapitalizationOfEndMatterHeaders( wikicode ) {
wikicode = wikicode.replace( /^(== ?)References( ?==)$/gmi, "'$1References$2"' );
wikicode = wikicode.replace( /^(== ?)External links( ?==)$/gmi, "'$1External links$2"' );
wikicode = wikicode.replace( /^(== ?)Further reading( ?==)$/gmi, "'$1Further reading$2"' );
wikicode = wikicode.replace( /^(== ?)See also( ?==)$/gmi, "'$1See also$2"' );
return wikicode;
}
 
// if article has headings but no lead, remove first heading
ifNoLeadSectionDeleteFirstHeading( wikicode ) {
let output = '';
letconst lines = wikicode.split(" '\n"' );
letconst lineCount = lines.length;
let i = 0;
let textCount = 0;
for ( letconst line of lines ) {
i++;
// scan for first heading.
// empty lines, lines with templates, or lines with images do not count.
if ( line.startsWith( '{' ) || line.length === 0 || line.startsWith( '[[File:' ) ) {
output += line;
} else if ( line.startsWith( '==' ) && ! textCount ) {
continue; // delete this line by not putting it in the output string
} else {
Line 523 ⟶ 382:
}
if ( i !== lineCount ) {
output += "'\n"';
}
}
return output;
}
 
// delete [edit], [edit source], and [editar] from headings
deleteCopyPastedEditAndEditSource( wikicode ) {
wikicode = wikicode.replace( /\[edit\]( ?={2,})$/gm, '$1' );
wikicode = wikicode.replace( /\[edit source\]( ?={2,})$/gm, '$1' );
wikicode = wikicode.replace( /\[editar\]( ?={2,})$/gm, '$1' );
return wikicode;
}
 
// at beginning of lines, replace unicode bullets with asterisks
replaceUnicodeBulletsWithAsterisks( wikicode ) {
return wikicode.replace( /^\s{0,}[·•●]\s{0,}/gm, '* ' );
}
 
// remove whitespace if that is the only character on a line
trimEmptyLines( wikicode ) {
return wikicode.replace( /^\s*$/gm, '' );
}
 
// no more than 2 newlines (1 blank line) in a row. except stubs, which get 3 newlines (2 blank lines)
// Note: AFCH does this too
deleteMoreThanTwoEntersInARow( wikicode ) {
wikicode = wikicode.replace( /\n{3,}/gm, "'\n\n"' );
wikicode = wikicode.replace(/\n{2}(\{\{[^}]*stub\}\})/gi, '\n\n\n$1');
return wikicode;
}
 
// convert =TitleHeading= to ==H2Heading==
convertH1ToH2( wikicode ) {
return wikicode.replace( /^= ?([^=]*?) ?=$/gm, '== $1 ==' );
}
 
convertVeryLongHeadingToParagraph( wikicode ) {
let output = '';
letconst lines = wikicode.split(" '\n"' );
letconst lineCount = lines.length;
let i = 0;
for ( letconst line of lines ) {
i++;
if ( line.length > 150 && line.match( /^==.*==$/gm ) && ! line.match( /<ref/ ) ) {
output += line.replace( /^={1,}\s*(.*?)\s*={1,}$/m, '$1' );
} else {
output += line;
}
if ( i !== lineCount ) {
output += "'\n"';
}
}
Line 579 ⟶ 437:
}
 
fixWordEmphasizedWithSingleQuotes( wikicode ) {
return wikicode.replace( / '(\w+)' /g, ' "$1" ' );
}
 
fixDoublePeriod( wikicode ) {
return wikicode.replace( /(?<=[A-Za-z\]])\.\.(?=<ref| |\n)/g, '.' );
}
 
fixWikilinksContainingURL( wikicode ) {
// non-piped wikilink
wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\]\]/g, '[[$1]]' );
// piped wikilink
wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\|([^\]]*)\]\]/g, '[[$1|$2]]' );
// non-piped external link
wikicode = wikicode.replace( /\[\[(http[^|]*)\]\]/g, '[$1]' );
// piped external link
wikicode = wikicode.replace( /\[\[(http[^|]*)\|([^\]]*)\]\]/g, '[$1 $2]' );
return wikicode;
}
 
fixExternalLinksToWikipediaArticles( wikicode ) {
// [https://en.wikipedia.org/wiki/Article] and [https://en.wikipedia.org/wiki/Article Article name]
return wikicode.replace( /(?<!\[)\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ \]]*)( [^\]]*)?\]/gs, function( match, p1 ) => {
p1 = decodeURIComponent( p1 );
p1 = p1.replace( /_/g, ' ' );
return `[[${ p1 }]]`;
} );
}
 
deleteBlankLinesBetweenBullets( wikicode ) {
letconst lines = wikicode.split( '\n' );
letconst buffer = [];
letconst length = lines.length;
for ( let i = 0; i < length; i++ ) {
letconst previous = lines[ i - 1 ];
letconst current = lines[ i ];
letconst next = lines[ i + 1 ];
if (
typeof previous !== 'undefined' &&
typeof next !== 'undefined' &&
previous.startsWith( '*' ) &&
current === '' &&
next.startsWith( '*' )
) {
continue;
}
buffer.push( current );
}
return buffer.join( '\n' );
}
 
deleteWeirdUnicodeCharacters( wikicode ) {
return wikicode.replace( /[–]/g, '' );
}
 
deleteSomeHTMLTags( wikicode ) {
wikicode = wikicode.replace( /<\/?p( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?strong( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?em( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?nowiki( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?u( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /(?:<big>|<\/big>)/g, '' );
return wikicode;
}
 
fixHeadingsInAllCapsdeleteNonAFCDraftTags( wikicode ) {
wikicode = wikicode.replace( /{{Preloaddraft submit}}\n{0,2}/gi, '' );
wikicode = wikicode.replace( /<!-- When you move this draft into article space, please link it to the Wikidata entry and remove the QID in the infobox code\. -->\n{0,2}/gi, '' );
wikicode = wikicode.replace( /{{Draft}}\n{0,2}/gi, '' );
return wikicode;
}
 
deleteAFCDraftTagsIfMainspace( wikicode, namespaceNumber ) {
const isMainspace = namespaceNumber == 0;
if ( isMainspace ) {
// {{AfC submission}}, {{AfC topic}}, {{AfC comment}}, etc.
wikicode = wikicode.replace( /{{AfC [^}]*}}\n?/g, '' );
wikicode = wikicode.replace( /{{Draft topics[^}]*}}\n?/g, '' );
}
return wikicode;
}
 
fixHeadingsInAllCaps( wikicode ) {
// create a concatenated string with the text from every heading
letconst matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );
let headingString = '';
for ( letconst match of matches ) {
headingString += match[ 1 ];
}
 
// if string only contains caps
if ( this._isUpperCase( headingString ) ) {
// convert all headings to sentence case
letconst matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );
for ( letconst match of matches ) {
let matchRegex = this._escapeRegEx( match[ 1 ].trim() );
matchRegex = new RegExp( '== {0,}' + matchRegex + ' {0,}==', 'g' );
letconst sentenceCase = this._toSentenceCase( match[ 1 ].trim() );
wikicode = wikicode.replace( matchRegex, '== ' + sentenceCase + ' ==' );
}
}
Line 666 ⟶ 542:
}
 
deleteBigTagdeleteEmptySections( wikicode ) {
return wikicode.replace( /\n*== ?(?:<big>See also|<External links) ?==\n*$/big>)/g, '' );
}
 
deleteEmptySectionsdeleteDuplicateReferencesSection( wikicode ) {
returnconst matches = wikicode.replacematch( /==\ns*References\s*== ?(?:See also|External links) ?==\n*$/,gi '');
}
 
deleteDuplicateReferencesSection(wikicode) {
let matches = wikicode.match(/==\s*References\s*==/gi);
if ( matches !== null && matches.length > 1 ) {
// run regexes that are likely to delete the extra section
letconst attempt = wikicode.replace(
`== References ==
<!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
{{reflist}}`
, '' );
letconst matches2 = attempt.match( /==\s*References\s*==/gi );
if ( matches2.length === 1 ) {
wikicode = attempt.trim();
wikicode = wikicode.replace( /==\s*References\s*==/gi, '== References ==' );
}
}
Line 692 ⟶ 564:
}
 
removeUnderscoresFromWikilinks( wikicode ) {
letconst sf = new StringFilter();
wikicode = sf.surgicalReplaceInsideTags( /_/g, ' ', wikicode, [ '[[' ], [ ']]' ] );
return wikicode;
}
 
fixPipedWikilinksWithIdenticalParameters( wikicode ) {
const matches = wikicode.matchAll( /\[\[([^|\]]+)\|([^\]]+)\]\]/g );
for ( const match of matches ) {
if ( match[ 1 ] === match[ 2 ] ) {
wikicode = this._replaceAll( wikicode, `[[${ match[ 1 ] }|${ match[ 1 ] }]]`, `[[${ match[ 1 ] }]]` );
}
}
return wikicode;
}
 
removeBorderFromImagesInInfoboxes( wikicode ) {
wikicode = wikicode.replace( /(\|\s*logo\s*=\s*)\[\[File:([^\]\|]*)[^\]\]]*\]\]/g, '$1$2' );
wikicode = wikicode.replace( /(\|\s*cover\s*=\s*)\[\[File:([^\]\|]*)[^\]\]]*\]\]/g, '$1$2' );
return wikicode;
}
 
/** These often hide towards the bottom of a draft. When the draft is submitted, unsubmitted templates (t) detect this and show up as blank, creating a weird extra line break. So this basically fixes the line break. */
removeExtraAFCSubmissionTemplates( wikicode ) {
letconst hasSubmittedTemplate = wikicode.match( /{{AfC submission\|\|/ );
letconst hasUnsubmittedTemplate = wikicode.match( /{{AfC submission\|t\|/ );
if ( hasSubmittedTemplate && hasUnsubmittedTemplate ) {
wikicode = wikicode.replace( /{{AfC submission\|t\|[^\}\}]*\}\}\n?/gm, '' );
}
return wikicode;
}
 
deleteMultipleReferenceTagsmoveAFCSubmissionTemplatesToTop( wikicode ) {
letconst hasReflisthasTemplateAtBottom = wikicode.match( /\n[^\n]+\n*({{ReflistAfC submission[^}]*}})\s*$/i );
if ( hasTemplateAtBottom ) {
let hasReferencesTag = wikicode.match(/<references ?\/>/i);
// delete all submission templates
wikicode = wikicode.replace( /{{AfC submission[^}}]*\}\}\n?/gm, '' );
 
// insert template at top
wikicode = hasTemplateAtBottom[ 1 ] + '\n----\n\n' + wikicode;
}
return wikicode;
}
 
deleteMultipleReferenceTags( wikicode ) {
const hasReflist = wikicode.match( /{{Reflist}}/i );
const hasReferencesTag = wikicode.match( /<references ?\/>/i );
if ( hasReflist && hasReferencesTag ) {
// delete all references tags
wikicode = wikicode.replace( /<references ?\/>\n?/gi, '' );
}
return wikicode;
}
 
_isUpperCase( str ) {
return str === str.toUpperCase();
}
 
_toSentenceCase( string ) {
return string.charAt( 0 ).toUpperCase() + string.slice( 1 ).toLowerCase();
}
 
_replaceAll( haystack, needle, replacement ) {
const regex = new RegExp( this._escapeRegEx( needle ), 'g' );
haystack = haystack.replace( regex, replacement );
return haystack;
}
 
_escapeRegEx( string ) {
return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string
}
}
 
 
/** Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string. */
// === modules/StringFilter.js ======================================================
 
/**
* Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string.
*/
class StringFilter {
/**
/** Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits. */
* Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits.
surgicalReplaceOutsideTags(regex, replacement, haystack, openingTags, closingTags) {
*/
let allTags = [...openingTags, ...closingTags];
surgicalReplaceOutsideTags( regex, replacement, haystack, openingTags, closingTags ) {
let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
const allTags = [ ...openingTags, ...closingTags ];
let resultArray = [];
const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );
const resultArray = [];
for ( let part of parts ) {
let openingTagMatch = false;
for ( letconst tag of openingTags ) {
if ( part.startsWith( tag ) ) {
openingTagMatch = true;
break;
}
}
if ( ! openingTagMatch ) {
part = part.replace( regex, replacement );
}
resultArray.push( part );
}
return resultArray.join( '' );
}
 
/**
/** Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag. */
* Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag.
surgicalReplaceInsideTags(regex, replacement, haystack, openingTags, closingTags) {
*/
let allTags = [...openingTags, ...closingTags];
surgicalReplaceInsideTags( regex, replacement, haystack, openingTags, closingTags ) {
let parts = this._splitStringUsingMultiplePatterns(haystack, allTags);
const allTags = [ ...openingTags, ...closingTags ];
let resultArray = [];
const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );
const resultArray = [];
for ( let part of parts ) {
for ( letconst tag of openingTags ) {
if ( part.startsWith( tag ) ) {
part = part.replace( regex, replacement );
}
}
resultArray.push( part );
}
return resultArray.join( '' );
}
 
/**
* Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: <ref>Test/>Test</ref>. But should be good enough for DraftCleaner stuff.
*
* @param {Arraystring} patternsstring
* @param {string[]} patterns
*/
* @return {string[]}
_splitStringUsingMultiplePatterns(string, patterns) {
*/
let length = string.length;
_splitStringUsingMultiplePatterns( string, patterns ) {
let result = [];
const length = string.length;
const result = [];
let positionOfLastMatch = 0;
for ( let i = 0; i < length; i++ ) {
letconst lookAhead = string.substringslice( i ); // the rest of the string after current position
let patternMatch = false;
for ( letconst pattern of patterns ) {
if ( lookAhead.startsWith( pattern ) ) {
patternMatch = true;
break;
Line 791 ⟶ 706:
}
if ( patternMatch ) {
letconst chunk = string.slice( positionOfLastMatch, i );
if ( ! chunk ) continue; // if blank (happens if i=0 matches), continue instead of putting an empty "" into the array
result.pushif ( !chunk ); {
continue;
}
result.push( chunk );
positionOfLastMatch = i;
}
}
// Don't forget the last chunk.
result.push( string.substringslice( positionOfLastMatch ) );
return result;
}
}
$(async function() {
 
// === main.js ======================================================
 
/* THIS SCRIPT IS BUGGY ABOUT 10% OF THE TIME. Be sure to check the diff that pops up before submitting.
 
- Adds "Run DraftCleaner" link to the left sidebar
 
- Top uses:
- remove extra line breaks (for example, 3 enters in a row)
- in the first sentence, bold the title
- convert curly quotes to regular quotes
- put <ref>s after periods
- clean external links out of the main article area (turn them into references)
- add ==References== section
- remove bold from headings
 
- Other uses:
- converts [inline external links] to <ref>s
- removes spaces in front of <ref>s
- get rid of any level 2 heading that contains the article's title
- converts =TitleHeading= to ==H2Heading==
- replaces Covid-19 with COVID-19
- removes enter characters between <ref>s
- trims whitespace at beginning and end
- remove self wikilinks to the article title
- convert ==Reference== to ==References==
- turn bare URLs into references
- fix errant spaces at beginning of lines, which makes a blockquote looking thing
- delete whitespace at the end of lines
- convert double spaces to single spaces
- remove blank heading
- in refs, turn short links into long links, so you can see the ___domain
- change year range dash to ndash
- if in draftspace, and draft in categories, disable the categories
- delete <br>. in drafts, these are usually poorly placed
- fix empty references section
- right align images
- remove whitespace if that is the only character on a line
- correct capitalization of see also, references, further reading, external links
- if article has headings but no lead, remove first heading
- replace unicode bullets with asterisks
 
Add one of the following to your User:yourName/common.js (at the top) to change the position where DraftCleaner puts its link:
window.draftCleanerPutInToolsMenu = true;
window.draftCleanerPutInMoreMenu = true;
 
This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/
 
( function () {
async function getWikicode( title ) {
const pageIsDeleted = !mw.config.get( 'wgCurRevisionId' );
if ( pageIsDeleted ) {
return '';
}
 
let wikicode = '';
title = encodeURIComponent( title );
await $.ajax( {
url: 'https://en.wikipedia.org/w/api.php?action=parse&page=' + title + '&prop=wikitext&formatversion=2&format=json',
success: function ( result ) {
wikicode = result.parse.wikitext;
},
dataType: 'json'
} );
return wikicode;
}
 
function goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, editSummary ) {
const titleEncoded = encodeURIComponent( titleWithNamespaceAndUnderscores );
const wgServer = mw.config.get( 'wgServer' );
const wgScriptPath = mw.config.get( 'wgScriptPath' );
const baseURL = wgServer + wgScriptPath + '/';
// https://stackoverflow.com/a/12464290/3480193
$( `<form action="${ baseURL }index.php?title=${ titleEncoded }&action=submit" method="POST"/>` )
.append( $( '<input type="hidden" name="wpTextbox1">' ).val( wikicode ) )
.append( $( '<input type="hidden" name="wpSummary">' ).val( editSummary ) )
.append( $( '<input type="hidden" name="mode">' ).val( 'preview' ) )
.append( $( '<input type="hidden" name="wpDiff">' ).val( 'Show changes' ) )
.append( $( '<input type="hidden" name="wpUltimateParam">' ).val( '1' ) )
.appendTo( $( document.body ) ) // it has to be added somewhere into the <body>
.trigger( 'submit' );
}
 
/** returns the pagename, including the namespace name, but with spaces replaced by underscores */
function getArticleName() {
return mw.config.get( 'wgPageName' );
}
 
// don't run when not viewing articles
const action = mw.config.get( 'wgAction' );
const isNotViewing = action != 'view';
if ( isNotViewing ) {
return;
}
 
// don't run when viewing diffs
const isDiff = mw.config.get( 'wgDiffNewId' );
if ( isDiff ) {
return;
}
 
// Don't run in virtual namespaces
const isVirtualNamespace = mw.config.get( 'wgNamespaceNumber' ) < 0;
if ( isVirtualNamespace ) {
return;
}
 
let menuID = 'p-navigation';
// @ts-ignore
if ( window.draftCleanerPutInToolsMenu ) {
menuID = 'p-tb';
// @ts-ignore
} else if ( window.draftCleanerPutInMoreMenu ) {
menuID = 'p-cactions';
}
 
const titleWithNamespaceAndUnderscores = getArticleName();
const namespaceNumber = mw.config.get( 'wgNamespaceNumber' );
 
let running = false;
 
// Add DraftCleaner to the toolbar
mw.loader.using( [ 'mediawiki.util' ], () => {
mw.util.addPortletLink( menuID, '#', 'Run DraftCleaner', 'DraftCleanerLink' );
$( '#DraftCleanerLink' ).on( 'click', async () => {
// prevent running the script while script is already in progress
if ( running ) {
return;
}
running = true;
 
mw.notify( 'Parsing page content...' );
 
// get page wikicode
const titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace( /_/g, ' ' );
const originalWikicode = await getWikicode( titleWithNamespaceAndUnderscores );
let wikicode = originalWikicode;
 
const dc = new DraftCleaner();
wikicode = dc.cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces );
 
const needsChanges = wikicode != originalWikicode;
if ( needsChanges ) {
const summary = 'clean up ([[User:Novem Linguae/Scripts/DraftCleaner.js|DraftCleaner]])';
await goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, summary );
} else {
mw.notify( 'No changes needed!' );
}
} );
} );
}() );
 
 
});