Wikipedia:AutoEd/htmltowikitext.js: Difference between revisions

Content deleted Content added
m Protected Wikipedia:AutoEd/htmltowikitext.js: User scripts are high-risk ([edit=sysop] (indefinite) [move=sysop] (indefinite))
Line 1:
//<source lang=javascript>
 
//Convert HTML to wikitext
function autoEdHTMLtoWikitext(str) { //MAIN FUNCTION describes list of fixes
// <b>, <strong>, <i>, and <em> tags
str = str.replace(/<(B|STRONG)[ ]*>((?:[^<>]|<[a-z][^<>]*\/>|<([a-z]+)(?:| [^<>]*)>[^<>]*<\/\3>)*?)<\/\1[ ]*>/gi, "'''$2'''"); // Wikify <B> and <STRONG>
str = str.replace(/<(I|EM)[ ]*>((?:[^<>]|<[a-z][^<>]*\/>|<([a-z]+)(?:| [^<>]*)>[^<>]*<\/\3>)*?)<\/\1[ ]*>/gi, "''$2''"); // Wikify <I> and <EM>
// </br>, <\br>, <br\>, <BR />, ...
str = str.replace(/(.)<[\\\/\. ]*HR+BR[\\\/\. s]*>/gigim, '$1\n----<br />');
str = str.replace(/(<[\r\n\/\s])*BR[\t s]*<[\\\/\. ]*HR+[\\\/\. s]*>/gigim, '$1----<br />');
// <.br>, <br.>, <Br>, ...
str = str.replace(/<[ \s\.]*BR[ \s\.]*>/gigim, '<br>'); // Tag contains no slashes
// <hr>
str = str.replace(/(^|[\r\n])[\t ]*<H1[^<>]*>([^\r\n]*?)<\/H1[\r\n\t. ]*>HR[\t\\/\. ]*([\r\n]|$)>/gimgi, '$1=$2=$3----');
str = str.replace(/(.)<[\\\/\. ]+BR*HR[\\\/\. ]*>/gi, '<br />$1\n----'); // Tag starts with a slash or period
// Not really an HTML-to-wikitext fix, but close enough
str = str.replace(/<[\\\/\. ]*BR[ s]*REFERENCES[\\\/\.]+[ s]*>/gigim, '<brreferences />'); // Tag ends with a slash or period
// Repeated references tag
str = str.replace(/(<references \/>)[\s]*\1/gim, '$1');
str = str.replace(/([^\r\n ])[\t ]*(<H[1-6][^<>]*>)/gim, '$1\n$2'); // Make sure <H1>, ..., <H6> is after a newline
str = str.replace(/(^|[^\r\n ])[\t ]*(<H2H[^<>1-6]*>([^\r\n]*?)<\/H2[\r\n\t >]*>[\t ]*([\r\n]|$)/gim, '$1==\n$2==$3');
str = str.replace(/(<\/H[1-6][^<>]*>)[\t ]*([^\r\n ])/gim, '$1\n$2'); // Make sure </H1>, ..., </H6> is before a newline
str = str.replace(/(^|[<\r\n])/H[\t 1-6]*<H3[^<>]*>([^\r\n]*?)<\/H3[\r\n\t ]*>[\t ]*([^\r\n ]|$)/gim, '$1===\n$2===$3');
// Remove newlines from inside <H1>, ..., <H6>
var loopcount = 0;
while( str.search( /<H([1-6])[^<>]*>(?:[^<>]|<\/?[^\/h\r\n][^<>]*>)*?<\/H\1[^<>]*>/gim ) >= 0 && loopcount <= 10 ) {
str = str.replace(/(<H)([1-6])([^<>]*>(?:[^<>]|<\/?[^\/h\r\n])[\t ]*<H4[^<>]*>()*?)[^\r\n]*((?):[^<>]|<\/H4?[^\/h\r\n\t ]*>[\t ^<>]*([>)*?<\r/H)\n2([^<>]|$*>)/gim, '$1====$2====$3 $4$2$5');
loopcount++;
}
// Replace <H1>, ..., <H6> with wikified section headings
str = str.replace(/(^|[\r\n])[\t ]*<H5H1[^<>]*>([^\r\n]*?)<\/H5H1[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1=====$2=====$3');
str = str.replace(/(^|[\r\n])[\t ]*<H6H2[^<>]*>([^\r\n]*?)<\/H6H2[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1======$2======$3');
str = str.replace(/(^|[\r\n])[\t ]*<H3[^<>]*>([^\r\n]*?)<\/H3[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1===$2===$3');
str = str.replace(/(^|[\r\n])[\t ]*<H4[^<>]*>([^\r\n]*?)<\/H4[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1====$2====$3');
str = str.replace(/(^|[\r\n])[\t ]*<H5[^<>]*>([^\r\n]*?)<\/H5[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1=====$2=====$3');
str = str.replace(/(^|[\r\n])[\t ]*<H6[^<>]*>([^\r\n]*?)<\/H6[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1======$2======$3');
 
return str;
//Convert HTML to wikitext
str = str.replace(/<(B|STRONG)[ ]*>([^<>]*)<\/\1[ ]*>/gi, "'''$2'''"); // Wikify <B> and <STRONG>
str = str.replace(/<(I|EM)[ ]*>([^<>]*)<\/\1[ ]*>/gi, "''$2''"); // Wikify <I> and <EM>
str = str.replace(/<[\\\/\.]+BR[\\\/\. ]*>/gi, '<br />'); // Tag starts with a slash or period
str = str.replace(/<[\\\/\. ]*BR[ ]*[\\\/\.]+[ ]*>/gi, '<br />'); // Tag ends with a slash or period
str = str.replace(/<[ ]*BR[ ]*>/gi, '<br>'); // Tag contains no slashes
str = str.replace(/([\r\n])[\t ]*<[\\\/\. ]*HR[\\\/\. ]*>/gi, '$1----');
str = str.replace(/(.)<[\\\/\. ]*HR[\\\/\. ]*>/gi, '$1\n----');
str = str.replace(new RegExp('<REFERENCES/ >|<REFERENCES></REFERENCES>|<REFERENCES>|<REFERENCES/>', 'gi'), '<references />'); //Not really an HTML-to-wikitext fix, but close enough
str = str.replace(/([^\r\n ])[\t ]*(<H[1-6][^<>]*>)/gim, '$1\n$2'); // Make sure <H1>, ..., <H6> is after a newline
str = str.replace(/(<\/H[1-6][^<>]*>)[\t ]*([^\r\n ])/gim, '$1\n$2'); // Make sure </H1>, ..., </H6> is before a newline
str = str.replace(/(^|[\r\n])[\t ]*<H1[^<>]*>([^\r\n]*?)<\/H1[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1=$2=$3');
str = str.replace(/(^|[\r\n])[\t ]*<H2[^<>]*>([^\r\n]*?)<\/H2[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1==$2==$3');
str = str.replace(/(^|[\r\n])[\t ]*<H3[^<>]*>([^\r\n]*?)<\/H3[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1===$2===$3');
str = str.replace(/(^|[\r\n])[\t ]*<H4[^<>]*>([^\r\n]*?)<\/H4[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1====$2====$3');
str = str.replace(/(^|[\r\n])[\t ]*<H5[^<>]*>([^\r\n]*?)<\/H5[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1=====$2=====$3');
str = str.replace(/(^|[\r\n])[\t ]*<H6[^<>]*>([^\r\n]*?)<\/H6[\r\n\t ]*>[\t ]*([\r\n]|$)/gim, '$1======$2======$3');
 
return str;
}