User:Ohconfucius/script/Sources.js: Difference between revisions

Content deleted Content added
repeat edit of TheDJ to production script re migration of Regex_menu_framework
aligned to test script
Line 35:
 
// removing references to other WP articles and 'external' WP links
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter|mailonline|findagrave)\.com|dailymail\.co\.uk)\/[^<>]*<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP:SPS (Twitter, FB, WP)-->');
regex(/<ref>\s*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter|mailonline|findagrave)\.com|dailymail\.co\.uk)\/[^\s\]<]*<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP:SPS (Twitter, FB, WP)-->');
regex(/<ref>\s*\[https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter|mailonline|findagrave)\.com|dailymail\.co\.uk)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP:SPS (Twitter, FB, WP)-->');
regex(/\|\s*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter|mailonline|findagrave)\.com|dailymail\.co\.uk)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');
 
regex(/[ ]\[https?:\/\/\w{2}\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');
Line 56:
regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])=/gi, '');
regex(/\|[ ]*(?:first|last)=(?:staff |)(?:reporter|writer)[ ]*(?=[\|{}])=/gi, '');
regex(/(\|\s?accessdateaccess-?date\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?access-?date\s*\=\s*\d{4}\s?)(?=[|}])/gi, '');
regex(/(\|\s?work\s*\=\s*)(?:article|interview|review) ?(?=[|}\n])/gi, '$1');
regex(/(\|\s?volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
regex(/ [-–] 每日明報 daily news/gi, '');
regex(/(\|\s?title\s*\=\s*)Login ?(?=[|}\n])/gi, '$1ACTUAL ARTICLE TITLE BELONGS HERE');
regex(/(\|\s?publisher=)\|\s?via\s*\=\s*(\w+)(?=\s*[|}])/gi, '$1$2');
regex(/\|\s?via\s*\=\s*/gi, '|work=');
//Remove COinS corrupting templates from CS1 citations
Line 127 ⟶ 131:
 
// removing blank or redundant parameters
regex(/(?:\|[ ]*(?:accessdateaccess-?date|agency|archive-?(?:date|url)|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|editor(?:-first|-last|-link|\d|\d-first|\d-last|\d-link)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|___location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publication(?:-date|-place)|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|via|work|year|zbl)[ ]*=[\s]*)(?=[}|])/gi, '');
regex(/(?:\|[ ]*(began|ended)[ ]*=[^}|]*)(?=[}|])/gi, '');
regex(/(\|\s?work\s?\=\s?)\[\[iTunes\]\] Archive\s/gi, '|publisher=[[iTunes Store]]');
Line 148 ⟶ 152:
regex(/(\|[ ]*?(?:___location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\])(?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4$5$6');
regex(/(\|[ ]*?(?:___location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4');
 
// removing english icon template
regex(/[ ]?\{\{en[- ]icon\}\}/gi, "");
 
// removing english language parameters
Line 159 ⟶ 160:
// eliminating time of day
regex(/(\|[ ]*author[ ]*=[ ]*)(?:posted|published)(?: by| on|)[\s:](?=\s*\w)/gi, "$1");
regex(/(\|[ ]*(?:date|archivedatearchive-?date|accessdateaccess-?date|author|first|last)[ ]*=[ ]*)[0-2]?\d:[0-5]\d(?:[ ]|&nbsp;)(?:[ap]m ?|[ap]\.m\. |[A-Z]{1,2}T|UTC)[\.,]?[ ]?/gi, "$1");
 
// eliminating days of the week
regex(/(\|[ ]*(?:date|archivedatearchive-?date|accessdateaccess-?date|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)(?:[\.,]|day)?)\s/gi, "$1");
 
//'Accessed'/'Obtained' -> 'Retrieved'
Line 241 ⟶ 242:
regex(/(\|)((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|))(?: \((?:South Africa)(?:n newspaper|)\))(\]\])/g, '$1$2$3');
 
regex(/(=[ ]*)(Billboard|Fast Company|People|Q|Slate|Time Out|Wired) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Billboard|Fast Company|People|Q|Slate|Time Out|Wired) \(magazine\)(?='')/g, '$1$2'); //non-standard code
regex(/(''\[\[(?:Billboard|Fast Company|People|Q|Slate|Time Out|Wired) \(magazine\))(?=\]\]'')/g, '$1|'); //non-standard code
regex(/(\|)(Billboard|Fast Company|People|Q|Slate|Time Out|Wired)(?: \(magazine\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*)(Quartz) \((?:publication)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Quartz) \(publication\)(?='')/g, '$1$2'); //non-standard code
regex(/(''\[\[(Quartz) \(publication\))(?=\]\]'')/g, '$1$2'); //non-standard code
regex(/(\|)(Quartz)(?: \(publication\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*)(Salon) \((?:website)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Salon) \(website\)(?='')/g, '$1$2'); //non-standard code
Line 349 ⟶ 354:
regex(/(\w''\.)indiatimes\.com/gi, '$1');
 
regex(/\|[ ]?language[ ]?\=[ ]?English[ ]*?(?=[|}\n])/gi, ''); //note: adjusted for false positive in infoboxes books
regex(/- [\w]*\.com[ ]*\|/gi, '|');
regex(/(.) – Google [^ \]]*(\][\.,;]) Books\.google\.\w{2,3}(\.| )/gi, '$1$2Google Books$3');
Line 410 ⟶ 416:
regex(/( &#124; Comment is free)/gi, '');
regex(/\|\s?title\s*\=\s*BBC (?:News|Sport)\s?(?:[-–]|&#124; )\s?/gi, '|title=');
regex(/\|\s?title\s*\=\s*Asia Times Online\s?(?:[-–]|&#124; |:+)\s?/gi, '|title=');
regex(/(?:Entertainment|Football|(?:inter|)national|Latest|local|UK|world|) News &#124;(?=[ ]?[&\|])/gi, '');
regex(/(?:[-–|]|&#124;)[ ]*(?:Entertainment|Football|(?:inter|)national|Latest|local|UK|world|) News(?=[ ]?[&\|])/gi, '');
Line 426 ⟶ 433:
 
// expanding/dating tags
regex(/{{(?:cn|fact)}}/gi, '{{citation needed|date=JanuaryOctober 2017}}');
 
//dynamic columns for reflists; remove scroll bar
regex(/((?:[Rr]eferences|[Nn]otes)[ ]?={2,4}[\n\r])[\r\n\s]*<div (?:style|class)=[^>]*>([\S\s]*)<\/div>/g, '$1$2');
regex(/<[Rr]eferences ?\/>/g, '{{reflist}}'); //default
regex(/({{reflist)\|30em([^}]*}})/g, '$1$2');
 
// disambiguating duplicated ref names
regex(/(<ref name=[^/>"]*)><\/ref>/gi, '$1/>');
regex(/(<ref name=[^/>"]*)>([\s\S]*)\1>([\s\S]*)\1>/gi, '$1A>$2$1B>$3$1C');
regex(/(<ref name=[^/>"]*)>([\s\S]*)\1>/gi, '$1A>$2$1B>');
regex(/(<ref name="[^/>]*)">([\s\S]*)\1>/gi, '$1A>$2$1B">');
}