User:Ohconfucius/script/Sources.js: Difference between revisions

Content deleted Content added
//disabled quote mark removal 18/06/2022 per Jonesey
mirroring test script
Line 28:
regex(/\s?(<ref>(?:[^<>]*\|\s*url\s*=|\[?)https?:\/\/((?:\w+\.)*\w+(?:\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|)))\/[^<>{}]+\|publisher=)(?=}}<\/ref>)/gi, "$1$2"); //capture ___domain name to add to publisher
regex(/(\|\s*(?:publisher|website)\s*\=\s*)w{3}\.(\w+\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|))(?=\s*[|}])/gi, '$1$2');
regex(/(\|\s*publisher=)\s?\|\s*?via\s*\=\s*(\w+)(?=\s*[|}])/gi, '$1$2');
// regex(/\|\s*?via\s*\=\s*/gi, '|work=');
regex(/\s?[-–]\s?(?:The |)(\w+\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|))\s?(\|\s*publisher\s*\=\s*)\1(?=}})/gi, '$2$1');
regex(/\s?[-–]\s?([^|]+)\s?(\|\s*publisher\s*\=\s*)(The )\1(?=}})/gi, '$2$3$1');
regex(/\|\s*?first\s*\=[^|]*\|\s*?last\s*\=(?:Editor|Group|Staff|Reporter|Writer)\s*(?=\|)/gi, '');
regex(/({{Navbox deaths}})(\n)/gi, '==References==$2{{Reflist}}$2$1$2');
regex(/(==References==\n{{Reflist}}\n)\n?\1/gi, '$1');
Line 43:
// warning-tagging template:Primary source inline
regex(/(<ref[^<>]*>[^<>]*\|[ ]*url ?= ?https?:\/\/(?:(?:www\.|mobile\.|)(?:blogger|blogspot|facebook|instagram|tiktok|twitter|wordpress)\.com|www\.formspring\.me)\/[^<>{]*)(<\/ref>)(?:\{\{Primary source inline\}\}|)/gi, '$1{{Primary source inline}}$2');
regex(/(<ref[^<>]*>[^<>]*\|[ ]*url ?= ?https?:\/\/(?:(?:www\.|)(?:myspace|findagrave)\.com)\/[^<>{]*)(<\/ref>)(?:\{\{Primary source inline\}\}|)/gi, '$1{{Primary source inline}}$2');
regex(/(<ref>\s*https?:\/\/(?:(?:www\.|)(?:blogger|blogspot|facebook|findagrave|instagram|myspace|tiktok|twitter|wordpress)\.com)\/[^\s\]<]*)(<\/ref>)(?:\{\{Primary source inline\}\}|)/gi, '$1{{Primary source inline}}$2');
regex(/(<ref>\s*\[https?:\/\/(?:(?:www\.|)(?:blogger|blogspot|facebook|findagrave|instagram|myspace|tiktok|twitter|wordpress)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\])(<\/ref>)(?:\{\{Primary source inline\}\}|)/gi, '$1{{Primary source inline}}$2');
regex(/(\{\{Primary source inline\}\})\1/gi, '$1');
 
// removing http links within publisher/journal/work fields
//
// regex(/(\|\s*(?:author|publisher|work) *= *)(?:https?:\/{2}|)(?:w{3}\.|)(\w+(?:\.(?:com?|net|org|gov|\w{2})|)(?:\.\w{2}|))(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2'); //returns ___domain name
// regex(/(\|\s*title\s*\=\s*)https?:\/{2}(?:w{3}\.|)(\w+\.com)(?=\s?(?:[=|{}]|\}\}))/gi, '$1<!-- ACTUAL ARTICLE TITLE BELONGS HERE! original text: $2 -->');
// regex(/(\|\s*title\s*\=\s*)https?:\/{2}([^|\s]+)(?=\s?(?:[=|{}]|\}\}))/gi, '$1<!-- ACTUAL ARTICLE TITLE BELONGS HERE! original text: $2 -->');
// regex(/(\|\s*title\s*\=\s*)(Archived copy)(?=\s?(?:[=|{]|\}\}))/gi, '$1<!-- ACTUAL ARTICLE TITLE BELONGS HERE! original text: $2 -->');
regex(/(\|\s*(?:author(?:link\d?|)|chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)https?:\/{2}(?:w{3}\.|)(?=\w)/gi, '$1'); //removes http:// and optionally www.
regex(/(\|\s*(?:author(?:link\d?|)|chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)(?:w{3}\.)(?=\w)/gi, '$1'); //removes www.
regex(/(\|\s*(?:author(?:link\d?|)|chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)(\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2'); //returns ___domain name
regex(/(>{{cit[^}]+\|\s*?website\s*\=\s*)https?:\/{2}(?:w{3}\.|)(?=\w)/gi, '$1'); //leave only ___domain name
regex(/(>{{cit[^}]+\|\s*?website\s*\=\s*)(?:w{3}\.)(?=\w)/gi, '$1'); //leave only ___domain name
 
// removing hyperlinks within publisher/journal/website/work fields
regex(/(\|\s*(?:chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2'); //leave only linked text
regex(/(>{{cit[^}]+\|\s*?website\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2'); //leavecitation templates: rem url whilst leaving only linked text within |website=
 
regex(/(\|\s*title\s*=\s*)(?:\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]+)(?:\/[^|}]+)(?:\/[^|}]+)(?:\/[^|}]+)\/([^|}]+)\/?(?=\s*[\]|}])/gi, '$1$2'); //returns terminal part of url
Line 73 ⟶ 75:
 
// removing references to other WP articles and 'external' WP links
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?= ?https?:\/\/\w{2}\.wikipedia\.org\/w[^<>]*<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP-->');
regex(/<ref>\s*https?:\/\/\w{2}\.wikipedia\.org\/wiki\/[^\s\]<]*<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP-->');
regex(/<ref>\s*\[https?:\/\/\w{2}\.wikipedia\.org\/w[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP-->');
Line 80 ⟶ 82:
 
//deprecated sources
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?= ?https?:\/\/(?:(?:\w+\.|)((?:the|)epochtimes|ntdtv)\.com)\/[^<>{]*<\/ref>/gi, '<!--rem deprecated source \($1\)-->');
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?= ?https?:\/\/(?:(?:\w+\.|)(thesun|dailymail)\.co\.uk)\/[^<>{]*<\/ref>/gi, '<!--rem deprecated source \($1\)-->');
 
regex(/(\|\s*url\s*\=\s*|\[)(https?:[^|{}#\s]+)(?:#[A-Za-z0-9\.]{12,13}|\?CMP=fb_gu)(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking
regex(/(\|\s*url\s*\=\s*)(https?:[^|{}#\s]+\.html?)\?[^|}]{1,5}(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking
regex(/(\|\s*url\s*\=\s*|\[)(https?:\/\/books\.google\.[^\/]+\/books\?id=\w{12}&pg=PA\d{1,3})&dq[^\s|}]+(?=\s?[|}])/gi, '$1$2'); //rem browser optimisation
Line 112 ⟶ 114:
regex(/(\|\s*)(?:publisher|website|work)=(Vimeo)(?:\.com|)(?=\s*[|}])/gi, '$1via=$2');
regex(/(\|\s*)(?:publisher|website|work)=(Newspapers\.com)(?=\s*[|}])/gi, '$1via=$2');
regex(/(\|\s*publisher=)\|\s*?via\s*\=\s*(\w+)(?=\s*[|}])/gi, '$1$2');
regex(/(?:-[ ]*Google Books[ ]*(?:\|[^}]*|)|)\|\s*?publisher\s*\=\s*(Google Books)(?=[\s\.]*[|}])/g, '|via=$1');
// regex(/(\|\s*)(?:publisher|website|work)=books\.google(?:\.com?|.co\.uk|)(?=\s*[|}])/gi, '|via=Google Books');
// regex(/\|\s*?(?:publisher|website|work)=(British Newspaper Archive|Google Books|Project Gutenberg|Proquest|Scribd|web(?:\.archive\|citation)(?:\.org|))(?=\s*[|}])/gi, '|via=$1');
 
//Remove COinS corrupting templates from CS1 citations
Line 123 ⟶ 125:
regex(/(\|\s*url\s*\=)(www\.)(?=[|}])/gi, '$1http//$2'); //common cs1 error
regex(/(\|\s*date\s*=\s*)(?:not? |non-|un)date[ds]?\s*(?=[|}])/gi, '$1n.d.'); //common cs1 error
regex(/(\|\s*dead-?url=no?)(?=\s*[|}])/gi, '|url-status=dead'); //common cs1 error (deprecated parameter)
 
// regex(/\{\{wikinews ?(|2|cat(?:egory)?|has|par2?|portal|table|-inline)(\|[^\}]+|)\}\}\s*/gi, '');
regex(/(\*[ ]*|)\[\[n:[^\]\|]*\]\][\r\n]*/gi, '');
// regex(/\*[ ]*\{\{(?:Facebook|Find a Grave|Myspace)\|([^}]*)\}\}[\n\r\s]*/gi, ''); //disabled 19 Sept 2015 per Batty's request
 
//citation template fixes
// regex(/(\|\s*)published\s?=/gi, '$1publisher='); //disabled 12/5/21 – false positive in book infobox
// rem copyright assertion
regex(/(\|\s*publisher\s*\=\s*)(?:\[\[copyright(?:\|©|)\]\])\s?/gi, '$1');
regex(/(\|\s*publisher\s*\=\s*)(?:©|copyright)\s?/gi, '$1');
regex(/(\|\s*title\s*\=\s*[^|}]+?)[-‒–—―]\s+([^|}]+?)(\|\s*(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s*title\s*\=\s*([^|}]+?))[-‒–—―]\s+([^|}]+?\|\s*?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
// misused 'date' parameter
regex(/(\{\{\s?cit[ae][^}]+)\|\s*?date(\s?=\s?[12]\d{3}\s?[|}])/gi, '$1|year$2');
 
// rem toggles and redundant quote marks
// regex(/(\|\s*(?:agency|author|newspaper|work|journal|publisher|title)\s*\=\s*)\'\'([^|}]+)\'\'(?=\s*[\}\|])/gi, '$1$2'); //without link //disabled 18/06/2022 per Jonesey
// regex(/(\|\s*(?:agency|author|newspaper|work|journal|publisher|title)\s*\=\s*)\'\'(\[\[(?:[^\|]+\||)[^\|\]]+\]\])\'\'(?=\s*[\}\|])/gi, '$1$2'); //with link //disabled 18/06/2022 per Jonesey
regex(/(\|\s*title\s*\=\s*)\'&#39;([^\|\{\}]+)\'&#39;/gi, '$1$2'); //rem &#39; in titles
regex(/(\|\s*publisher\s*\=\s*)\(([^\|\{\}]+)\)/gi, '$1$2'); //rem parenthetical publishers
regex(/(\|\s*publisher\s*\=\s*)\"([^\|\{\}]+)\"/gi, '$1$2'); //rem quote marks around publishers
 
// reordering 'work' and 'publisher' (first run - see second run in cleanup function)
Line 151 ⟶ 154:
 
/// removing identical/similar entries in 'work' and 'publisher', and in 'work' and 'website' (different default vs [post] cleanup rules)
regex(/\|\s*?work\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s*?(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s*?work\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s*?(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s*?work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s*?(?:publisher|website)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$3'); //piped work
 
regex(/\|\s*?publisher\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s*?publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s*?publisher\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s*?publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s*?publisher\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s*?publisher\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$2'); //piped work
 
// remove redundant parentheses and templates from dm and md dates (equivalents also exists in Mosnum script)
Line 175 ⟶ 178:
 
// adjust for possibly incorrectly input title
regex(/(\|\s*title\s*\=\s*)([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s*?(publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=\s*[|}])/g, '$1 |$4=$2$3'); //creating deliberate error (blank title) per Trappist
// rem misplaced punctuation
Line 222 ⟶ 225:
// (disabling for persistent errors) ohc_regex(/(?:Retrieved|Accessed)(?: on(?:line|)|):? (@Month\s@DD,?\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd)(?=\D)/gi, 'Retrieved $1');
ohc_regex(/(\w|\])[\.,;]?[ ]\((Retrieved (?:@Month\s@DD,\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd))\)/gi, '$1. $2');
 
// equivalence for French language cite web template
regex(/(\|)\s*auteur1\s*(?=\=)/gi, '$1author1');
regex(/(\|)\s*auteur2\s*(?=\=)/gi, '$1author2');
regex(/(\|)\s*citation\s*(?=\=)/gi, '$1quote');
regex(/(\|)\s*consulté le\s*(?=\=)/gi, '$1accessdate');
regex(/(\|)\s*éditeur\s*(?=\=)/gi, '$1publisher');
regex(/(\|)\s*langue\s*(?=\=)/gi, '$1language=fr');
regex(/(\|)\s*lienauteur1\s*(?=\=)/gi, '$1authorlink1');
regex(/(\|)\s*lieu\s*(?=\=)/gi, '$1place');
regex(/(\|)\s*lire en ligne\s*(?=\=)/gi, '$1place');
regex(/(\|)\s*nom1\s*(?=\=)/gi, '$1last1');
regex(/(\|)\s*nom2\s*(?=\=)/gi, '$1last2');
regex(/(\|)\s*périodique\s*(?=\=)/gi, '$1newspaper');
regex(/(\|)\s*prénom1\s*(?=\=)/gi, '$1first1');
regex(/(\|)\s*prénom2\s*(?=\=)/gi, '$1first2');
regex(/(\|)\s*site\s*(?=\=)/gi, '$1website');
regex(/(\|)\s*titre\s*(?=\=)/gi, '$1title');
 
regex(/(\{\{)\s*Lien web\s*(?=\|)/gi, '$1cite web');
regex(/(>)\{\{[,\.]\}\}(?=<)/gi, '$1');
regex(/\{\{date-\|([^\|]+)\|(\d{4})\-\}\}/gi, '$1 $2');
 
}
Line 255 ⟶ 280:
//linked publishing houses
// removing publishers less well-known than their titles
regex(/(\|\s*publisher\s?\=MTV\|\s*?)publisher\s*\=\s*(?:MTV Networks(?: \(Viacom\)|)|Viacom)/gi, '$1=');
 
regex(/(?:\|publisher=Turner Sports Interactive, Inc)\.? ?\|publisher=(NBA)(?= ?\|)/gi, '$1');
Line 262 ⟶ 287:
// removing publishers for periodicals
regex(/\|publisher=\[\[(?:PMC \(company\)\||)(?:PMC|Penske Media Corporation)\]\](?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s*?(newspaper|work|publisher)\s*\=\s*\[?\[?(?:Hachette Filipacchi Médias\||)Hachette Filipacchi(?: \(UK\) Ltd.?| UK|)\]?\]?)(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s*?(newspaper|work|publisher)\s*\=\s*(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers|)))(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s*?(newspaper|work|publisher)\s*\=\s*\[\[(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers))\]\])(?=[\s\.]*[|}])/gi, '');
regex(/\| ?publisher ?= ?(?:Fandango Media|\[\[Fandango Media\]\]) ?/gi, '');
regex(/(?:\|\s*?publisher\s*\=\s*(Alexander Lebedev|American Media|Associated Newspapers|Bauer (?:Consumer Media|Media Group)|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?: Limited| Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mediacorp|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Postmedia Network Inc\.?|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)[\s\.]*(?=[|}])/gi, '');
regex(/\|\s*?publisher\s*\=\s*(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Independent News & Media|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Singapore Press Holdings|Torstar|Time Inc\.|Times (?:Group|Publishing)|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?(?=[\s\.]*[|}])/g, '');
regex(/\|\s*publisher\s*\=\s*(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?(?=[\s\.]*[|}])/g, '');
// regex(/\|\s*?publisher\s*\=\s*(?:\w+Cox|Halifax|North )+(?:MediaJersey|PublishingSun-Times|PublicationsTampa|Herald|Stephens|WEHCO) Media(?: Group(?:,? Inc\.)?| Berhad)?(?=[\s\.]*[|}])/g, ''); //rem "Communications" - false positive for "Ministry of Economic Affairs and Communications" reported 28 May 2014
// regex(/\|\s?publisher\s*\=\s*(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)(?=[\s\.]*[|}])/g, ''); //rem "Communications" - false positive for "Ministry of Economic Affairs and Communications" reported 28 May 2014
//duplicate above with links //("Corporation" excluded - false positive with Australian Broadcasting Corporation)
regex(/(?:\|\s*?publisher\s*\=\s*\[\[(Alexander Lebedev|American Media|Associated Newspapers|Bauer (?:Consumer Media|Media Group)|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?: Limited| Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mediacorp|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Postmedia Network Inc\.?|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)(?:\|[^\]\}]*|)\]\][\s\.]*(?=[|}])/gi, '');
regex(/\|\s*?publisher\s*\=\s*\[\[(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Independent News & Media|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Singapore Press Holdings|Torstar|Time Inc\.|Times Publishing|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s*?publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?\]\](?=[\s\.]*[|}])/g, '');
// regex(/\|\s*?publisher\s*\=\s*\[\[(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)\]\](?=[\s\.]*[|}])/g, '');
 
regex(/\| ?publisher ?= ?Star Publications \(Malaysia\)|\[\[Star Publications \(Malaysia\)\]\] ?/gi, '');
Line 284 ⟶ 310:
var txt=document.editform.wpTextbox1;
 
// The following regexes for dab-links are in sets of four. If changing, please ensure all sets are changed )note (unique regex line for Empire film magazineadded 20/6/2021)
regex(/(=[ ]*(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)([ ]*[|}])/g, '$1|___location=$2$3');
regex(/(''(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)(''[\.,;])/g, '$1$3$2');
Line 294 ⟶ 320:
regex(/(\|)(The Advertiser)(?: \((Adelaide)\))(\]\])/g, '$1$2$4|___location=$3');
regex(/(\[\[)(The Advertiser)( \((Adelaide)\))(\]\])/g, '$1$2$3|$2$5|___location=$4');
regex(/(=[ ]*)(Billboard|Campaign|Fast Company|Hello!|New York|People|Q|Slate|Time(?: Out|)|Wired) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Billboard|Campaign|Fast Company|Hello!|New York|People|Q|Slate|Time(?: Out|)|Wired) \(magazine\)(?='')/g, '$1$2'); //non-standard code
regex(/(\[\[(Billboard|Campaign|Fast Company|Hello!|New York|People|Q|Slate|Time(?: Out|)|Wired) \(magazine\))(?=\]\])/g, '$1|$2'); //non-standard code - tweaked to insert title per pipe trick
regex(/(\|)(Billboard|Campaign|Fast Company|Hello!|New York|People|Q|Slate|Time(?: Out|)|Wired)(?: \(magazine\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*)(Empire) \((?:film magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Empire) \(film magazine\)(?='')/g, '$1$2'); //non-standard code
regex(/(''\[\[(Empire) \(film magazine\))(?=\]\]'')/g, '$1$2'); //non-standard code
regex(/(=[ ]*\[\[(Empire) \(film magazine\))(?=\]\][ ]*\|)/g, '$1|$2'); //non-standard code (unique regex added 20/6/2021)
regex(/(\|)(Empire)(?: \(film magazine\))(?=\]\])/g, '$1$2');
 
regex(/(=[ ]*)(Quartz) \((?:publication)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Quartz) \(publication\)(?='')/g, '$1$2'); //non-standard code
Line 306 ⟶ 338:
regex(/(''\[\[(Salon) \(website\))(?=\]\]'')/g, '$1$2'); //non-standard code
regex(/(\|)(Salon)(?: \(website\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*)(Stuff) \((?:company)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Stuff) \(company\)(?='')/g, '$1$2'); //non-standard code
regex(/(''\[\[(Stuff) \(company\))(?=\]\]'')/g, '$1$2'); //non-standard code
regex(/(\|)(Stuff)(?: \(company\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*Daily News) \((New York)\)([ ]*[|}])/g, '$1|___location=$2$3');
Line 325 ⟶ 361:
regex(/(''(?:Daily Star)) \((United Kingdom)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Daily Star)(?: \(United Kingdom\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*El Comercio) \((Equador|Peru|Spain)\)(\s*[|}])/g, '$1|___location=$2$3');
regex(/(''El Comercio) \((Equador|Peru|Spain)\)('')/g, '$1$3 ($2)');
regex(/(\|)(El Comercio)(?: \((?:Equador|Peru|Spain)\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*El Mundo) \((Columbia|Spain)\)(\s*[|}])/g, '$1|___location=$2$3');
regex(/(''El Mundo) \((Columbia|Spain)\)('')/g, '$1$3 ($2)');
Line 368 ⟶ 407:
regex(/(\|)(Sporting Life)(?: \(British newspaper\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*The Standard) \((Hong Kong|Kenya)\)(\s*[|}])/g, '$1|___location=$2$3');
regex(/(=[ ]*Today) \((Singapore newspaper)\)(\s*[|}])/g, '$1|___location=Singapore$3');
regex(/(''Today) \((Singapore newspaper)\)('')/g, '$1$3 (Singapore)');
regex(/(\|)(Today)(?: \(Singapore newspaper\))(\]\])/g, '$1$2$3');
regex(/(''The Standard) \((Hong Kong|Kenya)\)('')/g, '$1$3 ($2)');
regex(/(\|)(The Standard)(?: \((?:Hong Kong|Kenya)\))(\]\])/g, '$1$2$3');
Line 404 ⟶ 446:
regex(/(= ?(?:RT)) \((TV network)\)([ ]*[|}])/g, '$1|___location=Russia$3');
regex(/(\|)(RT)(?: \(TV network\))(\]\])/g, '$1$2$3');
 
regex(/((?:agency|author|journal|magazine|newspaper|periodical|website|work)[ ]*=[ ]*\[\[[\w\s]+\w) ?(\|language=\w{2} ?(?:\|___location=(?=Hong Kong|Taiwan) ?|))(\]\])[ ]*(?=[|}])/g, '$1$3 $2'); //adjust misplaced brackets due to insertion
 
// regex(/(=[ ]*\[\[)([^\[\]\(\)\|:]*)( \([^\[\]\(\)\|]*\))(\]\])/gi, '$1$2$3|$2$4'); //adding piping to parentheticals inside parameters //disabled 7/3/2020 false positives (e.g. [[Communist Party of India (Marxist)]])
Line 420 ⟶ 464:
regex(/(.) at Discogs(\][\.,;]) Discogs\.com(\.| )/gi, '$1$2Discogs$3');
regex(/(\|\s*author\s?\=\s?)(?:posted|publishe[dr]|written)\s?(?:by|on):?\s/gi, '$1');
regex(/\|\s*?(?:publisher|website|work)(\s?\=MTV)\|\s*?publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '|publisher$1=');
 
// regex(/\|\s*?(?:publisher|website|work)\s*\=\s*(?:BBC|BBC News(?: Online|))\s*(\|[^}<>]*|)\|\s*?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|work=\'\'BBC News\'\' $1');
// regex(/\|\s*?(?:publisher|website|work)\s*\=\s*(BBC Sports?)\s*(\|[^}<>]*|)\|\s*?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|work=\'\'$1\'\'$2');
// regex(/\|\s*?publisher\s*\=\s*(BBC (?:Sports|News))\s*(?=\|[^}<>]*|)/g, '|work=\'\'$1\'\'');
regex(/\|\s*?(?:publisher|website|work)\s*\=\s*(Metacritic|\[\[Metacritic\]\])\s*(\|[^}<>]*|)\|\s*?publisher\s*\=\s*(CBS Interactive|\[\[CBS Interactive\]\])(?=[\s\.]*[|}])/g, '|publisher=$1$2');
 
//rem duplicated publishers in separate fields (post); rem preceding nbsp
Line 456 ⟶ 500:
regex(/(\|\s*(?:newspaper|website|work)\s*\=\s*(?:\[\[(?:[^<|\]]*)\|(?:[^}<>]*)\]\])(?:\|[^}<>]*|))\|\s?work\s*\=\s*(?:[^\[|}]*)(?=\s*[|}])/g, '$1'); //piped work x1
 
regex(/\|\s*?___location\s*\=\s*New York(?: City|)\s*(\|[^}<>]*|)\|\s*?___location\s*\=\s*(New York(?: City|)|USA)(?=[\s\.]*[|}])/g, '|___location=New York $1');
 
regex(/(\|[ ]*?___location=[^\|<\}]*)([^<}]*|)\|[ ]*?___location=[^|}]*(?=[|}])/gi, '$1$2');
Line 464 ⟶ 508:
 
//remove redundant parameter ('website' is an alias of 'work')
// regex(/\|\s*?(?:newspaper|website|work)\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s*?(?:newspaper|website|work)\s*\=\s*(?:\[\[[^\]]+\]\]|[|}]+)(?=\s*[|}])/g, '|work=$1$2'); //unlinked work
// regex(/\|\s*?(?:newspaper|website|work)\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s*?(?:newspaper|website|work)\s*\=\s*(?:\[\[[^\]]+\]\]|[|}]+)(?=\s*[|}])/g, '|work=$1$2'); //unpiped work
// regex(/\|\s*?(?:newspaper|website|work)\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s*?(?:newspaper|website|work)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|work=$1$3'); //piped work
regex(/(\|\s*(?:newspaper|website|work)\s*\=\s*[^=}<>]*(?:\|[^}<>]*|))\|\s*?(?:newspaper|website|work)\s*\=\s*[^\}|]+(?=\s*[|}])/g, '$1'); //universal work and aliases
 
Line 480 ⟶ 524:
regex(/( &#124; [\w, ]*?)(?=[ ]&#124)/gi, '');
regex(/( &#124; Comment is free)/gi, '');
regex(/\|\s*?title\s*\=\s*BBC (?:News|Sport)\s?(?:[-–]|&#124; )\s?/gi, '|title=');
regex(/\|\s*?title\s*\=\s*Asia Times Online\s?(?:[-–]|&#124; |:+)\s?/gi, '|title=');
regex(/(?:Entertainment|Football|(?:inter|)national|Latest|local|Music|UK|world|) News &#124;(?=[ ]?[&\|])/gi, '');
regex(/(?:[-–|]|&#124;)[ ]*(?:Entertainment|Football|(?:inter|)national|Latest|local|Music|UK|world|) News(?=[ ]?[&\|])/gi, '');
regex(/<!-- Bot generated title -->/gi, '');
regex(/(\|\s*title\s*\=\s*[^|}]+?)[-‒–—―]\s+([^|}]+?)(\|\s*(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s*title\s*\=\s*([^|}]+?))[-‒–—―]\s+([^|}]+?\|\s*?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s*website\s*\=\s*Play Legit):\s+[^|}]+?(?=[\|}])/gi, '$1');