User:Ohconfucius/script/Sources.js: Difference between revisions

Content deleted Content added
tweak per User:GoingBatty
multiple or zero spaces
Line 27:
//test
regex(/\s?(<ref>(?:[^<>]*\|\s*url\s*=|\[?)https?:\/\/((?:\w+\.)*\w+(?:\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|)))\/[^<>{}]+\|publisher=)(?=}}<\/ref>)/gi, "$1$2"); //capture ___domain name to add to publisher
regex(/(\|\s?*(?:publisher|website)\s*\=\s*)w{3}\.(\w+\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|))(?=\s*[|}])/gi, '$1$2');
regex(/(\|\s?*publisher=)\s?\|\s?*via\s*\=\s*(\w+)(?=\s*[|}])/gi, '$1$2');
// regex(/\|\s?*via\s*\=\s*/gi, '|work=');
regex(/\s?[-–]\s?(?:The |)(\w+\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|))\s?(\|\s?*publisher\s*\=\s*)\1(?=}})/gi, '$2$1');
regex(/\s?[-–]\s?([^|]+)\s?(\|\s?*publisher\s*\=\s*)(The )\1(?=}})/gi, '$2$3$1');
regex(/\|\s?*first\s*\=[^|]*\|\s?*last\s*\=(?:Editor|Group|Staff|Reporter|Writer)\s*(?=\|)/gi, '');
regex(/({{Navbox deaths}})(\n)/gi, '==References==$2{{Reflist}}$2$1$2');
regex(/(==References==\n{{Reflist}}\n)\n?\1/gi, '$1');
Line 50:
// removing http links within publisher/journal/work fields
//
// regex(/(\|\s?*(?:author|publisher|work) *= *)(?:https?:\/{2}|)(?:w{3}\.|)(\w+(?:\.(?:com?|net|org|gov|\w{2})|)(?:\.\w{2}|))(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2'); //returns ___domain name
regex(/(\|\s?*title\s*\=\s*)https?:\/{2}(?:w{3}\.|)(\w+\.com)(?=\s?[=|{}])/gi, '$1<!--ACTUAL ARTICLE TITLE BELONGS HERE! original text: $2-->');
regex(/(\|\s?*title\s*\=\s*)https?:\/{2}([^|\s]+)(?=\s?[=|{}])/gi, '$1<!--ACTUAL ARTICLE TITLE BELONGS HERE! original text: $2-->');
regex(/(\|\s?*(?:author(?:link\d?|)|chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)https?:\/{2}(?:w{3}\.|)(?=\w)/gi, '$1'); //removes http:// and optionally www.
regex(/(\|\s?*(?:author(?:link\d?|)|chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)(?:w{3}\.)(?=\w)/gi, '$1'); //removes www.
regex(/(\|\s?*(?:author(?:link\d?|)|chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)(\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2'); //returns ___domain name
regex(/(>{{cit[^}]+\|\s?*website\s*\=\s*)https?:\/{2}(?:w{3}\.|)(?=\w)/gi, '$1'); //leave only ___domain name
regex(/(>{{cit[^}]+\|\s?*website\s*\=\s*)(?:w{3}\.)(?=\w)/gi, '$1'); //leave only ___domain name
 
// removing hyperlinks within publisher/journal/website/work fields
regex(/(\|\s?*(?:chapter|journal|magazine|newspaper|publisher|title|work|via)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2'); //leave only linked text
regex(/(>{{cit[^}]+\|\s?*website\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2'); //leave only linked text
 
regex(/(\|\s?*title\s*=\s*)(?:\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]+)(?:\/[^|}]+)(?:\/[^|}]+)(?:\/[^|}]+)\/([^|}]+)\/?(?=\s*[\]|}])/gi, '$1$2'); //returns terminal part of url
regex(/(\|\s?*title\s*=\s*)(?:\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]+)(?:\/[^|}]+)(?:\/[^|}]+)\/([^|}]+)\/?(?=\s*[\]|}])/gi, '$1$2'); //returns terminal part of url
regex(/(\|\s?*title\s*=\s*)(?:\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]+)(?:\/[^|}]+)\/([^|}]+)\/?(?=\s*[\]|}])/gi, '$1$2'); //returns terminal part of url
regex(/(\|\s?*title\s*=\s*)(?:\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))(?:\/[^|}]+)\/([^|}]+)\/?(?=\s*[\]|}])/gi, '$1$2'); //returns terminal part of url
regex(/(\|\s?*title\s*=\s*)(?:\w+(?:\.(?:com?|net|org|gov|\w{2}))(?:\.\w{2}|))\/([^|}]+)\/?(?=\s*[\]|}])/gi, '$1$2'); //returns terminal part of url
regex(/(\|\s?*author(?:link\d?|)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
regex(/(\|\s?*author(?:link\d?|)\s*\=\s*)(?:https?:\/\/|)www\.[\w][^|}]*(?=[|}])/gi, '$1'); //rem outright (not a WL)
 
// removing references to other WP articles and 'external' WP links
Line 83:
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:(?:\w+\.|)(thesun|dailymail)\.co\.uk)\/[^<>{]*<\/ref>/gi, '<!--rem deprecated source \($1\)-->');
 
regex(/(\|\s?*url\s*\=\s*|\[)(https?:[^|{}#\s]+)#[A-Za-z0-9\.]{12,13}(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking
regex(/(\|\s?*url\s*\=\s*)(https?:[^|{}#\s]+\.html?)\?[^|}]{1,5}(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking
regex(/(\|\s?*url\s*\=\s*|\[)(https?:\/\/books\.google\.[^\/]+\/books\?id=\w{12}&pg=PA\d{1,3})&dq[^\s|}]+(?=\s?[|}])/gi, '$1$2'); //rem browser optimisation
 
 
Line 94:
 
// removing artefacts within fields
regex(/(\|\s?*author\s*\=\s*)(?:by |)(?:[^|]+staff|(?:staff |)(?:reporter|writer)s?|)[\s\n]*(?=[|}])/gi, '');
regex(/(\|\s?*author\s*\=\s*)([A-Z][a-z]*(?: [A-Z][a-z]*)*) (?:(?:wire |)staff|(?:staff |)(?:reporter|writer)s?)[\s\n]*(?=[|}])/gi, '$1$2');
regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])/gi, '');
regex(/\|[ ]*(?:first|last)=(?:staff |)(?:reporter|writer)[ ]*(?=[\|{}])/gi, '');
regex(/\|[ ]*first= ?staff ?\|[ ]*last=(?:reporter|writer)[ ]*(?=[\|{}])/gi, '');
regex(/\|[ ]*first= ?The ?\|[ ]*last=[^\|{}]*(?=[\|{}])/gi, '');
regex(/(\|\s?*access-?date\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?*access-?date\s*\=\s*\d{4}\s?)(?=[|}])/gi, '');
regex(/(\|\s?*work\s*\=\s*)(?:article|interview|review) ?(?=[|}])/gi, '$1');
regex(/(\|\s?*volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?*pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
regex(/ [-–] (?:每日明報 daily news|東方日報)/gi, '');
regex(/(\.\.\. |)\{\{!\}\} 立場(報道|新聞)/gi, '');
regex(/(\|\s?*title\s*\=\s*)Login ?(?=[|}])/gi, '$1'); //creating deliberate error (blank title) per Trappist
regex(/(\|\s?*)(?:publisher|website|work)=You ?tube(?:\.com|)(?=\s*[|}])/gi, '$1via=YouTube');
regex(/(\|\s?*)(?:publisher|website|work)=(Amazon)(?:\.com|)(?:\.\w{2}|)(?=\s*[|}])/gi, '$1via=$2');
regex(/(\|\s?*)(?:publisher|website|work)=(Vimeo)(?:\.com|)(?=\s*[|}])/gi, '$1via=$2');
regex(/(\|\s?*)(?:publisher|website|work)=(Newspapers\.com)(?=\s*[|}])/gi, '$1via=$2');
regex(/(\|\s?*publisher=)\|\s?*via\s*\=\s*(\w+)(?=\s*[|}])/gi, '$1$2');
regex(/(?:-[ ]*Google Books[ ]*(?:\|[^}]*|)|)\|\s?*publisher\s*\=\s*(Google Books)(?=[\s\.]*[|}])/g, '|via=$1');
// regex(/(\|\s?*)(?:publisher|website|work)=books\.google(?:\.com?|.co\.uk|)(?=\s*[|}])/gi, '|via=Google Books');
// regex(/\|\s?*(?:publisher|website|work)=(British Newspaper Archive|Google Books|Project Gutenberg|Proquest|Scribd|web(?:\.archive\|citation)(?:\.org|))(?=\s*[|}])/gi, '|via=$1');
 
//Remove COinS corrupting templates from CS1 citations
regex(/(\|\s?*(?:authors?|first\d?|last\d?|publisher|work)\s*\=\s*(?:[^{}|]*|)){{(?:Sm|Aut|SC|Small[- ]caps|Sm?caps)\|([^{}|]*)}}(?=(?:[^{}|]*|)[|}])/gi, '$1');
 
regex(/(\|\s?*)\w+\=(url\s*\=\s*https?:\/\/)(?=[|}])/gi, '$1$2'); //common cs1 error
regex(/(\|\s?*url\s*\=)(www\.)(?=[|}])/gi, '$1http//$2'); //common cs1 error
regex(/(\|\s*date\s*=\s*)(?:not? |non-|un)date[ds]?\s*(?=[|}])/gi, '$1n.d.'); //common cs1 error
regex(/(\|\s?*dead-?url=no?)(?=\s*[|}])/gi, ''); //common cs1 error (deprecated parameter)
 
regex(/\{\{wikinews ?(|2|cat(?:egory)?|has|par2?|portal|table|-inline)(\|[^\}]+|)\}\}\s*/gi, '');
Line 130:
 
//citation template fixes
// regex(/(\|\s?*)published\s?=/gi, '$1publisher='); //disabled 12/5/21 – false positive in book infobox
// rem copyright assertion
regex(/(\|\s?*publisher\s*\=\s*)(?:\[\[copyright(?:\|©|)\]\])\s?/gi, '$1');
regex(/(\|\s?*publisher\s*\=\s*)(?:©|copyright)\s?/gi, '$1');
regex(/(\|\s?*title\s*\=\s*[^|}]+?)[-‒–—―]\s+([^|}]+?)(\|\s?*(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s?*title\s*\=\s*([^|}]+?))[-‒–—―]\s+([^|}]+?\|\s?*(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
// misused 'date' parameter
regex(/(\{\{\s?cit[ae][^}]+)\|\s?*date(\s?=\s?[12]\d{3}\s?[|}])/gi, '$1|year$2');
 
// rem toggles and redundant quote marks
regex(/(\|\s?*(?:agency|author|newspaper|work|journal|publisher|title)\s*\=\s*)\'\'([^|}]+)\'\'(?=\s*[\}\|])/gi, '$1$2'); //without link
regex(/(\|\s?*(?:agency|author|newspaper|work|journal|publisher|title)\s*\=\s*)\'\'(\[\[(?:[^\|]+\||)[^\|\]]+\]\])\'\'(?=\s*[\}\|])/gi, '$1$2'); //with link
regex(/(\|\s?*title\s*\=\s*)\'&#39;([^\|\{\}]+)\'&#39;/gi, '$1$2'); //rem &#39; in titles
regex(/(\|\s?*publisher\s*\=\s*)\(([^\|\{\}]+)\)/gi, '$1$2'); //rem parenthetical publishers
 
// reordering 'work' and 'publisher' (first run - see second run in cleanup function)
regex(/(\|\s?*publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?*(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
regex(/(\|\s?*website\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?*(?: journal|newspaper|magazine|periodical|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
 
/// removing identical/similar entries in 'work' and 'publisher', and in 'work' and 'website' (different default vs [post] cleanup rules)
regex(/\|\s?*work\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?*(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s?*work\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?*(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s?*work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?*(?:publisher|website)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$3'); //piped work
 
regex(/\|\s?*publisher\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s?*publisher\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s?*publisher\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$2'); //piped work
 
// remove redundant parentheses and templates from dm and md dates (equivalents also exists in Mosnum script)
Line 163:
 
// rem corporate designation
regex(/(\|\s?*(?:author|publisher|work)\s*\=\s*[^\[|}]{1,40}),? (?:Inc|LL[CP]|Ltd|Limited|PLC|SA|(?:Snd\. |)Berhad)\.?(?=[ ]*\|\})/gi, '$1');
 
// correcting yahoo! corporate designation
regex(/(\|\s?*(?:publisher|work)\s*\=\s*Yahoo)(?: |)(Finance|Green|Kids|Music|News|)(?=[ ]*\|\})/gi, '$1! $2');
 
// rem unnecessary quote marks
regex(/(\|\s?*title\s*\=\s*)["“]([^\|]+)["”](?=\s?[|}])/gi, '$1$2');
regex(/(\|\s?*title\s*\=\s*)['‘]([^\|'’]+)['’](?=\s?[|}])/gi, '$1$2');
// repl double 'in-title' quote marks with single quotes
regex(/(\|\s?*title\s*\=\s*[\w ]* )["“]((?:\w[\w]* )+(?:\w[\w]*))["”]([^\|]+|)(?=\s?[|}])/gi, '$1\'$2\'$3');
 
// adjust for possibly incorrectly input title
regex(/(\|\s?*title\s*\=\s*)([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?*(publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=\s*[|}])/g, '$1 |$4=$2$3'); //creating deliberate error (blank title) per Trappist
// rem misplaced punctuation
Line 184:
regex(/(?:\|[ ]*(?:agency|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|(?:editor|translator)(?:-first|-last|-link|\d|\d-first|\d-last|\d-link)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|___location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|via|work|year|zbl)[ ]*=[\s]*)(?=[}|])/gi, '');
// regex(/(?:\|[ ]*(began|ended)[ ]*=[^}|]*)(?=[}|])/gi, ''); //disabled 18/06/20: false positive removal of associated (Start date) templates
regex(/(\|\s?*work\s?\=\s?)\[\[iTunes\]\] Archive\s/gi, '|publisher=[[iTunes Store]]');
 
//rem underlining within certain fields
regex(/(\|\s?*(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]+)<\/u>/gi, '$1$2');
 
//rem redundant top-level domains (.com, .net, .org), strip "www"
regex(/(\|\s?*(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)(\[\[[^\[\]\}]*\]\])\.(?:biz|com|net|org|co\.uk|(?:com\.|)[a-z]{2})(?=\s*[|}])/gi, '$1$2');
 
//rem duplicated publishers in separate fields (pre)
regex(/(?:[‒–—―]+|&#124;)\s*(?:The |)([^\|\}&]{3,})(?:\.com|)\s*(\|\s?*(?:agency|publisher|work)\s*\=\s*)\1(?=\s*[|}])/gi, '$2$1');
regex(/(?:[‒–—―]+|&#124;)\s*([^\|\}&]{3,})(?:\.com|)\s*(\|\s?*(?:agency|publisher|work)\s*\=\s*)(The |)\1(?=\s*[|}])/gi, '$2$3$1');
 
//'work' and its alias (pre)
Line 255:
//linked publishing houses
// removing publishers less well-known than their titles
regex(/(\|\s?*publisher\s?\=MTV\|\s?*)publisher\s*\=\s*(?:MTV Networks(?: \(Viacom\)|)|Viacom)/gi, '$1=');
 
regex(/(?:\|publisher=Turner Sports Interactive, Inc)\.? ?\|publisher=(NBA)(?= ?\|)/gi, '$1');
Line 262:
// removing publishers for periodicals
regex(/\|publisher=\[\[(?:PMC \(company\)\||)(?:PMC|Penske Media Corporation)\]\](?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?*(newspaper|work|publisher)\s*\=\s*\[?\[?(?:Hachette Filipacchi Médias\||)Hachette Filipacchi(?: \(UK\) Ltd.?| UK|)\]?\]?)(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?*(newspaper|work|publisher)\s*\=\s*(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers|)))(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?*(newspaper|work|publisher)\s*\=\s*\[\[(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers))\]\])(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?*publisher\s*\=\s*(Alexander Lebedev|American Media|Associated Newspapers|Bauer (?:Consumer Media|Media Group)|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)[\s\.]*(?=[|}])/gi, '');
regex(/\|\s?*publisher\s*\=\s*(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Independent News & Media|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times (?:Group|Publishing)|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?(?=[\s\.]*[|}])/g, '');
regex(/\|\s?*publisher\s*\=\s*(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?(?=[\s\.]*[|}])/g, '');
// regex(/\|\s?*publisher\s*\=\s*(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)(?=[\s\.]*[|}])/g, ''); //rem "Communications" - false positive for "Ministry of Economic Affairs and Communications" reported 28 May 2014
//duplicate above with links //("Corporation" excluded - false positive with Australian Broadcasting Corporation)
regex(/(?:\|\s?*publisher\s*\=\s*\[\[(Alexander Lebedev|American Media|Associated Newspapers|Bauer (?:Consumer Media|Media Group)|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)(?:\|[^\]\}]*|)\]\][\s\.]*(?=[|}])/gi, '');
regex(/\|\s?*publisher\s*\=\s*\[\[(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Independent News & Media|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times Publishing|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s?*publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?\]\](?=[\s\.]*[|}])/g, '');
// regex(/\|\s?*publisher\s*\=\s*\[\[(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)\]\](?=[\s\.]*[|}])/g, '');
 
regex(/\| ?publisher ?= ?Star Publications \(Malaysia\)|\[\[Star Publications \(Malaysia\)\]\] ?/gi, '');
regex(/\| ?publisher ?= ?(?:New Straits Times Press|\[\[New Straits Times Press\]\]) ?/gi, '');
regex(/(\|\s?*publisher\s?\=\s?MTV\s?)\|\s*\=\s*\(\w+\)/gi, '$1='); //palliative - correction for unsuppoeted parameters
 
}
Line 412:
regex(/ – (?:Times of India|Rediff.com [\w]*)(?=[ ]?\|)/gi, '');
regex(/(?: +[‒–—―] *Times Of India|)(\]. +''The Times of India''\.)indiatimes\.com/gi, '$1');
regex(/(?: +[‒–—―] *([\w ]+))(\|\s?*(?:publisher|work)\s*\=\s*\1)/gi, '$2');
regex(/(\w''\.)indiatimes\.com/gi, '$1');
 
Line 419:
regex(/(.) – Google [^ \]]*(\][\.,;]) Books\.google\.\w{2,3}(\.| )/gi, '$1$2Google Books$3');
regex(/(.) at Discogs(\][\.,;]) Discogs\.com(\.| )/gi, '$1$2Discogs$3');
regex(/(\|\s?*author\s?\=\s?)(?:posted|publishe[dr]|written)\s?(?:by|on):?\s/gi, '$1');
regex(/\|\s?*(?:publisher|website|work)(\s?\=MTV)\|\s?*publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '|publisher$1=');
 
// regex(/\|\s?*(?:publisher|website|work)\s*\=\s*(?:BBC|BBC News(?: Online|))\s*(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|work=\'\'BBC News\'\' $1');
// regex(/\|\s?*(?:publisher|website|work)\s*\=\s*(BBC Sports?)\s*(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|work=\'\'$1\'\'$2');
// regex(/\|\s?*publisher\s*\=\s*(BBC (?:Sports|News))\s*(?=\|[^}<>]*|)/g, '|work=\'\'$1\'\'');
regex(/\|\s?*(?:publisher|website|work)\s*\=\s*(Metacritic|\[\[Metacritic\]\])\s*(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(CBS Interactive|\[\[CBS Interactive\]\])(?=[\s\.]*[|}])/g, '|publisher=$1$2');
 
//rem duplicated publishers in separate fields (post); rem preceding nbsp
regex(/\s?&nbsp;\s?(\|\s?*(?:agency|publisher|work)\s*\=\s*)/gi, ' $1');
regex(/(?:[‒–—―]+|&#124;)\s*(?:The |)([^\|\}&]{3,})(?:\.com|)\s*(\|\s?*(?:agency|publisher|work)\s*\=\s*)\1(?=\s*[|}])/gi, '$2$1');
regex(/(?:[‒–—―]+|&#124;)\s*([^\|\}&]{3,})(?:\.com|)\s*(\|\s?*(?:agency|publisher|work)\s*\=\s*)(The |)\1(?=\s*[|}])/gi, '$2$3$1');
 
//per [[Help:Citation Style 1#Elements not included]]
 
// toggling domains within |website= parameter
// regex(/(\|\s?*(?:work|website)\s*\=\s*)(\w+\.(?:com?|org|net|gov|ac|)(?:\.[a-z]{2}|))(?=\s*[|}])/gi, '$1\'\'$2\'\'');
 
// reordering 'work' and 'publisher'; reordering 'work' and 'website'
regex(/(\|\s?*publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?*(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
regex(/(\|\s?*website\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?*work\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
 
/// removing identical/similar entries in 'work' and 'publisher', and in 'work' and 'website'
regex(/\|\s?*work\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|work=$1$2'); //unlinked work
regex(/\|\s?*work\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|work=$1$2'); //unpiped work
regex(/\|\s?*work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|work=$1$3'); //piped work
 
regex(/\|\s?*publisher\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s?*publisher\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?*publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s?*publisher\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?*publisher\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$3'); //piped work
 
regex(/(\|\s?*(?:newspaper|website|work)\s*\=\s*(?:[^=|}\[<>]*)(?:\|[^}<>]*|))\|\s?*work\s*\=\s*(?:[^|}]*)(?=\s*[|}])/g, '$1'); //unlinked work
regex(/(\|\s?*(?:newspaper|website|work)\s*\=\s*\[\[([^<|\]]*)\]\](?:\|[^}<>]*|))\|\s?*work\s*\=\s*(?:[^|}]*)(?=\s*[|}])/g, '$1'); //linked unpiped work
regex(/(\|\s?*(?:newspaper|website|work)\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(?:\|[^}<>]*|))\|\s?*work\s*\=\s*(?:[^|}]*)(?=\s*[|}])/g, '$1'); //piped work
 
regex(/\|\s?*___location\s*\=\s*New York(?: City|)\s*(\|[^}<>]*|)\|\s?*___location\s*\=\s*(New York(?: City|)|USA)(?=[\s\.]*[|}])/g, '|___location=New York $1');
 
regex(/(\|[ ]*?___location=[^\|<\}]*)([^<}]*|)\|[ ]*?___location=[^|}]*(?=[|}])/gi, '$1$2');
Line 463:
 
//remove redundant parameter ('website' is an alias of 'work')
// regex(/\|\s?*(?:newspaper|website|work)\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?*(?:newspaper|website|work)\s*\=\s*(?:\[\[[^\]]+\]\]|[|}]+)(?=\s*[|}])/g, '|work=$1$2'); //unlinked work
// regex(/\|\s?*(?:newspaper|website|work)\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?*(?:newspaper|website|work)\s*\=\s*(?:\[\[[^\]]+\]\]|[|}]+)(?=\s*[|}])/g, '|work=$1$2'); //unpiped work
// regex(/\|\s?*(?:newspaper|website|work)\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?*(?:newspaper|website|work)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|work=$1$3'); //piped work
regex(/(\|\s?*(?:newspaper|website|work)\s*\=\s*[^=}<>]*(?:\|[^}<>]*|))\|\s?*(?:newspaper|website|work)\s*\=\s*[^\}|]+(?=\s*[|}])/g, '$1'); //universal work and aliases
 
Line 479:
regex(/( &#124; [\w, ]*?)(?=[ ]&#124)/gi, '');
regex(/( &#124; Comment is free)/gi, '');
regex(/\|\s?*title\s*\=\s*BBC (?:News|Sport)\s?(?:[-–]|&#124; )\s?/gi, '|title=');
regex(/\|\s?*title\s*\=\s*Asia Times Online\s?(?:[-–]|&#124; |:+)\s?/gi, '|title=');
regex(/(?:Entertainment|Football|(?:inter|)national|Latest|local|Music|UK|world|) News &#124;(?=[ ]?[&\|])/gi, '');
regex(/(?:[-–|]|&#124;)[ ]*(?:Entertainment|Football|(?:inter|)national|Latest|local|Music|UK|world|) News(?=[ ]?[&\|])/gi, '');
regex(/<!-- Bot generated title -->/gi, '');
regex(/(\|\s?*title\s*\=\s*[^|}]+?)[-‒–—―]\s+([^|}]+?)(\|\s?*(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s?*title\s*\=\s*([^|}]+?))[-‒–—―]\s+([^|}]+?\|\s?*(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s?*website\s*\=\s*Play Legit):\s+[^|}]+?(?=[\|}])/gi, '$1');
 
regex(/(DOC|PDF)\) \./gi, '$1).');