User:Ohconfucius/script/Sources.js: Difference between revisions

Content deleted Content added
curtailing rem of link tracking
update from test script
Line 25:
// removing http links within publisher/journal/work fields
regex(/((?:author|publisher|work) *= *)(?:https?:|ftp:)\/{2}(?:\w{2,4}\.|)(\w+)\.(?:com?|net|org|gov)(?:\.\w{2}|)(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2');
txt.value=txt.value.replaceregex(/(\|\s?(?:author(?:link\d?|)|journal|newspaper|publisher|work)\s*\=\s*)(?:https?:\/\/|)www\.(\w)/gi, '$1$2'); //leave only ___domain name
txt.value=txt.value.replaceregex(/(>{{cit[^}]+\|\s?website\s*\=\s*)(?:https?:\/\/|)www\.(\w[^}]+)(?=}}<)/gi, '$1$2'); //leave only ___domain name
txt.value=txt.value.replaceregex(/(\|\s?(?:newspaper|work|journal|publisher)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
 
txt.value=txt.value.replaceregex(/(\|\s?author(?:link\d?|)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
txt.value=txt.value.replaceregex(/(\|\s?author(?:link\d?|)\s*\=\s*)(?:https?:\/\/|)www\.[\w][^|}]*(?=[|}\n])/gi, '$1'); //rem outright (not a WL)
 
// removing references to other WP articles and 'external' WP links
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^<>]*<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP:SPS (Twitter, FB, WP)-->');
regex(/<ref>\s*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)\/[^\s\]<]*<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP:SPS (Twitter, FB, WP)-->');
regex(/<ref>\s*\[https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '{{cn}}<!-- WP:RS needed; removed link to WP:SPS (Twitter, FB, WP)-->');
regex(/\|[ ]\s*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace)\.com)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');
 
regex(/[ ]\[https?:\/\/\w{2}\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');
Line 51:
// removing artefacts within fields
regex(/(\|\s?author\s*\=\s*)(?:by |)(?:[^|]+staff|(?:staff |)reporters?|)[ ]*(?=[|}\n])/gi, '');
regex(/(\|\s?author\s*\=\s*)([A-Z][a-z]*(?: [A-Z][a-z]*)*) (?:(?:wire |)staff|(?:staff |)reporters?)[ ]*(?=[|}\n])/gi, '$1$2');
regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])=/gi, '');
regex(/(\|\s?accessdate\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
// regex(/(\|\s?atwork\s*\=\s*)(?:pages? article|interview|)(?:[-–\d\s,;]*review) ?)[^|}]+(?=[|}\n])/gi, '$1');
regex(/(\|\s?volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
Line 64 ⟶ 65:
regex(/(\|\s*date\s*=\s*)(?:not? |non-|un)date[ds]?\s*(?=[|}\n])/gi, '$1n.d.'); //common cs1 error
 
// regex(/\{\{wikinews ?(|2|cat(?:egory)?|has|par2?|portal|table|-inline)(\|[^\}]+|)\}\}\s*/gi, '');
regex(/(\*[ ]*|)\[\[n:[^\]]*\]\][^\r\n]*[\r\n]/gi, '');
// regex(/\*[ ]*\{\{(?:Facebook|Find a Grave|Myspace)\|([^}]*)\}\}[\n\r\s]*/gi, ''); //disabled 19 Sept 2015 per Batty's request
 
// removing inappropriately populated fields
// regex(/(\|\s?at\s*\=\s*(?:pages? |)(?:[-–\d\s,;]*) ?)[^|}]+(?=[|}\n])/gi, '');
 
//citation template fixes
Line 78 ⟶ 76:
regex(/(\|\s?title\s*\=\s*[^|}]+?)[-‒–—―] *([^|}]+?)(\|\s?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s?title\s*\=\s*([^|}]+?))[-‒–—―] *([^|}]+?\|\s?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
 
// misused 'date' parameter
regex(/(\{\{\s?cit[ae][^}]+)\|\s?date(\s?=\s?[12]\d{3}\s?[|}])/gi, '$1|year$2');
Line 95 ⟶ 93:
regex(/\|\s?work\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s?work\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s?work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?(?:publisher|website)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$3'); //piped work
 
regex(/\|\s?publisher\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s?publisher\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s?publisher\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?publisher\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$3'); //piped work
 
// remove redundant parentheses and templates from dm and md dates (equivalents also exists in Mosnum script)
Line 105 ⟶ 103:
 
// rem corporate designation
txt.value=txt.value.replaceregex(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}), (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
txt.value=txt.value.replaceregex(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}) (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
 
// rem unnecessary quote marks
txt.value=txt.value.replaceregex(/(\|\s?title\s*\=\s*)["“]([^\|]+)["”](?=\s?[|}])/gi, '$1$2');
txt.value=txt.value.replaceregex(/(\|\s?title\s*\=\s*)['‘]([^\|'’]+)['’](?=\s?[|}])/gi, '$1$2');
// repl double 'in-title' quote marks with single quotes
txt.value=txt.value.replaceregex(/(\|\s?title\s*\=\s*[\w ]* )["“]((?:\w[\w]* )+(?:\w[\w]*))["”]([^\|]+|)(?=\s?[|}])/gi, '$1\'$2\'$3');
 
// adjust for possibly incorrectly input title
Line 123 ⟶ 121:
 
// removing blank or redundant parameters
regex(/(?:\|[ ]*(?:accessdate|agency|archive(?:date|url)|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|editor(?:-first|-last|-link|\d|\d-first|\d-last|\d-link)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|___location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publication(?:-date|-place)|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|work|year|zbl)[ ]*=[\s]*)(?=[\}\|])/gi, '');
// regex(/(?:\|[ ]*deadurl(began|ended)[ ]*=[ ^}|]*yes)(?=[ }|]*)/gi, ''); //disabling per request although default
regex(/(\|\s?work\s?\=\s?)\[\[iTunes\]\] Archive\s/gi, '|publisher=[[iTunes Store]]');
 
//rem underlining within certain fields
txt.value=txt.value.replaceregex(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1$2');
 
//rem redundant top-level domains (.com, .net, .org), strip "www"
txt.value=txt.value.replaceregex(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)(\[\[[^\[\]\}]*\]\])\.(?:biz|com|net|org|co\.uk)(?=\s*[|}])/gi, '$1$2');
 
//rem duplicated publishers in separate fields (pre)
Line 158 ⟶ 157:
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)[\.,]?)\s/gi, "$1");
 
//'Accessed'/'Obtained' -> 'Retrieved'
ohc_regex(/(?:[\.,;][ ]*(?:url |link |last |)(?:Retrieved|Accessed|Obtained))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '. Retrieved $1');
ohc_regex(/(\w|\])(?:[ ]*(?:url |link |last |)(?:Retrieved|Accessed|Obtained))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '$1. Retrieved $2');
 
// (disabling due tofor persistent errors) ohc_regex(/(?:Retrieved|Accessed)(?: on(?:line|)|):? (@Month\s@DD,?\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd)(?=\D)/gi, 'Retrieved $1');
ohc_regex(/(\w|\])[\.,;]?[ ]\((Retrieved (?:@Month\s@DD,\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd))\)/gi, '$1. $2');
 
Line 202 ⟶ 201:
// removing publishers for periodicals
regex(/\|publisher=\[\[(?:PMC \(company\)\||)(?:PMC|Penske Media Corporation)\]\](?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[?\[?(?:Hachette Filipacchi Médias\||)Hachette Filipacchi(?: \(UK\) Ltd.?| UK|)\]?\]?)(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers|)))(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[\[(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers))\]\])(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?publisher\s*\=\s*(Alexander Lebedev|American Media|Associated Newspapers|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)[\s\.]*(?=[|}\n])/gi, '');
regex(/\|\s?publisher\s*\=\s*(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times (?:Group|Publishing)|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?(?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?(?=[\s\.]*[|}])/g, '');
// regex(/\|\s?publisher\s*\=\s*(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)(?=[\s\.]*[|}])/g, ''); //rem "Communications" - false positive for "Ministry of Economic Affairs and Communications" reported 28 May 2014
//duplicate above with links //("Corporation" excluded - false positive with Australian Broadcasting Corporation)
regex(/(?:\|\s?publisher\s*\=\s*\[\[(Alexander Lebedev|American Media|Associated Newspapers|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)(?:\|[^\]\}]*|)\]\][\s\.]*(?=[|}\n])/gi, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times Publishing|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?\]\](?=[\s\.]*[|}])/g, '');
// regex(/\|\s?publisher\s*\=\s*\[\[(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)\]\](?=[\s\.]*[|}])/g, ''); //disabling - false positives for book citations
 
regex(/\| ?publisher ?= ?Star Publications \(Malaysia\)|\[\[Star Publications \(Malaysia\)\]\] ?/gi, '');
regex(/\| ?publisher ?= ?(?:New Straits Times Press|\[\[New Straits Times Press\]\]) ?/gi, '');
regex(/(\|\s?publisher\s?\=\s?MTV\s?)\|\s*\=\s*\(\w+\)/gi, '$1='); //palliative - correction for unsuppoeted parameters
 
}
Line 233 ⟶ 232:
regex(/(\|)((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|))(?: \((?:South Africa)(?:n newspaper|)\))(\]\])/g, '$1$2$3');
 
regex(/(=[ ]*)(Billboard|Fast Company|People|Q|Time Out) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Billboard|Fast Company|People|Q|Time Out) \(magazine\)(?='')/g, '$1$2'); //non-standard code
regex(/(\|)(Billboard|Fast Company|People|Q|Time Out)(?: \(magazine\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*)(Salon) \((?:website)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Salon) \(website\)(?='')/g, '$1$2'); //non-standard code
regex(/(\bthe |)(Salon)(?:'''?|)the \(website\))(?=\]\])/gig, '$1$2');
regex(/(=[ ]*Daily News) \((New York)\)([ ]*[|}])/g, '$1|___location=$2$3');
regex(/(''Daily News) \((New York)\)('')/g, '$1$3 ($2)');
Line 244 ⟶ 247:
regex(/(\|)(Daily Record|Sunday Mail)(?: \((Scotland)\))(\]\])/g, '$1$2$4|___location=$3');
regex(/(\[\[)(Daily Record|Sunday Mail)( \((Scotland)\))(\]\])/g, '$1$2$3|$2$5|___location=$4');
regex(/(=[ ]*(?:Daily Times)) \((Pakistan)\)(\s*[|}])/g, '$1|___location=$2$3');
regex(/(''(?:Daily Times)) \((Pakistan)\)('')/g, '$1$3 ($2)');
regex(/(\|)(Daily Times)(?: \(Pakistan\))(\]\])/g, '$1$2$3');
regex(/(=[ ]*Dawn) \((newspaper)\)(\s*[|}])/g, '$1|___location=Pakistan$3');
regex(/(''Dawn) \((newspaper)\)('')/g, '$1$3 (Pakistan)');
Line 329 ⟶ 335:
regex(/ – (?:Times of India|Rediff.com [\w]*)(?=[ ]?\|)/gi, '');
regex(/(?: +[‒–—―] *Times Of India|)(\]. +''The Times of India''\.)indiatimes\.com/gi, '$1');
regex(/(?: +[‒–—―] *([\w ]+))(\|\s?(?:publisher|work)\s*\=\s*\1)/gi, '$2');
regex(/(\w''\.)indiatimes\.com/gi, '$1');
 
Line 343 ⟶ 350:
 
//rem duplicated publishers in separate fields (post); rem preceding nbsp
regex(/\s?&nbsp;\s?(\|\s?(?:agency|publisher|work)\s*\=\s*)/gi, ' $1');
regex(/(?:[‒–—―]+|&#124;)\s*(?:The |)([^\|\}&]{3,})(?:\.com|)\s*(\|\s?(?:agency|publisher|work)\s*\=\s*)\1(?=\s*[|}])/gi, '$2$1');
regex(/(?:[‒–—―]+|&#124;)\s*([^\|\}&]{3,})(?:\.com|)\s*(\|\s?(?:agency|publisher|work)\s*\=\s*)(The |)\1(?=\s*[|}])/gi, '$2$3$1');
 
//per [[Help:Citation Style 1#Elements not included]]
Line 356 ⟶ 363:
 
/// removing identical/similar entries in 'work' and 'publisher', and in 'work' and 'website'
regex(/\|\s?work\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|work=$1$2'); //unlinked work
regex(/\|\s?work\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?(?:publisher|website)\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|work=$1$2'); //unpiped work
regex(/\|\s?work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?(?:publisher|website)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|work=$1$3'); //piped work
 
regex(/\|\s?publisher\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unlinked work
regex(/\|\s?publisher\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:\1|\[\[\1\]\])\.?(?=\s*[|}])/g, '|publisher=$1$2'); //unpiped work
regex(/\|\s?publisher\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?publisher\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|publisher=$1$3'); //piped work
 
regex(/(\|\s?work\s*\=\s*(?:[^=|}\[<>]*)(?:\|[^}<>]*|))\|\s?website\s*\=\s*(?:[^|}]*)(?=\s*[|}])/g, '$1'); //unlinked work
regex(/(\|\s?work\s*\=\s*\[\[([^<|\]]*)\]\](?:\|[^}<>]*|))\|\s?website\s*\=\s*(?:[^|}]*)(?=\s*[|}])/g, '$1'); //unpiped work
regex(/(\|\s?work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(?:\|[^}<>]*|))\|\s?website\s*\=\s*(?:[^|}]*)(?=\s*[|}])/g, '$1'); //piped work
 
regex(/\|\s?___location\s*\=\s*New York(?: City|)\s*(\|[^}<>]*|)\|\s?___location\s*\=\s*(New York(?: City|)|USA)(?=[\s\.]*[|}])/g, '|___location=New York $1');
Line 369 ⟶ 380:
regex(/(\|[ ]*?publisher=[^=}]*)(?:\|[ ]*?publisher=[^=}]*)(?=[|}\n])+/gi, '$1');
regex(/(\|[ ]*?work=[^=}]*)(?:\|[ ]*?work=[^=}]*)(?=[|}\n])+/gi, '$1');
 
//remove redundant parameter ('website' is an alias of 'work')
regex(/\|\s?work\s*\=\s*([^=|}\[<>]*)(\|[^}<>]*|)\|\s?(?:website|work)\s*\=\s*(?:\[\[[^\]]+\]\]|[|}]+)(?=\s*[|}])/g, '|work=$1$2'); //unlinked work
regex(/\|\s?work\s*\=\s*\[\[([^<|\]]*)\]\](\|[^}<>]*|)\|\s?(?:website|work)\s*\=\s*(?:\[\[[^\]]+\]\]|[|}]+)(?=\s*[|}])/g, '|work=$1$2'); //unpiped work
regex(/\|\s?work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?(?:website|work)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|work=$1$3'); //piped work
 
//'work' and its alias (pre)
Line 387 ⟶ 403:
regex(/(?:[-–|]|&#124;)[ ]*(?:Football|international|Latest|local|UK|world|) News(?=[ ]?[&\|])/gi, '');
regex(/<!-- Bot generated title -->/gi, '');
regex(/(\|\s?title\s*\=\s*[^|}]+?)[-‒–—―] *([^|}]+?)(\|\s?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
regex(/(\|\s?title\s*\=\s*([^|}]+?))[-‒–—―] *([^|}]+?\|\s?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
 
regex(/(DOC|PDF)\) \./gi, '$1).');
Line 392 ⟶ 410:
// removing artefacts (outside of citation templates)
regex(/([\w]+\'\')\.(?:co(?:m|m?\.\w{2})|\.\w{2})[ ]/gi, '$1. ');
regex(/\bthe ((?:'''?|)the )/gi, '$1');
 
// removing other artefacts
regex(/(UEFA\]\])\.(?:co(?:m|m?\.\w{2})|\.\w{2})(?= ?[\|{}])/gi, '$1');
regex(/\bthe ((?:'''?|)the )/gi, '$1');
 
// expanding/dating tags
regex(/{{(?:cn|fact)}}/gi, '{{citation needed|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}');
 
//dynamic columns for reflists; remove scroll bar
regex(/((?:[Rr]eferences|[Nn]otes)[ ]?={2,4}[\n\r])[\r\n\s]*<div (?:style|class)=[^>]*>([\S\s]*)<\/div>/g, '$1$2');
Line 428 ⟶ 446:
// the sensitive part is stored and replaced with a unique identifier,
// which is later replaced with the stored part.
 
var protect_function = function(s, begin, replace, end) {
linkmap.push(replace);
Line 434 ⟶ 452:
};
 
// protect the rest (after purging urls inserted in ('website' or )'work' parameters)
regex(/((?:[\[=]\s*)(?:https?:|ftp:))([^\]\|\}]*)(\s*[\]\|\}])/gi, protect_function);
regex(/(\{\{(?:harv\w*|sfn\w*|cite ?book|listen)\s?\|)([^\}]+)(\})/gi, protect_function);
regex(/(\|\s*contribution\s*=)([^|}]+)(\|\})/gi, protect_function);