User:Ohconfucius/script/Sources.js: Difference between revisions

Content deleted Content added
Undid revision 731282514 by Ohconfucius (talk)
align to master script
Line 25:
// removing http links within publisher/journal/work fields
regex(/((?:author|publisher|work) *= *)(?:https?:|ftp:)\/{2}(?:\w{2,4}\.|)(\w+)\.(?:com?|net|org|gov)(?:\.\w{2}|)(?:\/[^|}]*|)(?=\s*[\]|}])/gi, '$1$2');
regex(/(\|\s?(?:author(?:link\d?|)|journal|newspaper|publisher|work)\s*\=\s*)(?:https?:\/\/|)www\.(\w)\/?/gi, '$1$2'); //leave only ___domain name
regex(/(>{{cit[^}]+\|\s?(?:via|website)\s*\=\s*)(?:https?:\/\/|)www\.(\w[^}<]+)(?=}}<)/gi, '$1$2'); //leave only ___domain name
regex(/(\|\s?(?:newspaper|work|journal|publisher)\s*\=\s*)\[?https?:\/\/[^\s\]]*\s([\w][^\]]*)\]?/gi, '$1$2'); //leave only linked text
 
regex(/(\|\s?author(?:link\d?|)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
Line 50:
 
// removing artefacts within fields
regex(/(\|\s?author\s*\=\s*)(?:by |)(?:[^|]+staff|(?:staff |)reporters(?:reporter|writer)s?|)[ ]*(?=[|}\n])/gi, '');
regex(/(\|\s?author\s*\=\s*)([A-Z][a-z]*(?: [A-Z][a-z]*)*) (?:(?:wire |)staff|(?:staff |)reporters(?:reporter|writer)s?)[ ]*(?=[|}\n])/gi, '$1$2');
regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])=/gi, '');
regex(/\|[ ]*(?:first|last)=(?:staff |)(?:reporter|writer)[ ]*(?=[\|{}])=/gi, '');
regex(/(\|\s?accessdate\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?work\s*\=\s*)(?:article|interview|review) ?(?=[|}\n])/gi, '$1');
regex(/(\|\s?volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
regex(/ [-–] 每日明報 daily news/gi, '');
//Remove COinS corrupting templates from CS1 citations
Line 103 ⟶ 105:
 
// rem corporate designation
regex(/(\|\s?(?:author|publisher|work)\s*\=\s*[^\[|}]{1,40}),? (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
 
regex(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}) (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
// correcting yahoo! corporate designation
regex(/(\|\s?(?:publisher|work)\s*\=\s*[^\[|}]{1,40}Yahoo) (?:Inc |LL[CP])(Finance|LtdGreen|PLCKids|Music|News|SA)\.?(?=[ ]*\|\})/gi, '$1! $2');
 
// rem unnecessary quote marks
Line 121 ⟶ 125:
 
// removing blank or redundant parameters
regex(/(?:\|[ ]*(?:accessdate|agency|archive(?:date|url)|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|editor(?:-first|-last|-link|\d|\d-first|\d-last|\d-link)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|___location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publication(?:-date|-place)|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|via|work|year|zbl)[ ]*=[\s]*)(?=[}|])/gi, '');
regex(/(?:\|[ ]*(began|ended)[ ]*=[^}|]*)(?=[}|])/gi, '');
regex(/(\|\s?work\s?\=\s?)\[\[iTunes\]\] Archive\s/gi, '|publisher=[[iTunes Store]]');
Line 129 ⟶ 133:
 
//rem redundant top-level domains (.com, .net, .org), strip "www"
regex(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)(\[\[[^\[\]\}]*\]\])\.(?:biz|com|net|org|co\.uk|(?:com\.|)[a-z]{2})(?=\s*[|}])/gi, '$1$2');
 
//rem duplicated publishers in separate fields (pre)
Line 136 ⟶ 140:
 
//'work' and its alias (pre)
regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=\s?[|}\n])+/gi, '$1');
 
//rem linking within '___location' field
Line 154 ⟶ 158:
 
// eliminating days of the week
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|WednesWed|ThursThur?|Fri|SaturSat|Sun)day(?:[\.,]|day)?)\s/gi, "$1");
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)[\.,]?)\s/gi, "$1");
 
//'Accessed'/'Obtained' -> 'Retrieved'
Line 181 ⟶ 184:
regex(/(\|)(Sport \()(newspaper\))(?=\||\]\])/g, '$1$2Spanish $3'); //dab moved December 2012
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(?=\]\])/gi, '$1ESPN');
regex(/(\[\[)Sky ?(?:\((?:British|UK)\)|UK|United Kingdom)(?=\|)/gi, '$1Sky (United Kingdom)');
 
regex(/(?:agency|journal|newspaper|periodical|publisher|website|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(?=\]\])/gi, 'publisher$1MTV');
 
//unwinding of unnecessary pipes
Line 204 ⟶ 209:
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers|)))(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?(newspaper|work|publisher)\s*\=\s*\[\[(ACP Magazines|The Herald and Weekly Times|John Fairfax (and Sons Ltd\.?|Holdings)|Fairfax(?: Media(?: Limited|)| Digital| newspapers))\]\])(?=[\s\.]*[|}])/gi, '');
regex(/(?:\|\s?publisher\s*\=\s*(Alexander Lebedev|American Media|Associated Newspapers|Bauer (?:Consumer Media|Media Group)|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)[\s\.]*(?=[|}\n])/gi, '');
regex(/\|\s?publisher\s*\=\s*(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|Media24|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times (?:Group|Publishing)|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?(?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?(?=[\s\.]*[|}])/g, '');
// regex(/\|\s?publisher\s*\=\s*(?:\w+ )+(?:Media|Publishing|Publications)(?: Group(?:,? Inc\.)?| Berhad)(?=[\s\.]*[|}])/g, ''); //rem "Communications" - false positive for "Ministry of Economic Affairs and Communications" reported 28 May 2014
//duplicate above with links //("Corporation" excluded - false positive with Australian Broadcasting Corporation)
regex(/(?:\|\s?publisher\s*\=\s*\[\[(Alexander Lebedev|American Media|Associated Newspapers|Bauer (?:Consumer Media|Media Group)|Cond[eé] Nast(?: Publications|)|Daily Mail and General Trust|Devin Laz[ae]rine|Dow Jones & Company|Future plc|(Guardian|Telegraph) Media Group|(?:Guardian|Independent) News (?:and|&) Media (?:Limited|Ltd\.|)|Hachette Filipacchi Médias|Hearst (?:Corporation|Magazines(?: UK|))|Herald Media|IGN Entertainment|Imdb Inc\.?|InterMedia Partners|IDG|IPC Media|Lee Enterprises|Media ?News Group|Mortimer Zuckerman|MTV Networks|News (?:Corporation|International|Limited)|Prometheus Global Media|Reed Business Information|Rovi Corporation|Trinity Mirror|Times Newspapers|Nielsen (?: Media Research|Business Media)|Viacom|Time(?: Warner ?|)))(,? Inc| LL[CP]| Ltd|Limited|)(?:\|[^\]\}]*|)\]\][\s\.]*(?=[|}\n])/gi, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:The |)(?:Deseret News Publishing|Dispatch Printing|E\. W\. Scripps|Evening Post Publishing|Forbes(?: Publishing|, Inc\.)|Gannett?|Irish Times Trust|(?:Jann Wenner|Wenner Media)|Johnson Publishing|Journal Communications|Mac Publishing|McClatchy|Nash holdings LLC|New York Times|Seattle Times|Star Tribune|Thomp?son(?:[- ]?Reuters)?(?: Corporation| Plc.?|)|Torstar|Time Inc\.|Times Publishing|Tribune|Vox Media|Washington Post|World Publishing|Ziff Davis Media)(?: Co(?:mpany|\.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media(?: Group(?:,? Inc\.)?| Berhad)?\]\](?=[\s\.]*[|}])/g, '');
Line 234 ⟶ 239:
regex(/(=[ ]*)(Billboard|Fast Company|People|Q|Time Out) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Billboard|Fast Company|People|Q|Time Out) \(magazine\)(?='')/g, '$1$2'); //non-standard code
regex(/(''\[\[(?:Billboard|Fast Company|People|Q|Time Out) \(magazine\))(?=\]\]'')/g, '$1|'); //non-standard code
regex(/(\|)(Billboard|Fast Company|People|Q|Time Out)(?: \(magazine\))(?=\]\])/g, '$1$2');
regex(/(=[ ]*)(Salon) \((?:website)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Salon) \(website\)(?='')/g, '$1$2'); //non-standard code
regex(/(''\[\[(Salon) \(website\))(?=\]\]'')/g, '$1$2'); //non-standard code
regex(/(\|)(Salon)(?: \(website\))(?=\]\])/g, '$1$2');
Line 343 ⟶ 350:
regex(/(.) at Discogs(\][\.,;]) Discogs\.com(\.| )/gi, '$1$2Discogs$3');
regex(/(\|\s?author\s?\=\s?)(?:posted|publishe[dr]|written)\s?(?:by|on):?\s/gi, '$1');
regex(/\|\s?(?:work|publisher|website|work)(\s?\=MTV)\|\s?publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '|publisher$1=');
 
regex(/\|\s?(?:publisher|website|work)\s*\=\s*(?:BBC|BBC News(?: Online|))\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=BBC News $1');
regex(/\|\s?(?:publisher|website|work)\s*\=\s*(BBC Sports?)\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=$1$2');
regex(/\|\s?(?:publisher|website|work)\s*\=\s*(Metacritic|\[\[Metacritic\]\])\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(CBS Interactive|\[\[CBS Interactive\]\])(?=[\s\.]*[|}])/g, '|publisher=$1$2');
 
//rem duplicated publishers in separate fields (post); rem preceding nbsp
Line 386 ⟶ 393:
regex(/\|\s?work\s*\=\s*(\[\[(?:[^<|\]]*)\|([^}<>]*)\]\])(\|[^}<>]*|)\|\s?(?:website|work)\s*\=\s*(\1|\2)\.?(?=\s*[|}])/g, '|work=$1$3'); //piped work
 
//'work' and its alias (prepost)
regex(/(\|[ ]*?(?:newspaper|work)[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?workwebsite[ ]*=[^|}]*)(?=[|}\n])+/gi, '$1');
 
//unwinding of unnecessary pipes
Line 393 ⟶ 400:
 
// removing artefacts (within citation templates)
// regex(/(\|[ ]*?author=)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,? ?)(?=[^\]\|\}]*\|)/gi, '$1'); //redundant/duplicated
regex(/(?:&#124; (?:Daily|English|(?:Mail |)Online|Music|News|Indian Express))(?=\s*\|)+/gi, '');
 
Line 400 ⟶ 407:
regex(/( &#124; Comment is free)/gi, '');
regex(/\|\s?title\s*\=\s*BBC (?:News|Sport)\s?(?:[-–]|&#124; )\s?/gi, '|title=');
regex(/(?:Entertainment|Football|international(?:inter|)national|Latest|local|UK|world|) News &#124;(?=[ ]?[&\|])/gi, '');
regex(/(?:[-–|]|&#124;)[ ]*(?:Entertainment|Football|international(?:inter|)national|Latest|local|UK|world|) News(?=[ ]?[&\|])/gi, '');
regex(/<!-- Bot generated title -->/gi, '');
regex(/(\|\s?title\s*\=\s*[^|}]+?)[-‒–—―] *([^|}]+?)(\|\s?(?:publisher|work)\s*\=\s*\2)/gi, '$1$3');
Line 414 ⟶ 421:
regex(/(UEFA\]\])\.(?:co(?:m|m?\.\w{2})|\.\w{2})(?= ?[\|{}])/gi, '$1');
regex(/\bthe ((?:'''?|)the )/gi, '$1');
 
// expanding/dating tags
regex(/{{(?:cn|fact)}}/gi, '{{citation needed|date=July 2016}}');
 
//dynamic columns for reflists; remove scroll bar
regex(/((?:[Rr]eferences|[Nn]otes)[ ]?={2,4}[\n\r])[\r\n\s]*<div (?:style|class)=[^>]*>([\S\s]*)<\/div>/g, '$1$2');
regex(/(?:\{\{[Rr]eflist\}\}|<[Rr]eferences ?\/>)/g, '{{reflist|colwidth=30em}}');
 
}
Line 429 ⟶ 433:
//removal of all links from sources
//rem linking within citation template parameters
regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|via|website|work)=[ ]*?)\[\[([^\|\]]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|via|website|work)=[ ]*?)\[\[(?:[^\|\]]+?\|)([\w\s\,]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
 
//rem other linking within refs tags
Line 459 ⟶ 463:
}
 
 
function ohc_unprotect_urls()
{
Line 484 ⟶ 488:
function Ohc_Source_edit_summary(){
//Add a tag to the summary box
setoptions(minor='true');
setreason('[[User:Ohconfucius/script/Sources|Script]]-assisted fixes: per [[Help:Citation Style 1|CS1]], [[Template:Citation]] and [[MOS:ITALICS]]', 'append');
doaction('diff');
 
}