User:Ohconfucius/script/Sources.js: Difference between revisions

Content deleted Content added
align to test script
tweaks
Line 21:
function Ohc_linkspam() {
var txt=document.editform.wpTextbox1;
 
txt.value=txt.value.replace(/([^>\*][ ]?)\[https?:\/\/[^\s\[\]]*[ ]([\'\w\d][^\[\]]*)\](?![ ]*[\n\-]+)/gi, '$1$2');
 
Line 32:
function Ohc_sources_prep() {
var txt=document.editform.wpTextbox1;
 
// removing references to other WP articles and 'external' WP links
regex(/<ref[^<>]*>[^<>]*\|[ ]*url ?=https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^<>]*<\/ref>/gi, '');
Line 38:
regex(/<ref>\[https?:\/\/(?:en\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)\/[^\s\]]*[ ]+[\w\d][^\]]*\]<\/ref>/gi, '');
regex(/\|[ ]*url[ ]*=[ ]*https?:\/\/(?:\w{2}\.wikipedia\.org\/wiki|(?:www\.|)(?:facebook|myspace|twitter)\.com)[^\s\|\{\}<]*(?=[ ]*[|}])/gi, '');
 
regex(/[ ]\[https?:\/\/en\.wikipedia\.org\/wiki\/[^\s\]]*[ ]+([\w][^\]]*)\]/gi, ' [[$1]]');
regex(/(\|\s?url\s*\=\s*|\[)(https?:[^|{}#\s]+)#[A-Za-z0-9\.]{12,13}(?=[\s\[\]|{}<>])/gi, '$1$2'); //rem link tracking
 
// removing external links within publisher/journal/work fields
txt.value=txt.value.replace(/(\|\s?(?:newspaper|work|journal|publisher)\s*\=\s*)\[https?:\/\/[^\s\]]*\s([\w][^\]]*)\]/gi, '$1$2');
Line 48:
txt.value=txt.value.replace(/(\|\s?author(?:link\d?|)\s*\=\s*)(?:https?:\/\/|)www\.[\w][^|}]*(?=[|}\n])/gi, '$1'); //rem outright (not a WL)
txt.value=txt.value.replace(/(\|\s?(?:author(?:link\d?|)|journal|newspaper|publisher|website|work)\s*\=\s*)(?:https?:\/\/|)www\.(\w)/gi, '$1$2'); //leave only ___domain name
 
// removing artefacts within fields
regex(/(\|\s?author\s*\=\s*)(?:by |)(?:wire staff|(?:staff |)reporters?|)[ ]*(?=[|}\n])/gi, '');
regex(/(\|\s?author\s*\=\s*)([A-Z][a-z]*(?: [A-Z][a-z]*)*) (?:wire staff|(?:staff |)reporters?)[ ]*(?=[|}\n])/gi, '$1$2');
regex(/\|[ ]*last=(Reporter|staff)[ ]*\|[ ]*first=[^|\{\}]*(?=[\|{}])=/gi, '');
regex(/(\|\s?accessdate\s*\=\s*)(?:accessed|retrieved)(?: by| on|):?[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?volume\s*\=\s*)vol(?:ume|\.?)[ ]*(\d)/gi, '$1$2');
regex(/(\|\s?pages?\s*\=\s*)(?:pages?|p[gp]?\.?)[ ]*(\d)/gi, '$1$2');
 
//Remove COinS corrupting templates from CS1 citations
regex(/(\|\s?(?:authors?|first\d?|last\d?|publisher|work)\s*\=\s*(?:[^{}|]*|)){{(?:Sm|Aut|SC|Small[- ]caps|Sm?caps)\|([^{}|]*)}}(?=(?:[^{}|]*|)[|}])/gi, '$1');
 
regex(/(\|\s?)\w+\=(url\s*\=\s*https?:\/\/)(?=[|}\n])/gi, '$1$2'); //common cs1 error
regex(/(\|\s?url\s*\=)(www\.)(?=[|}\n])/gi, '$1http//$2'); //common cs1 error
regex(/(\|\s*date\s*=\s*)(?:not? |non-|un)date[ds]?\s*(?=[|}\n])/gi, '$1n.d.'); //common cs1 error
 
regex(/\{\{wikinews ?(|2|cat(?:egory)?|has|par2?|portal|table|-inline)(\|[^\}]+|)\}\}\s*/gi, '');
regex(/(\*[ ]*|)\[\[n:[^\]]*\]\][^\r\n]*[\r\n]/gi, '');
regex(/\*[ ]*\{\{(?:Facebook|Find a Grave|Myspace)\|([^}]*)\}\}[\n\r\s]*/gi, '');
 
// removing inappropriately populated fields
// regex(/(\|\s?at\s*\=\s*(?:pages? |)(?:[-–\d\s,;]*) ?)[^|}]+(?=[|}\n])/gi, '');
 
//citation template fixes
regex(/(\|\s?)published\s?=/gi, '$1publisher=');
Line 78:
// misused 'date' parameter
regex(/\|\s?date(\s?=\s?[12]\d{3}\s?[|}])/gi, '|year$1');
 
// rem toggles and redundant quote marks
txt.value=txt.value.replace(/(\|\s?(?:agency|author|newspaper|work|journal|publisher)\s*\=\s*)\'\'([^|}]+)\'\'(?=\s*[\}\|])/gi, '$1$2'); //without link
Line 84:
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)\'&#39;([^\|\{\}]+)\'&#39;/gi, '$1$2'); //rem &#39; in titles
txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*)\(([^\|\{\}]+)\)/gi, '$1$2'); //rem parenthetical publishers
 
// reordering 'work' and 'publisher' (first run - see second run in cleanup function)
regex(/(\|\s?publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
 
// remove redundant parentheses and templates from dm and md dates (equivalents also exists in Mosnum script)
regex(/(=[ ]*)\(([^()|{}])\)/gi, '$1$2');
regex(/(date[ ]*=[ ]*)\{\{(?:Start|End) ?date\|(\d{4})\|(0?\d|1[012])\|([0-2]?\d|30|31)\}\}/gi, '$1$2-$3-$4'); //stripping start/end template notes inside "|date=" parameter
 
// rem corporate designation
txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}), (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
txt.value=txt.value.replace(/(\|\s?publisher\s*\=\s*[^\[|}]{1,40}) (?:Inc|LL[CP]|Ltd|PLC|SA)\.?(?=[ ]*\|\})/gi, '$1');
 
// rem unnecessary quote marks
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*)["“]([^\|]+)["”](?=\s?[|}])/gi, '$1$2');
Line 101:
// repl double 'in-title' quote marks with single quotes
txt.value=txt.value.replace(/(\|\s?title\s*\=\s*[\w ]* )["“]((?:\w[\w]* )+(?:\w[\w]*))["”]([^\|]+|)(?=\s?[|}])/gi, '$1\'$2\'$3');
 
// adjust for possibly incorrectly input title
regex(/(\|\s?title\s*\=\s*)([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?(publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=\s*[|}])/g, '$1ACTUAL ARTICLE TITLE BELONGS HERE! |$4=$2$3');
regex(/(\|\s?title\s*\=\s*)(\w+\.com)(?=\s?[=|{}])/gi, '$1ACTUAL ARTICLE TITLE BELONGS HERE! |publisher=$2');
 
// rem misplaced punctuation
regex(/(<ref[^>]*>[^<]+?[\]\.\},;–]\s*\'\'[\w-]*(?: [\w-]*){0,3})(\.com|)([;,\.])(\'\')([^<]*?<\/ref>)/gi, '$1$2$4$3$5');
regex(/([\w]+)\.(['"]\])[ ]/gi, '$1$2. '); //LQ for titles
 
// removing blank parameters
regex(/(?:\|[ ]*(?:accessdate|agency|archive(?:date|url)|arxiv|asin|at|author(-?link|-mask|-name-separator|-separator|\d|\d-link|link\d?|)|bibcode|chapter|chapter-url|coauthors?|contribution(?:-url|)|date|deadurl|display-authors|doi|doi-inactive|doibroken|edition|editor(?:-first|-last|-link|\d|\d-first|\d-last|\d-link|)|(?:first|last)\d?|format|id|is[bs]n|issue|jfm|journal|jstor|language|lay(?:date|source|summary)|lccn|___location|magazine|day|month|mr|newspaper|nopp|oclc|ol|origyear|osti|others|pages?|periodical|place|pm[cd]|pmid|postscript|publication(?:-date|-place)|publisher|quote|ref|rfc|separator|series|ssrn|trans_title|type|url|volume|work|year|zbl)[ ]*=[\s]*)(?=[\}\|])/gi, '');
 
//rem underlining within certain fields
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)<u>([^|}]*)<\/u>/gi, '$1');
 
//rem redundant top-level domains (.com, .net, .org), strip "www"
txt.value=txt.value.replace(/(\|\s?(?:journal|newspaper|periodical|publisher|work)\s*\=\s*)(\[\[[^\[\]\}]*\]\])\.(?:biz|com|net|org|co\.uk)(?=\s*[|}])/gi, '$1$2');
 
//rem duplicated publishers in separate fields (pre)
regex(/[‒–—―]+\s*([^|}]{3,})\s*(\|\s?(?:publisher|work)\s*\=\s*(?:\w+\.|))\1(?=\s?[|}])/gi, '$2$1');
//'work' and its alias (pre)
regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=\s?[|}])+/gi, '$1');
 
//rem linking within '___location' field
regex(/(\|[ ]*?(?:___location|place)=[ ]*?)\[\[ ?(Abkhazia|Afghanistan|Albania|Algeria|Andorra|Angola|Antigua and Barbuda|Argentina|Armenia|Australia|Austria|Azerbaijan|(?:The |)Bahamas|Bahrain|Bangladesh|Barbados|Belarus|Belgium|Belize|Benin|Bhutan|Bolivia|Bosnia and Herzegovina|Botswana|Brazil|Brunei|Bulgaria|Burkina Faso|Burma|Burundi|Cambodia|Cameroon|Canada|Cape Verde|Central African Republic|Chad|Chile|(?:(?:People's |)Republic of |)China|Colombia|Comoros|(?:Democratic |)Republic of (?:the |)Congo|Costa Rica|Côte d'Ivoire|Croatia|Cuba|Cyprus|Czech Republic|(?:Kingdom of |)Denmark|Djibouti|Dominica|Dominican Republic|East Timor|Ecuador|Egypt|El Salvador|Equatorial Guinea|Eritrea|Estonia|Ethiopia|Fiji|Finland|France|Gabon|Georgia \(country\)|Germany|Ghana|Greece|Greenland|Grenada|Guatemala|Guinea|Guinea-Bissau|Guyana|Haiti|Honduras|Hungary|Iceland|India|Indonesia|Iran|Iraq|(?:Republic of |)Ireland|Israel|Italy|Jamaica|Japan|Jordan|Kazakhstan|Kenya|Kiribati|North Korea|South Korea|Kosovo|Kuwait|Kyrgyzstan|Laos|Latvia|Lebanon|Lesotho|Liberia|Libya|Liechtenstein|Lithuania|Luxembourg|(?:Republic of |)Macedonia|Madagascar|Malawi|Malaysia|Maldives|Mali|Malta|Marshall Islands|Mauritania|Mauritius|Myanmar|M[ée]xico|(?:Federated States of |)Micronesia|Moldova|Monaco|Mongolia|Montenegro|Morocco|Mozambique|Nagorno-Karabakh|Namibia|Nauru|Nepal|(?:Kingdom of the |)Netherlands|Holland|New Zealand|Nicaragua|Niger|Nigeria|Northern Cyprus|Norway|Oman|Pakistan|Palau|Palestine|Panama|Papua New Guinea|Paraguay|Peru|Philippines|Poland|Portugal|Qatar|Romania|Russia|Rwanda|SADR|Saint Kitts and Nevis|Saint Lucia|Saint Vincent and the Grenadines|Samoa|San Marino|São Tomé and Príncipe|Saudi Arabia|Senegal|Serbia|Seychelles|Sierra Leone|Singapore|Slovakia|Slovenia|Solomon Islands|Somalia|Somaliland|South Africa|South Ossetia|Spain|Sri Lanka|Sudan|Suriname|Swaziland|Sweden|Switzerland|Syria|Taiwan|Tajikistan|Tanzania|Thailand|Timor Leste|(?:The |)Gambia|Togo|Tonga|Transnistria|Trinidad and Tobago|Tunisia|Turkey|Turkmenistan|Tuvalu|Uganda|Ukraine|United Arab Emirates|United Kingdom|United States|Uruguay|Uzbekistan|Vanuatu|Vatican City|Venezuela|Vietnam|Yemen|Zambia|Zimbabwe)[ ]?\|[ ]?(?:\w{2,3})\]\]/gi, '$1$2');
regex(/(\|[ ]*?(?:___location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\])(?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4$5$6');
regex(/(\|[ ]*?(?:___location|place)=[ ]*?)\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\](?:(,? )\[\[(?:[^\|\]]+?\||)([\w\s\,]+?)\]\]|)(?=[ ]?[|}])/gi, '$1$2$3$4');
 
// removing english icon template
regex(/[ ]?\{\{en[- ]icon\}\}/gi, "");
 
// removing icon template from within "|language=" parameter
regex(/(\|[ ]*?language[ ]*?=[ ]*?)\{\{(\w{2})(?:[- ]icon|)\}\}/gi, "$1$2");
 
// eliminating time of day
regex(/(\|[ ]*author[ ]*=[ ]*)(?:posted|published)(?: by| on|)[\s:](?=\s*\w)/gi, "$1");
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)[0-2]?\d:[0-5]\d(?:[ ]|&nbsp;)(?:[ap]m ?|[ap]\.m\. |[A-Z]{1,2}T|UTC)[\.,]?[ ]?/gi, "$1");
 
// eliminating days of the week
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,?)\s/gi, "$1");
regex(/(\|[ ]*(?:date|archivedate|accessdate|author)[ ]*=[ ]*)(?:(?:Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)[\.,]?)\s/gi, "$1");
 
//'Accessed' -> 'Retrieved'
ohc_regex(/(?:[\.,;][ ]*(?:url |link |last |)(?:Retrieved|Accessed))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '. Retrieved $1');
ohc_regex(/(\w|\])(?:[ ]*(?:url |link |last |)(?:Retrieved|Accessed))(?: on(?:line|)|):? (@month|@dd|@yyyy)(?=\D)/gi, '$1. Retrieved $2');
 
ohc_regex(/(?:Retrieved|Accessed)(?: on(?:line|)|):? (@Month\s@DD,?\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd)(?=\D)/gi, 'Retrieved $1');
ohc_regex(/(\w|\])[\.,;]?[ ]\((Retrieved (?:@Month\s@DD,\s@YYYY|@DD\s@Month\s@YYYY|@yyyy-@mm-@dd))\)/gi, '$1. $2');
 
}
 
function Ohc_dab_news_sources() {
var txt=document.editform.wpTextbox1;
 
//pre-dab of piped sources
// regex(/(\[\[)(?:foo|bar)(\|)/gi, '$1foo bar \(dab\)$2');
 
regex(/(\[\[)(?:(?:British|English|London) Sun|Sun on Sunday|The Scottish Sun|(?:The |)Sun (?:\((?:British |)newspaper\)|\(tabloid\)|\(UK newspaper\)|\(UK\)|Newspaper|on Sunday|Online)|Thesun\.co\.uk)(\|)/gi, '$1The Sun (United Kingdom)$2');
regex(/(\[\[)Daily Star \((?:British|UK)\)(\|)/gi, '$1Daily Star (United Kingdom)$2');
Line 167:
regex(/(\|)(Sport \()(newspaper\))(\]\])/g, '$1$2Spanish $3'); //dab moved December 2012
regex(/(=[ ]*\[\[)(?:[BE]SPN ?(?:USA|HD|Network|the ocho|\(United States\))|E.S.P.N.|(?:The |)Entertainment (?:and |)Sports Programming Network)(?:\|[\w, ]*)(\]\])/gi, '$1ESPN$2');
 
regex(/(?:agency|journal|newspaper|periodical|publisher|work)(\s?=\s?\[\[)(?:MTV (?:[A-Z]\w*|\([^\)\]]*\)))\|[^\)\]]*(\]\])/gi, 'publisher$1MTV$2');
 
//unwinding of unnecessary pipes
regex(/(\[\[)Public Broadcasting Service\|(PBS\]\])/gi, '$1$2');
 
}
 
function Ohc_publishers() {
var txt=document.editform.wpTextbox1;
 
//linked publishing houses
// removing publishers less well-known than their titles
regex(/(\|\s?publisher\s?\=MTV\|\s?)publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '$1=');
 
regex(/(?:\|publisher=Turner Sports Interactive, Inc)\.? ?\|publisher=(NBA)(?= ?\|)/gi, '$1');
regex(/(\|publisher=NBA) ?\|publisher=(?:Turner Sports Interactive, Inc)\.?(?= ?\|)/gi, '$1');
Line 199:
regex(/\|\s?publisher\s*\=\s*\[\[(?:Cox|Halifax|North Jersey|Sun-Times|Tampa|Herald|Stephens|WEHCO|\w+) Media( Group(?:, Inc.)?)?\]\](?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*\[\[(?:\w+ )+(?:Communications|Media|Publishing|Publications)( Group(?:, Inc.)?)?\]\](?=[\s\.]*[|}])/g, '');
 
}
 
function Ohc_sources_cleanup() {
var txt=document.editform.wpTextbox1;
 
// displacing ___location-dab (in parentheses)
regex(/( \|___location=(?:New York|UK))(\]\])/gi, '$2$1');
 
// The following regexes for dab-links are in sets of four. If changing, please ensure all sets are changed
regex(/(=[ ]*(?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)) \((South Africa)(?:n newspaper|)\)([ ]*[|}])/g, '$1|___location=$2$3');
Line 213:
regex(/(\[\[((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|)))( \(South Africa(?:n newspaper|)\))(\]\][\.,;]?)/g, '$1$3|$2$4');
regex(/(\|)((?:The ?|)[A-Z]\w*(?: [A-Z]\w*|))(?: \((?:South Africa)(?:n newspaper|)\))(\]\])/g, '$1$2$3');
 
regex(/(=[ ]*)(Billboard|Fast Company|Q|Time Out) \((?:magazine)\)(?=\s*[|}])/g, '$1$2'); //non-standard code
regex(/('')(Billboard|Fast Company|Q|Time Out) \(magazine\)(?='')/g, '$1$2'); //non-standard code
Line 228:
regex(/(''Dawn) \((newspaper)\)('')/g, '$1$3 (Pakistan)');
regex(/(\|)(Dawn)(?: \(newspaper\))(\]\])/g, '$1$2$3');
 
regex(/(=[ ]*(?:Daily Star)) \((United Kingdom)\)(\s*[|}])/g, '$1|___location=$2$3');
regex(/(''(?:Daily Star)) \((United Kingdom)\)('')/g, '$1$3 ($2)');
Line 289:
regex(/(''Vogue) \((British magazine)\)('')/g, '$1$3 (UK)');
regex(/(\|)(Vogue)(?: \(British magazine\))(\]\])/g, '$1$2$3');
 
regex(/(= ?(?:ABC|Marca)) \((newspaper)\)([ ]*[|}])/g, '$1|___location=Spain$3');
regex(/(''(?:ABC|Marca)) \((newspaper)\)('')/g, '$1$3 (Spain)');
Line 296:
regex(/(''(?:Il Giorno)) \((newspaper)\)('')/g, '$1$3 (Italy)');
regex(/(\|)(Il Giorno)(?: \(newspaper\))(\]\])/g, '$1$2$3');
 
regex(/(= ?(?:RT)) \((TV network)\)([ ]*[|}])/g, '$1|___location=Russia$3');
regex(/(\|)(RT)(?: \(TV network\))(\]\])/g, '$1$2$3');
 
regex(/(=[ ]*\[\[)([^\[\]\(\)\|:]*)( \([^\[\]\(\)\|]*\))(\]\])/gi, '$1$2$3|$2$4'); //adding piping to parentheticals inside parameters
 
regex(/(?:author|agency|publisher)(\s*\=\s*\[\[[^()|]+ \((?:newspaper|magazine)\)\|[^\[\]|]+\]\])/gi, 'work$1');
 
// removing redundancies
regex(/ – (?:Times of India|Rediff.com [\w]*)(?=[ ]?\|)/gi, '');
regex(/(?: +[‒–—―] *Times Of India|)(\]. +''The Times of India''\.)indiatimes\.com/gi, '$1');
regex(/(\w''\.)indiatimes\.com/gi, '$1');
 
regex(/\|[ ]?language[ ]?\=[ ]?English[ ]*?(?=[|}\n])/gi, ''); //note: adjusted for false positive in infoboxes books
regex(/- [\w]*\.com[ ]*\|/gi, '|');
Line 315:
regex(/\|\s?author\s?\=(?:posted|publishe[dr]|written)\s?(?:by|on)\s/gi, '|author=');
regex(/\|\s?(?:work|publisher)(\s?\=MTV)\|\s?publisher\s*\=\s*(?:MTV Networks|Viacom)/gi, '|publisher$1=');
 
regex(/\|\s?(?:publisher|work)\s*\=\s*(?:BBC|BBC News(?: Online|))\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=BBC News $1');
regex(/\|\s?(?:publisher|work)\s*\=\s*(BBC Sports?)\s*(\|[^}<>]*|)\|\s?publisher\s*\=\s*(?:BBC|BBC News(?: Online|)|British Broadcasting Corporation)(?=[\s\.]*[|}])/g, '|publisher=$1$2');
 
//rem duplicated publishers in separate fields (post); rem preceding nbsp
txt.value=txt.value.replace(/(?:[‒–—―]+|&#124;)\s*(?:The |)([^\|\}&]{3,})(?:\.com|)\s*(\|\s?(?:publisher|work)\s*\=\s*)\1(?=\s*[|}])/gi, '$2$1');
txt.value=txt.value.replace(/\s?&nbsp;\s?(\|\s?(?:publisher|work)\s*\=\s*)/gi, ' $1');
 
//per [[Help:Citation Style 1#Elements not included]]
regex(/(?:-[ ]*Google Books[ ]*(\|[^}]*|)|)\|\s?publisher\s*\=\s*Google Books(?=[\s\.]*[|}])/g, '');
regex(/\|\s?publisher\s*\=\s*(?:Project Gutenberg|Proquest|Scribd|web(?:\.archive\|citation).org)(?=[\s\.]*[|}])/g, '');
 
// reordering 'work' and 'publisher'; reordering 'work' and 'website'
regex(/(\|\s?publisher\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?(?: journal|newspaper|magazine|periodical|website|work)\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
regex(/(\|\s?website\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(\s?\|[^}<>]*|)(\|\s?work\s*\=\s*(?:\[\[[^<{}\]]*\]\]|[^{}\|\}<>]*))(?=[\s\.]*[|}])/g, '$3$1$2');
 
// removing identical/similar entries in 'work' and 'publisher'
regex(/\|\s?work\s*\=\s*([^\|\}<>]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=[\s\.]*[|}])/g, '|work=$1$2');
regex(/\|\s?work\s*\=\s*(\[\[(?:[^<\|\]]*)\]\]|[^<\|\]]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*\1(?=[\s\.]*[|}])/g, '|work=$1$2');
 
regex(/\|\s?publisher\s*\=\s*([^\[\]|}<>]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*(?:\1|\[\[\1\]\])(?=[\s\.]*[|}])/g, '|work=$1$2');
regex(/\|\s?publisher\s*\=\s*(\[\[(?:[^|\]]*)\]\]|[^<\|\]\}]*)(\s?\|[^}<>]*|)\|\s?(?:publisher|work)\s*\=\s*\1(?=[\s\.]*[|}])/g, '|work=$1$2');
 
regex(/\|\s?___location\s*\=\s*New York(?: City|)\s*(\|[^}<>]*|)\|\s?___location\s*\=\s*(New York(?: City|)|USA)(?=[\s\.]*[|}])/g, '|___location=New York $1');
 
regex(/(?:\|[ ]*?___location=[^\|]*)(\|[ ]*?___location=[^|}]*)(?=[|}\n])+/gi, '$1');
regex(/(\|[ ]*?publisher=[^=}]*)(?:\|[ ]*?publisher=[^=}]*)(?=[|}\n])+/gi, '$1');
regex(/(\|[ ]*?work=[^=}]*)(?:\|[ ]*?work=[^=}]*)(?=[|}\n])+/gi, '$1');
 
//'work' and its alias (pre)
regex(/(\|[ ]*?newspaper[ ]*=[^\|}]*(?:\|[^\{\}]*|))(?:\|[ ]*?work[ ]*=[^|}]*)(?=[|}\n])+/gi, '$1');
 
//unwinding of unnecessary pipes
regex(/\[\[([^\]\|]*)\|\1(?=\]\])/gi, '[[$1');
 
// removing artefacts (within citation templates)
regex(/(\|[ ]*?author=)(?:(?:Mon|Tues|Wednes|Thurs|Fri|Satur|Sun)day,? ?)(?=[^\]\|\}]*\|)/gi, '$1');
regex(/(?:&#124; (?:Daily|English|(?:Mail |)Online|Music|News|Indian Express))(?=\s*\|)+/gi, '');
 
regex(/(?:(?:[ ]+&#124;[ ]+HighBeam Research[ ]+[-–][ ]+FREE trial[ ]+|)\|publisher=Highbeam.com)/gi, '');
regex(/( &#124; [\w, ]*?)(?=[ ]&#124)/gi, '');
Line 361:
regex(/(?:[-–|]|&#124;)[ ]*(?:Football|international|Latest|local|UK|world|) News(?=[ ]?[&\|])/gi, '');
regex(/<!-- Bot generated title -->/gi, '');
 
regex(/(DOC|PDF)\) \./gi, '$1).');
 
// removing artefacts (outside of citation templates)
regex(/([\w]+\'\')\.(?:co(?:m|m?\.\w{2})|\.\w{2})[ ]/gi, '$1. ');
 
// removing other artefacts
regex(/(UEFA\]\])\.(?:co(?:m|m?\.\w{2})|\.\w{2})(?= ?[\|{}])/gi, '$1');
 
//dynamic columns for reflists; remove scroll bar
regex(/((?:[Rr]eferences|[Nn]otes)[ ]?={2,4}[\n\r])[\r\n\s]*<div (?:style|class)=[^>]*>([\S\s]*)<\/div>/g, '$1$2');
regex(/(?:\{\{[Rr]eflist\}\}|<[Rr]eferences ?\/>)/g, '{{reflist|colwidth=30em}}');
 
}
 
function Ohc_sourceunlink() {
var txt=document.editform.wpTextbox1;
 
//removal of all links from sources
//rem linking within citation template parameters
regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|work)=[ ]*?)\[\[([^\|\]]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
regex(/(\|[ ]*?(?:author|agency|publisher|journal|newspaper|periodical|work)=[ ]*?)\[\[(?:[^\|\]]+?\|)([\w\s\,]+?)\]\](?:(,? )\[\[([^\|\]]+?)\]\]|)([ ]{0,1})/gi, '$1$2$3$4$5');
 
//rem other linking within refs tags
regex(/(<ref[^>]*>[^<]+?[\]\.,;]\s+)\[\[(?:[^\|\]<]*\||)([^\|\]<]*)(?: online|)\]\]([^<]*?<\/ref>)\) \./gi, '$1$2$3).');
regex(/(<ref[^>]*>[^<]+?[\]\.,;]\s+\'\')\[\[(?:[^\|\]<]*\||)([^\|\]<]*)(?: online|)\]\](\'\'[^<]*?<\/ref>)\) \./gi, '$1$2$3).');
}
 
/** ------------------------------------------------------------------------ **/
/// PROTECTION BY STRING SUBSTITUTION
 
var linkmap=[];
function ohc_protect_linkspam()
Line 398:
// the sensitive part is stored and replaced with a unique identifier,
// which is later replaced with the stored part.
 
var protect_function = function(s, begin, replace, end) {
linkmap.push(replace);
return begin + "⍌"+(linkmap.length-1)+"⍍" + end;
};
 
regex(/(<ref[^>]*?>)(.*?)(<\/ref>)/gi, protect_function);
regex(/(\*[ ]?\[(?:https?:|ftp:))([^\]]*)(\])/gi, protect_function);
 
}
 
function ohc_unprotect_linkspam()
{
 
//removes protection put in place by function ohc_protect_fmt (all cats, templates etc.)
regex(/⍌([0-9]+)⍍/g, function(x, n) {
Line 430:
});
}
 
var linkmap=[];
function ohc_protect_urls()
Line 437:
// the sensitive part is stored and replaced with a unique identifier,
// which is later replaced with the stored part.
 
var protect_function = function(s, begin, replace, end) {
linkmap.push(replace);
return begin + "⍌"+(linkmap.length-1)+"⍍" + end;
};
 
regex(/((?:[\[=]\s*)(?:https?:|ftp:))([^\]\|\}]*)(\s*[\]\|\}])/gi, protect_function);
regex(/(\{\{(?:harv\w*|sfn\w*|cite ?book)\s?\|)([^\}]+)(\})/gi, protect_function);
regex(/(\|\s*contribution\s*=)([^|}]+)(\|\})/gi, protect_function);
 
}
 
function ohc_unprotect_urls()