User:Quarl/___location canonicalize.js: Difference between revisions

Content deleted Content added
〈218 words changed〉add Canada
«+"locz.qAutoEdit();"»
 
(29 intermediate revisions by the same user not shown)
Line 1:
// [[User:Quarl/location_canonicalize.js]] - canonicalizes ___location WikiLinks
// as per [[Wikipedia:WikiProject Location Format]]
 
// Example: [[Seattle, Washington]] becomes [[Seattle, Washington|Seattle]], [[Washington]], [[USA]].
 
// requiresdepends: wikipage.js, util.js, addlilinkwikitabs.js, wikiedit.js, autoedit.js
 
// quarl 2006-01-22 initial version
// quarl 2006-02-08 refactored to autoedit.js
 
//<pre><nowiki>
 
location_canonicalizevar locz = new Objectautoedit();
'locz',
'LocZ', 'ca-locz', 'Canonicalize ___location wikilinks',
'Location canonicalization');
 
location_canonicalizelocz.runinitData = function() {
var CountryData = function(states, link_country, regexp_country) {
location_canonicalize.initData();
this.states = states;
wikiPage.getEditorAsync(location_canonicalize.edit);
this.link_country = link_country.match(/\[/) ? link_country : '[['+link_country+']]';
var regexp_country = regexp_country || '\\[\\['+link_country+'\\]\\]';
this.regexp_country = new RegExp(regexp_country);
this.regexp_country_sq = new RegExp('^, *'+regexp_country);
this.regexp_substate = (
new RegExp('^([^,]+), *(' + this.states.join('|') + ')$'));
this.regexp_state = (
new RegExp('^(?:' + this.states.join('|') + ')$'));
}
 
this.countries = [
new CountryData( // USA
['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia',
'Washington', 'West Virginia', 'Wisconsin', 'Wyoming',
'Washington, DC', 'Washington, D.C.' // not strictly a state, but needs to be qualified with country also
],
'[[United States|USA]]',
'\\[\\[(?:United[ _]States(?:[ _][^|\\\]]+?)?|USA)(?:\\|[^|\\\]]+?)?\\]\\]'),
 
new CountryData( // Canada
['British Columbia', 'Alberta', 'Saskatchewan', 'Manitoba',
'Ontario', 'Quebec', 'New Brunswick', 'Nova Scotia',
'Prince Edward Island', 'Newfoundland and Labrador'],
'Canada'),
 
new CountryData( // England
['Bedfordshire', 'Berkshire', 'City of Bristol',
'Buckinghamshire', 'Cambridgeshire', 'Cheshire',
'Cornwall', 'Cumbria', 'Derbyshire', 'Devon', 'Dorset',
'Durham', 'East Riding of Yorkshire', 'East Sussex', 'Essex',
'Gloucestershire', 'Greater London', 'Greater Manchester',
'Hampshire', 'Herefordshire', 'Hertfordshire', 'Isle of Wight',
'Kent', 'Lancashire', 'Leicestershire', 'Lincolnshire',
'City of London', 'Merseyside', 'Norfolk', 'Northamptonshire',
'Northumberland', 'North Yorkshire', 'Nottinghamshire',
'Oxfordshire', 'Rutland', 'Shropshire', 'Somerset',
'South Yorkshire', 'Staffordshire', 'Suffolk', 'Surrey',
'Tyne and Wear', 'Warwickshire', 'West Midlands', 'West Sussex',
'West Yorkshire', 'Wiltshire', 'Worcestershire'],
'England'),
 
];
}
 
location_canonicalizelocz.editsplitText = function(editorinput) {
//var makeinputs changes= [];
 
// special case for hat link, if there is one
var result = '';
varif (input.match(/^: = editor*''.wpTextbox1;*/)) {
var changesinfobox = []RegExp.lastMatch;
var right = RegExp.rightContext;
 
inputs.push(infobox);
input = right;
}
 
// special case the first Infobox, if there is one
if (input.match(/^(?:{{Infobox(?:.|\n)*?\n}}|{\|(?:.|\n)*?\n\|})/i)) {
// var left = RegExp.leftContext;
var infobox = RegExp.lastMatch;
Line 31 ⟶ 90:
// treat the infobox separately, so that USA links get added to main
// article.
result = (location_canonicalizeinputs.canonicalizeStringpush(infobox, changes) +;
input = right;
location_canonicalize.canonicalizeString(right, changes));
} else {
result = location_canonicalize.canonicalizeString(input, changes);
}
 
if (changesinputs.lengthpush(input) {;
return inputs;
editor.wpTextbox1 = result;
editor.wpSummary = '___location canonicalization: ' + changes.join('; ');
editor.wpMinoredit = true;
editor.submit('wpDiff');
} else {
alert("No changes to make!");
}
}
 
location_canonicalizelocz.canonicalizeStringbuildRegExp = function(input, changes) {
return /\[\[ *(?:([^|\]]+?) *\| *)?([^\]]+?) *\]\]/;
var result = '';
 
while (input.match(/\[\[ *(?:([^|\]]+?) *\| *)?([^\]]+?) *\]\]/)) {
var left = RegExp.leftContext;
var wfull = RegExp.lastMatch;
var wlink = RegExp.$1;
var wtext = RegExp.$2;
var right = RegExp.rightContext;
 
result += left;
var r = location_canonicalize.wikilink((wlink||wtext), wtext, result, right);
if (r) {
var new_wfull = r.wfull;
result = r.left;
right = r.right;
changes.push(wfull + ' → ' + new_wfull);
} else {
var new_wfull = wfull;
}
result += new_wfull;
input = right;
}
result += input;
return result;
}
 
location_canonicalizelocz.initDatareplaceRegExp = function(d, m) {
var wlink = m[1] || m[2];
if (this.init) return;
this.initvar wtext = truem[2];
 
this.USstates = [
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia',
'Washington', 'West Virginia', 'Wisconsin', 'Wyoming' ];
 
this.regexp_USsubstate = (
new RegExp('^([^,]+), (' + this.USstates.join('|') + ')$'));
this.regexp_USstate = (
new RegExp('^(?:' + this.USstates.join('|') + ')$'));
 
// Canadian provinces
this.CAstates = [
'British Columbia', 'Alberta', 'Saskatchewan', 'Manitoba',
'Ontario', 'Quebec', 'New Brunswick', 'Nova Scotia',
'Prince Edward Island', 'Newfoundland and Labrador'];
 
this.regexp_CAsubstate = (
new RegExp('^([^,]+), (' + this.CAstates.join('|') + ')$'));
this.regexp_CAstate = (
new RegExp('^(?:' + this.CAstates.join('|') + ')$'));
}
 
location_canonicalize.wikilink = function(wlink, wtext, left, right) {
// non-main namespace - usually a category
if (wtext.match(/:/)) return null;
 
if (wlink != wtext) return;
///////////////////////////////////////////////////////
// USA
if (wlink == wtext &&
wtext.match(location_canonicalize.regexp_USsubstate))
{
var city = RegExp.$1, state = RegExp.$2;
 
for (i in this.countries) {
var wfull = '[[' + wtext + '|' + city + ']]';
var c = this.countries[i];
// only add link to state and country if we haven't yet mentioned them.
if (!left.match('\\[\\['+state+'\\]\\]')) {
wfull += ', [['+state+']]';
 
var changes = 0;
if (!left.match(/\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/)) {
var wfull += ', [[United States|USA]]';
if (wtext.match(c.regexp_substate)) {
var city = RegExp.$1, state = RegExp.$2;
wfull = '[[' + wtext + '|' + city + ']]';
// only add link to state if we haven't link it yet.
if (d.left.match('\\[\\['+state+'\\]\\]')) {
wfull += ', ' + state;
} else {
wfull += ', [['+state+']]';
}
++changes;
} else if (wtext.match(c.regexp_state)) {
// state link -- just need to add country link as necessary
wfull = '[['+wtext+']]';
}
 
if (!wfull) continue;
// get rid of any redundant subsequent [[USA]] link
right = right.replace(/^, *\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/, '');
 
if (d.left.match(c.regexp_country)) {
return { wfull: wfull, left: left, right: right};
// Already mentioned country. Delete redundant subsequent
}
// country links
 
if (wlink == wtext && wtext if (d.right.match(location_canonicalizec.regexp_USstateregexp_country_sq)) {
// state link -- just make sure there's ad.right [[USA]]= link if necessaryRegExp.rightContext;
// only count as a change if we actually delete it!
if (!left.match(/\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/)) {
var wfull = '[[' +wtext+']]'changes;
wfull += ', [[United States|USA]]';}
} else {
 
// getHaven't ridmentioned ofcountry any redundant subsequent [[USA]] linkearlier
right =if (d.right.replacematch(/^, *\[\[(?:United[ _]States(?:[ _][^|\]]+?c.regexp_country_sq)?|USA)(?:\|[^|\]]+?|)?\]\]/, '');{
// it's right after the current link; good.
 
return} else { wfull: wfull, left: left, right: right};
} // not there; add it.
wfull += ', ' + c.link_country;
return null;
++changes;
}
 
////////////////////////////////////////////////////////////
// Canada
 
if (wlink == wtext &&
wtext.match(location_canonicalize.regexp_CAsubstate))
{
var city = RegExp.$1, state = RegExp.$2;
 
var wfull = '[[' + wtext + '|' + city + ']]';
// only add link to state and country if we haven't yet mentioned them.
if (!left.match('\\[\\['+state+'\\]\\]')) {
wfull += ', [['+state+']]';
 
if (!left.match(/\[\[Canada\]\]/)) {
wfull += ', [[Canada]]';
}
}
 
if (changes) {
// get rid of any redundant subsequent [[USA]] link
right = right d.replace(/^,text *\[\[Canada\]\]/,= '')wfull;
 
return { wfull: wfull, left: left, right: right};
}
 
if (wlink == wtext && wtext.match(location_canonicalize.regexp_CAstate)) {
// state link -- just make sure there's a [[Canada]] link if necessary
if (!left.match(/\[\[Canada\]\]/)) {
var wfull = '[['+wtext+']]';
wfull += ', [[Canada]]';
 
// get rid of any redundant subsequent [[Canada]] link
right = right.replace(/^, *\[\[Canada\]\]/, '');
 
return { wfull: wfull, left: left, right: right};
}
return null;
}
 
return;
// TODO: India, etc.
 
return null;
}
 
location_canonicalizelocz.load_load = function() {
locz.qAutoEdit();
if (wikiPage.nsSpecialP) return;
locz.addTab();
addTab('javascript:location_canonicalize.run()', 'locz', 'ca-locz', 'Canonicalize ___location wikilinks');
}
 
addOnloadHook(location_canonicalizelocz.load_load);
 
//</nowiki></pre>