User:Quarl/___location canonicalize.js: Difference between revisions

Content deleted Content added
〈218 words changed〉add Canada
〈672 words changed〉fully generalized, more robust
Line 78:
this.init = true;
 
var initRegexp = function(states, link_country, regexp_country) {
this.USstates = [
d = new Object();
'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
d.states = states;
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
d.link_country = link_country.match(/\[/) ? link_country : '[['+link_country+']]';
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
var regexp_country = regexp_country || '\\[\\['+link_country+'\\]\\]';
'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
d.regexp_country = new RegExp(regexp_country);
'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
d.regexp_country_sq = new RegExp(', '+regexp_country);
'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
d.regexp_substate = (
'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
new RegExp('^([^,]+), (' + d.states.join('|') + ')$'));
'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia',
d.regexp_state = (
'Washington', 'West Virginia', 'Wisconsin', 'Wyoming' ];
new RegExp('^(?:' + this.states.join('|') + ')$'));
}
 
this.regexp_USsubstatecountries = ([
initRegexp( // USA
new RegExp('^([^,]+), (' + this.USstates.join('|') + ')$'));
['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
this.regexp_USstate = (
'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
new RegExp('^(?:' + this.USstates.join('|') + ')$'));
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia',
'Washington', 'West Virginia', 'Wisconsin', 'Wyoming' ],
'[[United States|USA]]',
'\\[\\[(?:United[ _]States(?:[ _][^|\\\]]+?)?|USA)(?:\|[^|\\\]]+?)?\\]\\]'),
 
initRegexp( // Canada
// Canadian provinces
['British Columbia', 'Alberta', 'Saskatchewan', 'Manitoba',
this.CAstates = [
'British Columbia 'Ontario', 'AlbertaQuebec', 'SaskatchewanNew Brunswick', 'ManitobaNova Scotia',
'Ontario', 'Quebec', 'NewPrince BrunswickEdward Island', 'NovaNewfoundland and ScotiaLabrador'],
'Canada'),
'Prince Edward Island', 'Newfoundland and Labrador'];
];
 
this.regexp_CAsubstate = (
new RegExp('^([^,]+), (' + this.CAstates.join('|') + ')$'));
this.regexp_CAstate = (
new RegExp('^(?:' + this.CAstates.join('|') + ')$'));
}
 
Line 110 ⟶ 117:
if (wtext.match(/:/)) return null;
 
if (wlink != wtext) return;
///////////////////////////////////////////////////////
// USA
if (wlink == wtext &&
wtext.match(location_canonicalize.regexp_USsubstate))
{
var city = RegExp.$1, state = RegExp.$2;
 
for (i in this.countries) {
var wfull = '[[' + wtext + '|' + city + ']]';
var c = this.countries[i];
// only add link to state and country if we haven't yet mentioned them.
if (!left.match('\\[\\['+state+'\\]\\]')) {
wfull += ', [['+state+']]';
 
var wfull;
if (!left.match(/\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/)) {
if (wtext.match(c.regexp_substate)) {
wfull += ', [[United States|USA]]';
var city = RegExp.$1, state = RegExp.$2;
 
wfull = '[[' + wtext + '|' + city + ']]';
// only add link to state and country if we haven't yet mentioned them.
if (left.match('\\[\\['+state+'\\]\\]')) {
} else {
wfull += ', [['+state+']]';
}
} else if (wtext.match(c.regexp_state)) {
// state link -- just need to add country link as necessary
wfull = '[['+wtext+']]';
}
 
if (!wfull) continue;
// get rid of any redundant subsequent [[USA]] link
right = right.replace(/^, *\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/, '');
 
if (left.match(c.regexp_country)) {
return { wfull: wfull, left: left, right: right};
// Already mentioned country. Delete redundant subsequent
}
// country links
 
right = right.replace(c.regexp_country_sq, '');
if (wlink == wtext && wtext.match(location_canonicalize.regexp_USstate)) {
} else {
// state link -- just make sure there's a [[USA]] link if necessary
// Haven't mentioned country earlier
if (!left.match(/\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/)) {
varif wfull(right.match(c.regexp_country_sq)) = '[['+wtext+']]';{
wfull += ', [[United States|USA]]// it';s there already, good.
} else {
 
// get rid of any// redundantnot subsequentthere; [[USA]]add linkit.
wfull += ', ' + c.link_country;
right = right.replace(/^, *\[\[(?:United[ _]States(?:[ _][^|\]]+?)?|USA)(?:\|[^|\]]+?|)?\]\]/, '');
 
return { wfull: wfull, left: left, right: right};
}
return null;
}
 
////////////////////////////////////////////////////////////
// Canada
 
if (wlink == wtext &&
wtext.match(location_canonicalize.regexp_CAsubstate))
{
var city = RegExp.$1, state = RegExp.$2;
 
var wfull = '[[' + wtext + '|' + city + ']]';
// only add link to state and country if we haven't yet mentioned them.
if (!left.match('\\[\\['+state+'\\]\\]')) {
wfull += ', [['+state+']]';
 
if (!left.match(/\[\[Canada\]\]/)) {
wfull += ', [[Canada]]';
}
}
 
// get rid of any redundant subsequent [[USA]] link
right = right.replace(/^, *\[\[Canada\]\]/, '');
 
return { wfull: wfull, left: left, right: right};
}
 
if (wlink == wtext && wtext.match(location_canonicalize.regexp_CAstate)) {
// state link -- just make sure there's a [[Canada]] link if necessary
if (!left.match(/\[\[Canada\]\]/)) {
var wfull = '[['+wtext+']]';
wfull += ', [[Canada]]';
 
// get rid of any redundant subsequent [[Canada]] link
right = right.replace(/^, *\[\[Canada\]\]/, '');
 
return { wfull: wfull, left: left, right: right};
}
return null;
}
 
// TODO: India, etc.
 
return null;