Content deleted Content added
Pathoschild (talk | contribs) + chainable version of ohc_regex |
Pathoschild (talk | contribs) tuck functions into a namespace |
||
Line 6:
mw.loader.load('//tools-static.wmflabs.org/meta/scripts/pathoschild.templatescript.js');
var ohc = ohc || {};
ohc.dateutil = {
/** ------------------------------------------------------------------------- */
/// REGEX utility functions
regex_to_string: function(s) {
return s.toString()
.replace(/^\//, "")
.replace(/\/[^\/]*$/, "");
},
var message
if (reg !== undefined)
message += "\n\nPlease report the name of the article to [????]";
alert(message);
},
/**
Main worker routine. The routine implements custom regex language to
simplify date transformations. The given regex and substitution strings
can contain magic words that specify what date format the routine should
accept and to what date format the routine should convert the date to.
Aside from magic strings, ordinary regular expressions and substitution
strings are accepted. Leading and trailing slashes are not needed for the
regex.
magic strings make their output available for later processing. Most of
the capturing magic strings can be used in the output with equivalent
meaning.
Internally, the routine maintains information about several dates, so the
regex can contain more than one magic string specifying day, month or year.
The first date will be assigned the first occurrences of magic strings from
on. For example, if the regex is '@MM @DD @YYYY, @Month @ZM @Day', the
The second date will be assigned @Month as months and @Day as days. The
third date will be assigned @ZM as months.
output the data of the first date. To access the subsequent dates, magic
strings in the format @XX2, @XX3, and so on must be used. @XX1 is provided
as alias to @XX for convenience.
AVAILABLE REGEX MAGIC STRINGS
=============================
Note: The capturing magic strings start with an uppercase letter whereas
the equivalent non-capturing magic strings start with a lowercase
letter.
Days:
@
@
@
@Day A day in numeric format with optional leading zero, with optional st,
nd or th suffix. Equivalent to @DD@th?
@sd, @zd, ... : noncapturing equivalents
@th Matches st, nd, rd or th
Months:
@
@
@MM A month in numeric format with optional leading zero (@SM or @ZM)
@
@Mon Matches short name of a month (Jan, Feb, ..). Also optionally
matches dot (Jan., Feb., ..) and 'Sept', 'Sept.'.
@Month Matches full or short name of a month (@FullMonth or @Mon)
@sm, @zm, ... : noncapturing equivalents
Years:
@YYYY Matches a 4-digit year
@YY Matches a 2-digit year. 50-99 are interpreted as 1950..1999, 0-49 are interpreted as 2000-2049.
@YYNN Matches 4 or 2-digit year (@YYYY or @YY).
@Year Matches 1 to 4 digit year
@yyyy, @yy, ... : noncapturing equivalents
@@ Matches literal @
AVAILABLE REPLACEMENT STRING MAGIC STRINGS
==========================================
@SD: Outputs a day in numeric format without leading zero (1-31).
@ZD: Outputs a day in numeric format with leading zero (01-31).
@DD: Equivalent to @ZD
@Day: Equivalent to @SD
@LDay: Outputs the matched day string without any transformations
@SDn, @ZDn, ..., where n is integer
Outputs the day of the nth date in the specified format.
@SM Outputs a month in numeric format without leading zero (1-12)
@ZM Outputs a month in numeric format with leading zero (01-12)
@MM Equivalent to @ZM
@FullMonth Outputs a full name of a month (January, February)
@Mon Outputs a short name of a month (Jan, Feb)
@Month Equivalent to @FullMonth
@LMonth Outputs the matched month string without any transformations
@SMn, @ZMn, ..., where n is integer
Outputs the month of the nth date in the specified format.
@YYYY Outputs 4-digit year. Valid only if the year was not captured
by @Year
@YY Outputs 2-digit year. Outputs the last two digits of a year.
Valid only if the year was not captured by @Year
@YYNN Equivalent to @YYYY
@Year Outputs 1 to 4 digit number identifying a year. Equivalent to
@YYYY if the year is between 1000 and 9999
@LYear Outputs the matched year string without any transformations.
@YYYYn, @YYn, ..., where n is integer
Outputs the year of the nth date in the specified format.
@@ Outputs literal @
*/
"June", "July", "August", "September", "October", "November", "December");
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
REGULAR : 1,
SD : 10,
ZD : 11,
DD : 12,
DAY : 13,
SM : 20,
ZM : 21,
MM : 22,
FMONTH : 23,
MONTH : 24,
MON : 25,
YYYY : 30,
YYNN : 31,
YY : 32,
YEAR : 33
DAY : 0,
MONTH : 1,
YEAR : 2
{ type : ParamType.SD, group : Group.DAY, magic : "@SD", match : /([1-9]|[1-2][0-9]|30|31)/ },
{ type : ParamType.ZD, group : Group.DAY, magic : "@ZD", match : /(0[1-9]|[1-2][0-9]|30|31)/ },
{ type : ParamType.DD, group : Group.DAY, magic : "@DD", match : /(0?[1-9]|[1-2][0-9]|30|31)/ },
{ type : ParamType.DAY,group : Group.DAY, magic : "@Day",match : /((?:[012]?[1-9]|10|20|30|31)(?:st|nd|rd|th|)?)/ },
{ type : ParamType.SM, group : Group.MONTH,magic : "@SM", match : /([1-9]|10|11|12)/ },
{ type : ParamType.ZM, group : Group.MONTH,magic : "@ZM", match : /(0[1-9]|10|11|12)/ },
{ type : ParamType.MM, group : Group.MONTH,magic : "@MM", match : /(0?[1-9]|10|11|12)/ },
{
type : ParamType.FMONTH, group : Group.MONTH, magic : "@FullMonth",
match : /(January|February|March|April|May|June|July|August|September|October|November|December)/
},
{
type : ParamType.MONTH, group : Group.MONTH, magic : "@Month",
match : /(January|February|March|April|May|June|July|August|September|October|November|December|Jan\.|Jan|Feb\.|Feb|Mar\.|Mar|Apr\.|Apr|May|Jun\.|Jun|Jul\.|Jul|Aug\.|Aug|Sep\.|Sept\.|Sept|Sep|Oct\.|Oct|Nov\.|Nov|Dec\.|Dec)/
},
{ //must be after month entry
type : ParamType.MON, group : Group.MONTH, magic : "@Mon",
match : /(Jan\.|Jan|Feb\.|Feb|Mar\.|Mar|Apr\.|Apr|May|Jun\.|Jun|Jul\.|Jul|Aug\.|Aug|Sep\.|Sept\.|Sept|Sep|Oct\.|Oct|Nov\.|Nov|Dec\.|Dec)/
},
{ type : ParamType.YYYY, group : Group.YEAR, magic : "@YYYY", match : "([1-2][0-9]{3})" },
{ type : ParamType.YYNN, group : Group.YEAR, magic : "@YYNN", match : "([1-2][0-9]{3}|[0-9]{2})" },
{ type : ParamType.YY, group : Group.YEAR, magic : "@YY", match : "([0-9]{2})" }, //must be after yyyy and yy24 entries
{ type : ParamType.YEAR, group : Group.YEAR, magic : "@Year", match : "([1-2][0-9]{3}|[1-9][0-9]{0,2})" }
{ magic : "@sd", match : /(?:[1-9]|[1-2][0-9]|30|31)/ },
{ magic : "@zd", match : /(?:0[1-9]|[1-2][0-9]|30|31)/ },
{ magic : "@dd", match : /(?:0?[1-9]|[1-2][0-9]|30|31)/ },
{ magic : "@day",match : /(?:(?:[012]?[1-9]|10|20|30|31)(?:st|nd|rd|th|)?)/ },
{ magic : "@sm", match : /(?:[1-9]|10|11|12)/ },
{ magic : "@zm", match : /(?:0[1-9]|10|11|12)/ },
{ magic : "@mm", match : /(?:0?[1-9]|10|11|12)/ },
{
magic : "@fullmonth",
match : /(?:January|February|March|April|May|June|July|August|September|October|November|December)/
},
{
magic : "@month",
match : /(?:January|February|March|April|May|June|July|August|September|October|November|December|Jan\.|Jan|Feb\.|Feb|Mar\.|Mar|Apr\.|Apr|May|Jun\.|Jun|Jul\.|Jul|Aug\.|Aug|Sep\.|Sept\.|Sept|Sep|Oct\.|Oct|Nov\.|Nov|Dec\.|Dec)/
},
{ //must be after month entry
magic : "@mon",
match : /(?:Jan\.|Jan|Feb\.|Feb|Mar\.|Mar|Apr\.|Apr|May|Jun\.|Jun|Jul\.|Jul|Aug\.|Aug|Sep\.|Sept\.|Sept|Sep|Oct\.|Oct|Nov\.|Nov|Dec\.|Dec)/
},
{ magic : "@yyyy", match : /(?:[1-2][0-9]{3})/ },
{ magic : "@yynn", match : "(?:[1-2][0-9]{3}|[0-9]{2})" },
{ magic : "@yy", match : "(?:[0-9]{2})" }, //must be after yyyy and yy24 entries
{ magic : "@year", match : "(?:[1-2][0-9]{3}|[1-9][0-9]{0,2})" },
// misc
{ magic : "@th", match : "(?:th|st|nd|rd)" }
var param = {};
param.index = match.index;
param.type = ParamType.REGULAR;
param.num = pi;
params_by_index[match.index] = param;
pi++;
var index = -1;
while (1) {
index = reg.indexOf(Formats[i].magic, index+1);
if (index == -1)
break;
if (params_by_index[index] === undefined) {
if (magic_per_group[index] > MAX_DATE) {
alert("DATE SCRIPT: unsupported number of dates from the same group");
return;
}
var param = {};
param.index = index;
param.type = Formats[i].type;
param.num = magic_per_group[Formats[i].group];
params_by_index[index] = param;
magic_per_group[Formats[i].group]++;
}
}
}
// pack the resulting array and sort by index
var param_desc = [];
for (var i in params_by_index) {
}
param_desc.sort(function(a,b) {return a.index - b.index;});
//replace magic strings with proper matches
for (var i = 0; i < Formats.length; i++) {
reg = reg.split(Formats[i].magic).join(ohc.dateutil.regex_to_string(Formats[i].match));
}
for (var i = 0; i < NCFormats.length; i++) {
reg = reg.split(NCFormats[i].magic).join(ohc.dateutil.regex_to_string(NCFormats[i].match));
}
reg = reg.split("@@").join("@");
//inline function for month parsing
function parse_month(str) {
var imonth = str.toLowerCase();
for (var i = 0; i < month_names_short.length; i++) {
if (imonth.substr(0,3) == month_names_short[i].toLowerCase()) {
return i+1;
}
}
return -1;
}
//inline function for 2-digit year parsing
function parse_yy(str) {
var y = parseInt(str, 10);
if (y > 99) {
}
else if (y >= 20) {
return y + 1900;
}
else {
return y + 2000;
}
}
//
function regex_worker(str, m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10,
m11, m12, m13, m14, m15, m16, m17, m18, m19) {
var MAX_REGEX = 20;
// computed numeric values
var day = [];
var month = [];
var year = [];
// string values to output
var raw_day = [];
var raw_month = [];
var raw_year = [];
var sday = [];
var zday = [];
var smonth = [];
var zmonth = [];
var full_month = [];
var short_month = [];
var year_yy = [];
var year_yyyy = [];
var regex_param = [];
//fill in the initial values
for (var i = 0; i < MAX_DATE; i++) {
raw_day[i] = raw_month[i] = raw_year[i] = "@ERROR@";
year_yy[i] = year_yyyy[i] = "@ERROR@";
day[i] = month[i] = year[i] = -1;
}
// save all arguments as an array
var params = [];
params.push(m0); params.push(m1); params.push(m2); params.push(m3);
params.push(m4); params.push(m5); params.push(m6); params.push(m7);
params.push(m8); params.push(m9); params.push(m10); params.push(m11);
params.push(m12); params.push(m13); params.push(m14); params.push(m15);
params.push(m16); params.push(m17); params.push(m18); params.push(m19);
// parse the arguments according to the specification given as param_desc
for (var i = 0; i < param_desc.length; i++) {
if (i > 19) {
alert("DATE SCRIPT: param id out of bounds");
return str;
}
var c_param = params[i]; //current param
var c_desc = param_desc[i]; //current param description
switch (c_desc.type) {
case ParamType.REGULAR:
regex_param[c_desc.num] = c_param;
break;
case ParamType.SD:
case ParamType.ZD:
case ParamType.DD:
day[c_desc.num] = parseInt(c_param, 10);
raw_day[c_desc.num] = c_param;
break;
case ParamType.DAY:
day[c_desc.num] = parseInt(c_param.replace(/[^0-9]/g, ''), 10);
raw_day[c_desc.num] = c_param;
break;
case ParamType.SM:
case ParamType.ZM:
case ParamType.MM:
month[c_desc.num] = parseInt(c_param, 10);
raw_month[c_desc.num] = c_param;
break;
case ParamType.FMONTH:
case ParamType.MONTH:
case ParamType.MON:
month[c_desc.num] = parse_month(c_param);
raw_month[c_desc.num] = c_param;
break;
case ParamType.YY:
year[c_desc.num] = parse_yy(c_param);
raw_year[c_desc.num] = c_param;
break;
case ParamType.YEAR:
case ParamType.YYYY:
year[c_desc.num] = parseInt(c_param, 10);
raw_year[c_desc.num] = c_param;
break;
case ParamType.YYNN:
var yy = parse_yy(c_param, 10);
if (yy == -1) {
yy = parseInt(c_param, 10);
}
year[c_desc.num] = yy;
raw_year[c_desc.num] = c_param;
break;
}
}
//catch errors, if any
for (var i = 0; i < MAX_DATE; i++) {
ohc.dateutil.alert_error("Invalid day [" + i + "]=" + day[i], debug_reg);
}
if (month[i] == 0 || month[i] < -1 || month[i] > 12) {
ohc.dateutil.alert_error("Invalid month [" + i + "]=" + month[i], debug_reg);
month[i] = -1;
}
if (year[i] == 0 || year[i] < -1 || year[i] > 9999) {
ohc.dateutil.alert_error("Invalid year [" + i + "]=" + year[i], debug_reg);
year[i] = -1;
}
}
//check whether to make the replacement
if (func !== undefined) {
var di = {};
di.d = day[i]; di.m = month[i]; di.y = year[i];
d.push(di);
}
if (func(d[0], d[1], d[2], d[3]) === false) {
return str;
}
}
//compute all needed formats
for (var i = 0; i < MAX_DATE; i++) {
if (
if (day[i] < 10) {
zday[i] = "0" + zday[i];
}
}
if (month[i] != -1) {
zmonth[i] = smonth[i] = month[i].toString();
if (month[i] < 10) {
zmonth[i] = "0" + zmonth[i];
}
full_month[i] = month_names[month[i]-1];
short_month[i] = month_names_short[month[i]-1];
}
if (year[i] != -1) {
year_yyyy[i] = year[i].toString();
if (year[i] >= 1950 && year[i] < 2050) {
year_yy[i] = year_yyyy[i].charAt(2) + year_yyyy[i].charAt(3);
}
}
}
//replace
var csub = sub;
csub = csub.split("$1").join(regex_param[0]);
csub = csub.split("$2").join(regex_param[1]);
csub = csub.split("$3").join(regex_param[2]);
csub = csub.split("$4").join(regex_param[3]);
csub = csub.split("$5").join(regex_param[4]);
csub = csub.split("$6").join(regex_param[5]);
csub = csub.split("$7").join(regex_param[6]);
csub = csub.split("$8").join(regex_param[7]);
csub = csub.split("$9").join(regex_param[8]);
csub = csub.split("$10").join(regex_param[9]);
csub = csub.split("$11").join(regex_param[10]);
csub = csub.split("$12").join(regex_param[11]);
csub = csub.split("$13").join(regex_param[12]);
csub = csub.split("$14").join(regex_param[13]);
csub = csub.split("$15").join(regex_param[14]);
csub = csub.split("$16").join(regex_param[15]);
csub = csub.split("$17").join(regex_param[16]);
csub = csub.split("$18").join(regex_param[17]);
csub = csub.split("$19").join(regex_param[18]);
csub = csub.split("$20").join(regex_param[19]);
csub = csub.split("@SD4").join(sday[3]);
csub = csub.split("@ZD4").join(zday[3]);
csub = csub.split("@DD4").join(zday[3]);
csub = csub.split("@Day4").join(sday[3]);
csub = csub.split("@LDay4").join(raw_day[3]);
csub = csub.split("@SM4").join(smonth[3]);
csub = csub.split("@ZM4").join(zmonth[3]);
csub = csub.split("@MM4").join(zmonth[3]);
csub = csub.split("@FullMonth4").join(full_month[3]);
csub = csub.split("@Month4").join(full_month[3]);
csub = csub.split("@Mon4").join(short_month[3]);
csub = csub.split("@LMonth4").join(raw_month[3]);
csub = csub.split("@YYYY4").join(year_yyyy[3]);
csub = csub.split("@YYNN4").join(year_yyyy[3]);
csub = csub.split("@Year4").join(year_yyyy[3]);
csub = csub.split("@YY4").join(year_yy[3]);
csub = csub.split("@LYear4").join(raw_year[3]);
csub = csub.split("@SD3").join(sday[2]);
csub = csub.split("@ZD3").join(zday[2]);
csub = csub.split("@DD3").join(zday[2]);
csub = csub.split("@Day3").join(sday[2]);
csub = csub.split("@LDay3").join(raw_day[2]);
csub = csub.split("@SM3").join(smonth[2]);
csub = csub.split("@ZM3").join(zmonth[2]);
csub = csub.split("@MM3").join(zmonth[2]);
csub = csub.split("@FullMonth3").join(full_month[2]);
csub = csub.split("@Month3").join(full_month[2]);
csub = csub.split("@Mon3").join(short_month[2]);
csub = csub.split("@LMonth3").join(raw_month[2]);
csub = csub.split("@YYYY3").join(year_yyyy[2]);
csub = csub.split("@YYNN3").join(year_yyyy[2]);
csub = csub.split("@Year3").join(year_yyyy[2]);
csub = csub.split("@YY3").join(year_yy[2]);
csub = csub.split("@LYear3").join(raw_year[2]);
csub = csub.split("@SD2").join(sday[1]);
csub = csub.split("@ZD2").join(zday[1]);
csub = csub.split("@DD2").join(zday[1]);
csub = csub.split("@Day2").join(sday[1]);
csub = csub.split("@LDay2").join(raw_day[1]);
csub = csub.split("@SM2").join(smonth[1]);
csub = csub.split("@ZM2").join(zmonth[1]);
csub = csub.split("@MM2").join(zmonth[1]);
csub = csub.split("@FullMonth2").join(full_month[1]);
csub = csub.split("@Month2").join(full_month[1]);
csub = csub.split("@Mon2").join(short_month[1]);
csub = csub.split("@LMonth2").join(raw_month[1]);
csub = csub.split("@YYYY2").join(year_yyyy[1]);
csub = csub.split("@YYNN2").join(year_yyyy[1]);
csub = csub.split("@Year2").join(year_yyyy[1]);
csub = csub.split("@YY2").join(year_yy[1]);
csub = csub.split("@LYear2").join(raw_year[1]);
csub = csub.split("@SD1").join(sday[0]);
csub = csub.split("@ZD1").join(zday[0]);
csub = csub.split("@DD1").join(zday[0]);
csub = csub.split("@Day1").join(sday[0]);
csub = csub.split("@LDay1").join(raw_day[0]);
csub = csub.split("@SM1").join(smonth[0]);
csub = csub.split("@ZM1").join(zmonth[0]);
csub = csub.split("@MM1").join(zmonth[0]);
csub = csub.split("@FullMonth1").join(full_month[0]);
csub = csub.split("@Month1").join(full_month[0]);
csub = csub.split("@Mon1").join(short_month[0]);
csub = csub.split("@LMonth1").join(raw_month[0]);
csub = csub.split("@YYYY1").join(year_yyyy[0]);
csub = csub.split("@YYNN1").join(year_yyyy[0]);
csub = csub.split("@Year1").join(year_yyyy[0]);
csub = csub.split("@YY1").join(year_yy[0]);
csub = csub.split("@LYear1").join(raw_year[0]);
csub = csub.split("@SD").join(sday[0]);
csub = csub.split("@ZD").join(zday[0]);
csub = csub.split("@DD").join(zday[0]);
csub = csub.split("@Day").join(sday[0]);
csub = csub.split("@LDay").join(raw_day[0]);
csub = csub.split("@SM").join(smonth[0]);
csub = csub.split("@ZM").join(zmonth[0]);
csub = csub.split("@MM").join(zmonth[0]);
csub = csub.split("@FullMonth").join(full_month[0]);
csub = csub.split("@Month").join(full_month[0]); //this must be executed before the @Mon rule
csub = csub.split("@Mon").join(short_month[0]);
csub = csub.split("@LMonth").join(raw_month[0]);
csub = csub.split("@YYYY").join(year_yyyy[0]); //this must be executed before the @YY rule
csub = csub.split("@YYNN").join(year_yyyy[0]);
csub = csub.split("@Year").join(year_yyyy[0]);
csub = csub.split("@YY").join(year_yy[0]);
csub = csub.split("@LYear").join(raw_year[0]);
csub = csub.split("@@").join("@");
return csub;
}
//check whether a simple regex (i.e. without using regex_worker) would suffice
var
for (var i = 0; i < param_desc.length; i++) {
if (param_desc[i].type != ParamType.REGULAR) {
simple_regex = false;
break;
}
}
//do the replacement
try {
var rg = new RegExp(reg,'gi');
if (simple_regex == true)
text = text.replace(rg, sub);
else
text = text.replace(rg, regex_worker);
var aa_reg = debug_reg; //place for a breakpoint for debugging
}
catch(err) {
var message = "Error in regex execution\n" + "ERROR: " + err.message + "\n";
ohc.dateutil.alert_error(message, debug_reg);
}
}
};
String.prototype.ohc_regex = function(rg, sub, func) {
return
};
|