local main = {};
local monthIndices = {
['january'] = 1,
['february'] = 2,
['march'] = 3,
['april'] = 4,
['may'] = 5,
['june'] = 6,
['july'] = 7,
['august'] = 8,
['september'] = 9,
['october'] = 10,
['november'] = 11,
['december'] = 12
}
local monthShortIndices = {
['jan'] = 1,
['feb'] = 2,
['mar'] = 3,
['apr'] = 4,
['may'] = 5, -- long one would have caught this already
['jun'] = 6,
['jul'] = 7,
['aug'] = 8,
['sep'] = 9,
['oct'] = 10,
['nov'] = 11,
['dec'] = 12
}
local monthDays = {
[1] = 31,
[2] = 29, -- will check below
[3] = 31,
[4] = 30,
[5] = 31,
[6] = 30,
[7] = 31,
[8] = 31,
[9] = 30,
[10] = 31,
[11] = 30,
[12] = 31
}
function checkIfDayValid(day, month, year)
-- First check that the month can have at least this many days
if (day > monthDays[month]) then return false end
-- February leap year check
if (month == 2) then
if (day == 29 and not ((year % 4 == 0) and (year % 100 ~= 0) or (year % 400 == 0))) then
return false
end
end
return true
end
function checkIfMonthValid(month)
return month ~= 0 and month <= 12 -- <0 never happens with [0-9] pattern
end
function checkIfYearValid(year)
return year >= 1583 -- up to 9999
end
function checkIfHourValid(hour)
return hour < 24 -- <0 never happens with [0-9] pattern
end
function checkIfMinuteValid(minute)
return minute < 60 -- <0 never happens with [0-9] pattern
end
function checkIfSecondValid(second)
return second < 60 -- <0 never happens with [0-9] pattern
end
-- This will first verify that we have a valid date and time and then output an ISO datetime string
function checkAndOutput(year, month, day, hour, minute, second)
if (year and not checkIfYearValid(year)) then return nil; end
if (month and not checkIfMonthValid(month)) then return nil; end
if (day and not checkIfDayValid(day, month, year)) then return nil; end
if (hour and not checkIfHourValid(hour)) then return nil; end
if (minute and not checkIfMinuteValid(minute)) then return nil; end
if (second and not checkIfSecondValid(second)) then return nil; end
if (second) then return string.format('%d-%02d-%02d %02d:%02d:%02d', year, month, day, hour, minute, second)
elseif (minute) then return string.format('%d-%02d-%02d %02d:%02d', year, month, day, hour, minute)
elseif (hour) then return string.format('%d-%02d-%02d %02d', year, month, day, hour)
elseif (day) then return string.format('%d-%02d-%02d', year, month, day)
elseif (month) then return string.format('%d-%02d', year, month)
elseif (year) then return string.format('%d', year)
else return nil end -- why are we here again?
end
function periodHourAdd(period)
if (period == 'p.m.' or period == 'p.m' or period == 'pm.') then -- the '.' is pattern artifact as those will match
return 12
else
return 0
end
end
local seekString -- this is our local seek string, so we don't have to pass it as parameter every time
local currentPosition -- this keeps track of where we are in seeking our current string
-- These are the element type "constants" for readability mostly
local ELEMENT_INVALID = 1
local ELEMENT_ONETWODIGITS = 2
local ELEMENT_FOURDIGITS = 3
local ELEMENT_WHITESPACE = 4
local ELEMENT_MONTHWORD = 5
local ELEMENT_COMMA = 6
local ELEMENT_DASH = 7
local ELEMENT_DATESEPARATOR = 8
local ELEMENT_TIMESEPARATOR = 9
local ELEMENT_TIMETIMEPERIOD = 10
function seekNextElement()
-- Digits
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^([0-9]+)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
-- Additionally check how many digits we actually have, as arbitrary number isn't valid
if (#foundMatch <= 2) then
return ELEMENT_ONETWODIGITS, tonumber(foundMatch), (currentPosition > #seekString)
elseif (#foundMatch == 4) then
return ELEMENT_FOURDIGITS, tonumber(foundMatch), (currentPosition > #seekString)
else
return ELEMENT_INVALID -- just the invalid, the number of digits won't match any pattern
end
end
-- Word
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^([A-Za-z]+)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
if (#foundMatch >= 3) then
-- Find the possible month name index
monthIndex = monthIndices[string.lower(foundMatch)]
if (not monthIndex) then monthIndex = monthShortIndices[string.lower(foundMatch)] end
if (monthIndex) then
return ELEMENT_MONTHWORD, monthIndex, (currentPosition > #seekString)
else
return ELEMENT_INVALID -- just the invalid, the word didn't match a valid month name
end
else
-- TODO LETTERS
return ELEMENT_INVALID -- just the invalid, the word was too short to be valid month name
end
end
-- Dash with possible whitespace or Date separator (dash without whitespace)
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s*%-%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
if (#foundMatch == 1) then
return ELEMENT_DATESEPARATOR, foundMatch, (currentPosition > #seekString)
else
return ELEMENT_DASH, foundMatch, (currentPosition > #seekString)
end
end
-- Time separator (colon without whitespace)
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(:)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
return ELEMENT_TIMESEPARATOR, foundMatch, (currentPosition > #seekString)
end
-- Comma and any following whitespace
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(,%s*)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
return ELEMENT_COMMA, foundMatch, (currentPosition > #seekString)
end
-- Time period - am/pm
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s*[ap]%.?m%.?)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
return ELEMENT_TIMEPERIOD, foundMatch, (currentPosition > #seekString)
end
-- Whitespace
local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s+)', currentPosition)
if (foundPositionStart) then
currentPosition = foundPositionEnd + 1 -- this is our new start ___location
return ELEMENT_WHITESPACE, foundMatch, (currentPosition > #seekString)
end
return ELEMENT_INVALID -- just the invalid, we won't be parsing this further
end
function parseDateString(input)
-- Reset our seek string and position
seekString = input
currentPosition = 1
local elements = {}
local values = {}
-- Seek the entire string now
local numberOfElements = 0
repeat
foundElement, foundValue, eos = seekNextElement()
-- If we found something we can't process, return as unparsable
if (foundElement == ELEMENT_INVALID) then return nil end
numberOfElements = numberOfElements + 1
elements[numberOfElements] = foundElement
values[numberOfElements] = foundValue
until eos
--[[
local s = input .. ' -> ' .. numberOfElements .. ' elements: '
for currentElementIndex = 1, numberOfElements do
s = s .. ' #' .. elements[currentElementIndex] .. '=' .. values[currentElementIndex]
end
do return s end
]]
if (elements[1] == ELEMENT_ONETWODIGITS) then -- '3'
if (elements[2] == ELEMENT_WHITESPACE) then -- '3 '
if (elements[3] == ELEMENT_MONTHWORD) then -- '3 May'
if (elements[4] == ELEMENT_WHITESPACE) then -- '3 May '
if (elements[5] == ELEMENT_FOURDIGITS) then -- '3 May 2013'
if (numberOfElements == 5) then return checkAndOutput(values[5], values[3], values[1], nil, nil, nil) end
if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '3 May 2013, '
if (elements[7] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10'
if (elements[8] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10 am'
if (numberOfElements == 8) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[8]), nil, nil) end
end
if (elements[8] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:'
if (elements[9] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38'
if (elements[10] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38 am'
if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[20]), values[9], nil) end
end
if (elements[10] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:38:'
if (elements[11] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38:27'
if (numberOfElements == 11) then return checkAndOutput(values[5], values[3], values[1], values[7], values[9], values[11]) end
if (elements[12] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38:27 am'
if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
end
end
end
end
end
end
end
end
end
end
end
end
--[[
1-2 digits
Word (month name)
Done -- d M y (3 May)
Whitespace
4 digits
Done -- d M y (3 May 2013)
Whitespace? and Dash
1-2 digits
Word (month name)
4 digits
Done -- d M Y - d M Y (13 May 2013 – 16 February 2014)
Comma? and Whitespace
1-2 digits
Whitespace and/or Letters (am/pm)
Done -- d M y, h p (3 May 2013, 10 am)
Time separator
1-2 digits
Done -- d M y, h:m (3 May 2013, 10:38)
Whitespace and/or Letters (am/pm)
Done -- d M y, h:m p (3 May 2013, 10:38 am)
Time separator
1-2 digits
Done -- d M y, h:m:s (3 May 2013, 10:38:27)
Whitespace and/or Letters (am/pm)
Done -- d M y, h:m:s p (3 May 2013, 10:38:27 am)
Whitespace? and Dash
1-2 digits
Word (month name)
Done -- d M - d M (13 May – 16 June)
4 digits
Done -- d M - d M y (13 May – 16 June 2013)
Whitespace and/or Letters (am/pm)
Done -- h p (10 am)
Time separator
1-2 digits
Done -- h:m (10:38)
Whitespace and/or Letters (am/pm)
Done -- h:m p (10:38 am)
Time separator
1-2 digits
Done -- h:m:s (10:38:27)
Whitespace and/or Letters (am/pm)
Done -- h:m:s p (10:38:27 am)
Dash
1-2 digits
Whitespace
Word (month name)
Whitespace
4 digits
Done -- d-d M y (3–16 May 2013)
Done -- d-d M (3–16 May)
]]
if (elements[1] == ELEMENT_FOURDIGITS) then
if (numberOfElements == 1) then return checkAndOutput(values[1], nil, nil, nil, nil, nil) end
if (elements[2] == ELEMENT_DATESEPARATOR) then
if (elements[3] == ELEMENT_ONETWODIGITS) then
if (numberOfElements == 3) then return checkAndOutput(values[1], values[3], nil, nil, nil, nil) end
if (elements[4] == ELEMENT_DATESEPARATOR) then
if (elements[5] == ELEMENT_ONETWODIGITS) then
if (numberOfElements == 5) then return checkAndOutput(values[1], values[3], values[5], nil, nil, nil) end
end
end
end
end
end
--[[
4 digits
Done -- y (2013)
ymd date separator
1-2 digits
Done -- y-m (2013-05)
ymd date separator
1-2 digits
Done -- y-m-d (2013-05-03)
Whitespace and/or Dash?
1-2 digits
Done -- y-m-d h (2013-05-03 10)
Time separator
1-2 digits
Done -- y-m-d h:m (2013-05-03 10:38)
Time separator
1-2 digits
Done -- y-m-d h:m:s (2013-05-03 10:38:27)
Dash
4 digits
Done -- y-y (2013-2014)
]]
if (elements[1] == ELEMENT_MONTHWORD) then
if (elements[2] == ELEMENT_WHITESPACE) then
if (elements[3] == ELEMENT_ONETWODIGITS) then
if (elements[4] == ELEMENT_COMMA) then
if (elements[5] == ELEMENT_FOURDIGITS) then
if (numberOfElements == 5) then return checkAndOutput(values[5], values[1], values[3], nil, nil, nil) end
end
end
elseif (elements[3] == ELEMENT_FOURDIGITS) then
if (numberOfElements == 3) then return checkAndOutput(values[3], values[1], nil, nil, nil, nil) end
end
end
end
--[[
Word (month name)
Done -- M (May)
Whitespace
1-2 digits
Done -- M d (May 3)
Comma and Whitespace
4 digits
Done -- M d, y (May 3, 2013) {{tick}}
Whitespace? and Dash
Word (month name)
4 digits
Done -- M d, Y - M d, y (May 3, 2013 – February 16, 2014)
Comma? and Whitespace
1-2 digits
Whitespace and/or Letters (am/pm)
Done -- M d, Y, h p (May 3, 2013, 10 am)
Time separator
1-2 digits
Done -- M d, Y, h:m (May 3, 2013, 10:38)
Whitespace and/or Letters (am/pm)
Done -- M d, Y, h:m p (May 3, 2013, 10:38 am)
Time separator
1-2 digits
Done -- M d, Y, h:m:s (May 3, 2013, 10:38:27)
Whitespace and/or Letters (am/pm)
Done -- M d, Y, h:m:s p (May 3, 2013, 10:38:27 am)
Whitespace? and Dash
1-2 digits
Done -- M d-d (May 3–16)
Comma and Whitespace
4 digits
Done -- M d-d, y (May 3–16, 2013)
Word (month name)
Done -- M d - M d, y (May 13 – June 16)
Comma and Whitespace
4 digits
Done -- M d - M d, y (May 13 – June 16, 2013)
4 digits
Done -- M y (May 2013) {{tick}}
Whitespace? and Dash
Word (month name)
4 digits
Done -- M y - M y (May 2013 – February 2014) {{tick}}
Whitespace? and Dash
Word (month name)
Done -- M - M (May – June)
Whitespace
4 digits
Done -- M - M y (May – June 2013)
]]
return nil -- the combination of elements was not a recognized one
end
function main.parseDate(frame)
return parseDateString(frame.args[1])
end
--[[
function main.parseField(frame)
local input = frame.args[1]
matchNoRef = input:match('^([^<]*)<ref[^>]*>[^<]*</ref>$') -- basic ref
if (matchNoRef) then
return parseDateString(matchNoRef)
else
return parseDateString(input)
end
end
]]
--[[
function main.emitMetadata(frame)
-- First parse the date and see if we get a valid output date
local date = parseDateString(frame.args[1])
if (not date) then return nil end
local spanClass = frame.args.spanClass or 'bday dtstart published updated'
return '<span style="display:none"> (<span class="' .. spanClass .. '">' .. date .. '</span>)</span>'
end
]]
return main