Module:Sandbox/Hellknowz/Test

This is an old revision of this page, as edited by Hellknowz (talk | contribs) at 18:01, 31 August 2013 (6). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
local main = {};

local monthIndices = {
    ['january'] = 1,
    ['february'] = 2,
    ['march'] = 3,
    ['april'] = 4,
    ['may'] = 5,
    ['june'] = 6,
    ['july'] = 7,
    ['august'] = 8,
    ['september'] = 9,
    ['october'] = 10,
    ['november'] = 11,
    ['december'] = 12
}

local monthShortIndices = {
    ['jan'] = 1,
    ['feb'] = 2,
    ['mar'] = 3,
    ['apr'] = 4,
    ['may'] = 5, -- long one would have caught this already
    ['jun'] = 6,
    ['jul'] = 7,
    ['aug'] = 8,
    ['sep'] = 9,
    ['oct'] = 10,
    ['nov'] = 11,
    ['dec'] = 12
}

local monthDays = {
    [1] = 31,
    [2] = 29, -- will check below
    [3] = 31,
    [4] = 30,
    [5] = 31,
    [6] = 30,
    [7] = 31,
    [8] = 31,
    [9] = 30,
    [10] = 31,
    [11] = 30,
    [12] = 31
}

function checkIfDayValid(day, month, year)
    -- First check that the month can have at least this many days
    if (day > monthDays[month]) then return false end

    -- February leap year check
    if (month == 2) then
        if (day == 29 and not ((year % 4 == 0) and (year % 100 ~= 0) or (year % 400 == 0))) then
             return false
        end
    end

    return true
end

function checkIfMonthValid(month)
    return month ~= 0 and month <= 12  -- <0 never happens with [0-9] pattern
end

function checkIfYearValid(year)
    return year >= 1583 -- up to 9999
end

function checkIfHourValid(hour)
    return hour < 24 -- <0 never happens with [0-9] pattern
end

function checkIfMinuteValid(minute)
    return minute < 60 -- <0 never happens with [0-9] pattern
end

function checkIfSecondValid(second)
    return second < 60 -- <0 never happens with [0-9] pattern
end

-- This will first verify that we have a valid date and time and then output an ISO datetime string
function checkAndOutput(year, month, day, hour, minute, second)

    if (year and not checkIfYearValid(year)) then return nil; end
    if (month and not checkIfMonthValid(month)) then return nil; end
    if (day and not checkIfDayValid(day, month, year)) then return nil; end
    if (hour and not checkIfHourValid(hour)) then return nil; end
    if (minute and not checkIfMinuteValid(minute)) then return nil; end
    if (second and not checkIfSecondValid(second)) then return nil; end

    -- time only
    if (second and not year) then return string.format('%02d:%02d:%02d', hour, minute, second)
    elseif (minute and not year) then return string.format('%02d:%02d', hour, minute)
    elseif (hour and not year) then return string.format('%02d', hour)

    -- date and time
    elseif (second) then return string.format('%d-%02d-%02d %02d:%02d:%02d', year, month, day, hour, minute, second)
    elseif (minute) then return string.format('%d-%02d-%02d %02d:%02d', year, month, day, hour, minute)
    elseif (hour) then return string.format('%d-%02d-%02d %02d', year, month, day, hour)

    -- date only
    elseif (day) then return string.format('%d-%02d-%02d', year, month, day)
    elseif (month) then return string.format('%d-%02d', year, month)
    elseif (year) then return string.format('%d', year)
    
    else return nil end -- why are we here again?

end

function periodHourAdd(period)
    if (period == 'pm' or period == 'p.m' or period == 'pm.' or period == 'p.m.') then -- random '.' is pattern match artifact
        return 12
    else
        return 0
    end
end

local seekString -- this is our local seek string, so we don't have to pass it as parameter every time

local currentPosition -- this keeps track of where we are in seeking our current string

-- These are the element type "constants" for readability mostly
local ELEMENT_INVALID = 1
local ELEMENT_ONETWODIGITS = 2
local ELEMENT_FOURDIGITS = 3
local ELEMENT_WHITESPACE = 4
local ELEMENT_MONTHWORD = 5
local ELEMENT_COMMA = 6
local ELEMENT_DASH = 7
local ELEMENT_DATESEPARATOR = 8
local ELEMENT_TIMESEPARATOR = 9
local ELEMENT_TIMETIMEPERIOD = 10

function seekNextElement()

    -- Digits
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^([0-9]+)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location

        -- Additionally check how many digits we actually have, as arbitrary number isn't valid
        if (#foundMatch <= 2) then
            return ELEMENT_ONETWODIGITS, tonumber(foundMatch), (currentPosition > #seekString)
        elseif (#foundMatch == 4) then
            return ELEMENT_FOURDIGITS, tonumber(foundMatch), (currentPosition > #seekString)
        else
            return ELEMENT_INVALID -- just the invalid, the number of digits won't match any pattern
        end
    end

    -- Time period - a.m./p.m. (before letters)
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^%s*([ap]%.?m%.?)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_TIMEPERIOD, foundMatch, (currentPosition > #seekString)
    end

    -- Word
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^([A-Za-z]+)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location

        if (#foundMatch >= 3) then

            -- Find the possible month name index
            monthIndex = monthIndices[string.lower(foundMatch)]
            if (not monthIndex) then monthIndex = monthShortIndices[string.lower(foundMatch)] end

            if (monthIndex) then
                return ELEMENT_MONTHWORD, monthIndex, (currentPosition > #seekString)
            else
                return ELEMENT_INVALID -- just the invalid, the word didn't match a valid month name
            end
        else
            -- TODO LETTERS
            return ELEMENT_INVALID -- just the invalid, the word was too short to be valid month name
        end
    end

    -- Dash with possible whitespace or Date separator (dash without whitespace)
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s*%-%s*)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        
        if (#foundMatch == 1) then
            return ELEMENT_DATESEPARATOR, foundMatch, (currentPosition > #seekString)
        else
            return ELEMENT_DASH, foundMatch, (currentPosition > #seekString)
        end
    end

    -- Time separator (colon without whitespace)
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(:)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_TIMESEPARATOR, foundMatch, (currentPosition > #seekString)
    end

    -- Comma and any following whitespace
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(,%s*)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_COMMA, foundMatch, (currentPosition > #seekString)
    end

    -- Whitespace (after all others that capture whitespace)
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s+)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_WHITESPACE, foundMatch, (currentPosition > #seekString)
    end

    return ELEMENT_INVALID -- just the invalid, we won't be parsing this further

end

function parseDateString(input)

    -- Reset our seek string and position
    seekString = input
    currentPosition = 1

    local elements = {}
    local values = {}

    -- Seek the entire string now
    local numberOfElements = 0
    repeat

        foundElement, foundValue, eos = seekNextElement()

        -- If we found something we can't process, return as unparsable
        if (foundElement == ELEMENT_INVALID) then return nil end

        numberOfElements = numberOfElements + 1
        elements[numberOfElements] = foundElement
        values[numberOfElements] = foundValue

    until eos

    --[[
    local s = input .. ' -> ' .. numberOfElements .. ' elements: '

    for currentElementIndex = 1, numberOfElements do
        s = s .. ' #' .. elements[currentElementIndex] .. '=' .. values[currentElementIndex]
    end

    do return s end  
    ]]


    if (elements[1] == ELEMENT_ONETWODIGITS) then -- '3' or '10'
        if (elements[2] == ELEMENT_WHITESPACE) then -- '3 '
            if (elements[3] == ELEMENT_MONTHWORD) then -- '3 May'
                if (elements[4] == ELEMENT_WHITESPACE) then -- '3 May '
                    if (elements[5] == ELEMENT_FOURDIGITS) then -- '3 May 2013'
                        if (numberOfElements == 5) then return checkAndOutput(values[5], values[3], values[1], nil, nil, nil) end
                        if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '3 May 2013, '
                            if (elements[7] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10'
                                if (elements[8] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10 am'
                                    if (numberOfElements == 8) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[8]), nil, nil) end
                                elseif (elements[8] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:'
                                    if (elements[9] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38'
                                        if (numberOfElements == 9) then return checkAndOutput(values[5], values[3], values[1], values[7], values[9], nil) end
                                        if (elements[10] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38 am'
                                            if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[10]), values[9], nil) end
                                        elseif (elements[10] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:38:'
                                            if (elements[11] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38:27'
                                                if (numberOfElements == 11) then return checkAndOutput(values[5], values[3], values[1], values[7], values[9], values[11]) end
                                                if (elements[12] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38:27 am'
                                                    if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
                                                end
                                            end                                            
                                        end                                        
                                    end                                    
                                end                                
                            end                            
                        end
                    end                
                end
            end
        elseif (elements[2] == ELEMENT_TIMESEPARATOR) then -- '10:'
            if (elements[3] == ELEMENT_ONETWODIGITS) then -- '10:28'
                if (numberOfElements == 3) then return checkAndOutput(nil, nil, nil, values[1], values[3], nil) end
                if (elements[4] == ELEMENT_TIMESEPARATOR) then -- '10:28:'
                    if (elements[5] == ELEMENT_ONETWODIGITS) then -- '10:28:27'
                        if (numberOfElements == 5) then return checkAndOutput(nil, nil, nil, values[1], values[3], values[5]) end
                        if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '10:28:27, '
                            if (elements[7] == ELEMENT_ONETWODIGITS) then -- '10:28:27, 3'
                                if (elements[8] == ELEMENT_WHITESPACE) then -- '10:28:27, 3 '
                                    if (elements[9] == ELEMENT_MONTHWORD) then -- '10:28:27, 3 May'
                                        if (elements[10] == ELEMENT_WHITESPACE) then -- '10:28:27, 3 May '
                                            if (elements[11] == ELEMENT_FOURDIGITS) then -- '10:28:27, 3 May 2013'
                                                if (numberOfElements == 11) then return checkAndOutput(values[11], values[9], values[7], values[1], values[3], values[5]) end
                                            end
                                        end
                                    end
                                end
                            end
                        end
                    end
                end
            end
        elseif (elements[2] == ELEMENT_TIMEPERIOD) then -- '10 am'
            if (numberOfElements == 2) then return checkAndOutput(nil, nil, nil, values[1] + periodHourAdd(values[2]), nil, nil) end
        end    

    elseif (elements[1] == ELEMENT_FOURDIGITS) then -- '2013'
        if (numberOfElements == 1) then return checkAndOutput(values[1], nil, nil, nil, nil, nil) end
        if (elements[2] == ELEMENT_DATESEPARATOR) then -- '2013-'
            if (elements[3] == ELEMENT_ONETWODIGITS) then -- '2013-05'
                if (numberOfElements == 3) then return checkAndOutput(values[1], values[3], nil, nil, nil, nil) end
                if (elements[4] == ELEMENT_DATESEPARATOR) then -- '2013-05-'
                    if (elements[5] == ELEMENT_ONETWODIGITS) then -- '2013-05-03'
                        if (numberOfElements == 5) then return checkAndOutput(values[1], values[3], values[5], nil, nil, nil) end
                        if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '3 May 2013, '
                            if (elements[7] == ELEMENT_ONETWODIGITS) then -- '2013-05-03, 10'
                                if (elements[8] == ELEMENT_TIMEPERIOD) then -- '2013-05-03, 10 am'
                                    if (numberOfElements == 8) then return checkAndOutput(values[1], values[3], values[5], values[7] + periodHourAdd(values[8]), nil, nil) end
                                elseif (elements[8] == ELEMENT_TIMESEPARATOR) then -- '2013-05-03, 10:'
                                    if (elements[9] == ELEMENT_ONETWODIGITS) then -- '2013-05-03, 10:38'
                                        if (numberOfElements == 9) then return checkAndOutput(values[1], values[3], values[5], values[7], values[9], nil) end
                                        if (elements[10] == ELEMENT_TIMEPERIOD) then -- '2013-05-03, 10:38 am'
                                            if (numberOfElements == 10) then return checkAndOutput(values[1], values[3], values[5], values[7] + periodHourAdd(values[10]), values[9], nil) end
                                        elseif (elements[10] == ELEMENT_TIMESEPARATOR) then -- '2013-05-03, 10:38:'
                                            if (elements[11] == ELEMENT_ONETWODIGITS) then -- '2013-05-03, 10:38:27'
                                                if (numberOfElements == 11) then return checkAndOutput(values[1], values[3], values[5], values[7], values[9], values[11]) end
                                                if (elements[12] == ELEMENT_TIMEPERIOD) then -- '2013-05-03, 10:38:27 am'
                                                    if (numberOfElements == 10) then return checkAndOutput(values[1], values[3], values[5], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
                                                end
                                            end
                                        end
                                    end
                                end
                            end
                        end
                    end
                end
            end
        end

    elseif (elements[1] == ELEMENT_MONTHWORD) then -- 'May'
        if (elements[2] == ELEMENT_WHITESPACE) then -- 'May '
            if (elements[3] == ELEMENT_ONETWODIGITS) then -- 'May 3'
                if (elements[4] == ELEMENT_COMMA) then -- 'May 3, '
                    if (elements[5] == ELEMENT_FOURDIGITS) then -- 'May 3, 2013'
                        if (numberOfElements == 5) then return checkAndOutput(values[5], values[1], values[3], nil, nil, nil) end
                        if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- ''May 3, 2013, '
                            if (elements[7] == ELEMENT_ONETWODIGITS) then -- ''May 3, 2013, 10'
                                if (elements[8] == ELEMENT_TIMEPERIOD) then -- ''May 3, 2013, 10 am'
                                    if (numberOfElements == 8) then return checkAndOutput(values[5], values[1], values[3], values[7] + periodHourAdd(values[8]), nil, nil) end
                                elseif (elements[8] == ELEMENT_TIMESEPARATOR) then -- ''May 3, 2013, 10:'
                                    if (elements[9] == ELEMENT_ONETWODIGITS) then -- ''May 3, 2013, 10:38'
                                        if (numberOfElements == 9) then return checkAndOutput(values[5], values[1], values[3], values[7], values[9], nil) end
                                        if (elements[10] == ELEMENT_TIMEPERIOD) then -- ''May 3, 2013, 10:38 am'
                                            if (numberOfElements == 10) then return checkAndOutput(values[5], values[1], values[3], values[7] + periodHourAdd(values[10]), values[9], nil) end
                                        elseif (elements[10] == ELEMENT_TIMESEPARATOR) then -- ''May 3, 2013, 10:38:'
                                            if (elements[11] == ELEMENT_ONETWODIGITS) then -- ''May 3, 2013, 10:38:27'
                                                if (numberOfElements == 11) then return checkAndOutput(values[5], values[1], values[3], values[7], values[9], values[11]) end
                                                if (elements[12] == ELEMENT_TIMEPERIOD) then -- ''May 3, 2013, 10:38:27 am'
                                                    if (numberOfElements == 10) then return checkAndOutput(values[5], values[1], values[3], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
                                                end
                                            end
                                        end
                                    end
                                end
                            end
                        end
                    end
                end
            elseif (elements[3] == ELEMENT_FOURDIGITS) then -- 'May 2013'
                if (numberOfElements == 3) then return checkAndOutput(values[3], values[1], nil, nil, nil, nil) end
            end
        end

    else
        return nil -- the combination of elements was not a recognized one
    end

end

function main.parseDate(frame)

    return parseDateString(frame.args[1])

end

--[[
function main.parseField(frame)

    local input = frame.args[1]

    matchNoRef = input:match('^([^<]*)<ref[^>]*>[^<]*</ref>$') -- basic ref

    if (matchNoRef) then
        return parseDateString(matchNoRef)
    else
        return parseDateString(input)
    end

end
]]

--[[
function main.emitMetadata(frame)

    -- First parse the date and see if we get a valid output date
    local date = parseDateString(frame.args[1])
    if (not date) then return nil end

    local spanClass = frame.args.spanClass or 'bday dtstart published updated'

    return '<span style="display:none">&#160;(<span class="' .. spanClass .. '">' .. date .. '</span>)</span>'

end
]]

return main