Module:Sandbox/Hellknowz/Test

This is an old revision of this page, as edited by Hellknowz (talk | contribs) at 14:54, 31 August 2013 (so many ifs). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
local main = {};

local monthIndices = {
    ['january'] = 1,
    ['february'] = 2,
    ['march'] = 3,
    ['april'] = 4,
    ['may'] = 5,
    ['june'] = 6,
    ['july'] = 7,
    ['august'] = 8,
    ['september'] = 9,
    ['october'] = 10,
    ['november'] = 11,
    ['december'] = 12
}

local monthShortIndices = {
    ['jan'] = 1,
    ['feb'] = 2,
    ['mar'] = 3,
    ['apr'] = 4,
    ['may'] = 5, -- long one would have caught this already
    ['jun'] = 6,
    ['jul'] = 7,
    ['aug'] = 8,
    ['sep'] = 9,
    ['oct'] = 10,
    ['nov'] = 11,
    ['dec'] = 12
}

local monthDays = {
    [1] = 31,
    [2] = 29, -- will check below
    [3] = 31,
    [4] = 30,
    [5] = 31,
    [6] = 30,
    [7] = 31,
    [8] = 31,
    [9] = 30,
    [10] = 31,
    [11] = 30,
    [12] = 31
}

function checkIfDayValid(day, month, year)
    -- First check that the month can have at least this many days
    if (day > monthDays[month]) then return false end

    -- February leap year check
    if (month == 2) then
        if (day == 29 and not ((year % 4 == 0) and (year % 100 ~= 0) or (year % 400 == 0))) then
             return false
        end
    end

    return true
end

function checkIfMonthValid(month)
    return month ~= 0 and month <= 12  -- <0 never happens with [0-9] pattern
end

function checkIfYearValid(year)
    return year >= 1583 -- up to 9999
end

function checkIfHourValid(hour)
    return hour < 24 -- <0 never happens with [0-9] pattern
end

function checkIfMinuteValid(minute)
    return minute < 60 -- <0 never happens with [0-9] pattern
end

function checkIfSecondValid(second)
    return second < 60 -- <0 never happens with [0-9] pattern
end

-- This will first verify that we have a valid date and time and then output an ISO datetime string
function checkAndOutput(year, month, day, hour, minute, second)

    if (year and not checkIfYearValid(year)) then return nil; end
    if (month and not checkIfMonthValid(month)) then return nil; end
    if (day and not checkIfDayValid(day, month, year)) then return nil; end
    if (hour and not checkIfHourValid(hour)) then return nil; end
    if (minute and not checkIfMinuteValid(minute)) then return nil; end
    if (second and not checkIfSecondValid(second)) then return nil; end

    if (second) then return string.format('%d-%02d-%02d %02d:%02d:%02d', year, month, day, hour, minute, second)
    elseif (minute) then return string.format('%d-%02d-%02d %02d:%02d', year, month, day, hour, minute)
    elseif (hour) then return string.format('%d-%02d-%02d %02d', year, month, day, hour)
    elseif (day) then return string.format('%d-%02d-%02d', year, month, day)
    elseif (month) then return string.format('%d-%02d', year, month)
    elseif (year) then return string.format('%d', year)
    else return nil end -- why are we here again?

end

function periodHourAdd(period)
    if (period == 'p.m.' or period == 'p.m' or period == 'pm.') then -- the '.' is pattern artifact as those will match
        return 12
    else
        return 0
    end
end

local seekString -- this is our local seek string, so we don't have to pass it as parameter every time

local currentPosition -- this keeps track of where we are in seeking our current string

-- These are the element type "constants" for readability mostly
local ELEMENT_INVALID = 1
local ELEMENT_ONETWODIGITS = 2
local ELEMENT_FOURDIGITS = 3
local ELEMENT_WHITESPACE = 4
local ELEMENT_MONTHWORD = 5
local ELEMENT_COMMA = 6
local ELEMENT_DASH = 7
local ELEMENT_DATESEPARATOR = 8
local ELEMENT_TIMESEPARATOR = 9
local ELEMENT_TIMETIMEPERIOD = 10

function seekNextElement()

    -- Digits
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^([0-9]+)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location

        -- Additionally check how many digits we actually have, as arbitrary number isn't valid
        if (#foundMatch <= 2) then
            return ELEMENT_ONETWODIGITS, tonumber(foundMatch), (currentPosition > #seekString)
        elseif (#foundMatch == 4) then
            return ELEMENT_FOURDIGITS, tonumber(foundMatch), (currentPosition > #seekString)
        else
            return ELEMENT_INVALID -- just the invalid, the number of digits won't match any pattern
        end
    end

    -- Word
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^([A-Za-z]+)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location

        if (#foundMatch >= 3) then

            -- Find the possible month name index
            monthIndex = monthIndices[string.lower(foundMatch)]
            if (not monthIndex) then monthIndex = monthShortIndices[string.lower(foundMatch)] end

            if (monthIndex) then
                return ELEMENT_MONTHWORD, monthIndex, (currentPosition > #seekString)
            else
                return ELEMENT_INVALID -- just the invalid, the word didn't match a valid month name
            end
        else
            -- TODO LETTERS
            return ELEMENT_INVALID -- just the invalid, the word was too short to be valid month name
        end
    end

    -- Dash with possible whitespace or Date separator (dash without whitespace)
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s*%-%s*)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        
        if (#foundMatch == 1) then
            return ELEMENT_DATESEPARATOR, foundMatch, (currentPosition > #seekString)
        else
            return ELEMENT_DASH, foundMatch, (currentPosition > #seekString)
        end
    end

    -- Time separator (colon without whitespace)
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(:)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_TIMESEPARATOR, foundMatch, (currentPosition > #seekString)
    end

    -- Comma and any following whitespace
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(,%s*)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_COMMA, foundMatch, (currentPosition > #seekString)
    end

    -- Time period - am/pm
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s*[ap]%.?m%.?)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_TIMEPERIOD, foundMatch, (currentPosition > #seekString)
    end

    -- Whitespace
    local foundPositionStart, foundPositionEnd, foundMatch = seekString:find('^(%s+)', currentPosition)
    if (foundPositionStart) then
        currentPosition = foundPositionEnd + 1 -- this is our new start ___location
        return ELEMENT_WHITESPACE, foundMatch, (currentPosition > #seekString)
    end

    return ELEMENT_INVALID -- just the invalid, we won't be parsing this further

end

function parseDateString(input)

    -- Reset our seek string and position
    seekString = input
    currentPosition = 1

    local elements = {}
    local values = {}

    -- Seek the entire string now
    local numberOfElements = 0
    repeat

        foundElement, foundValue, eos = seekNextElement()

        -- If we found something we can't process, return as unparsable
        if (foundElement == ELEMENT_INVALID) then return nil end

        numberOfElements = numberOfElements + 1
        elements[numberOfElements] = foundElement
        values[numberOfElements] = foundValue

    until eos

    --[[
    local s = input .. ' -> ' .. numberOfElements .. ' elements: '

    for currentElementIndex = 1, numberOfElements do
        s = s .. ' #' .. elements[currentElementIndex] .. '=' .. values[currentElementIndex]
    end

    do return s end  
    ]]

    if (elements[1] == ELEMENT_ONETWODIGITS) then -- '3'
        if (elements[2] == ELEMENT_WHITESPACE) then -- '3 '
            if (elements[3] == ELEMENT_MONTHWORD) then -- '3 May'
                if (elements[4] == ELEMENT_WHITESPACE) then -- '3 May '
                    if (elements[5] == ELEMENT_FOURDIGITS) then -- '3 May 2013'
                        if (numberOfElements == 5) then return checkAndOutput(values[5], values[3], values[1], nil, nil, nil) end
                        if (elements[6] == ELEMENT_WHITESPACE or elements[6] == ELEMENT_COMMA) then -- '3 May 2013, '
                            if (elements[7] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10'
                                if (elements[8] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10 am'
                                    if (numberOfElements == 8) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[8]), nil, nil) end
                                end
                                if (elements[8] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:'
                                    if (elements[9] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38'
                                        if (elements[10] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38 am'
                                            if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[20]), values[9], nil) end
                                        end
                                        if (elements[10] == ELEMENT_TIMESEPARATOR) then -- '3 May 2013, 10:38:'
                                            if (elements[11] == ELEMENT_ONETWODIGITS) then -- '3 May 2013, 10:38:27'
                                                if (numberOfElements == 11) then return checkAndOutput(values[5], values[3], values[1], values[7], values[9], values[11]) end
                                                if (elements[12] == ELEMENT_TIMEPERIOD) then -- '3 May 2013, 10:38:27 am'
                                                    if (numberOfElements == 10) then return checkAndOutput(values[5], values[3], values[1], values[7] + periodHourAdd(values[12]), values[9], values[11]) end
                                                end
                                            end
                                        end
                                    end
                                end
                            end
                        end
                    end
                end
            end
        end
    end

--[[
    1-2 digits

        Word (month name)

            Done -- d M y (3 May)
            Whitespace

                4 digits

                    Done -- d M y (3 May 2013)
                    Whitespace? and Dash

                        1-2 digits

                            Word (month name)

                                4 digits

                                    Done -- d M Y - d M Y (13 May 2013 – 16 February 2014)

                    Comma? and Whitespace

                        1-2 digits

                            Whitespace and/or Letters (am/pm)

                                Done -- d M y, h p (3 May 2013, 10 am)

                            Time separator

                                1-2 digits

                                    Done -- d M y, h:m (3 May 2013, 10:38)
                                    Whitespace and/or Letters (am/pm)

                                        Done -- d M y, h:m p (3 May 2013, 10:38 am)

                                    Time separator

                                        1-2 digits

                                            Done -- d M y, h:m:s (3 May 2013, 10:38:27)
                                            Whitespace and/or Letters (am/pm)

                                                Done -- d M y, h:m:s p (3 May 2013, 10:38:27 am)

            Whitespace? and Dash

                1-2 digits

                    Word (month name)

                        Done -- d M - d M (13 May – 16 June)
                        4 digits

                            Done -- d M - d M y (13 May – 16 June 2013)

        Whitespace and/or Letters (am/pm)

            Done -- h p (10 am)

        Time separator

            1-2 digits

                Done -- h:m (10:38)
                Whitespace and/or Letters (am/pm)

                    Done -- h:m p (10:38 am)

                Time separator

                    1-2 digits

                        Done -- h:m:s (10:38:27)
                        Whitespace and/or Letters (am/pm)

                            Done -- h:m:s p (10:38:27 am)

        Dash

            1-2 digits

                Whitespace

                    Word (month name)

                        Whitespace

                            4 digits

                                Done -- d-d M y (3–16 May 2013)

                        Done -- d-d M (3–16 May)
]]

    if (elements[1] == ELEMENT_FOURDIGITS) then
        if (numberOfElements == 1) then return checkAndOutput(values[1], nil, nil, nil, nil, nil) end
        if (elements[2] == ELEMENT_DATESEPARATOR) then
            if (elements[3] == ELEMENT_ONETWODIGITS) then
                if (numberOfElements == 3) then return checkAndOutput(values[1], values[3], nil, nil, nil, nil) end
                if (elements[4] == ELEMENT_DATESEPARATOR) then
                    if (elements[5] == ELEMENT_ONETWODIGITS) then
                        if (numberOfElements == 5) then return checkAndOutput(values[1], values[3], values[5], nil, nil, nil) end
                    end
                end
            end
        end
    end

--[[
    4 digits

        Done -- y (2013)
        ymd date separator

            1-2 digits

                Done -- y-m (2013-05)
                ymd date separator

                    1-2 digits

                        Done -- y-m-d (2013-05-03)
                        Whitespace and/or Dash?

                            1-2 digits

                                Done -- y-m-d h (2013-05-03 10)
                                Time separator

                                    1-2 digits

                                        Done -- y-m-d h:m (2013-05-03 10:38)
                                        Time separator

                                            1-2 digits

                                                Done -- y-m-d h:m:s (2013-05-03 10:38:27)

        Dash

            4 digits

                Done -- y-y (2013-2014)
]]

    if (elements[1] == ELEMENT_MONTHWORD) then
        if (elements[2] == ELEMENT_WHITESPACE) then
            if (elements[3] == ELEMENT_ONETWODIGITS) then
                if (elements[4] == ELEMENT_COMMA) then
                    if (elements[5] == ELEMENT_FOURDIGITS) then
                        if (numberOfElements == 5) then return checkAndOutput(values[5], values[1], values[3], nil, nil, nil) end
                    end
                end
            elseif (elements[3] == ELEMENT_FOURDIGITS) then
                if (numberOfElements == 3) then return checkAndOutput(values[3], values[1], nil, nil, nil, nil) end
            end
        end
    end

--[[
    Word (month name)

        Done -- M (May)
        Whitespace

            1-2 digits

                Done -- M d (May 3)
                Comma and Whitespace

                    4 digits

                        Done -- M d, y (May 3, 2013) {{tick}}
                        Whitespace? and Dash

                            Word (month name)

                                4 digits

                                    Done -- M d, Y - M d, y (May 3, 2013 – February 16, 2014)

                        Comma? and Whitespace

                            1-2 digits

                                Whitespace and/or Letters (am/pm)

                                    Done -- M d, Y, h p (May 3, 2013, 10 am)

                                Time separator

                                    1-2 digits

                                        Done -- M d, Y, h:m (May 3, 2013, 10:38)
                                        Whitespace and/or Letters (am/pm)

                                            Done -- M d, Y, h:m p (May 3, 2013, 10:38 am)

                                        Time separator

                                            1-2 digits

                                                Done -- M d, Y, h:m:s (May 3, 2013, 10:38:27)
                                                Whitespace and/or Letters (am/pm)

                                                    Done -- M d, Y, h:m:s p (May 3, 2013, 10:38:27 am)

                Whitespace? and Dash

                    1-2 digits

                        Done -- M d-d (May 3–16)
                        Comma and Whitespace

                            4 digits

                                Done -- M d-d, y (May 3–16, 2013)

                        Word (month name)

                            Done -- M d - M d, y (May 13 – June 16)
                            Comma and Whitespace

                                4 digits

                                    Done -- M d - M d, y (May 13 – June 16, 2013)

            4 digits

                Done -- M y (May 2013) {{tick}}
                Whitespace? and Dash

                    Word (month name)

                        4 digits

                            Done -- M y - M y (May 2013 – February 2014) {{tick}}

        Whitespace? and Dash

            Word (month name)

                Done -- M - M (May – June)
                Whitespace

                    4 digits

                        Done -- M - M y (May – June 2013)
]]

    return nil -- the combination of elements was not a recognized one

end

function main.parseDate(frame)

    return parseDateString(frame.args[1])

end

--[[
function main.parseField(frame)

    local input = frame.args[1]

    matchNoRef = input:match('^([^<]*)<ref[^>]*>[^<]*</ref>$') -- basic ref

    if (matchNoRef) then
        return parseDateString(matchNoRef)
    else
        return parseDateString(input)
    end

end
]]

--[[
function main.emitMetadata(frame)

    -- First parse the date and see if we get a valid output date
    local date = parseDateString(frame.args[1])
    if (not date) then return nil end

    local spanClass = frame.args.spanClass or 'bday dtstart published updated'

    return '<span style="display:none">&#160;(<span class="' .. spanClass .. '">' .. date .. '</span>)</span>'

end
]]

return main