Question

I just finished writing a date parser for my ECMAScript implementation. Previously I had written a regular expressions compiler and I was really impressed with the way the spec described the process. Essentially the input is passed through a series of continuations that test each portion of the string. My date parser is loosely based around the idea and I really want to know what it is called.

Note: I have only left the core of the parser to reduce noise.

public sealed class DateParser
{
    public double Parse()
    {
        using (var tokens = Tokenize().GetEnumerator())
        {
            var previous = new Result(ResultType.Success, HandleFirst);
            var next = default(Result);
            while (true)
            {
                if (!tokens.MoveNext())
                {
                    return previous.Type == ResultType.Optional ? Complete() : double.NaN;
                }
                next = previous.Continuation(tokens.Current);
                if (next.Type == ResultType.Complete)
                {
                    return Complete();
                }
                else if (next.Type == ResultType.MustFail)
                {
                    return double.NaN;
                }
                else if (next.Type == ResultType.CanFail)
                {
                    return previous.Type == ResultType.Optional ? Complete() : double.NaN;
                }
                previous = next;
            }
        }
    }

    private Result HandleFirst(DateToken token)
    {
        switch (token.Type)
        {
            case DateTokenType.Integer:
                return HandleYear(token);
            case DateTokenType.T:
                return HandleT(token);
            default:
                return new Result(ResultType.MustFail, null);
        }
    }

    private Result HandleYear(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 4)
        {
            _year = double.Parse(token.Value);
            return new Result(ResultType.Optional, HandleMonthHyphen);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleMonthHyphen(DateToken token)
    {
        if (token.Type == DateTokenType.Hyphen)
        {
            return new Result(ResultType.Success, HandleMonth);
        }
        return new Result(ResultType.Complete, null);
    }

    private Result HandleMonth(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _month = double.Parse(token.Value);
            if (_month < 1 || _month > 12)
            {
                _month = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleDayHyphen);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleDayHyphen(DateToken token)
    {
        if (token.Type == DateTokenType.Hyphen)
        {
            return new Result(ResultType.Success, HandleDay);
        }
        return new Result(ResultType.CanFail, null);
    }

    private Result HandleDay(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _day = double.Parse(token.Value);
            if (_day < 1 || _day > 31)
            {
                _day = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleT);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleT(DateToken token)
    {
        if (token.Type == DateTokenType.T)
        {
            return new Result(ResultType.Success, HandleHour);
        }
        return new Result(ResultType.CanFail, null);
    }

    private Result HandleHour(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _hour = double.Parse(token.Value);
            if (_hour >= DatePrototype.HoursPerDay)
            {
                _hour = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Success, HandleHourColon);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleHourColon(DateToken token)
    {
        if (token.Type == DateTokenType.Colon)
        {
            return new Result(ResultType.Success, HandleMinute);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleMinute(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _minute = double.Parse(token.Value);
            if (_minute >= DatePrototype.MinutesPerHour)
            {
                _minute = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleSecondColonOrOffset);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleSecondColonOrOffset(DateToken token)
    {
        if (token.Type == DateTokenType.Colon)
        {
            return new Result(ResultType.Success, HandleSecond);
        }
        else
        {
            var result = HandleOffset(token);
            if (result.Type == ResultType.CanFail)
            {
                return new Result(ResultType.MustFail, null);
            }
            return result;
        }
    }

    private Result HandleSecond(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _second = double.Parse(token.Value);
            if (_second >= DatePrototype.SecondsPerMinute)
            {
                _second = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleSecondDotOrOffset);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleSecondDotOrOffset(DateToken token)
    {
        if (token.Type == DateTokenType.Dot)
        {
            return new Result(ResultType.Success, HandleMillisecond);
        }
        else
        {
            var result = HandleOffset(token);
            if (result.Type == ResultType.CanFail)
            {
                return new Result(ResultType.MustFail, null);
            }
            return result;
        }
    }

    private Result HandleMillisecond(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 3)
        {
            _millisecond = double.Parse(token.Value);
            if (_millisecond >= DatePrototype.MsPerMinute)
            {
                _millisecond = null;
                return new Result(ResultType.MustFail, null);
            }
            return new Result(ResultType.Optional, HandleOffset);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleOffset(DateToken token)
    {
        switch (token.Type)
        {
            case DateTokenType.Z:
                _offset = 0.0;
                return new Result(ResultType.Success, null);
            case DateTokenType.Plus:
                _offset = 0.0;
                return new Result(ResultType.Success, HandleOffsetHour);
            case DateTokenType.Hyphen:
                _offset = -0.0;
                return new Result(ResultType.Success, HandleOffsetHour);
            default:
                return new Result(ResultType.CanFail, null);
        }
    }

    private Result HandleOffsetHour(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _offset += double.Parse(token.Value) * DatePrototype.MsPerHour;
            return new Result(ResultType.Success, HandleOffsetHourColon);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleOffsetHourColon(DateToken token)
    {
        if (token.Type == DateTokenType.Colon)
        {
            return new Result(ResultType.Success, HandleOffsetMinute);
        }
        return new Result(ResultType.MustFail, null);
    }

    private Result HandleOffsetMinute(DateToken token)
    {
        if (token.Type == DateTokenType.Integer && token.Value.Length == 2)
        {
            _offset += double.Parse(token.Value) * DatePrototype.MsPerMinute;
            return new Result(ResultType.Complete, null);
        }
        return new Result(ResultType.MustFail, null);
    }
} 
Was it helpful?

Solution

I think the term you're looking for is Finite State Machine, which essentially something like:

while( tokensAvailable ){
    // look at current token
    // do something maybe relating to state
    // loop
}

OTHER TIPS

Try recursive descent parser.

There's a free ebook (pdf) by Niklaus Wirth that gives a good introduction to some different techniques.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top