boost spirit lex and qi. Integrating a skip parser

Question

As with most of spirit. If you wanna do something REALISTIC you gotta spend hours looking for a solution which isn't documented but buried in examples and mailing lists. Seriously considering moving to ragel or flex/bison. The problem isn't that the machinery isn't available it's that it is undocumented.

In this case when looking at the lex documentation, one get's generously mislead by looking at the lex parser api calls which has a tokenize_and_phrase_parse function. Which doesn't really work when you try to use it like the qi::phrase_parse neither does the documentation explain how to wire up a skipper using this function.

The wire-up of getting a space skipper into the parser is done by altering the lexer, and then using some undocumented qi-skipper construct initialising the grammar and rules. You can see this in action in the lex example directory ( example 5). Code that compiles and works:

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/cstdint.hpp>
#include <string>
#include<exception>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_paranthesis("\"{\""),
        right_paranthesis("\"}\""),
        colon(":"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = 
              left_paranthesis    [ std::cout << px::val("lpar") << std::endl]
            | right_paranthesis   [ std::cout << px::val("rpar") << std::endl]
            | colon               [ std::cout << px::val("colon") << std::endl]
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];


        this->self("WS") =   lex::token_def<>("[ \\t\\n]+");
    }


    lex::token_def<> left_paranthesis, right_paranthesis, colon;
    lex::token_def<lex::omit> namespace_, event, optional, required;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator, qi::in_state_skipper<Lexer> > event;
};

// std::string test = "namespace{ event { OPtiONAL 124:hello_world RequireD} } ";

std::string test = " OPTIONAL ";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);

    std::string::iterator it = test.begin();
    iterator_type first = token_lexer.begin(it, test.end());
    iterator_type last = token_lexer.end();

    bool r; 

    r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
   /* 
    lexer_type::iterator_type iter; 

    try
    {
        iter = token_lexer.begin(first,last);
    }
    catch(std::exception & e)
    {
        std::cout << e.what() << std::endl;
    }

    lexer_type::iterator_type end = token_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;
   */ 
}