Question

edit : I have ripped out the lexer as it does not cleanly integrate with Qi and just obfuscates grammars (see here).


I'm trying to grow a grammar on top of the spirit lex framework. When I try to move a skip parser into the grammar I begin getting errors.

So, changing the qi::grammar<> and qi::rule<> event signatures from <Iterator> to <Iterator,void(),ascii::space_type>. In the grammar struct. What do I need to do?

Additionally I have set the token_def to omit its attribute for the optional token, and some others. Why is it still providing me with a valid _val in the semantic action for optional in the lexer? Reason I ask is because I thought the problem had to do with the string attribute of the optional token on the rhs of the event rule in qi not unifying with the void() Attribute signature of the rule.

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/cstdint.hpp>
#include <string>
#include<exception>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_paranthesis("\"{\""),
        right_paranthesis("\"}\""),
        colon(":"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = " "
            | left_paranthesis    [ std::cout << px::val("lpar") << std::endl]
            | right_paranthesis   [ std::cout << px::val("rpar") << std::endl]
            | colon               [ std::cout << px::val("colon") << std::endl]
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];
    }

    lex::token_def<> left_paranthesis, right_paranthesis, colon;
    lex::token_def<lex::omit> namespace_, event, optional, required;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

template <typename Iterator>
struct grammar : qi::grammar<Iterator>
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator> event;
};

// std::string test = "namespace{ event { OPtiONAL 124:hello_world RequireD} } ";

std::string test = "OPTIONAL";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type> grammar(token_lexer);

    std::string::iterator first = test.begin();
    std::string::iterator last = test.end(); 
    bool r; 

    r = lex::tokenize_and_parse(first, last, token_lexer, grammar);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
   /* 
    lexer_type::iterator_type iter; 

    try
    {
        iter = token_lexer.begin(first,last);
    }
    catch(std::exception & e)
    {
        std::cout << e.what() << std::endl;
    }

    lexer_type::iterator_type end = token_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;
   */ 
}

This grammar fails :

template <typename Iterator>
struct grammar : qi::grammar<Iterator,void(),ascii::space_type>
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator,void(),ascii::space_type> event;
};
Was it helpful?

Solution

As with most of spirit. If you wanna do something REALISTIC you gotta spend hours looking for a solution which isn't documented but buried in examples and mailing lists. Seriously considering moving to ragel or flex/bison. The problem isn't that the machinery isn't available it's that it is undocumented.

In this case when looking at the lex documentation, one get's generously mislead by looking at the lex parser api calls which has a tokenize_and_phrase_parse function. Which doesn't really work when you try to use it like the qi::phrase_parse neither does the documentation explain how to wire up a skipper using this function.

The wire-up of getting a space skipper into the parser is done by altering the lexer, and then using some undocumented qi-skipper construct initialising the grammar and rules. You can see this in action in the lex example directory ( example 5). Code that compiles and works:

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/cstdint.hpp>
#include <string>
#include<exception>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_paranthesis("\"{\""),
        right_paranthesis("\"}\""),
        colon(":"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = 
              left_paranthesis    [ std::cout << px::val("lpar") << std::endl]
            | right_paranthesis   [ std::cout << px::val("rpar") << std::endl]
            | colon               [ std::cout << px::val("colon") << std::endl]
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];


        this->self("WS") =   lex::token_def<>("[ \\t\\n]+");
    }


    lex::token_def<> left_paranthesis, right_paranthesis, colon;
    lex::token_def<lex::omit> namespace_, event, optional, required;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event)
    {
      //start = event;
      event = tok.optional [ std::cout << px::val("== OPTIONAL") << std::endl];
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator, qi::in_state_skipper<Lexer> > event;
};

// std::string test = "namespace{ event { OPtiONAL 124:hello_world RequireD} } ";

std::string test = " OPTIONAL ";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);

    std::string::iterator it = test.begin();
    iterator_type first = token_lexer.begin(it, test.end());
    iterator_type last = token_lexer.end();

    bool r; 

    r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
   /* 
    lexer_type::iterator_type iter; 

    try
    {
        iter = token_lexer.begin(first,last);
    }
    catch(std::exception & e)
    {
        std::cout << e.what() << std::endl;
    }

    lexer_type::iterator_type end = token_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;
   */ 
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top