Question

I'm currently trying to get some work done using boost::spirit::qi::phrase_parse but I'm not able to figure this out by myself.

Worth mentioning: I'm totally new to boost and so to boost::spirit.

I'm getting an input of the form "{A [B C] -> F [D E], C ->E,B->Z}"

I'd like to parse this type of input into a std::map<std::string, std::string>. The key should hold every std::string before the "->" and the value every std::string after the "->" until the ',' occurs.

Furthermore the '[' and ']' shouldn't be stored.

So the content of the std::map should be something like this after the parsing succeeded:

     {
       ("A", "F"),
       ("A", "D E"),
       ("B C", "F"),
       ("B C", "D E"),
       ("C", "E"),
       ("B", "Z")
     }

My first approach was to store all the keys/values in a std::vector<std::string>.

    #include <boost/spirit/include/qi.hpp>

    #include <iostream>
    #include <string>
    #include <vector>

    int main()
    {
        using boost::spirit::qi::phrase_parse;
        using boost::spirit::qi::char_;
        using boost::spirit::qi::lexeme;

        std::string input = "{A [B C] -> F [D E], C    ->E,B->Z}";
        std::string::const_iterator beg = input.begin(), end = input.end();

        std::vector<std::string> sdvec;

        bool r = phrase_parse(  beg, 
                                end,
                                '{' >> (+(+char_("a-zA-Z0-9") | lexeme[('[' >> +char_("a-zA-Z0-9 ") >> ']')]) >> '-' >> '>' >> +(+char_("a-zA-Z0-9") | lexeme[('[' >> +char_("a-zA-Z0-9 ") >> ']')])) % ',' >> '}',
                                boost::spirit::ascii::space,
                                sdvec
                           );

        if(beg != end) {
            std::cout << "Parsing failed!" << std::endl;
        } else {
            std::cout << "Parsing succeeded!" << std::endl;    
        }

        for(int i=0; i<sdvec.size(); i++) {
            std::cout << i << ": " << sdvec[i] << std::endl;
        }

        return 0;
    }

Executing this I'm getting each found std::string as an entry of the std::vector:

    Parsing 2 succeeded!
    0: A
    1: B C
    2: F
    3: D E
    4: C
    5: E
    6: B
    7: Z

But I've no idea how to parse these values into a std::map<std::string, std::string> using boost::spirit::qi::phrase_parse as simply replacing throws some compiling errors.

EDIT:

Actually I found something that's quite what I need: http://boost-spirit.com/home/articles/qi-example/parsing-a-list-of-key-value-pairs-using-spirit-qi/

I adopted the code of this article according to my problem:

    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/include/std_pair.hpp>

    #include <iostream>
    #include <string>
    #include <vector>
    #include <map>

    namespace qi = boost::spirit::qi;

    template <typename Iterator>
    struct keys_and_values
      : qi::grammar<Iterator, std::map<std::string, std::string>()>
    {
        keys_and_values()
          : keys_and_values::base_type(query)
        {
            query =  '{' >> *qi::lit(' ') >> pair >> *(qi::lit(',') >> *qi::lit(' ') >> pair) >> *qi::lit(' ') >> '}';
            pair  =  key >> -(*qi::lit(' ') >> "->" >> *qi::lit(' ') >> value);
            key   =  +qi::char_("a-zA-Z0-9") | qi::lexeme[('[' >> +qi::char_("a-zA-Z0-9 ") >> ']')];
            value = +qi::char_("a-zA-Z0-9") | qi::lexeme[('[' >> +qi::char_("a-zA-Z0-9 ") >> ']')];
        }
        qi::rule<Iterator, std::map<std::string, std::string>()> query;
        qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
        qi::rule<Iterator, std::string()> key, value;
    };

    int main()
    {
        std::string input = "{AB -> CD, E -> F, G -> HI, [J K L] -> [M N O]                   }";

        std::string::iterator begin = input.begin();
        std::string::iterator end = input.end();

        keys_and_values<std::string::iterator> p;    // create instance of parser
        std::map<std::string, std::string> m;        // map to receive results
        bool result = qi::phrase_parse(begin, end, p, boost::spirit::ascii::space, m);   // returns true if successful

        if(begin != end) {
            std::cout << "Parsing failed!" << std::endl;
        } else {
            std::cout << "Parsing succeeded!" << std::endl;    
        }

        std::cout << m["AB"] << "\t" << m["E"] << "\t" << m["G"] << "\t" << m["J K L"] << std::endl;

        return 0;
    }

The result of this is more or less what I need:

Parsing succeeded!
CD  F   HI  M N O

My last problem to solve is a case like A [B C] -> F [D E].

Any way to get those as four separated key-value pairs ("A", "F"), ("A", "D E"), ("B C", "F"), ("B C", "D E") into my std::map<std::string, std::string> m?

Or maybe it's easier to parse it into a std::map<std::vector<std::string>, std::vector<std::string> > where each std::vector<std::string> holds all keys/values?

For example:

in: "{A [B C] -> F [D E], C ->E,B->Z}"
out: { ({"A", "B C"}, {"F", "D E"}), ({"C"}, {"E"}), ({"B"}, {"Z"}) }

Thanks for any help!

Was it helpful?

Solution

I think you are quite close to your goal so I will skip the combinatorial part :-) The parser will do the things it is supposed to do ... to check the syntax and to tokenize data then it passes keys, values and output map ( multimap ) arguments into phoenix function inserter where you can insert whatever you need in your map ( multimap )

#if __cplusplus >= 201103L
#define BOOST_RESULT_OF_USE_DECLTYPE
#endif
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <iomanip>
#include <vector>
#include <map>

namespace qi = boost::spirit::qi;
namespace ascii=boost::spirit::ascii;

typedef std::map< std::string,std::string > TMap;
//typedef std::multimap< std::string,std::string > TMap;

struct SMapInsert
{
    template <typename Arg1,typename Arg2,typename Arg3>
    struct result
    {
        typedef void type;
    };

    template <typename Arg1,typename Arg2,typename Arg3>
    void operator()( Arg1&out, Arg2&keys, Arg3&vals ) const
    {
        std::cout << "Keys:" << std::endl;
        for( const auto &key : keys )
            std::cout << std::left << "`" << key << "`" << std::endl;
        std::cout << "Vals:" << std::endl;
        for( const auto &val : vals )
            std::cout << std::left << "`" << val << "`" << std::endl;
        // your map here...
        // out.insert
    }
};

boost::phoenix::function< SMapInsert > inserter;

int main()
{
    std::string input = "{A [B C] -> F [D E], C ->E,B->Z}";
    TMap data;

    std::string::const_iterator iter = input.begin();
    std::string::const_iterator last = input.end();

    qi::rule< std::string::const_iterator,std::string() > token=+qi::alnum;
    qi::rule< std::string::const_iterator,ascii::space_type,std::vector< std::string >() > 
        keyOrvalue = +( token  | ( '[' >> qi::lexeme[ +qi::char_("a-zA-Z0-9 ") ] >> ']' ) );
    qi::rule< std::string::const_iterator,ascii::space_type, TMap() > 
        root = '{' >> ( ( keyOrvalue >> "->" >> keyOrvalue )[ inserter( qi::_val, qi::_1, qi::_2 ) ] ) % ',' >> '}';
    std::cout << "input: `" << input << "`" << std::endl;
    if( qi::phrase_parse( iter, last, root, ascii::space, data ) && iter==last )
    {
        for( const auto &keyValue : data )
            std::cout << std::left << std::setw(10) << keyValue.first << std::setw(10) << keyValue.second << std::endl;
    } 
    else
        std::cout << "parsing failed:" << std::string( iter,last ) << std::endl;        

    return 0;
}

OTHER TIPS

Edit:This is an alternative way to do it, but I think it is a lot less clear than G. Civardi's solution.

As you observed, parsing into a map<vector<string>,vector<string>> would be the easiest way, and you can later manipulate it to get the map you really want. The solution below uses an intermediate struct (basically equivalent to map<vector,vector>) and then uses the customization point transform_attribute in order to fill the multimap (since there are keys that repeat).
PS: Please forgive the use of range-based for loops, change them if you can't use c++11.

Running on coliru.

#define BOOST_SPIRIT_DEBUG

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

#include <iostream>
#include <string>
#include <vector>
#include <map>

namespace qi=boost::spirit::qi;
namespace ascii=boost::spirit::ascii;

struct key_value
{
    std::vector<std::string> keys;
    std::vector<std::string> values;
};

struct intermediate_struct
{
    std::vector<key_value> data;
};

BOOST_FUSION_ADAPT_STRUCT(
    key_value,
    (std::vector<std::string>, keys)
    (std::vector<std::string>, values)
)

BOOST_FUSION_ADAPT_STRUCT(
    intermediate_struct,
    (std::vector<key_value>, data)
)

namespace boost{ namespace spirit{ namespace traits
{

    template <>
    struct transform_attribute<std::multimap<std::string,std::string>,intermediate_struct,qi::domain>
    {
        typedef intermediate_struct type;

        static type pre(std::multimap<std::string,std::string>& )
        { 
            return intermediate_struct();
        }
        static void post(std::multimap<std::string,std::string>& map, intermediate_struct const& intermediate)
        {
            for(const auto& key_val : intermediate.data)
            {
                for(const auto& key : key_val.keys)
                {
                    for(const auto& val : key_val.values)
                    {
                        map.insert(typename std::multimap<std::string,std::string>::value_type(key,val));
                    }
                } 
            }
        }  
        static void fail(std::multimap<std::string,std::string>&){} 
    };

}}}

int main()
{
    std::string input = "{A [B C] -> F [D E], C ->E,B->Z}";
    std::string::const_iterator iter = input.begin(), end = input.end();

    std::multimap<std::string,std::string> sdmap;

    qi::rule<std::string::const_iterator,std::string(),ascii::space_type> text_rule =
        +qi::char_("a-zA-Z0-9") | qi::lexeme[('[' >> +qi::char_("a-zA-Z0-9 ") >> ']')];
    qi::rule<std::string::const_iterator,std::vector<std::string>(),ascii::space_type> keys_rule =
        +text_rule;
    qi::rule<std::string::const_iterator,std::vector<std::string>(),ascii::space_type> values_rule =
        +text_rule;
    qi::rule<std::string::const_iterator,intermediate_struct(),ascii::space_type> map_rule =
        qi::eps >> ('{' >> (keys_rule >> "->" >> values_rule)%',' >> '}');

    BOOST_SPIRIT_DEBUG_NODES( (map_rule)(keys_rule)(values_rule) );

    bool r = qi::phrase_parse(  iter, 
                            end,
                            map_rule,
                            ascii::space,
                            sdmap
                        );

    if(r && (iter == end)) {
        std::cout << "Parsing succeeded!" << std::endl;
        for(const auto& pair : sdmap) {
            std::cout << pair.first << ": " << pair.second << std::endl;
        }
    } else {
        std::cout << "Parsing Failed!" << std::endl;   
        std::cout << "Unparsed: " << std::string(iter,end) << std::endl;
    }

    return 0;
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top