problem parsing INI section using Boost.Spirit

69 Views Asked by At

I'm studying parsers for a college project. I found out about Boost.Spirit and decided to use it. After reading its documentation and implementing some basic examples, I tried to make a parser that parses a small INI section. The output of the program is:

Program error

<section>
  <try>[Section]\nkey1 = val</try>
  <key>
    <try>Section]\nkey1 = valu</try>
    <success>]\nkey1 = value1\nkey2</success>
    <attributes>[]</attributes>
  </key>
  <fail/>
</section>

--------------------
Parsing failed
--------------------

Code

#include <boost/spirit/include/qi.hpp>

#include <iostream>
#include <string>

using namespace std;

namespace client 
{
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    
    template <typename Iterator>
    struct ini_grammar : qi::grammar<Iterator, ascii::space_type>
    {
        ini_grammar() : ini_grammar::base_type(section)
        {
            using qi::char_;
           
            key     = +char_("a-zA-Z_0-9");
            pair    = key >> '=' >> key;
            section =
                '[' >> key  >> ']' 
                >> '\n'
                >> *(pair >> '\n')
                ;
            BOOST_SPIRIT_DEBUG_NODES((key)(pair)(section))
        }

            qi::rule<Iterator, ascii::space_type> section, pair, key;
    };
}


int main() 
{
    using boost::spirit::qi::phrase_parse;
    using boost::spirit::ascii::space;

    string ini_section =
        "[Section]\n"
        "key1 = value1\n"
        "key2 = value2\n";

    typedef client::ini_grammar<string::const_iterator> ini_grammar;
    ini_grammar grammar;
    
    string::const_iterator iter = ini_section.begin();
    string::const_iterator end  = ini_section.end();

    bool r = phrase_parse(iter, end, grammar, space);

    if (r == true) 
    {
        cout << "-------------------------\n";
        cout << "Parsing succeeded\n";
        cout << "-------------------------\n";
    }

    else 
    {
        cout << "-------------------------\n";
        cout << "Parsing failed\n";
        cout << "-------------------------\n";
    }
    return 0;
}

For some context, I read this SO thread. I used the code provided on the question as a reference and try to use some of the hints provided on the answers.

1

There are 1 best solutions below

3
sehe On

Key should not use a skipper. It is a "lexeme".

Next up, '\n' can never be matched as your skipper already eats it.

See Boost spirit skipper issues

Since you intend blank space to be insignificant in those rules, but NOT newlines, use qi::blank, not qi::space. Also, match qi::eol for more portability (DOS/UNIX line ends). For finishing touches, accept multiple line ends if blank/empty lines are accepted.

While you're at it, don't expose the choice of skipper as it's essential to your grammar.

I've also elected to use qi::space/qi::blank for consistency with qi::char_.

Live On Coliru

// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <iostream>

namespace qi = boost::spirit::qi;
namespace client {
    template <typename Iterator> struct ini_grammar : qi::grammar<Iterator> {
        ini_grammar() : ini_grammar::base_type(start) {
            key     = +qi::char_("a-zA-Z_0-9");
            pair    = key >> '=' >> key;
            section = '[' >> key >> ']' >> +qi::eol >> *(pair >> +qi::eol);

            start   = qi::skip(qi::blank)[section];

            BOOST_SPIRIT_DEBUG_NODES((key)(pair)(section))
        }

      private:
        qi::rule<Iterator>                 start;
        qi::rule<Iterator, qi::blank_type> section, pair;
        qi::rule<Iterator>                 key;
    };
} // namespace client

int main() {
    std::string const ini_section = R"([Section]
key1 = value1
key2 = value2
)";

    using It = std::string::const_iterator;
    client::ini_grammar<It> grammar;

    It iter = ini_section.begin(), end = ini_section.end();
    bool r = parse(iter, end, grammar);

    if (r == true) {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "-------------------------\n";
    } else {
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "-------------------------\n";
    }
}

Prints

-------------------------
Parsing succeeded
-------------------------

BONUS

With actual attribute propagation into datastructure:

using Key     = std::string;
using Value   = std::string;
using Section = std::map<Key, Value>;
using IniFile = std::multimap<Key, Section>;

And a better value rule that doesn't disallow spaces and other special characters.

And using a custom skipper to also ignore comments.

Live On Coliru

// #define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <map>

using Key     = std::string;
using Value   = std::string;
using Section = std::map<Key, Value>;
using IniFile = std::multimap<Key, Section>;

namespace qi = boost::spirit::qi;
namespace client {
    template <typename Iterator> struct ini_grammar : qi::grammar<Iterator, IniFile()> {
        ini_grammar() : ini_grammar::base_type(start) {
            skipper = qi::blank | '#' >> *(qi::char_ - qi::eol);

            key     = +qi::char_("a-zA-Z_0-9");
            value   = *(qi::char_ - qi::eol);
            pair    = key >> '=' >> value;
            section = '[' >> key >> ']' >> +qi::eol >> *(pair >> +qi::eol);
            file    = *section;

            start   = qi::skip(copy(skipper))[file];

            BOOST_SPIRIT_DEBUG_NODES((key)(value)(pair)(section)(file))
        }

      private:
        using Skipper = qi::rule<Iterator>;
        Skipper skipper; // also skip comments

        using KVP = std::pair<Key, Value>;
        using SEC = std::pair<Key, Section>;
        qi::rule<Iterator, IniFile()>          start;
        qi::rule<Iterator, IniFile(), Skipper> file;
        qi::rule<Iterator, SEC(), Skipper>     section;
        qi::rule<Iterator, KVP(), Skipper>     pair;
        // lexemes
        qi::rule<Iterator, Key()>   key;
        qi::rule<Iterator, Value()> value;
    };
} // namespace client

int main() {
    std::string const ini_section = R"([First]
key1 = value1
key2 = value2

[otherSection]
key3 = It should really not matter how much spaces are included 
key4 = Or equal signs ("=") for that matter

# comments should be comments
# just like empty/blank lines
    
[First] #second first, actually
key5 = More
)";

    using It = std::string::const_iterator;
    client::ini_grammar<It> section;

    It iter = ini_section.begin(), end = ini_section.end();
    IniFile ini;
    bool r = parse(iter, end, section, ini);

    if (r == true) {
        for (auto& [name,entries] : ini) {
            std::cout << "Section " << quoted(name) << "\n";
            for (auto& [k, v] : entries)
                std::cout << "\t" << quoted(k) << " -> " << quoted(v) << "\n";
        }
    } else {
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "-------------------------\n";
    }
}

Prints

Section "First"
    "key1" -> "value1"
    "key2" -> "value2"
Section "First"
    "key5" -> "More"
Section "otherSection"
    "key3" -> "It should really not matter how much spaces are included "
    "key4" -> "Or equal signs (\"=\") for that matter"

Further examples

To have a flavor of INI file that expects quoted strings with optional escape characters instead of just accepting "bare strings" like the above: