Boost: parsing only variables that were previously declared

I put together a parser using boost libraries from various sources on the net. It works (although not as clean as I would like), but I ran into a specific problem. In the first part of the analyzer, I first parse the name of the function, then the set of arguments enclosed in parentheses. Later, when parsing the actual expression, in the parsing, factor

I allow numbers and variables to be parsed. However, I would only like to parse the variables that were previously declared in the syntax vars

. Here is my grammar:

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all =
        name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
        >> expr [at_c<2>(_val) = _1];

      // Parsing of actual expression
      expr =
          term                            [_val = _1]
          >> *(   ('+' >> term            [_val += _1])
              |   ('-' >> term            [_val -= _1])
            );

      term =
          factor                          [_val = _1]
          >> *(   ('*' >> factor          [_val *= _1])
              |   ('/' >> factor          [_val /= _1])
            );

      factor =
          simple                          [_val = _1]
          |   '(' >> expr                 [_val = _1] >> ')'
          |   ('-' >> factor              [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor            [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor            [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor            [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor              [_val = _1]);

      // Prototyping of expression
      prtctd %= lit("sin") | lit("cos") | lit("tan");
      var    %= !prtctd >> char_('a','z');
      num    %= double_;
      simple %= var | num | ('(' >> expr >> ')');
      name   %= ((char_('a','z') | char_('A','Z') ) >> *(char_('a','z') | char_('A','Z') | char_('0','9') ));
      vars   %= (char_('a','z') >> *(',' >> char_('a','z')));
    }
    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;

    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::string(), ascii::space_type> name, prtctd;
    qi::rule<Iterator, std::vector<char>(), ascii::space_type> vars;
    qi::rule<Iterator, char(), ascii::space_type> var;
    qi::rule<Iterator, double(), ascii::space_type> num;
  };

      

And this is the structure I am using to store it:

  struct expression {
    std::string name;
    std::vector<char> arguments;
    ast syntax_tree;
  };

      

Now how can I access std::vector<char>

in the syntax factor

so that I only parse the correct variables.

Also, I am new to using boost and use it as an exercise for myself to start learning a little. If anyone has any advice please let me know how I can clean up this code.

Thanks in advance!

+3


source to share


1 answer


This is a great anti-pattern in the Spirit:

  all =
    name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
    >> expr [at_c<2>(_val) = _1];

      

In fact, I'm convinced that the samples you were looking for show the best approaches. Also, I note that you have chosen code from conflicting approaches (you cannot synthesize a syntax tree when semantic actions evaluate expression values ​​on the fly).

First of all, get rid of the semantic action thinking: Boost Spirit: "Semantic actions are evil"?

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

all = name >> '(' >> vars >> ')' >> '=' >> expr;

      

There are many other "diseases":

  • prtctd

    must be a token, therefore si\nn

    does not match
  • *(char_('a','z') | char_('A','Z') | char_('0','9') )

    just *alnum

  • name must also be a token, so just

    name   = alpha >> *alnum;
    
          

  • vars

    doesn't even use var

    ?

In general, here's a simplification of those rules (assuming you've dropped the skipper out prtctd

and out name

):

  prtctd = lit("sin") | "cos" | "tan";
  var    = !prtctd >> ascii::lower;
  num    = double_;
  simple = var | num | '(' >> expr >> ')';
  name   = ascii::alpha >> *ascii::alnum;
  vars   = var % ',';

      

Independent example

Add some snippets to the above and try something we can test:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct ast {
    template <typename T> ast& operator+=(T&&) { return *this; }
    template <typename T> ast& operator*=(T&&) { return *this; }
    template <typename T> ast& operator/=(T&&) { return *this; }
    template <typename T> ast& operator-=(T&&) { return *this; }
    ast() = default;
    template <typename T> ast(T&&) { }
    template <typename T> ast& operator =(T&&) { return *this; }

    friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};

struct expression {
    std::string name;
    std::vector<std::string> arguments;
    ast syntax_tree;

    friend std::ostream& operator<<(std::ostream& os, expression const& e) { 
        os << e.name << "(";
        for (auto arg : e.arguments) os << arg << ", ";
        return os << ") = " << e.syntax_tree;
    }
};

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

enum UN_OP { MIN, SIN, COS, TAN };

struct make_unary_f {
    template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all = name >> '(' >> vars >> ')' >> '=' >> expr;

      // Parsing of actual expression
      expr =
          term                   [_val = _1]
          >> *(   ('+' >> term   [_val += _1])
              |   ('-' >> term   [_val -= _1])
            );

      term =
          factor                 [_val = _1]
          >> *(   ('*' >> factor [_val *= _1])
              |   ('/' >> factor [_val /= _1])
            );

      factor =
          simple                 [_val = _1]
          |   '(' >> expr        [_val = _1] >> ')'
          |   ('-' >> factor     [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor   [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor   [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor   [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor     [_val = _1]);

      // Prototyping of expression
      prtctd = lit("sin") | "cos" | "tan";
      var    = !prtctd >> ascii::lower;
      num    = double_;
      simple = var | num | '(' >> expr >> ')';
      name   = ascii::alpha >> *ascii::alnum;
      vars   = var % ',';
    }

  private:
    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;

    // lexemes
    qi::rule<Iterator, std::string()> name, prtctd;
    qi::rule<Iterator, std::string()> var;
    qi::rule<Iterator, double()> num;
  };

int main() {
    for (std::string const& input : {
            "",
            "foo (a) = 3*8+a",
            "bar (x, y) = (sin(x) + y*y) / (x + y)",
            "oops (x, y) = (sin(x) + y*y) / (x + a)",
        })
    try {
        using It = std::string::const_iterator;
        It f = input.begin(), l = input.end();

        expression e;
        bool ok = qi::phrase_parse(f, l, exp_parser<It>{} >> qi::eoi, ascii::space, e);

        if (ok) {
            std::cout << "Parse success: '" << input << "' -> " << e << "\n";
        } else {
            std::cout << "Parse failed: '" << input << "'\n";
        }

        if (f != l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    } catch(std::exception const& e) {
        std::cout << "Exception: '" << e.what() << "'\n";
    }
}

      

As expected, it still parses all non-blank lines, including oops

which it mistakenly uses a

instead of y

:

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree

      



Announcement and verification

To match the declared variables, I would useqi::symbols<>

:

qi::symbols<char> _declared;

simple = _declared | num | '(' >> expr >> ')';

      

Now, to add the declared elements, we will develop the Phoenix function,

struct add_declaration_f {
    add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
    qi::symbols<char>* _p;
    void operator()(std::string const& arg) const { _p->add(arg); }
};

phoenix::function<add_declaration_f> _declare { _declared };

      

And use it:

  vars  %= var [ _declare(_1) ] % ',';

      

Demo integration

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct ast {
    template <typename T> ast& operator+=(T&&) { return *this; }
    template <typename T> ast& operator*=(T&&) { return *this; }
    template <typename T> ast& operator/=(T&&) { return *this; }
    template <typename T> ast& operator-=(T&&) { return *this; }
    ast() = default;
    template <typename T> ast(T&&) { }
    template <typename T> ast& operator =(T&&) { return *this; }

    friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};

struct expression {
    std::string name;
    std::vector<std::string> arguments;
    ast syntax_tree;

    friend std::ostream& operator<<(std::ostream& os, expression const& e) { 
        os << e.name << "(";
        for (auto arg : e.arguments) os << arg << ", ";
        return os << ") = " << e.syntax_tree;
    }
};

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

enum UN_OP { MIN, SIN, COS, TAN };

struct make_unary_f {
    template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all = name >> '(' >> vars >> ')' >> '=' >> expr;

      // Parsing of actual expression
      expr =
          term                   [_val = _1]
          >> *(   ('+' >> term   [_val += _1])
              |   ('-' >> term   [_val -= _1])
            );

      term =
          factor                 [_val = _1]
          >> *(   ('*' >> factor [_val *= _1])
              |   ('/' >> factor [_val /= _1])
            );

      factor =
          simple                 [_val = _1]
          |   '(' >> expr        [_val = _1] >> ')'
          |   ('-' >> factor     [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor   [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor   [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor   [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor     [_val = _1]);

      // Prototyping of expression
      prtctd = lit("sin") | "cos" | "tan";
      var    = !prtctd >> ascii::lower;
      num    = double_;
      simple = _declared | num | '(' >> expr >> ')';
      name   = ascii::alpha >> *ascii::alnum;
      vars  %= var [ _declare(_1) ] % ',';
    }

  private:
    qi::symbols<char> _declared;

    struct add_declaration_f {
        add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
        qi::symbols<char>* _p;
        void operator()(std::string const& arg) const { _p->add(arg); }
    };

    phoenix::function<add_declaration_f> _declare { _declared };

    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;

    // lexemes
    qi::rule<Iterator, std::string()> name, prtctd;
    qi::rule<Iterator, std::string()> var;
    qi::rule<Iterator, double()> num;
  };

int main() {
    for (std::string const& input : {
            "",
            "foo (a) = 3*8+a",
            "bar (x, y) = (sin(x) + y*y) / (x + y)",
            "oops (x, y) = (sin(x) + y*y) / (x + a)",
        })
    try {
        using It = std::string::const_iterator;
        It f = input.begin(), l = input.end();

        expression e;
        bool ok = qi::phrase_parse(f, l, exp_parser<It>{}, ascii::space, e);

        if (ok) {
            std::cout << "Parse success: '" << input << "' -> " << e << "\n";
        } else {
            std::cout << "Parse failed: '" << input << "'\n";
        }

        if (f != l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    } catch(std::exception const& e) {
        std::cout << "Exception: '" << e.what() << "'\n";
    }
}

      

What prints:

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree
Remaining unparsed: '/ (x + a)'

      

Adding a >> qi::eoi

parser to the expression, we get: Live On Coliru

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse failed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'
Remaining unparsed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'

      

+2


source







All Articles