Boost: parsing only variables that were previously declared
I put together a parser using boost libraries from various sources on the net. It works (although not as clean as I would like), but I ran into a specific problem. In the first part of the analyzer, I first parse the name of the function, then the set of arguments enclosed in parentheses. Later, when parsing the actual expression, in the parsing, factor
I allow numbers and variables to be parsed. However, I would only like to parse the variables that were previously declared in the syntax vars
. Here is my grammar:
template<typename Iterator>
struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
{
exp_parser() : exp_parser::base_type(all)
{
using qi::_val;
using qi::_1;
using qi::char_;
using qi::double_;
using qi::lit;
using phoenix::at_c;
using phoenix::push_back;
using phoenix::bind;
all =
name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
>> expr [at_c<2>(_val) = _1];
// Parsing of actual expression
expr =
term [_val = _1]
>> *( ('+' >> term [_val += _1])
| ('-' >> term [_val -= _1])
);
term =
factor [_val = _1]
>> *( ('*' >> factor [_val *= _1])
| ('/' >> factor [_val /= _1])
);
factor =
simple [_val = _1]
| '(' >> expr [_val = _1] >> ')'
| ('-' >> factor [_val = bind(make_unary, UN_OP::MIN, _1)])
| ("sin" >> factor [_val = bind(make_unary, UN_OP::SIN, _1)])
| ("cos" >> factor [_val = bind(make_unary, UN_OP::COS, _1)])
| ("tan" >> factor [_val = bind(make_unary, UN_OP::TAN, _1)])
| ('+' >> factor [_val = _1]);
// Prototyping of expression
prtctd %= lit("sin") | lit("cos") | lit("tan");
var %= !prtctd >> char_('a','z');
num %= double_;
simple %= var | num | ('(' >> expr >> ')');
name %= ((char_('a','z') | char_('A','Z') ) >> *(char_('a','z') | char_('A','Z') | char_('0','9') ));
vars %= (char_('a','z') >> *(',' >> char_('a','z')));
}
qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
qi::rule<Iterator, expression(), ascii::space_type> all;
qi::rule<Iterator, std::string(), ascii::space_type> name, prtctd;
qi::rule<Iterator, std::vector<char>(), ascii::space_type> vars;
qi::rule<Iterator, char(), ascii::space_type> var;
qi::rule<Iterator, double(), ascii::space_type> num;
};
And this is the structure I am using to store it:
struct expression {
std::string name;
std::vector<char> arguments;
ast syntax_tree;
};
Now how can I access std::vector<char>
in the syntax factor
so that I only parse the correct variables.
Also, I am new to using boost and use it as an exercise for myself to start learning a little. If anyone has any advice please let me know how I can clean up this code.
Thanks in advance!
source to share
This is a great anti-pattern in the Spirit:
all =
name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
>> expr [at_c<2>(_val) = _1];
In fact, I'm convinced that the samples you were looking for show the best approaches. Also, I note that you have chosen code from conflicting approaches (you cannot synthesize a syntax tree when semantic actions evaluate expression values ββon the fly).
First of all, get rid of the semantic action thinking: Boost Spirit: "Semantic actions are evil"?
BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)
all = name >> '(' >> vars >> ')' >> '=' >> expr;
There are many other "diseases":
-
prtctd
must be a token, thereforesi\nn
does not match -
*(char_('a','z') | char_('A','Z') | char_('0','9') )
just*alnum
-
name must also be a token, so just
name = alpha >> *alnum;
-
vars
doesn't even usevar
?
In general, here's a simplification of those rules (assuming you've dropped the skipper out prtctd
and out name
):
prtctd = lit("sin") | "cos" | "tan";
var = !prtctd >> ascii::lower;
num = double_;
simple = var | num | '(' >> expr >> ')';
name = ascii::alpha >> *ascii::alnum;
vars = var % ',';
Independent example
Add some snippets to the above and try something we can test:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
struct ast {
template <typename T> ast& operator+=(T&&) { return *this; }
template <typename T> ast& operator*=(T&&) { return *this; }
template <typename T> ast& operator/=(T&&) { return *this; }
template <typename T> ast& operator-=(T&&) { return *this; }
ast() = default;
template <typename T> ast(T&&) { }
template <typename T> ast& operator =(T&&) { return *this; }
friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};
struct expression {
std::string name;
std::vector<std::string> arguments;
ast syntax_tree;
friend std::ostream& operator<<(std::ostream& os, expression const& e) {
os << e.name << "(";
for (auto arg : e.arguments) os << arg << ", ";
return os << ") = " << e.syntax_tree;
}
};
BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)
enum UN_OP { MIN, SIN, COS, TAN };
struct make_unary_f {
template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};
template<typename Iterator>
struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
{
exp_parser() : exp_parser::base_type(all)
{
using qi::_val;
using qi::_1;
using qi::char_;
using qi::double_;
using qi::lit;
using phoenix::at_c;
using phoenix::push_back;
using phoenix::bind;
all = name >> '(' >> vars >> ')' >> '=' >> expr;
// Parsing of actual expression
expr =
term [_val = _1]
>> *( ('+' >> term [_val += _1])
| ('-' >> term [_val -= _1])
);
term =
factor [_val = _1]
>> *( ('*' >> factor [_val *= _1])
| ('/' >> factor [_val /= _1])
);
factor =
simple [_val = _1]
| '(' >> expr [_val = _1] >> ')'
| ('-' >> factor [_val = bind(make_unary, UN_OP::MIN, _1)])
| ("sin" >> factor [_val = bind(make_unary, UN_OP::SIN, _1)])
| ("cos" >> factor [_val = bind(make_unary, UN_OP::COS, _1)])
| ("tan" >> factor [_val = bind(make_unary, UN_OP::TAN, _1)])
| ('+' >> factor [_val = _1]);
// Prototyping of expression
prtctd = lit("sin") | "cos" | "tan";
var = !prtctd >> ascii::lower;
num = double_;
simple = var | num | '(' >> expr >> ')';
name = ascii::alpha >> *ascii::alnum;
vars = var % ',';
}
private:
qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
qi::rule<Iterator, expression(), ascii::space_type> all;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;
// lexemes
qi::rule<Iterator, std::string()> name, prtctd;
qi::rule<Iterator, std::string()> var;
qi::rule<Iterator, double()> num;
};
int main() {
for (std::string const& input : {
"",
"foo (a) = 3*8+a",
"bar (x, y) = (sin(x) + y*y) / (x + y)",
"oops (x, y) = (sin(x) + y*y) / (x + a)",
})
try {
using It = std::string::const_iterator;
It f = input.begin(), l = input.end();
expression e;
bool ok = qi::phrase_parse(f, l, exp_parser<It>{} >> qi::eoi, ascii::space, e);
if (ok) {
std::cout << "Parse success: '" << input << "' -> " << e << "\n";
} else {
std::cout << "Parse failed: '" << input << "'\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
} catch(std::exception const& e) {
std::cout << "Exception: '" << e.what() << "'\n";
}
}
As expected, it still parses all non-blank lines, including oops
which it mistakenly uses a
instead of y
:
Parse failed: '' Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree
Announcement and verification
To match the declared variables, I would useqi::symbols<>
:
qi::symbols<char> _declared;
simple = _declared | num | '(' >> expr >> ')';
Now, to add the declared elements, we will develop the Phoenix function,
struct add_declaration_f {
add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
qi::symbols<char>* _p;
void operator()(std::string const& arg) const { _p->add(arg); }
};
phoenix::function<add_declaration_f> _declare { _declared };
And use it:
vars %= var [ _declare(_1) ] % ',';
Demo integration
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
struct ast {
template <typename T> ast& operator+=(T&&) { return *this; }
template <typename T> ast& operator*=(T&&) { return *this; }
template <typename T> ast& operator/=(T&&) { return *this; }
template <typename T> ast& operator-=(T&&) { return *this; }
ast() = default;
template <typename T> ast(T&&) { }
template <typename T> ast& operator =(T&&) { return *this; }
friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};
struct expression {
std::string name;
std::vector<std::string> arguments;
ast syntax_tree;
friend std::ostream& operator<<(std::ostream& os, expression const& e) {
os << e.name << "(";
for (auto arg : e.arguments) os << arg << ", ";
return os << ") = " << e.syntax_tree;
}
};
BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)
enum UN_OP { MIN, SIN, COS, TAN };
struct make_unary_f {
template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};
template<typename Iterator>
struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
{
exp_parser() : exp_parser::base_type(all)
{
using qi::_val;
using qi::_1;
using qi::char_;
using qi::double_;
using qi::lit;
using phoenix::at_c;
using phoenix::push_back;
using phoenix::bind;
all = name >> '(' >> vars >> ')' >> '=' >> expr;
// Parsing of actual expression
expr =
term [_val = _1]
>> *( ('+' >> term [_val += _1])
| ('-' >> term [_val -= _1])
);
term =
factor [_val = _1]
>> *( ('*' >> factor [_val *= _1])
| ('/' >> factor [_val /= _1])
);
factor =
simple [_val = _1]
| '(' >> expr [_val = _1] >> ')'
| ('-' >> factor [_val = bind(make_unary, UN_OP::MIN, _1)])
| ("sin" >> factor [_val = bind(make_unary, UN_OP::SIN, _1)])
| ("cos" >> factor [_val = bind(make_unary, UN_OP::COS, _1)])
| ("tan" >> factor [_val = bind(make_unary, UN_OP::TAN, _1)])
| ('+' >> factor [_val = _1]);
// Prototyping of expression
prtctd = lit("sin") | "cos" | "tan";
var = !prtctd >> ascii::lower;
num = double_;
simple = _declared | num | '(' >> expr >> ')';
name = ascii::alpha >> *ascii::alnum;
vars %= var [ _declare(_1) ] % ',';
}
private:
qi::symbols<char> _declared;
struct add_declaration_f {
add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
qi::symbols<char>* _p;
void operator()(std::string const& arg) const { _p->add(arg); }
};
phoenix::function<add_declaration_f> _declare { _declared };
qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
qi::rule<Iterator, expression(), ascii::space_type> all;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;
// lexemes
qi::rule<Iterator, std::string()> name, prtctd;
qi::rule<Iterator, std::string()> var;
qi::rule<Iterator, double()> num;
};
int main() {
for (std::string const& input : {
"",
"foo (a) = 3*8+a",
"bar (x, y) = (sin(x) + y*y) / (x + y)",
"oops (x, y) = (sin(x) + y*y) / (x + a)",
})
try {
using It = std::string::const_iterator;
It f = input.begin(), l = input.end();
expression e;
bool ok = qi::phrase_parse(f, l, exp_parser<It>{}, ascii::space, e);
if (ok) {
std::cout << "Parse success: '" << input << "' -> " << e << "\n";
} else {
std::cout << "Parse failed: '" << input << "'\n";
}
if (f != l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
} catch(std::exception const& e) {
std::cout << "Exception: '" << e.what() << "'\n";
}
}
What prints:
Parse failed: '' Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree Remaining unparsed: '/ (x + a)'
Adding a >> qi::eoi
parser to the expression, we get: Live On Coliru
Parse failed: '' Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree Parse failed: 'oops (x, y) = (sin(x) + y*y) / (x + a)' Remaining unparsed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'
source to share