-
|
Thank you for all the great work on chumsky! So far it has been serving me quite well :D I am running into a stack overflow when the depth of an expression gets big enough. Or equivalently when the depth of my parser is too big. I am trying to parse expression with C-style operator precedence. Starting from the calculator example provided in the chumsky repo I expanded it to include all the other types of operators that the language I am trying to parse supports. I've chopped out a few parts that were very repetitive: pub fn parser<'tokens, 'src: 'tokens, I>() -> impl Parser<'tokens, I, SpannedExpr<'src>, extra::Err<Rich<'tokens, Token<'src>, FSpan<'src>>>>
where
I: ValueInput<'tokens, Token = Token<'src>, Span = FSpan<'src>>,
{
let ident_or_promote = select! {
Token::Ident(s) => s
}
.or(any().try_map(|token: Token<'src>, span| {
if let Some(keyword) = token.keyword() {
return Ok(keyword);
} else {
return Err(Rich::custom(span, "Expected identifier"));
}
}));
recursive(|p| {
let atom = {
let var_access = ident_or_promote
.map_with(|s, e| (Expr::VarAccess(s), e.span()));
// Function call parsing for mathematical functions
let function = select! {
Token::Sin => "sin",
// omitted for brevity ...
Token::Ident(s) => s
}
.then(
p.clone()
.separated_by(just(Token::Comma))
.collect::<Vec<_>>()
.delimited_by(just(Token::LParen), just(Token::RParen)),
)
.map_with(|(name, args), e| (
Expr::Call(name.to_string(), args), e.span()
));
let parenthesized = p
.clone()
.delimited_by(just(Token::LParen), just(Token::RParen));
let integer = select! {
Token::Int(n) => Expr::Int(n.parse::<i64>().unwrap()),
}.map_with(|t, e| (t, e.span()));
let float = select! {
Token::Float(n) => Expr::Float(n.parse::<f64>().unwrap()),
// omitted for brevity
}.map_with(|t, e| (t, e.span()));
let string = select! {
Token::String(s) => Expr::String(s),
}.map_with(|t, e| (t, e.span()));
let array = just(Token::LSquare).ignore_then(
p.clone().repeated().collect::<Vec<_>>()
).then_ignore(just(Token::RSquare))
.map_with(|v, e| (Expr::Array(v), e.span()));
choice((parenthesized, array, string, float, integer, function, var_access))
};
let unary = just(Token::Plus).or(just(Token::Minus))
.repeated()
.foldr_with(atom, |op, rhs, e| {
match op {
Token::Plus => rhs,
Token::Minus => (Expr::Neg(Box::new(rhs).into()), e.span()),
_ => unreachable!()
}
})
.boxed();
let binary_0 = unary
.clone()
.foldl_with(
just(Token::Pow).then(unary).repeated(),
|lhs, (_op, rhs), e| (Expr::Power(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
)
.boxed();
// multiplicative
let binary_1 = binary_0
.clone()
.foldl_with(
choice((just(Token::Mul), just(Token::Div)))
.then(binary_0)
.repeated(),
|lhs, (op, rhs), e| match op {
// omitted for brevity
},
)
.boxed();
// additive
let binary_2 = ...boxed() // as binary_1 but for +/-
// shift operators
let binary_3 = ...boxed() // as binary_1 but for <</>>
let compare = ...boxed() // as binary_1 but for </<=/>=/>
let equal = compare.clone().foldl_with(
just(Token::IsEqual)
.or(just(Token::NotEqual))
.then(compare)
.repeated(),
|lhs, (op, rhs), e| match op {
Token::IsEqual => (Expr::Eq(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
Token::NotEqual => (Expr::Neq(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
_ => unreachable!(),
},
);
let bitand = equal.clone().foldl_with(
just(Token::BitAnd).then(equal).repeated(),
|lhs, (_op, rhs), e| (Expr::BitAnd(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
);
let bitxnor = bitand.clone().foldl_with(
just(Token::Xnor).then(bitand).repeated(),
|lhs, (_op, rhs), e| (Expr::BitXnor(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
);
let bitor = bitxnor.clone().foldl_with(
just(Token::BitOr).then(bitxnor).repeated(),
|lhs, (_op, rhs), e| (Expr::BitOr(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
);
let logical_and = bitor.clone().foldl_with(
just(Token::LogicalAnd).then(bitor).repeated(),
|lhs, (_op, rhs), e| (Expr::LogicalAnd(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
);
let logical_or = logical_and.clone().foldl_with(
just(Token::LogicalOr).then(logical_and).repeated(),
|lhs, (_op, rhs), e| (Expr::LogicalOr(Box::new(lhs).into(), Box::new(rhs).into()), e.span()),
);
let ternary = logical_or
.clone()
.then(
just(Token::QMark)
.ignore_then(p.clone())
.then_ignore(just(Token::Colon))
.then(p.clone()),
)
.map_with(|(cond, (then_expr, else_expr)), e| {
(Expr::Ternary(Box::new(cond).into(), Box::new(then_expr).into(), Box::new(else_expr).into()), e.span())
});
logical_or.or(ternary)
})Skipping the ternary and logical or parsers allows the expression parser to parse 15x nested brackets: (((((((((((((((1))))))))))))))). While that is probably sufficient, with all parsers enabled I can only parse 3x nesting: (((1))): #[test]
fn parse_deep_expr() {
let tokens = as_tokens(vec![
Token::LParen,
Token::LParen,
Token::LParen,
Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
// Token::LParen,
Token::Int("1"),
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
// Token::RParen,
Token::RParen,
Token::RParen,
Token::RParen,
Token::RParen,
]);
let ctx = PathBuf::from("<test>");
let expr = parse_expr(&ctx, &tokens);
let expected = Expr::Int(1);
assert_eq!(format!("{:?}", expr.0), format!("{:?}", expected));
}My background is not in parsing so I am far from an expert on the subject, I imagine there is probably an obvious solution to this I am missing. I don't believe this is left recursion as the parser works fine for less nesting, but I could well be wrong. |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
|
Nesting - unless you use special-case tricks that don't generalise - requires recursion of some kind (be it stack-based or via heap allocation). So if you nest enough levels, you'll hit some similar failure mode with any parser. However, the depth that this overflow occurred at makes me think that you don't have the |
Beta Was this translation helpful? Give feedback.
Nesting - unless you use special-case tricks that don't generalise - requires recursion of some kind (be it stack-based or via heap allocation). So if you nest enough levels, you'll hit some similar failure mode with any parser.
However, the depth that this overflow occurred at makes me think that you don't have the
stackerfeature enabled, or you're on a platform where it's not supported. Is this the case?