fix(argus): weird lex errors

For inputs like 'G [,20000000000000000000]false', the lexer returns an
error of "found ']' expected something else". The error is not any
better when adding labels to each token group, so I am just discarding
them. If this manifests as a bug down the line, then ¯\_(ツ)_/¯
This commit is contained in:
Anand Balakrishnan 2023-10-13 15:21:11 -07:00
parent c522b17c88
commit f510f0067b
2 changed files with 60 additions and 38 deletions

View file

@ -85,7 +85,7 @@ impl<'src> fmt::Display for Token<'src> {
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> { pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> {
// A parser for numbers // A parser for numbers
let digits = text::digits(10).to_slice(); let digits = text::digits(10).to_slice().labelled("digits");
let frac = just('.').then(digits.or_not()); let frac = just('.').then(digits.or_not());
@ -103,6 +103,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
.map(Token::Float) .map(Token::Float)
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit float: {}", err))) .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit float: {}", err)))
}) })
.labelled("float")
.boxed(); .boxed();
let signed_int = one_of("+-") let signed_int = one_of("+-")
@ -113,14 +114,18 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
s.parse() s.parse()
.map(Token::Int) .map(Token::Int)
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit signed int: {}", err))) .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit signed int: {}", err)))
}); })
let unsigned_int = digits.to_slice().try_map_with(|s: &str, e| { .labelled("signed integer");
s.parse() let unsigned_int = digits
.map(Token::UInt) .to_slice()
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err))) .try_map_with(|s: &str, e| {
}); s.parse()
.map(Token::UInt)
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err)))
})
.labelled("unsigned integer");
let number = choice((floating_number, signed_int, unsigned_int)); let number = choice((floating_number, signed_int, unsigned_int)).labelled("number");
// A parser for control characters (delimiters, semicolons, etc.) // A parser for control characters (delimiters, semicolons, etc.)
let ctrl = choice(( let ctrl = choice((
@ -131,7 +136,8 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
just(")").to(Token::RParen), just(")").to(Token::RParen),
just(",").to(Token::Comma), just(",").to(Token::Comma),
just("..").to(Token::DotDot), just("..").to(Token::DotDot),
)); ))
.labelled("control token");
// Lexer for operator symbols // Lexer for operator symbols
let op = choice(( let op = choice((
@ -160,14 +166,16 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
just("*").to(Token::Times), just("*").to(Token::Times),
just("/").to(Token::Divide), just("/").to(Token::Divide),
just("=").to(Token::Assign), just("=").to(Token::Assign),
)); ))
.labelled("operator token");
let temporal_op = choice(( let temporal_op = choice((
just("\u{25cb}").to(Token::Next), // ○ just("\u{25cb}").to(Token::Next), // ○
just("\u{25ef}").to(Token::Next), // ◯ just("\u{25ef}").to(Token::Next), // ◯
just("\u{25c7}").to(Token::Eventually), // ◇ just("\u{25c7}").to(Token::Eventually), // ◇
just("\u{25a1}").to(Token::Always), // □ just("\u{25a1}").to(Token::Always), // □
)); ))
.labelled("temporal operator token");
// A parser for strings // A parser for strings
// Strings in our grammar are identifiers too // Strings in our grammar are identifiers too
@ -175,28 +183,31 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
.ignore_then(none_of('"').repeated()) .ignore_then(none_of('"').repeated())
.then_ignore(just('"')) .then_ignore(just('"'))
.to_slice() .to_slice()
.map(Token::Ident); .map(Token::Ident)
.labelled("quoted identifier");
// A parser for identifiers and keywords // A parser for identifiers and keywords
let ident = text::ident().map(|ident: &str| match ident { let ident = text::ident()
"true" => Token::Bool(true), .map(|ident: &str| match ident {
"false" => Token::Bool(false), "true" => Token::Bool(true),
"TRUE" => Token::Bool(true), "false" => Token::Bool(false),
"FALSE" => Token::Bool(false), "TRUE" => Token::Bool(true),
"G" => Token::Always, "FALSE" => Token::Bool(false),
"alw" => Token::Always, "G" => Token::Always,
"always" => Token::Always, "alw" => Token::Always,
"globally" => Token::Always, "always" => Token::Always,
"F" => Token::Eventually, "globally" => Token::Always,
"ev" => Token::Eventually, "F" => Token::Eventually,
"eventually" => Token::Eventually, "ev" => Token::Eventually,
"finally" => Token::Eventually, "eventually" => Token::Eventually,
"X" => Token::Next, "finally" => Token::Eventually,
"next" => Token::Next, "X" => Token::Next,
"U" => Token::Until, "next" => Token::Next,
"until" => Token::Until, "U" => Token::Until,
_ => Token::Ident(ident), "until" => Token::Until,
}); _ => Token::Ident(ident),
})
.labelled("identifier");
// A single token can be one of the above // A single token can be one of the above
let token = choice((op, temporal_op, ctrl, quoted_ident, ident, number)).boxed(); let token = choice((op, temporal_op, ctrl, quoted_ident, ident, number)).boxed();

View file

@ -20,8 +20,8 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
let (tokens, lex_errors) = lexer().parse(src).into_output_errors(); let (tokens, lex_errors) = lexer().parse(src).into_output_errors();
log::debug!("** Tokens output **"); log::debug!("** Tokens output **");
log::debug!("{:#?}", tokens); log::debug!("{:#?}", tokens);
log::debug!("** Lexing Errors **"); log::debug!("** Lexing Errors: {} **", lex_errors.len());
log::debug!("[{}]", lex_errors.iter().map(|e| e.to_string()).join("\n- ")); log::debug!("\n{}", lex_errors.iter().map(|e| e.to_string()).join("\n"));
let (parsed, parse_errors) = if let Some(tokens) = &tokens { let (parsed, parse_errors) = if let Some(tokens) = &tokens {
parser() parser()
@ -33,8 +33,8 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
log::debug!("** Parse output **"); log::debug!("** Parse output **");
log::debug!("{:#?}", parsed); log::debug!("{:#?}", parsed);
log::debug!("** Parse Errors **"); log::debug!("** Parse Errors: {}**", parse_errors.len());
log::debug!("[{}]", parse_errors.iter().map(|e| e.to_string()).join("\n- ")); log::debug!("\n{}", parse_errors.iter().map(|e| e.to_string()).join("\n"));
let (expr, expr_errors) = if let Some((ast, span)) = parsed { let (expr, expr_errors) = if let Some((ast, span)) = parsed {
let mut expr_builder = ExprBuilder::new(); let mut expr_builder = ExprBuilder::new();
@ -49,16 +49,27 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
log::debug!("** Final Expression **"); log::debug!("** Final Expression **");
log::debug!("{:#?}", expr); log::debug!("{:#?}", expr);
log::debug!("** AST to Expr Errors **"); log::debug!("** AST to Expr Errors: {} **", expr_errors.len());
log::debug!("[{}]", expr_errors.iter().map(|e| e.to_string()).join("\n- ")); log::debug!("\n{}", expr_errors.iter().map(|e| e.to_string()).join("\n"));
let errors: Vec<_> = lex_errors let errors: Vec<_> = lex_errors
.into_iter() .into_iter()
.map(|e| e.map_token(|c| c.to_string())) .filter_map(|e| {
// HACK: Discard empty expected lex errors
// TODO: not sure why this happens
use chumsky::error::RichReason::*;
match e.reason() {
ExpectedFound { expected, found: _ } if expected.is_empty() => return None,
_ => {}
};
Some(e.map_token(|c| c.to_string()))
})
.chain(parse_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string()))) .chain(parse_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
.chain(expr_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string()))) .chain(expr_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
.map(|e| e.into_owned()) .map(|e| e.into_owned())
.collect(); .collect();
log::debug!("** Total Errors: {}**", errors.len());
log::debug!("[{}]", errors.iter().map(|e| e.to_string()).join("\n"));
if !errors.is_empty() { if !errors.is_empty() {
Err(errors) Err(errors)
} else { } else {