fix(argus): weird lex errors

For inputs like 'G [,20000000000000000000]false', the lexer returns an error of "found ']' expected something else". The error is not any better when adding labels to each token group, so I am just discarding them. If this manifests as a bug down the line, then ¯\_(ツ)_/¯
2023-10-13 15:21:11 -07:00 · 2023-10-13 15:21:11 -07:00 · f510f0067b
commit f510f0067b
parent c522b17c88
2 changed files with 60 additions and 38 deletions
--- a/argus/src/parser/lexer.rs
+++ b/argus/src/parser/lexer.rs
@ -85,7 +85,7 @@ impl<'src> fmt::Display for Token<'src> {
 pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> {
    // A parser for numbers
-    let digits = text::digits(10).to_slice();
+    let digits = text::digits(10).to_slice().labelled("digits");
    let frac = just('.').then(digits.or_not());
@ -103,6 +103,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
                .map(Token::Float)
                .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit float: {}", err)))
        })
        .labelled("float")
        .boxed();
    let signed_int = one_of("+-")
@ -113,14 +114,18 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
            s.parse()
                .map(Token::Int)
                .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit signed int: {}", err)))
-        });
+        })
-    let unsigned_int = digits.to_slice().try_map_with(|s: &str, e| {
+        .labelled("signed integer");
-        s.parse()
+    let unsigned_int = digits
-            .map(Token::UInt)
+        .to_slice()
-            .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err)))
+        .try_map_with(|s: &str, e| {
-    });
+            s.parse()
                .map(Token::UInt)
                .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err)))
        })
        .labelled("unsigned integer");
-    let number = choice((floating_number, signed_int, unsigned_int));
+    let number = choice((floating_number, signed_int, unsigned_int)).labelled("number");
    // A parser for control characters (delimiters, semicolons, etc.)
    let ctrl = choice((
@ -131,7 +136,8 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
        just(")").to(Token::RParen),
        just(",").to(Token::Comma),
        just("..").to(Token::DotDot),
-    ));
+    ))
    .labelled("control token");
    // Lexer for operator symbols
    let op = choice((
@ -160,14 +166,16 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
        just("*").to(Token::Times),
        just("/").to(Token::Divide),
        just("=").to(Token::Assign),
-    ));
+    ))
    .labelled("operator token");
    let temporal_op = choice((
        just("\u{25cb}").to(Token::Next),       // ○
        just("\u{25ef}").to(Token::Next),       // ◯
        just("\u{25c7}").to(Token::Eventually), // ◇
        just("\u{25a1}").to(Token::Always),     // □
-    ));
+    ))
    .labelled("temporal operator token");
    // A parser for strings
    // Strings in our grammar are identifiers too
@ -175,28 +183,31 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
        .ignore_then(none_of('"').repeated())
        .then_ignore(just('"'))
        .to_slice()
-        .map(Token::Ident);
+        .map(Token::Ident)
        .labelled("quoted identifier");
    // A parser for identifiers and keywords
-    let ident = text::ident().map(|ident: &str| match ident {
+    let ident = text::ident()
-        "true" => Token::Bool(true),
+        .map(|ident: &str| match ident {
-        "false" => Token::Bool(false),
+            "true" => Token::Bool(true),
-        "TRUE" => Token::Bool(true),
+            "false" => Token::Bool(false),
-        "FALSE" => Token::Bool(false),
+            "TRUE" => Token::Bool(true),
-        "G" => Token::Always,
+            "FALSE" => Token::Bool(false),
-        "alw" => Token::Always,
+            "G" => Token::Always,
-        "always" => Token::Always,
+            "alw" => Token::Always,
-        "globally" => Token::Always,
+            "always" => Token::Always,
-        "F" => Token::Eventually,
+            "globally" => Token::Always,
-        "ev" => Token::Eventually,
+            "F" => Token::Eventually,
-        "eventually" => Token::Eventually,
+            "ev" => Token::Eventually,
-        "finally" => Token::Eventually,
+            "eventually" => Token::Eventually,
-        "X" => Token::Next,
+            "finally" => Token::Eventually,
-        "next" => Token::Next,
+            "X" => Token::Next,
-        "U" => Token::Until,
+            "next" => Token::Next,
-        "until" => Token::Until,
+            "U" => Token::Until,
-        _ => Token::Ident(ident),
+            "until" => Token::Until,
-    });
+            _ => Token::Ident(ident),
        })
        .labelled("identifier");
    // A single token can be one of the above
    let token = choice((op, temporal_op, ctrl, quoted_ident, ident, number)).boxed();
--- a/argus/src/parser/mod.rs
+++ b/argus/src/parser/mod.rs
@ -20,8 +20,8 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
    let (tokens, lex_errors) = lexer().parse(src).into_output_errors();
    log::debug!("** Tokens output **");
    log::debug!("{:#?}", tokens);
-    log::debug!("** Lexing Errors **");
+    log::debug!("** Lexing Errors: {} **", lex_errors.len());
-    log::debug!("[{}]", lex_errors.iter().map(|e| e.to_string()).join("\n- "));
+    log::debug!("\n{}", lex_errors.iter().map(|e| e.to_string()).join("\n"));
    let (parsed, parse_errors) = if let Some(tokens) = &tokens {
        parser()
@ -33,8 +33,8 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
    log::debug!("** Parse output **");
    log::debug!("{:#?}", parsed);
-    log::debug!("** Parse Errors **");
+    log::debug!("** Parse Errors: {}**", parse_errors.len());
-    log::debug!("[{}]", parse_errors.iter().map(|e| e.to_string()).join("\n- "));
+    log::debug!("\n{}", parse_errors.iter().map(|e| e.to_string()).join("\n"));
    let (expr, expr_errors) = if let Some((ast, span)) = parsed {
        let mut expr_builder = ExprBuilder::new();
@ -49,16 +49,27 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
    log::debug!("** Final Expression **");
    log::debug!("{:#?}", expr);
-    log::debug!("** AST to Expr Errors **");
+    log::debug!("** AST to Expr Errors: {} **", expr_errors.len());
-    log::debug!("[{}]", expr_errors.iter().map(|e| e.to_string()).join("\n- "));
+    log::debug!("\n{}", expr_errors.iter().map(|e| e.to_string()).join("\n"));
    let errors: Vec<_> = lex_errors
        .into_iter()
-        .map(|e| e.map_token(|c| c.to_string()))
+        .filter_map(|e| {
            // HACK: Discard empty expected lex errors
            // TODO: not sure why this happens
            use chumsky::error::RichReason::*;
            match e.reason() {
                ExpectedFound { expected, found: _ } if expected.is_empty() => return None,
                _ => {}
            };
            Some(e.map_token(|c| c.to_string()))
        })
        .chain(parse_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
        .chain(expr_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
        .map(|e| e.into_owned())
        .collect();
    log::debug!("** Total Errors: {}**", errors.len());
    log::debug!("[{}]", errors.iter().map(|e| e.to_string()).join("\n"));
    if !errors.is_empty() {
        Err(errors)
    } else {