diff --git a/argus/src/parser/lexer.rs b/argus/src/parser/lexer.rs index ddd3ae3..eae0853 100644 --- a/argus/src/parser/lexer.rs +++ b/argus/src/parser/lexer.rs @@ -85,7 +85,7 @@ impl<'src> fmt::Display for Token<'src> { pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> { // A parser for numbers - let digits = text::digits(10).to_slice(); + let digits = text::digits(10).to_slice().labelled("digits"); let frac = just('.').then(digits.or_not()); @@ -103,6 +103,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> .map(Token::Float) .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit float: {}", err))) }) + .labelled("float") .boxed(); let signed_int = one_of("+-") @@ -113,14 +114,18 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> s.parse() .map(Token::Int) .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit signed int: {}", err))) - }); - let unsigned_int = digits.to_slice().try_map_with(|s: &str, e| { - s.parse() - .map(Token::UInt) - .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err))) - }); + }) + .labelled("signed integer"); + let unsigned_int = digits + .to_slice() + .try_map_with(|s: &str, e| { + s.parse() + .map(Token::UInt) + .map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err))) + }) + .labelled("unsigned integer"); - let number = choice((floating_number, signed_int, unsigned_int)); + let number = choice((floating_number, signed_int, unsigned_int)).labelled("number"); // A parser for control characters (delimiters, semicolons, etc.) let ctrl = choice(( @@ -131,7 +136,8 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> just(")").to(Token::RParen), just(",").to(Token::Comma), just("..").to(Token::DotDot), - )); + )) + .labelled("control token"); // Lexer for operator symbols let op = choice(( @@ -160,14 +166,16 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> just("*").to(Token::Times), just("/").to(Token::Divide), just("=").to(Token::Assign), - )); + )) + .labelled("operator token"); let temporal_op = choice(( just("\u{25cb}").to(Token::Next), // ○ just("\u{25ef}").to(Token::Next), // ◯ just("\u{25c7}").to(Token::Eventually), // ◇ just("\u{25a1}").to(Token::Always), // □ - )); + )) + .labelled("temporal operator token"); // A parser for strings // Strings in our grammar are identifiers too @@ -175,28 +183,31 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> .ignore_then(none_of('"').repeated()) .then_ignore(just('"')) .to_slice() - .map(Token::Ident); + .map(Token::Ident) + .labelled("quoted identifier"); // A parser for identifiers and keywords - let ident = text::ident().map(|ident: &str| match ident { - "true" => Token::Bool(true), - "false" => Token::Bool(false), - "TRUE" => Token::Bool(true), - "FALSE" => Token::Bool(false), - "G" => Token::Always, - "alw" => Token::Always, - "always" => Token::Always, - "globally" => Token::Always, - "F" => Token::Eventually, - "ev" => Token::Eventually, - "eventually" => Token::Eventually, - "finally" => Token::Eventually, - "X" => Token::Next, - "next" => Token::Next, - "U" => Token::Until, - "until" => Token::Until, - _ => Token::Ident(ident), - }); + let ident = text::ident() + .map(|ident: &str| match ident { + "true" => Token::Bool(true), + "false" => Token::Bool(false), + "TRUE" => Token::Bool(true), + "FALSE" => Token::Bool(false), + "G" => Token::Always, + "alw" => Token::Always, + "always" => Token::Always, + "globally" => Token::Always, + "F" => Token::Eventually, + "ev" => Token::Eventually, + "eventually" => Token::Eventually, + "finally" => Token::Eventually, + "X" => Token::Next, + "next" => Token::Next, + "U" => Token::Until, + "until" => Token::Until, + _ => Token::Ident(ident), + }) + .labelled("identifier"); // A single token can be one of the above let token = choice((op, temporal_op, ctrl, quoted_ident, ident, number)).boxed(); diff --git a/argus/src/parser/mod.rs b/argus/src/parser/mod.rs index c8c76ac..17eff24 100644 --- a/argus/src/parser/mod.rs +++ b/argus/src/parser/mod.rs @@ -20,8 +20,8 @@ pub fn parse_str(src: &str) -> Result Result Result = lex_errors .into_iter() - .map(|e| e.map_token(|c| c.to_string())) + .filter_map(|e| { + // HACK: Discard empty expected lex errors + // TODO: not sure why this happens + use chumsky::error::RichReason::*; + match e.reason() { + ExpectedFound { expected, found: _ } if expected.is_empty() => return None, + _ => {} + }; + Some(e.map_token(|c| c.to_string())) + }) .chain(parse_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string()))) .chain(expr_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string()))) .map(|e| e.into_owned()) .collect(); + log::debug!("** Total Errors: {}**", errors.len()); + log::debug!("[{}]", errors.iter().map(|e| e.to_string()).join("\n")); if !errors.is_empty() { Err(errors) } else {