fix(argus): weird lex errors
For inputs like 'G [,20000000000000000000]false', the lexer returns an error of "found ']' expected something else". The error is not any better when adding labels to each token group, so I am just discarding them. If this manifests as a bug down the line, then ¯\_(ツ)_/¯
This commit is contained in:
parent
c522b17c88
commit
f510f0067b
2 changed files with 60 additions and 38 deletions
|
|
@ -85,7 +85,7 @@ impl<'src> fmt::Display for Token<'src> {
|
||||||
|
|
||||||
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> {
|
pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>> {
|
||||||
// A parser for numbers
|
// A parser for numbers
|
||||||
let digits = text::digits(10).to_slice();
|
let digits = text::digits(10).to_slice().labelled("digits");
|
||||||
|
|
||||||
let frac = just('.').then(digits.or_not());
|
let frac = just('.').then(digits.or_not());
|
||||||
|
|
||||||
|
|
@ -103,6 +103,7 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
|
||||||
.map(Token::Float)
|
.map(Token::Float)
|
||||||
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit float: {}", err)))
|
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit float: {}", err)))
|
||||||
})
|
})
|
||||||
|
.labelled("float")
|
||||||
.boxed();
|
.boxed();
|
||||||
|
|
||||||
let signed_int = one_of("+-")
|
let signed_int = one_of("+-")
|
||||||
|
|
@ -113,14 +114,18 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
|
||||||
s.parse()
|
s.parse()
|
||||||
.map(Token::Int)
|
.map(Token::Int)
|
||||||
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit signed int: {}", err)))
|
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit signed int: {}", err)))
|
||||||
});
|
})
|
||||||
let unsigned_int = digits.to_slice().try_map_with(|s: &str, e| {
|
.labelled("signed integer");
|
||||||
s.parse()
|
let unsigned_int = digits
|
||||||
.map(Token::UInt)
|
.to_slice()
|
||||||
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err)))
|
.try_map_with(|s: &str, e| {
|
||||||
});
|
s.parse()
|
||||||
|
.map(Token::UInt)
|
||||||
|
.map_err(|err| Rich::custom(e.span(), format!("Unable to parse as 64-bit unsigned int: {}", err)))
|
||||||
|
})
|
||||||
|
.labelled("unsigned integer");
|
||||||
|
|
||||||
let number = choice((floating_number, signed_int, unsigned_int));
|
let number = choice((floating_number, signed_int, unsigned_int)).labelled("number");
|
||||||
|
|
||||||
// A parser for control characters (delimiters, semicolons, etc.)
|
// A parser for control characters (delimiters, semicolons, etc.)
|
||||||
let ctrl = choice((
|
let ctrl = choice((
|
||||||
|
|
@ -131,7 +136,8 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
|
||||||
just(")").to(Token::RParen),
|
just(")").to(Token::RParen),
|
||||||
just(",").to(Token::Comma),
|
just(",").to(Token::Comma),
|
||||||
just("..").to(Token::DotDot),
|
just("..").to(Token::DotDot),
|
||||||
));
|
))
|
||||||
|
.labelled("control token");
|
||||||
|
|
||||||
// Lexer for operator symbols
|
// Lexer for operator symbols
|
||||||
let op = choice((
|
let op = choice((
|
||||||
|
|
@ -160,14 +166,16 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
|
||||||
just("*").to(Token::Times),
|
just("*").to(Token::Times),
|
||||||
just("/").to(Token::Divide),
|
just("/").to(Token::Divide),
|
||||||
just("=").to(Token::Assign),
|
just("=").to(Token::Assign),
|
||||||
));
|
))
|
||||||
|
.labelled("operator token");
|
||||||
|
|
||||||
let temporal_op = choice((
|
let temporal_op = choice((
|
||||||
just("\u{25cb}").to(Token::Next), // ○
|
just("\u{25cb}").to(Token::Next), // ○
|
||||||
just("\u{25ef}").to(Token::Next), // ◯
|
just("\u{25ef}").to(Token::Next), // ◯
|
||||||
just("\u{25c7}").to(Token::Eventually), // ◇
|
just("\u{25c7}").to(Token::Eventually), // ◇
|
||||||
just("\u{25a1}").to(Token::Always), // □
|
just("\u{25a1}").to(Token::Always), // □
|
||||||
));
|
))
|
||||||
|
.labelled("temporal operator token");
|
||||||
|
|
||||||
// A parser for strings
|
// A parser for strings
|
||||||
// Strings in our grammar are identifiers too
|
// Strings in our grammar are identifiers too
|
||||||
|
|
@ -175,28 +183,31 @@ pub fn lexer<'src>() -> impl Parser<'src, &'src str, Output<'src>, Error<'src>>
|
||||||
.ignore_then(none_of('"').repeated())
|
.ignore_then(none_of('"').repeated())
|
||||||
.then_ignore(just('"'))
|
.then_ignore(just('"'))
|
||||||
.to_slice()
|
.to_slice()
|
||||||
.map(Token::Ident);
|
.map(Token::Ident)
|
||||||
|
.labelled("quoted identifier");
|
||||||
|
|
||||||
// A parser for identifiers and keywords
|
// A parser for identifiers and keywords
|
||||||
let ident = text::ident().map(|ident: &str| match ident {
|
let ident = text::ident()
|
||||||
"true" => Token::Bool(true),
|
.map(|ident: &str| match ident {
|
||||||
"false" => Token::Bool(false),
|
"true" => Token::Bool(true),
|
||||||
"TRUE" => Token::Bool(true),
|
"false" => Token::Bool(false),
|
||||||
"FALSE" => Token::Bool(false),
|
"TRUE" => Token::Bool(true),
|
||||||
"G" => Token::Always,
|
"FALSE" => Token::Bool(false),
|
||||||
"alw" => Token::Always,
|
"G" => Token::Always,
|
||||||
"always" => Token::Always,
|
"alw" => Token::Always,
|
||||||
"globally" => Token::Always,
|
"always" => Token::Always,
|
||||||
"F" => Token::Eventually,
|
"globally" => Token::Always,
|
||||||
"ev" => Token::Eventually,
|
"F" => Token::Eventually,
|
||||||
"eventually" => Token::Eventually,
|
"ev" => Token::Eventually,
|
||||||
"finally" => Token::Eventually,
|
"eventually" => Token::Eventually,
|
||||||
"X" => Token::Next,
|
"finally" => Token::Eventually,
|
||||||
"next" => Token::Next,
|
"X" => Token::Next,
|
||||||
"U" => Token::Until,
|
"next" => Token::Next,
|
||||||
"until" => Token::Until,
|
"U" => Token::Until,
|
||||||
_ => Token::Ident(ident),
|
"until" => Token::Until,
|
||||||
});
|
_ => Token::Ident(ident),
|
||||||
|
})
|
||||||
|
.labelled("identifier");
|
||||||
|
|
||||||
// A single token can be one of the above
|
// A single token can be one of the above
|
||||||
let token = choice((op, temporal_op, ctrl, quoted_ident, ident, number)).boxed();
|
let token = choice((op, temporal_op, ctrl, quoted_ident, ident, number)).boxed();
|
||||||
|
|
|
||||||
|
|
@ -20,8 +20,8 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
|
||||||
let (tokens, lex_errors) = lexer().parse(src).into_output_errors();
|
let (tokens, lex_errors) = lexer().parse(src).into_output_errors();
|
||||||
log::debug!("** Tokens output **");
|
log::debug!("** Tokens output **");
|
||||||
log::debug!("{:#?}", tokens);
|
log::debug!("{:#?}", tokens);
|
||||||
log::debug!("** Lexing Errors **");
|
log::debug!("** Lexing Errors: {} **", lex_errors.len());
|
||||||
log::debug!("[{}]", lex_errors.iter().map(|e| e.to_string()).join("\n- "));
|
log::debug!("\n{}", lex_errors.iter().map(|e| e.to_string()).join("\n"));
|
||||||
|
|
||||||
let (parsed, parse_errors) = if let Some(tokens) = &tokens {
|
let (parsed, parse_errors) = if let Some(tokens) = &tokens {
|
||||||
parser()
|
parser()
|
||||||
|
|
@ -33,8 +33,8 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
|
||||||
|
|
||||||
log::debug!("** Parse output **");
|
log::debug!("** Parse output **");
|
||||||
log::debug!("{:#?}", parsed);
|
log::debug!("{:#?}", parsed);
|
||||||
log::debug!("** Parse Errors **");
|
log::debug!("** Parse Errors: {}**", parse_errors.len());
|
||||||
log::debug!("[{}]", parse_errors.iter().map(|e| e.to_string()).join("\n- "));
|
log::debug!("\n{}", parse_errors.iter().map(|e| e.to_string()).join("\n"));
|
||||||
|
|
||||||
let (expr, expr_errors) = if let Some((ast, span)) = parsed {
|
let (expr, expr_errors) = if let Some((ast, span)) = parsed {
|
||||||
let mut expr_builder = ExprBuilder::new();
|
let mut expr_builder = ExprBuilder::new();
|
||||||
|
|
@ -49,16 +49,27 @@ pub fn parse_str(src: &str) -> Result<crate::core::expr::Expr, Vec<Rich<'_, Stri
|
||||||
|
|
||||||
log::debug!("** Final Expression **");
|
log::debug!("** Final Expression **");
|
||||||
log::debug!("{:#?}", expr);
|
log::debug!("{:#?}", expr);
|
||||||
log::debug!("** AST to Expr Errors **");
|
log::debug!("** AST to Expr Errors: {} **", expr_errors.len());
|
||||||
log::debug!("[{}]", expr_errors.iter().map(|e| e.to_string()).join("\n- "));
|
log::debug!("\n{}", expr_errors.iter().map(|e| e.to_string()).join("\n"));
|
||||||
|
|
||||||
let errors: Vec<_> = lex_errors
|
let errors: Vec<_> = lex_errors
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|e| e.map_token(|c| c.to_string()))
|
.filter_map(|e| {
|
||||||
|
// HACK: Discard empty expected lex errors
|
||||||
|
// TODO: not sure why this happens
|
||||||
|
use chumsky::error::RichReason::*;
|
||||||
|
match e.reason() {
|
||||||
|
ExpectedFound { expected, found: _ } if expected.is_empty() => return None,
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
|
Some(e.map_token(|c| c.to_string()))
|
||||||
|
})
|
||||||
.chain(parse_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
|
.chain(parse_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
|
||||||
.chain(expr_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
|
.chain(expr_errors.into_iter().map(|e| e.map_token(|tok| tok.to_string())))
|
||||||
.map(|e| e.into_owned())
|
.map(|e| e.into_owned())
|
||||||
.collect();
|
.collect();
|
||||||
|
log::debug!("** Total Errors: {}**", errors.len());
|
||||||
|
log::debug!("[{}]", errors.iter().map(|e| e.to_string()).join("\n"));
|
||||||
if !errors.is_empty() {
|
if !errors.is_empty() {
|
||||||
Err(errors)
|
Err(errors)
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue