From e9356301fe061d263bb7d0d8b8404ff788b70289 Mon Sep 17 00:00:00 2001 From: hlorenzi Date: Wed, 27 Dec 2023 15:19:31 -0300 Subject: [PATCH] wip --- src/asm/defs/ruledef_map.rs | 4 +- src/asm/matcher/mod.rs | 193 +++-- src/asm/parser/directive.rs | 2 +- src/asm/parser/directive_addr.rs | 4 +- src/asm/parser/directive_align.rs | 4 +- src/asm/parser/directive_bank.rs | 4 +- src/asm/parser/directive_bankdef.rs | 4 +- src/asm/parser/directive_bits.rs | 4 +- src/asm/parser/directive_const.rs | 2 +- src/asm/parser/directive_data.rs | 6 +- src/asm/parser/directive_fn.rs | 8 +- src/asm/parser/directive_if.rs | 16 +- src/asm/parser/directive_include.rs | 4 +- src/asm/parser/directive_labelalign.rs | 4 +- src/asm/parser/directive_noemit.rs | 4 +- src/asm/parser/directive_once.rs | 4 +- src/asm/parser/directive_res.rs | 4 +- src/asm/parser/directive_ruledef.rs | 42 +- src/asm/parser/fields.rs | 6 +- src/asm/parser/instruction.rs | 15 +- src/asm/parser/mod.rs | 92 +-- src/asm/parser/symbol.rs | 8 +- src/asm/resolver/eval_asm.rs | 84 +- src/asm/resolver/instruction.rs | 14 +- src/diagn/span.rs | 13 + src/expr/eval.rs | 27 +- src/expr/expression.rs | 2 +- src/expr/parser.rs | 94 +-- src/syntax/mod.rs | 7 +- src/syntax/token.rs | 25 +- src/syntax/token_walker.rs | 728 ------------------ src/syntax/walker.rs | 586 ++++++++++++++ src/test/expr.rs | 11 +- tests/comment/7.asm | 2 +- tests/comment/8.asm | 2 +- tests/issue193/ok.asm | 12 + tests/rule_arg_glued/err_one_param.asm | 2 - tests/rule_arg_glued/err_one_param_symbol.asm | 7 - tests/rule_arg_glued/err_symbol_unknown.asm | 7 + tests/rule_arg_glued/err_two_params.asm | 2 - .../rule_arg_glued/err_two_params_symbol.asm | 7 - tests/rule_arg_glued/ok_one_param.asm | 5 +- tests/rule_arg_glued/ok_one_param_symbol.asm | 10 + tests/rule_arg_glued/ok_two_params.asm | 7 +- tests/rule_arg_glued/ok_two_params_symbol.asm | 10 + 45 files changed, 974 insertions(+), 1124 deletions(-) delete mode 100644 src/syntax/token_walker.rs create mode 100644 src/syntax/walker.rs create mode 100644 tests/issue193/ok.asm delete mode 100644 tests/rule_arg_glued/err_one_param_symbol.asm create mode 100644 tests/rule_arg_glued/err_symbol_unknown.asm delete mode 100644 tests/rule_arg_glued/err_two_params_symbol.asm create mode 100644 tests/rule_arg_glued/ok_one_param_symbol.asm create mode 100644 tests/rule_arg_glued/ok_two_params_symbol.asm diff --git a/src/asm/defs/ruledef_map.rs b/src/asm/defs/ruledef_map.rs index 7c10d062..bb5e35b8 100644 --- a/src/asm/defs/ruledef_map.rs +++ b/src/asm/defs/ruledef_map.rs @@ -102,7 +102,7 @@ impl RuledefMap pub fn parse_prefix( - walker: &syntax::TokenWalker) + walker: &syntax::Walker) -> RuledefMapPrefix { let mut prefix: RuledefMapPrefix = ['\0'; MAX_PREFIX_SIZE]; @@ -112,7 +112,7 @@ impl RuledefMap while prefix_index < MAX_PREFIX_SIZE { - let token = walker.next_nth(walker_index); + let token = walker.next_nth_token(walker_index); walker_index += 1; if token.kind.is_allowed_pattern_token() diff --git a/src/asm/matcher/mod.rs b/src/asm/matcher/mod.rs index f0b8b4aa..fddc5c61 100644 --- a/src/asm/matcher/mod.rs +++ b/src/asm/matcher/mod.rs @@ -1,11 +1,11 @@ use crate::*; -type WorkingMatches<'tokens> = Vec>; +type WorkingMatches<'src> = Vec>; -type WorkingMatch<'tokens> = - (InstructionMatch, syntax::TokenWalker<'tokens>); +type WorkingMatch<'src> = + (InstructionMatch, syntax::Walker<'src>); pub type InstructionMatches = Vec; @@ -73,7 +73,8 @@ impl InstructionMatchResolution pub struct InstructionArgument { pub kind: InstructionArgumentKind, - pub tokens: Vec, + pub span: diagn::Span, + pub excerpt: String, } @@ -106,7 +107,8 @@ pub fn match_all( let mut matches = match_instr( opts, defs, - &ast_instr.tokens); + ast_instr.span, + &ast_instr.src); if let Err(()) = error_on_no_matches( @@ -155,10 +157,7 @@ pub fn match_all( if opts.debug_iterations { println!(" size: {} = {:?}{}", - ast_instr.tokens.iter() - .map(|t| t.text()) - .collect::>() - .join(""), + ast_instr.src, instr.encoding.size.unwrap(), if instr.encoding_statically_known { " [static]" } else { "" }); } @@ -336,15 +335,19 @@ fn get_match_static_size( /// Runs the instruction-matching algorithm on the given -/// Token slice, and returns the matches. +/// string, and returns the matches. pub fn match_instr( opts: &asm::AssemblyOptions, defs: &asm::ItemDefs, - tokens: &[syntax::Token]) + span: diagn::Span, + src: &str) -> InstructionMatches { let mut working_matches = WorkingMatches::new(); - let mut walker = syntax::TokenWalker::new(tokens); + let mut walker = syntax::Walker::new( + src, + span.file_handle, + span.location().unwrap().0 as usize); if opts.optimize_instruction_matching { @@ -414,10 +417,10 @@ pub fn match_instr( } -fn match_with_ruledef_map<'tokens>( +fn match_with_ruledef_map<'src>( defs: &asm::ItemDefs, - walker: syntax::TokenWalker<'tokens>) - -> WorkingMatches<'tokens> + walker: syntax::Walker<'src>) + -> WorkingMatches<'src> { let mut matches = WorkingMatches::new(); @@ -444,12 +447,12 @@ fn match_with_ruledef_map<'tokens>( } -fn match_with_ruledef<'tokens>( +fn match_with_ruledef<'src>( defs: &asm::ItemDefs, ruledef_ref: util::ItemRef, - walker: &mut syntax::TokenWalker<'tokens>, + walker: &mut syntax::Walker<'src>, needs_consume_all_tokens: bool) - -> WorkingMatches<'tokens> + -> WorkingMatches<'src> { let mut matches = WorkingMatches::new(); @@ -474,14 +477,14 @@ fn match_with_ruledef<'tokens>( } -fn begin_match_with_rule<'tokens>( +fn begin_match_with_rule<'src>( defs: &asm::ItemDefs, ruledef_ref: util::ItemRef, rule_ref: util::ItemRef, rule: &asm::Rule, - mut walker: syntax::TokenWalker<'tokens>, + mut walker: syntax::Walker<'src>, needs_consume_all_tokens: bool) - -> WorkingMatches<'tokens> + -> WorkingMatches<'src> { match_with_rule( defs, @@ -501,14 +504,14 @@ fn begin_match_with_rule<'tokens>( } -fn match_with_rule<'tokens>( +fn match_with_rule<'src>( defs: &asm::ItemDefs, rule: &asm::Rule, - walker: &mut syntax::TokenWalker<'tokens>, + walker: &mut syntax::Walker<'src>, needs_consume_all_tokens: bool, at_pattern_part: usize, match_so_far: &mut InstructionMatch) - -> WorkingMatches<'tokens> + -> WorkingMatches<'src> { for part_index in at_pattern_part..rule.pattern.len() { @@ -518,23 +521,15 @@ fn match_with_rule<'tokens>( { asm::RulePatternPart::Exact(c) => { - if walker.next_is_whitespace() && - !walker.is_whitespace_acknowledged() + if !walker.maybe_expect_char(*c) { return vec![]; } - - if walker.next_partial().to_ascii_lowercase() != *c - { - return vec![]; - } - - walker.advance_partial(); } asm::RulePatternPart::Whitespace => { - if let None = walker.maybe_expect_whitespace() + if walker.next_token().kind != syntax::TokenKind::Whitespace { return vec![]; } @@ -587,62 +582,47 @@ fn match_with_rule<'tokens>( } -fn match_with_expr<'tokens>( +fn match_with_expr<'src>( defs: &asm::ItemDefs, rule: &asm::Rule, - walker: &mut syntax::TokenWalker<'tokens>, + walker: &mut syntax::Walker<'src>, needs_consume_all_tokens: bool, at_pattern_part: usize, match_so_far: &mut InstructionMatch) - -> WorkingMatches<'tokens> + -> WorkingMatches<'src> { - if walker.is_at_partial() - { - match walker.maybe_expect_partial_usize() - { - None => - { - return vec![]; - } - Some(value) => - { - let expr = expr::Value::make_integer(value) - .make_literal(); + let walker_start = walker.next_useful_index(); - match_so_far.args.push(InstructionArgument { - kind: InstructionArgumentKind::Expr(expr), - tokens: Vec::new(), - }); - } - } - } - else - { - let token_start = walker.get_current_token_index(); + let maybe_expr = parse_with_lookahead( + &rule.pattern, + at_pattern_part, + walker, + |walker| expr::parse_optional(walker)); - let maybe_expr = parse_with_lookahead( - &rule.pattern, - at_pattern_part, - walker, - |walker| expr::parse_optional(walker)); + let walker_end = walker.get_cursor_index(); + let walker_start = std::cmp::min(walker_start, walker_end); - let token_end = walker.get_current_token_index(); + let expr = { + match maybe_expr + { + Some(expr) => expr, + None => return vec![], + } + }; - let expr = { - match maybe_expr - { - Some(expr) => expr, - None => return vec![], - } - }; + let span = walker.get_span( + walker_start, + walker_end); - match_so_far.args.push(InstructionArgument { - kind: InstructionArgumentKind::Expr(expr), - tokens: walker.get_cloned_tokens_by_index( - token_start, - token_end), - }); - } + let excerpt = walker.get_excerpt( + walker_start, + walker_end); + + match_so_far.args.push(InstructionArgument { + kind: InstructionArgumentKind::Expr(expr), + span, + excerpt: excerpt.to_string(), + }); match_with_rule( defs, @@ -654,17 +634,18 @@ fn match_with_expr<'tokens>( } -fn match_with_nested_ruledef<'tokens>( +fn match_with_nested_ruledef<'src>( defs: &asm::ItemDefs, nested_ruledef_ref: util::ItemRef, rule: &asm::Rule, - walker: &mut syntax::TokenWalker<'tokens>, + walker: &mut syntax::Walker<'src>, needs_consume_all_tokens: bool, at_pattern_part: usize, match_so_far: &mut InstructionMatch) - -> WorkingMatches<'tokens> + -> WorkingMatches<'src> { - let token_start = walker.get_current_token_index(); + let walker_start = walker.next_useful_index(); + let walker_limit_prev = walker.get_cursor_limit(); let nested_matches = parse_with_lookahead( &rule.pattern, @@ -681,20 +662,28 @@ fn match_with_nested_ruledef<'tokens>( for nested_match in nested_matches { - let mut walker = walker.clone(); - walker.copy_state_from(&nested_match.1); - + let mut walker = nested_match.1; + walker.set_cursor_limit(walker_limit_prev); + let walker_end = walker.get_cursor_index(); + let walker_start = std::cmp::min(walker_start, walker_end); + let mut match_so_far = match_so_far.clone(); + let span = walker.get_span( + walker_start, + walker_end); + + let excerpt = walker.get_excerpt( + walker_start, + walker_end); + match_so_far.args.push(InstructionArgument { kind: InstructionArgumentKind::Nested(nested_match.0), - tokens: walker.get_cloned_tokens_by_index( - token_start, - walker.get_current_token_index()), + span, + excerpt: excerpt.to_string(), }); - // Continue matching the current rule let resumed_matches = match_with_rule( defs, @@ -737,27 +726,29 @@ fn match_with_nested_ruledef<'tokens>( /// In cases where there's no lookahead character, the TokenWalker /// isn't cut off, and the expression-parser is allowed to /// consume as much as it can. -fn parse_with_lookahead<'tokens, F, T>( +fn parse_with_lookahead<'src, F, T>( pattern: &asm::RulePattern, at_pattern_part: usize, - walker: &mut syntax::TokenWalker<'tokens>, + walker: &mut syntax::Walker<'src>, parse_fn: F) -> T - where F: FnOnce(&mut syntax::TokenWalker<'tokens>) -> T + where F: FnOnce(&mut syntax::Walker<'src>) -> T { - let maybe_lookahead = find_lookahead_character( + let maybe_lookahead_char = find_lookahead_char( pattern, at_pattern_part); - if let Some(lookahead) = maybe_lookahead + if let Some(lookahead_char) = maybe_lookahead_char { - let maybe_lookahead_walker = walker - .try_lookahead_until_char_over_nested_parens(lookahead); + let maybe_limit = + walker.find_lookahead_char_index(lookahead_char); - if let Some(mut lookahead_walker) = maybe_lookahead_walker + if let Some(limit) = maybe_limit { - let result = parse_fn(&mut lookahead_walker); - walker.copy_state_from(&lookahead_walker); + let prev_limit = walker.get_cursor_limit(); + walker.set_cursor_limit(limit); + let result = parse_fn(walker); + walker.set_cursor_limit(prev_limit); return result; } } @@ -771,7 +762,7 @@ fn parse_with_lookahead<'tokens, F, T>( /// /// If the next applicable pattern-part is a parameter, /// it returns `None`. -fn find_lookahead_character( +fn find_lookahead_char( pattern: &[asm::RulePatternPart], at_pattern_part: usize) -> Option diff --git a/src/asm/parser/directive.rs b/src/asm/parser/directive.rs index 2b1ec3ad..2c653ee5 100644 --- a/src/asm/parser/directive.rs +++ b/src/asm/parser/directive.rs @@ -3,7 +3,7 @@ use crate::*; pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { let tk_hash = walker.expect(report, syntax::TokenKind::Hash)?; diff --git a/src/asm/parser/directive_addr.rs b/src/asm/parser/directive_addr.rs index 1e071ab8..4d957dbf 100644 --- a/src/asm/parser/directive_addr.rs +++ b/src/asm/parser/directive_addr.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveAddr { pub header_span: diagn::Span, @@ -13,7 +13,7 @@ pub struct AstDirectiveAddr pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_align.rs b/src/asm/parser/directive_align.rs index 9081b967..1ead6aed 100644 --- a/src/asm/parser/directive_align.rs +++ b/src/asm/parser/directive_align.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveAlign { pub header_span: diagn::Span, @@ -13,7 +13,7 @@ pub struct AstDirectiveAlign pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_bank.rs b/src/asm/parser/directive_bank.rs index f0e0b07a..5a093a7c 100644 --- a/src/asm/parser/directive_bank.rs +++ b/src/asm/parser/directive_bank.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveBank { pub header_span: diagn::Span, @@ -14,7 +14,7 @@ pub struct AstDirectiveBank pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_bankdef.rs b/src/asm/parser/directive_bankdef.rs index 061adb82..f91d10c9 100644 --- a/src/asm/parser/directive_bankdef.rs +++ b/src/asm/parser/directive_bankdef.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveBankdef { pub header_span: diagn::Span, @@ -22,7 +22,7 @@ pub struct AstDirectiveBankdef pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_bits.rs b/src/asm/parser/directive_bits.rs index b0a9da28..5987d658 100644 --- a/src/asm/parser/directive_bits.rs +++ b/src/asm/parser/directive_bits.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveBits { pub header_span: diagn::Span, @@ -11,7 +11,7 @@ pub struct AstDirectiveBits pub fn parse( report: &mut diagn::Report, - _walker: &mut syntax::TokenWalker, + _walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_const.rs b/src/asm/parser/directive_const.rs index 142c7bcb..93f640fa 100644 --- a/src/asm/parser/directive_const.rs +++ b/src/asm/parser/directive_const.rs @@ -3,7 +3,7 @@ use super::*; pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, _header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_data.rs b/src/asm/parser/directive_data.rs index 03cb3955..42c96863 100644 --- a/src/asm/parser/directive_data.rs +++ b/src/asm/parser/directive_data.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveData { pub header_span: diagn::Span, @@ -14,7 +14,7 @@ pub struct AstDirectiveData pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, elem_size: Option, header_span: diagn::Span) -> Result @@ -30,7 +30,7 @@ pub fn parse( break; } - if walker.next_is_linebreak() + if walker.next_linebreak().is_some() { break; } diff --git a/src/asm/parser/directive_fn.rs b/src/asm/parser/directive_fn.rs index 2ef93f05..c46ebec7 100644 --- a/src/asm/parser/directive_fn.rs +++ b/src/asm/parser/directive_fn.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveFn { pub header_span: diagn::Span, @@ -14,7 +14,7 @@ pub struct AstDirectiveFn } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstFnParameter { pub name: String, @@ -23,7 +23,7 @@ pub struct AstFnParameter pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { @@ -35,7 +35,7 @@ pub fn parse( let mut params = Vec::new(); while !walker.is_over() && - !walker.next_is(0, syntax::TokenKind::ParenClose) + !walker.next_useful_is(0, syntax::TokenKind::ParenClose) { let tk_param_name = walker.expect(report, syntax::TokenKind::Identifier)?; let param_name = tk_param_name.excerpt.clone().unwrap(); diff --git a/src/asm/parser/directive_if.rs b/src/asm/parser/directive_if.rs index 5011fe33..c19d1459 100644 --- a/src/asm/parser/directive_if.rs +++ b/src/asm/parser/directive_if.rs @@ -1,7 +1,7 @@ use crate::{*, asm::AstTopLevel}; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveIf { pub header_span: diagn::Span, @@ -14,7 +14,7 @@ pub struct AstDirectiveIf pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { @@ -35,7 +35,7 @@ pub fn parse( fn parse_braced_block( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { walker.expect(report, syntax::TokenKind::BraceOpen)?; @@ -52,20 +52,18 @@ fn parse_braced_block( fn parse_else_blocks( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result, ()> { - if !walker.next_is(0, syntax::TokenKind::Hash) || - !walker.next_is(1, syntax::TokenKind::Identifier) + if !walker.next_useful_is(0, syntax::TokenKind::Hash) || + !walker.next_useful_is(1, syntax::TokenKind::Identifier) { return Ok(None); } let directive_name = walker - .next_nth(1) + .next_nth_useful_token(1) .excerpt - .as_ref() - .map(|s| s.as_str()) .unwrap(); if directive_name == "else" diff --git a/src/asm/parser/directive_include.rs b/src/asm/parser/directive_include.rs index 6f982196..a4e08a8c 100644 --- a/src/asm/parser/directive_include.rs +++ b/src/asm/parser/directive_include.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveInclude { pub header_span: diagn::Span, @@ -12,7 +12,7 @@ pub struct AstDirectiveInclude pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_labelalign.rs b/src/asm/parser/directive_labelalign.rs index 13cdf4ec..05b39a59 100644 --- a/src/asm/parser/directive_labelalign.rs +++ b/src/asm/parser/directive_labelalign.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveLabelAlign { pub header_span: diagn::Span, @@ -11,7 +11,7 @@ pub struct AstDirectiveLabelAlign pub fn parse( report: &mut diagn::Report, - _walker: &mut syntax::TokenWalker, + _walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_noemit.rs b/src/asm/parser/directive_noemit.rs index 54a13dff..d9250bcf 100644 --- a/src/asm/parser/directive_noemit.rs +++ b/src/asm/parser/directive_noemit.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveNoEmit { pub header_span: diagn::Span, @@ -11,7 +11,7 @@ pub struct AstDirectiveNoEmit pub fn parse( report: &mut diagn::Report, - _walker: &mut syntax::TokenWalker, + _walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_once.rs b/src/asm/parser/directive_once.rs index bcd6bd56..bd425345 100644 --- a/src/asm/parser/directive_once.rs +++ b/src/asm/parser/directive_once.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveOnce { pub header_span: diagn::Span, @@ -10,7 +10,7 @@ pub struct AstDirectiveOnce pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_res.rs b/src/asm/parser/directive_res.rs index ab45d265..ad8dfdb4 100644 --- a/src/asm/parser/directive_res.rs +++ b/src/asm/parser/directive_res.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveRes { pub header_span: diagn::Span, @@ -13,7 +13,7 @@ pub struct AstDirectiveRes pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, header_span: diagn::Span) -> Result { diff --git a/src/asm/parser/directive_ruledef.rs b/src/asm/parser/directive_ruledef.rs index 069daee6..42f353c3 100644 --- a/src/asm/parser/directive_ruledef.rs +++ b/src/asm/parser/directive_ruledef.rs @@ -1,7 +1,7 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstDirectiveRuledef { pub header_span: diagn::Span, @@ -14,7 +14,7 @@ pub struct AstDirectiveRuledef } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstRule { pub pattern_span: diagn::Span, @@ -23,7 +23,7 @@ pub struct AstRule } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AstRulePatternPart { Whitespace, @@ -32,7 +32,7 @@ pub enum AstRulePatternPart } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstRuleParameter { pub name_span: diagn::Span, @@ -42,7 +42,7 @@ pub struct AstRuleParameter } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AstRuleParameterType { Unspecified, @@ -55,13 +55,13 @@ pub enum AstRuleParameterType pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, is_subruledef: bool, header_span: diagn::Span) -> Result { let tk_name = walker.maybe_expect(syntax::TokenKind::Identifier); - let name = tk_name.map(|tk| tk.excerpt.clone().unwrap()); + let name = tk_name.clone().map(|tk| tk.excerpt.clone().unwrap()); let name_span = tk_name .map(|tk| tk.span) .unwrap_or_else(|| header_span); @@ -70,7 +70,7 @@ pub fn parse( let mut rules = Vec::new(); - while !walker.next_is(0, syntax::TokenKind::BraceClose) + while !walker.next_useful_is(0, syntax::TokenKind::BraceClose) { let rule = parse_rule( report, @@ -99,7 +99,7 @@ pub fn parse( fn parse_rule( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker, + walker: &mut syntax::Walker, is_subruledef: bool) -> Result { @@ -109,13 +109,15 @@ fn parse_rule( // Discard leading whitespace/indentation - walker.acknowledge_whitespace(); + walker.skip_ignorable(); while !walker.is_over() && - !walker.next_is(0, syntax::TokenKind::HeavyArrowRight) + !walker.next_useful_is(0, syntax::TokenKind::HeavyArrowRight) { - let tk = walker.advance(); + let tk = walker.next_token(); + walker.skip_to_token_end(&tk); + pattern_span = pattern_span.join(tk.span); @@ -145,6 +147,11 @@ fn parse_rule( pattern.push(AstRulePatternPart::Exact(c.to_ascii_lowercase())); } } + + else if tk.kind == syntax::TokenKind::Whitespace + { + pattern.push(AstRulePatternPart::Whitespace); + } else { @@ -154,15 +161,6 @@ fn parse_rule( return Err(()); } - - - // Add a whitespace pattern-part if present between tokens, - // but not at the end before the `=>` - if !walker.next_is(0, syntax::TokenKind::HeavyArrowRight) && - walker.maybe_expect_unacknowledged_whitespace().is_some() - { - pattern.push(AstRulePatternPart::Whitespace); - } } @@ -190,7 +188,7 @@ fn parse_rule( fn parse_rule_parameter( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { let tk_name = walker.expect(report, syntax::TokenKind::Identifier)?; diff --git a/src/asm/parser/fields.rs b/src/asm/parser/fields.rs index 79e13cbf..afe18c6e 100644 --- a/src/asm/parser/fields.rs +++ b/src/asm/parser/fields.rs @@ -19,7 +19,7 @@ pub struct AstField pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { let mut fields = AstFields { @@ -27,7 +27,7 @@ pub fn parse( fields: Vec::new(), }; - while !walker.next_is(0, syntax::TokenKind::BraceClose) + while !walker.next_useful_is(0, syntax::TokenKind::BraceClose) { let deprecated_hash = walker.maybe_expect(syntax::TokenKind::Hash).is_some(); @@ -46,7 +46,7 @@ pub fn parse( let maybe_expr = { - if (deprecated_hash && !walker.next_is_linebreak()) || + if (deprecated_hash && !walker.next_linebreak().is_some()) || walker.maybe_expect(syntax::TokenKind::Equal).is_some() { let expr = expr::parse(report, walker)?; diff --git a/src/asm/parser/instruction.rs b/src/asm/parser/instruction.rs index c9113e45..7ecd9dec 100644 --- a/src/asm/parser/instruction.rs +++ b/src/asm/parser/instruction.rs @@ -1,11 +1,11 @@ use crate::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstInstruction { pub span: diagn::Span, - pub tokens: Vec, + pub src: String, pub item_ref: Option>, } @@ -13,17 +13,18 @@ pub struct AstInstruction pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { - let skipped = walker - .skip_until_linebreak_over_nested_braces(); + walker.skip_ignorable(); + + let line = walker.advance_until_linebreak(); walker.expect_linebreak(report)?; Ok(AstInstruction { - span: skipped.get_full_span(), - tokens: skipped.get_cloned_tokens(), + span: line.get_full_span(), + src: line.get_full_excerpt().to_string(), item_ref: None, }) diff --git a/src/asm/parser/mod.rs b/src/asm/parser/mod.rs index 65285f46..b1687def 100644 --- a/src/asm/parser/mod.rs +++ b/src/asm/parser/mod.rs @@ -4,36 +4,24 @@ use crate::*; mod directive; mod directive_addr; -pub use directive_addr::{ - AstDirectiveAddr, -}; +pub use directive_addr::AstDirectiveAddr; mod directive_align; -pub use directive_align::{ - AstDirectiveAlign, -}; +pub use directive_align::AstDirectiveAlign; mod directive_bank; -pub use directive_bank::{ - AstDirectiveBank, -}; +pub use directive_bank::AstDirectiveBank; mod directive_bankdef; -pub use directive_bankdef::{ - AstDirectiveBankdef, -}; +pub use directive_bankdef::AstDirectiveBankdef; mod directive_bits; -pub use directive_bits::{ - AstDirectiveBits, -}; +pub use directive_bits::AstDirectiveBits; mod directive_const; mod directive_data; -pub use directive_data::{ - AstDirectiveData, -}; +pub use directive_data::AstDirectiveData; mod directive_fn; pub use directive_fn::{ @@ -42,34 +30,22 @@ pub use directive_fn::{ }; mod directive_if; -pub use directive_if::{ - AstDirectiveIf, -}; +pub use directive_if::AstDirectiveIf; mod directive_include; -pub use directive_include::{ - AstDirectiveInclude, -}; +pub use directive_include::AstDirectiveInclude; mod directive_labelalign; -pub use directive_labelalign::{ - AstDirectiveLabelAlign, -}; +pub use directive_labelalign::AstDirectiveLabelAlign; mod directive_noemit; -pub use directive_noemit::{ - AstDirectiveNoEmit, -}; +pub use directive_noemit::AstDirectiveNoEmit; mod directive_once; -pub use directive_once::{ - AstDirectiveOnce, -}; +pub use directive_once::AstDirectiveOnce; mod directive_res; -pub use directive_res::{ - AstDirectiveRes, -}; +pub use directive_res::AstDirectiveRes; mod directive_ruledef; pub use directive_ruledef::{ @@ -87,9 +63,7 @@ pub use fields::{ }; mod instruction; -pub use instruction::{ - AstInstruction, -}; +pub use instruction::AstInstruction; mod symbol; pub use symbol::{ @@ -99,7 +73,7 @@ pub use symbol::{ }; -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AstAny { DirectiveAddr(AstDirectiveAddr), @@ -121,7 +95,7 @@ pub enum AstAny } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstTopLevel { pub nodes: Vec, @@ -180,17 +154,17 @@ pub fn parse_and_resolve_includes( span, root_filename.borrow())?; - let chars = fileserver.get_str( + let src = fileserver.get_str( report, span, file_handle)?; - let tokens = syntax::tokenize( - report, + let mut walker = syntax::Walker::new( + &src, file_handle, - &chars)?; + 0); - let mut root_ast = parse(report, &tokens)?; + let mut root_ast = parse(report, &mut walker)?; // Check presence of an #once directive if root_ast.nodes.iter().any(|n| matches!(n, AstAny::DirectiveOnce(_))) @@ -258,16 +232,14 @@ pub fn parse_and_resolve_includes( pub fn parse( report: &mut diagn::Report, - tokens: &[syntax::Token]) + walker: &mut syntax::Walker) -> Result { - let mut walker = syntax::TokenWalker::new(tokens); - let mut nodes = Vec::new(); while !walker.is_over() { - if let Some(node) = parse_line(report, &mut walker)? + if let Some(node) = parse_line(report, walker)? { nodes.push(node); } @@ -279,15 +251,15 @@ pub fn parse( } -fn parse_nested_toplevel( +pub fn parse_nested_toplevel( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { let mut nodes = Vec::new(); while !walker.is_over() && - !walker.next_is(0, syntax::TokenKind::BraceClose) + !walker.next_useful_is(0, syntax::TokenKind::BraceClose) { if let Some(node) = parse_line(report, walker)? { @@ -303,31 +275,31 @@ fn parse_nested_toplevel( fn parse_line( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result, ()> { // Directives (starting with a hash sign) - if walker.next_is(0, syntax::TokenKind::Hash) + if walker.next_useful_is(0, syntax::TokenKind::Hash) { Ok(Some(directive::parse(report, walker)?)) } // Global labels (identifiers followed by colons) - else if walker.next_is(0, syntax::TokenKind::Identifier) && - walker.next_is(1, syntax::TokenKind::Colon) + else if walker.next_useful_is(0, syntax::TokenKind::Identifier) && + walker.next_useful_is(1, syntax::TokenKind::Colon) { Ok(Some(symbol::parse(report, walker)?)) } // Global constants (identifiers followed by equal signs) - else if walker.next_is(0, syntax::TokenKind::Identifier) && - walker.next_is(1, syntax::TokenKind::Equal) + else if walker.next_useful_is(0, syntax::TokenKind::Identifier) && + walker.next_useful_is(1, syntax::TokenKind::Equal) { Ok(Some(symbol::parse(report, walker)?)) } // Local labels or constants (starting with a dot) - else if walker.next_is(0, syntax::TokenKind::Dot) + else if walker.next_useful_is(0, syntax::TokenKind::Dot) { Ok(Some(symbol::parse(report, walker)?)) } diff --git a/src/asm/parser/symbol.rs b/src/asm/parser/symbol.rs index 920fc966..b08323dd 100644 --- a/src/asm/parser/symbol.rs +++ b/src/asm/parser/symbol.rs @@ -1,7 +1,7 @@ use super::*; -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstSymbol { pub decl_span: diagn::Span, @@ -14,7 +14,7 @@ pub struct AstSymbol } -#[derive(Debug)] +#[derive(Clone, Debug)] pub enum AstSymbolKind { Constant(AstSymbolConstant), @@ -22,7 +22,7 @@ pub enum AstSymbolKind } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AstSymbolConstant { pub expr: expr::Expr, @@ -31,7 +31,7 @@ pub struct AstSymbolConstant pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { let mut decl_span = diagn::Span::new_dummy(); diff --git a/src/asm/resolver/eval_asm.rs b/src/asm/resolver/eval_asm.rs index db5fe739..e706bdcc 100644 --- a/src/asm/resolver/eval_asm.rs +++ b/src/asm/resolver/eval_asm.rs @@ -22,20 +22,17 @@ pub fn eval_asm( let mut result = util::BigInt::new(0, Some(0)); - let ast = asm::parser::parse( - query.report, - query.tokens)?; - - for node in &ast.nodes + for node in &query.ast.nodes { if let asm::AstAny::Instruction(ast_instr) = node { let substs = parse_substitutions( query.report, - &ast_instr.tokens)?; + ast_instr.span, + &ast_instr.src)?; - let new_tokens = perform_substitutions( - &ast_instr.tokens, + let new_excerpt = perform_substitutions( + &ast_instr.src, &substs, query)?; @@ -44,7 +41,8 @@ pub fn eval_asm( let mut matches = asm::matcher::match_instr( opts, defs, - &new_tokens); + ast_instr.span, + &new_excerpt); let attempted_match_excerpt = { @@ -56,10 +54,7 @@ pub fn eval_asm( { Some(format!( "match attempted: `{}`", - new_tokens - .iter() - .map(|t| t.text()) - .collect::())) + new_excerpt)) } }; @@ -152,42 +147,49 @@ pub fn eval_asm( } -struct AsmSubstitution<'a> +struct AsmSubstitution { pub start: usize, pub end: usize, - pub name: &'a str, + pub name: String, pub span: diagn::Span, } -fn parse_substitutions<'tokens>( +fn parse_substitutions<'excerpt>( report: &mut diagn::Report, - tokens: &'tokens [syntax::Token]) - -> Result>, ()> + span: diagn::Span, + excerpt: &'excerpt str) + -> Result, ()> { let mut substs = Vec::new(); - let mut walker = syntax::TokenWalker::new(tokens); + let mut walker = syntax::Walker::new( + excerpt, + span.file_handle, + span.location().unwrap().0 as usize); while !walker.is_over() { - if let Some(_) = walker.maybe_expect(syntax::TokenKind::BraceOpen) - { - let start = walker.get_previous_token_index(); + walker.skip_ignorable(); + if let Some(tk_brace_open) = walker.maybe_expect(syntax::TokenKind::BraceOpen) + { + let start = walker.get_index_at_span_start( + tk_brace_open.span); + let tk_name = walker.expect( report, syntax::TokenKind::Identifier)?; - let name = tk_name.excerpt.as_ref().unwrap(); + let name = tk_name.clone().excerpt.unwrap(); let span = tk_name.span; walker.expect( report, syntax::TokenKind::BraceClose)?; - let end = walker.get_previous_token_index() + 1; + let end = walker.get_cursor_index(); substs.push(AsmSubstitution { start, @@ -198,7 +200,7 @@ fn parse_substitutions<'tokens>( } else { - walker.advance(); + walker.skip_to_token_end(&walker.next_token()); } } @@ -206,26 +208,26 @@ fn parse_substitutions<'tokens>( } -fn perform_substitutions<'tokens>( - tokens: &'tokens [syntax::Token], - substs: &Vec>, +fn perform_substitutions<'src>( + excerpt: &'src str, + substs: &Vec, info: &mut expr::EvalAsmBlockQuery) - -> Result, ()> + -> Result { - let mut result: Vec = Vec::new(); + let mut result = String::new(); let mut copied_up_to = 0; for subst in substs { - while copied_up_to < subst.start + if copied_up_to < subst.start { - result.push(tokens[copied_up_to].clone()); - copied_up_to += 1; + result.push_str(&excerpt[copied_up_to..subst.start]); + copied_up_to = subst.start; } - let token_subst = { - match info.eval_ctx.get_token_subst(subst.name) + let subst_str = { + match info.eval_ctx.get_token_subst(&subst.name) { Some(t) => t, None => @@ -241,20 +243,14 @@ fn perform_substitutions<'tokens>( } }; - for token in token_subst.iter() - { - let mut new_token = token.clone(); - new_token.span = subst.span; - result.push(new_token); - } + result.push_str(&subst_str); copied_up_to += subst.end - subst.start; } - while copied_up_to < tokens.len() + if copied_up_to < excerpt.len() { - result.push(tokens[copied_up_to].clone()); - copied_up_to += 1; + result.push_str(&excerpt[copied_up_to..]); } Ok(result) diff --git a/src/asm/resolver/instruction.rs b/src/asm/resolver/instruction.rs index 857a8432..b256ade0 100644 --- a/src/asm/resolver/instruction.rs +++ b/src/asm/resolver/instruction.rs @@ -69,10 +69,7 @@ pub fn resolve_instruction( if opts.debug_iterations { println!("instr: {} = {:?} [static]", - ast_instr.tokens.iter() - .map(|t| t.text()) - .collect::>() - .join(""), + ast_instr.src, instr.encoding); } @@ -96,10 +93,7 @@ pub fn resolve_instruction( if opts.debug_iterations { println!("instr: {} = {:?}", - ast_instr.tokens.iter() - .map(|t| t.text()) - .collect::>() - .join(""), + ast_instr.src, instr.encoding); } @@ -424,7 +418,7 @@ fn resolve_instruction_match_inner( eval_ctx.set_token_subst( ¶m.name, - arg.tokens.clone()); + arg.excerpt.clone()); } asm::InstructionArgumentKind::Nested(ref nested_match) => @@ -452,7 +446,7 @@ fn resolve_instruction_match_inner( eval_ctx.set_token_subst( ¶m.name, - arg.tokens.clone()); + arg.excerpt.clone()); } } } diff --git a/src/diagn/span.rs b/src/diagn/span.rs index 387cbc7f..c24f5c70 100644 --- a/src/diagn/span.rs +++ b/src/diagn/span.rs @@ -8,6 +8,8 @@ pub type SpanIndex = u32; pub struct Span { pub file_handle: util::FileServerHandle, + + /// Represents byte indices (not UTF-8 char indices) location: (SpanIndex, SpanIndex), } @@ -45,6 +47,17 @@ impl Span Some(self.location) } + + + pub fn length(&self) -> usize + { + if self.location.0 == SpanIndex::MAX + { + return 0; + } + + (self.location.1 - self.location.0) as usize + } pub fn before(&self) -> Span diff --git a/src/expr/eval.rs b/src/expr/eval.rs index 1687e897..50fca707 100644 --- a/src/expr/eval.rs +++ b/src/expr/eval.rs @@ -4,12 +4,12 @@ use crate::*; pub struct EvalContext { locals: std::collections::HashMap, - token_substs: std::collections::HashMap>, + token_substs: std::collections::HashMap, recursion_depth: usize, } -static ASM_HYGIENIZE_PREFIX: &'static str = ":"; +static ASM_HYGIENIZE_PREFIX: &'static str = "__"; impl EvalContext @@ -79,17 +79,17 @@ impl EvalContext pub fn set_token_subst( &mut self, name: S, - tokens: Vec) + excerpt: String) where S: Into { - self.token_substs.insert(name.into(), tokens); + self.token_substs.insert(name.into(), excerpt); } pub fn get_token_subst<'a>( &'a self, name: &str) - -> Option>> + -> Option> { if let Some(t) = self.token_substs.get(name) { @@ -98,14 +98,9 @@ impl EvalContext if let Some(_) = self.locals.get(name) { - return Some(std::borrow::Cow::Owned( - vec![syntax::Token { - span: diagn::Span::new_dummy(), - kind: syntax::TokenKind::Identifier, - excerpt: Some( - EvalContext::hygienize_name_for_asm_subst(name)), - }] - )); + return Some( + std::borrow::Cow::Owned( + EvalContext::hygienize_name_for_asm_subst(name))); } None @@ -255,7 +250,7 @@ pub struct EvalFunctionQueryArgument pub struct EvalAsmBlockQuery<'a> { pub report: &'a mut diagn::Report, - pub tokens: &'a [syntax::Token], + pub ast: &'a asm::AstTopLevel, pub span: diagn::Span, pub eval_ctx: &'a mut EvalContext, } @@ -747,11 +742,11 @@ impl expr::Expr } } - &expr::Expr::Asm(span, ref tokens) => + &expr::Expr::Asm(span, ref ast) => { let mut query = EvalAsmBlockQuery { report, - tokens, + ast, span, eval_ctx: ctx, }; diff --git a/src/expr/expression.rs b/src/expr/expression.rs index f3ff29ce..68ad474a 100644 --- a/src/expr/expression.rs +++ b/src/expr/expression.rs @@ -13,7 +13,7 @@ pub enum Expr SliceShort(diagn::Span, diagn::Span, Box, Box), Block(diagn::Span, Vec), Call(diagn::Span, Box, Vec), - Asm(diagn::Span, Vec), + Asm(diagn::Span, asm::AstTopLevel), } diff --git a/src/expr/parser.rs b/src/expr/parser.rs index c71d9f0e..6a60a197 100644 --- a/src/expr/parser.rs +++ b/src/expr/parser.rs @@ -3,7 +3,7 @@ use crate::*; pub fn parse( report: &mut diagn::Report, - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Result { ExpressionParser::new(report, walker) @@ -12,7 +12,7 @@ pub fn parse( pub fn parse_optional( - walker: &mut syntax::TokenWalker) + walker: &mut syntax::Walker) -> Option { let mut dummy_report = diagn::Report::new(); @@ -20,20 +20,20 @@ pub fn parse_optional( } -struct ExpressionParser<'a, 'tokens: 'a> +struct ExpressionParser<'a, 'src: 'a> { report: &'a mut diagn::Report, - walker: &'a mut syntax::TokenWalker<'tokens>, + walker: &'a mut syntax::Walker<'src>, recursion_depth: usize, } -impl<'a, 'tokens> ExpressionParser<'a, 'tokens> +impl<'a, 'src> ExpressionParser<'a, 'src> { pub fn new( report: &'a mut diagn::Report, - walker: &'a mut syntax::TokenWalker<'tokens>) - -> ExpressionParser<'a, 'tokens> + walker: &'a mut syntax::Walker<'src>) + -> ExpressionParser<'a, 'src> { ExpressionParser { report, @@ -50,7 +50,7 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> self.report.message_with_parents_dedup( diagn::Message::error_span( "expression recursion depth limit reached", - self.walker.get_span_after_prev())); + self.walker.get_cursor_span())); return Err(()); } @@ -78,7 +78,7 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> ops: &[(syntax::TokenKind, expr::UnaryOp)], parse_inner: F) -> Result - where F: Fn(&mut ExpressionParser<'a, 'tokens>) -> Result + where F: Fn(&mut ExpressionParser<'a, 'src>) -> Result { for op in ops { @@ -114,13 +114,13 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> ops: &[(syntax::TokenKind, expr::BinaryOp)], parse_inner: F) -> Result - where F: Fn(&mut ExpressionParser<'a, 'tokens>) -> Result + where F: Fn(&mut ExpressionParser<'a, 'src>) -> Result { let mut lhs = parse_inner(self)?; loop { - if self.walker.next_is_linebreak() + if self.walker.next_linebreak().is_some() { break; } let mut op_match = None; @@ -155,7 +155,7 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> fn parse_right_associative_binary_ops(&mut self, ops: &[(syntax::TokenKind, expr::BinaryOp)], parse_inner: F) -> Result - where F: Fn(&mut ExpressionParser<'a, 'tokens>) -> Result + where F: Fn(&mut ExpressionParser<'a, 'src>) -> Result { let mut lhs = parse_inner(self)?; @@ -328,7 +328,7 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> { let inner = self.parse_slice_short()?; - if self.walker.next_is_linebreak() + if self.walker.next_linebreak().is_some() { return Ok(inner); } let tk_open = match self.walker.maybe_expect(syntax::TokenKind::BracketOpen) @@ -363,7 +363,7 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> { let inner = self.parse_unary()?; - if self.walker.next_is_linebreak() + if self.walker.next_linebreak().is_some() { return Ok(inner); } let tk_grave_span = match self.walker.maybe_expect(syntax::TokenKind::Grave) @@ -397,18 +397,18 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> { let leaf = self.parse_leaf()?; - if self.walker.next_is_linebreak() + if self.walker.next_linebreak().is_some() { return Ok(leaf); } if self.walker.maybe_expect(syntax::TokenKind::ParenOpen).is_none() { return Ok(leaf); } let mut args = Vec::new(); - while !self.walker.next_is(0, syntax::TokenKind::ParenClose) + while !self.walker.next_useful_is(0, syntax::TokenKind::ParenClose) { args.push(self.parse_expr()?); - if self.walker.next_is(0, syntax::TokenKind::ParenClose) + if self.walker.next_useful_is(0, syntax::TokenKind::ParenClose) { break; } self.walker.expect(self.report, syntax::TokenKind::Comma)?; @@ -422,38 +422,38 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> fn parse_leaf(&mut self) -> Result { - if self.walker.next_is(0, syntax::TokenKind::BraceOpen) + if self.walker.next_useful_is(0, syntax::TokenKind::BraceOpen) { self.parse_block() } - else if self.walker.next_is(0, syntax::TokenKind::ParenOpen) + else if self.walker.next_useful_is(0, syntax::TokenKind::ParenOpen) { self.parse_parenthesized() } - else if self.walker.next_is(0, syntax::TokenKind::Identifier) + else if self.walker.next_useful_is(0, syntax::TokenKind::Identifier) { self.parse_variable() } - else if self.walker.next_is(0, syntax::TokenKind::Dot) + else if self.walker.next_useful_is(0, syntax::TokenKind::Dot) { self.parse_variable() } - else if self.walker.next_is(0, syntax::TokenKind::Number) + else if self.walker.next_useful_is(0, syntax::TokenKind::Number) { self.parse_number() } - else if self.walker.next_is(0, syntax::TokenKind::String) + else if self.walker.next_useful_is(0, syntax::TokenKind::String) { self.parse_string() } - else if self.walker.next_is(0, syntax::TokenKind::KeywordAsm) + else if self.walker.next_useful_is(0, syntax::TokenKind::KeywordAsm) { self.parse_asm() } - else if self.walker.next_is(0, syntax::TokenKind::KeywordTrue) + else if self.walker.next_useful_is(0, syntax::TokenKind::KeywordTrue) { self.parse_boolean_true() } - else if self.walker.next_is(0, syntax::TokenKind::KeywordFalse) + else if self.walker.next_useful_is(0, syntax::TokenKind::KeywordFalse) { self.parse_boolean_false() } else { self.report.error_span( "expected expression", - self.walker.get_span_after_prev()); + self.walker.get_cursor_span()); Err(()) } @@ -467,14 +467,14 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> .span; let mut exprs = Vec::new(); - while !self.walker.next_is(0, syntax::TokenKind::BraceClose) + while !self.walker.next_useful_is(0, syntax::TokenKind::BraceClose) { exprs.push(self.parse_expr()?); if self.walker.maybe_expect_linebreak().is_some() { continue; } - if self.walker.next_is(0, syntax::TokenKind::BraceClose) + if self.walker.next_useful_is(0, syntax::TokenKind::BraceClose) { break; } self.walker.expect(self.report, syntax::TokenKind::Comma)?; @@ -500,11 +500,9 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> let mut span = diagn::Span::new_dummy(); let mut hierarchy_level = 0; - self.walker.clear_linebreak(); - loop { - if self.walker.next_is_linebreak() + if self.walker.next_linebreak().is_some() { break; } @@ -523,17 +521,12 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> loop { - if self.walker.next_is_linebreak() - { - break; - } - let tk_name = self.walker.expect(self.report, syntax::TokenKind::Identifier)?; let name = tk_name.excerpt.clone().unwrap(); hierarchy.push(name); span = span.join(tk_name.span); - if self.walker.next_is_linebreak() + if self.walker.next_linebreak().is_some() { break; } @@ -617,19 +610,28 @@ impl<'a, 'tokens> ExpressionParser<'a, 'tokens> fn parse_asm(&mut self) -> Result { - let tk_asm_span = self.walker - .expect(self.report, syntax::TokenKind::KeywordAsm)? - .span; + let tk_asm = self.walker.expect( + self.report, + syntax::TokenKind::KeywordAsm)?; - self.walker.expect(self.report, syntax::TokenKind::BraceOpen)?; + let _tk_brace_open = self.walker.expect( + self.report, + syntax::TokenKind::BraceOpen)?; - let skipped = self.walker.skip_until_token_over_nested_braces(syntax::TokenKind::BraceClose); + let mut inner_walker = self.walker + .advance_until_closing_brace(); - let tk_brace_close = self.walker.expect(self.report, syntax::TokenKind::BraceClose)?; + let ast = asm::parser::parse_nested_toplevel( + self.report, + &mut inner_walker)?; + + let tk_brace_close = self.walker.expect( + self.report, + syntax::TokenKind::BraceClose)?; let expr = expr::Expr::Asm( - tk_asm_span.join(tk_brace_close.span), - skipped.get_cloned_tokens()); + tk_asm.span.join(tk_brace_close.span), + ast); Ok(expr) } diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index a5063f86..abe67b4e 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -3,13 +3,12 @@ pub use self::token::{ Token, TokenKind, tokenize, + decide_next_token, is_whitespace, }; -mod token_walker; -pub use self::token_walker::{ - TokenWalker, -}; +mod walker; +pub use self::walker::Walker; mod excerpt; pub use self::excerpt::{ diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 0a815747..6261f512 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -263,14 +263,7 @@ pub fn tokenize( let remaining = &src.get(index..).unwrap(); // Decide what the next token's kind and length are. - let (kind, length) = - check_for_whitespace(remaining).unwrap_or_else(|| - check_for_comment (remaining).unwrap_or_else(|| - check_for_number (remaining).unwrap_or_else(|| - check_for_identifier(remaining).unwrap_or_else(|| - check_for_special (remaining).unwrap_or_else(|| - check_for_string (remaining).unwrap_or_else(|| - (TokenKind::Error, 1))))))); + let (kind, length) = decide_next_token(&remaining); let span = diagn::Span::new( src_file_handle, @@ -315,6 +308,20 @@ pub fn tokenize( } +pub fn decide_next_token( + src: &str) + -> (TokenKind, usize) +{ + check_for_whitespace(src).unwrap_or_else(|| + check_for_comment (src).unwrap_or_else(|| + check_for_number (src).unwrap_or_else(|| + check_for_identifier(src).unwrap_or_else(|| + check_for_special (src).unwrap_or_else(|| + check_for_string (src).unwrap_or_else(|| + (TokenKind::Error, 1))))))) +} + + #[derive(Clone)] struct CharWalker<'a> { @@ -490,7 +497,7 @@ fn check_for_comment(src: &str) -> Option<(TokenKind, usize)> { if walker.ended() { - return None; + break; } else if walker.consume_str(";*") diff --git a/src/syntax/token_walker.rs b/src/syntax/token_walker.rs deleted file mode 100644 index 84d3fed8..00000000 --- a/src/syntax/token_walker.rs +++ /dev/null @@ -1,728 +0,0 @@ -use crate::*; - - -#[derive(Clone)] -pub struct TokenWalker<'tokens> -{ - tokens: &'tokens [syntax::Token], - index: usize, - index_prev: usize, - read_linebreak: bool, - read_whitespace_index: Option, - read_whitespace_acknowledged: bool, - partial_index: usize, - dummy_token: syntax::Token, -} - - -impl<'tokens> TokenWalker<'tokens> -{ - pub fn new(tokens: &'tokens [syntax::Token]) -> TokenWalker<'tokens> - { - let dummy_span = { - if let Some(tk_last) = tokens.last() - { - tk_last.span - } - else - { - diagn::Span::new_dummy() - } - }; - - let dummy_token = syntax::Token { - kind: syntax::TokenKind::LineBreak, - span: dummy_span, - excerpt: None, - }; - - let mut parser = TokenWalker - { - tokens: tokens, - index: 0, - index_prev: 0, - read_linebreak: false, - read_whitespace_index: None, - read_whitespace_acknowledged: true, - partial_index: 0, - dummy_token, - }; - - parser.skip_ignorable(); - parser - } - - - pub fn get_current_token_index(&self) -> usize - { - self.index - } - - - pub fn get_previous_token_index(&self) -> usize - { - self.index_prev - } - - - pub fn get_full_span(&self) -> diagn::Span - { - if self.tokens.len() == 0 - { - diagn::Span::new_dummy() - } - else - { - self.tokens[0].span.join(self.tokens.last().unwrap().span) - } - } - - - pub fn get_cloned_tokens(&self) -> Vec - { - let mut result = Vec::new(); - - for token in self.tokens - { - result.push(token.clone()); - } - - result - } - - - pub fn get_cloned_tokens_by_index(&self, start: usize, end: usize) -> Vec - { - let mut result = Vec::new(); - - for token in &self.tokens[start..end] - { - result.push(token.clone()); - } - - if let Some(last_token) = result.last() - { - if last_token.kind.is_ignorable() - { - result.pop(); - } - } - - result - } - - - pub fn debug_remaining(&self) -> String - { - let mut result = String::new(); - - for i in self.index..self.tokens.len() - { - result.push_str(&self.tokens[i].text()); - } - - result - } - - - pub fn get_next_spans(&self, count: usize) -> diagn::Span - { - if self.index >= self.tokens.len() - { - return diagn::Span::new_dummy(); - } - - let mut span = self.tokens[self.index].span; - - let mut i = 1; - while i <= count && self.index + i < self.tokens.len() - { - span = span.join(self.tokens[self.index + i].span); - i += 1; - } - - span - } - - - pub fn get_span_after_prev(&self) -> diagn::Span - { - if self.index_prev >= self.tokens.len() - { - return diagn::Span::new_dummy(); - } - - self.tokens[self.index_prev].span.after() - } - - - pub fn clone_slice<'b>( - &'b self, - start: usize, - end: usize) - -> TokenWalker<'tokens> - { - TokenWalker::new(&self.tokens[start..end]) - } - - - pub fn skip_until_linebreak_over_nested_braces<'b>( - &'b mut self) - -> TokenWalker<'tokens> - { - let start = self.get_current_token_index(); - let mut brace_nesting = 0; - - while !self.is_over() && - (!self.next_is_linebreak() || brace_nesting > 0) - { - if self.next_is(0, syntax::TokenKind::BraceOpen) - { - brace_nesting += 1; - self.advance(); - continue; - } - - if self.next_is(0, syntax::TokenKind::BraceClose) && brace_nesting > 0 - { - brace_nesting -= 1; - self.advance(); - continue; - } - - if brace_nesting > 0 - { - self.advance(); - continue; - } - - self.advance(); - } - - if self.get_current_token_index() == start - { - self.clone_slice(start, start) - } - else - { - self.clone_slice(start, self.get_previous_token_index() + 1) - } - } - - - pub fn skip_until_token_over_nested_braces<'b>( - &'b mut self, - kind: syntax::TokenKind) - -> TokenWalker<'tokens> - { - let start = self.get_current_token_index(); - let mut brace_nesting = 0; - - while !self.is_over() && - (!self.next_is(0, kind) || brace_nesting > 0) - { - if self.next_is(0, syntax::TokenKind::BraceOpen) - { - brace_nesting += 1; - self.advance(); - continue; - } - - if self.next_is(0, syntax::TokenKind::BraceClose) && brace_nesting > 0 - { - brace_nesting -= 1; - self.advance(); - continue; - } - - if brace_nesting > 0 - { - self.advance(); - continue; - } - - self.advance(); - } - - if self.get_current_token_index() == start - { - self.clone_slice(start, start) - } - else - { - self.clone_slice(start, self.get_previous_token_index() + 1) - } - } - - - pub fn try_lookahead_until_char_over_nested_parens<'b>( - &'b self, - c: char) - -> Option> - { - let mut lookahead = self.clone(); - let start = lookahead.get_current_token_index(); - - let mut paren_nesting = 0; - - loop - { - if lookahead.is_over() - { - break; - } - - if lookahead.next_partial() == c && - paren_nesting == 0 && - lookahead.get_current_token_index() > start - { - break; - } - - if lookahead.next_is(0, syntax::TokenKind::ParenOpen) - { - paren_nesting += 1; - lookahead.advance(); - continue; - } - - if lookahead.next_is(0, syntax::TokenKind::ParenClose) && - paren_nesting > 0 - { - paren_nesting -= 1; - lookahead.advance(); - continue; - } - - if paren_nesting > 0 - { - lookahead.advance(); - continue; - } - - lookahead.advance_partial(); - } - - let end = lookahead.get_previous_token_index() + 1; - - if lookahead.is_at_partial() || start >= end - { - None - } - else - { - let mut new_walker = self.clone(); - new_walker.tokens = &self.tokens[0..end]; - Some(new_walker) - } - } - - - pub fn copy_state_from(&mut self, other: &TokenWalker) - { - self.index = other.index; - self.index_prev = other.index_prev; - self.read_linebreak = other.read_linebreak; - self.read_whitespace_index = other.read_whitespace_index; - self.read_whitespace_acknowledged = other.read_whitespace_acknowledged; - self.partial_index = other.partial_index; - self.skip_ignorable(); - } - - - pub fn is_over(&self) -> bool - { - self.index >= self.tokens.len() - } - - - pub fn skip_ignorable(&mut self) - { - while self.index < self.tokens.len() && - self.tokens[self.index].kind.is_ignorable() - { - if self.tokens[self.index].kind == syntax::TokenKind::LineBreak - { self.read_linebreak = true; } - - if self.tokens[self.index].kind == syntax::TokenKind::Whitespace - { - self.read_whitespace_index = Some(self.index); - self.read_whitespace_acknowledged = false; - } - - self.index += 1; - } - } - - - pub fn advance(&mut self) -> &'tokens syntax::Token - { - if self.is_at_partial() - { panic!("trying to advance TokenWalker at partial token"); } - - self.index_prev = self.index; - - let token = &self.tokens[self.index]; - - if self.index < self.tokens.len() - { self.index += 1; } - - self.read_linebreak = false; - - self.skip_ignorable(); - token - } - - - pub fn advance_partial(&mut self) -> char - { - if self.index >= self.tokens.len() - { return '\0'; } - - let sliced = self.tokens[self.index] - .text() - .get(self.partial_index..) - .unwrap(); - - let mut char_indices = sliced.char_indices(); - let c = char_indices.next().unwrap().1; - - if let Some((index, _)) = char_indices.next() - { - self.partial_index += index; - } - else - { - self.partial_index = 0; - self.advance(); - } - - c - } - - - pub fn skip_until_linebreak(&mut self) - { - while !self.is_over() && !self.next_is_linebreak() - { self.advance(); } - } - - - pub fn next(&self) -> &'tokens syntax::Token - { - &self.tokens[self.index] - } - - - pub fn next_nth(&self, nth: usize) -> &syntax::Token - { - if self.index + nth >= self.tokens.len() - { - return &self.dummy_token; - } - - &self.tokens[self.index + nth] - } - - - pub fn next_partial(&mut self) -> char - { - if self.index >= self.tokens.len() - { return '\0'; } - - if self.tokens[self.index].kind == syntax::TokenKind::Whitespace - { return ' '; } - - let sliced = self.tokens[self.index] - .text() - .get(self.partial_index..) - .unwrap(); - - let mut char_indices = sliced.char_indices(); - char_indices.next().unwrap().1 - } - - - pub fn prev(&self) -> &'tokens syntax::Token - { - &self.tokens[self.index_prev] - } - - - pub fn is_at_partial(&self) -> bool - { - self.partial_index != 0 - } - - - pub fn next_is(&self, mut nth: usize, kind: syntax::TokenKind) -> bool - { - let mut index = self.index; - - while nth > 0 && index < self.tokens.len() - { - nth -= 1; - index += 1; - while index < self.tokens.len() && self.tokens[index].kind.is_ignorable() - { index += 1; } - } - - if index >= self.tokens.len() - { - return false; - } - - self.tokens[index].kind == kind - } - - - pub fn maybe_expect( - &mut self, - kind: syntax::TokenKind) - -> Option<&'tokens syntax::Token> - { - if self.next_is(0, kind) - { - self.acknowledge_whitespace(); - Some(self.advance()) - } - else - { None } - } - - - pub fn expect( - &mut self, - report: &mut diagn::Report, - kind: syntax::TokenKind) - -> Result<&'tokens syntax::Token, ()> - { - match self.maybe_expect(kind) - { - Some(token) => Ok(&token), - None => - { - let descr = format!("expected {}", kind.printable()); - let span = self.tokens[self.index_prev].span.after(); - report.error_span(descr, span); - Err(()) - } - } - } - - - pub fn expect_msg>( - &mut self, - report: &mut diagn::Report, - kind: syntax::TokenKind, - descr: S) - -> Result<&'tokens syntax::Token, ()> - { - match self.maybe_expect(kind) - { - Some(token) => Ok(&token), - None => - { - report.error_span( - descr, - self.tokens[self.index_prev].span.after()); - - Err(()) - } - } - } - - - pub fn acknowledge_whitespace(&mut self) - { - self.read_whitespace_acknowledged = true; - } - - - pub fn is_whitespace_acknowledged(&self) -> bool - { - self.read_whitespace_acknowledged - } - - - pub fn next_is_whitespace(&self) -> bool - { - if self.is_over() - { - true - } - else if let Some(index) = self.read_whitespace_index - { - if index + 1 == self.index - { - true - } - else - { - false - } - } - else - { - false - } - } - - - pub fn maybe_expect_whitespace(&mut self) -> Option<&syntax::Token> - { - if let Some(index) = self.read_whitespace_index - { - self.acknowledge_whitespace(); - Some(&self.tokens[index]) - } - else if self.is_over() - { - Some(&self.dummy_token) - } - else - { None } - } - - - pub fn maybe_expect_unacknowledged_whitespace(&mut self) -> Option<&'tokens syntax::Token> - { - if self.read_whitespace_acknowledged - { - return None; - } - - if let Some(index) = self.read_whitespace_index - { - self.acknowledge_whitespace(); - Some(&self.tokens[index]) - } - else - { None } - } - - - pub fn clear_linebreak(&mut self) - { - self.read_linebreak = false; - } - - - pub fn next_is_linebreak(&self) -> bool - { - self.read_linebreak || self.is_over() - } - - - pub fn maybe_expect_linebreak(&mut self) -> Option<()> - { - if self.next_is_linebreak() - { - self.clear_linebreak(); - Some(()) - } - else - { - None - } - } - - - pub fn expect_linebreak( - &mut self, - report: &mut diagn::Report) - -> Result<(), ()> - { - if self.maybe_expect_linebreak().is_some() - { - Ok(()) - } - else - { - report.error_span( - "expected line break", - self.tokens[self.index_prev].span.after()); - - Err(()) - } - } - - - pub fn expect_linebreak_or( - &mut self, - report: &mut diagn::Report, - kind: syntax::TokenKind) - -> Result<(), ()> - { - if self.maybe_expect(kind).is_some() - { - Ok(()) - } - else if self.maybe_expect_linebreak().is_some() - { - Ok(()) - } - else - { - report.error_span( - "expected line break", - self.tokens[self.index_prev].span.after()); - - Err(()) - } - } - - - pub fn expect_usize( - &mut self, - report: &mut diagn::Report) - -> Result<(&'tokens syntax::Token, usize), ()> - { - let tk = self.expect(report, syntax::TokenKind::Number)?; - - let value = syntax::excerpt_as_usize( - report, - tk.span, - &tk.excerpt.as_ref().unwrap())?; - - Ok((tk, value)) - } - - - pub fn maybe_expect_partial_usize(&mut self) -> Option - { - let mut value: usize = 0; - let mut advance_count: usize = 0; - - while !self.is_over() - { - let c = self.next_partial(); - - let digit = match c.to_digit(10) - { - Some(d) => d, - None => break - }; - - value = match value.checked_mul(10) - { - Some(v) => v, - None => break - }; - - value = match value.checked_add(digit as usize) - { - Some(v) => v, - None => break - }; - - self.advance_partial(); - advance_count += 1; - } - - if advance_count == 0 - { return None; } - - Some(value) - } -} \ No newline at end of file diff --git a/src/syntax/walker.rs b/src/syntax/walker.rs new file mode 100644 index 00000000..ff5001c7 --- /dev/null +++ b/src/syntax/walker.rs @@ -0,0 +1,586 @@ +use crate::*; + + +#[derive(Clone)] +pub struct Walker<'src> +{ + src: &'src str, + file_handle: util::FileServerHandle, + span_offset: usize, + + /// The current byte index into the `src` string. + cursor_index: usize, + /// The walker cannot see any characters past this byte index. + cursor_limit: usize, +} + + +impl<'src> Walker<'src> +{ + pub fn new( + src: &'src str, + src_file_handle: util::FileServerHandle, + src_byte_offset: usize) + -> Walker<'src> + { + let walker = Walker { + src, + file_handle: src_file_handle, + span_offset: src_byte_offset, + + cursor_index: 0, + cursor_limit: src.len(), + }; + + walker + } + + + pub fn slice( + &self, + start_byte_index: usize, + end_byte_index: usize) + -> Walker<'src> + { + let src = &self.src[start_byte_index..end_byte_index]; + + let walker = Walker { + src, + file_handle: self.file_handle, + span_offset: self.span_offset + start_byte_index, + + cursor_index: 0, + cursor_limit: src.len(), + }; + + walker + } + + + pub fn is_over(&self) -> bool + { + self.cursor_index >= self.cursor_limit + } + + + fn advance( + &mut self, + bytes: usize) + { + self.cursor_index += bytes; + } + + + pub fn skip_to_token_end( + &mut self, + token: &syntax::Token) + { + self.cursor_index = + token.span.location().unwrap().1 as usize - + self.span_offset; + } + + + pub fn skip_ignorable(&mut self) + { + loop + { + if self.is_over() + { break; } + + let token = self.token_at(self.cursor_index); + + if !token.kind.is_ignorable() + { break; } + + self.skip_to_token_end(&token); + } + } + + + pub fn get_cursor_index(&self) -> usize + { + self.cursor_index + } + + + pub fn get_index_at_span_start( + &self, + span: diagn::Span) + -> usize + { + span.location().unwrap().0 as usize - self.span_offset + } + + + pub fn next_useful_index(&self) -> usize + { + let token = self.next_useful_token(); + self.get_index_at_span_start(token.span) + } + + + pub fn get_span( + &self, + start_byte_index: usize, + end_byte_index: usize) + -> diagn::Span + { + let start = + (self.span_offset + start_byte_index) as diagn::SpanIndex; + + let end = + (self.span_offset + end_byte_index) as diagn::SpanIndex; + + diagn::Span::new( + self.file_handle, + start, + end) + } + + + pub fn get_cursor_span(&self) -> diagn::Span + { + self.get_span(self.cursor_index, self.cursor_index) + } + + + pub fn get_full_span(&self) -> diagn::Span + { + self.get_span(0, self.cursor_limit) + } + + + pub fn get_excerpt( + &self, + start_byte_index: usize, + end_byte_index: usize) + -> &'src str + { + &self.src[start_byte_index..end_byte_index] + } + + + pub fn get_full_excerpt(&self) -> &'src str + { + &self.src[0..self.cursor_limit] + } + + + pub fn get_cursor_limit( + &self) + -> usize + { + self.cursor_limit + } + + + pub fn set_cursor_limit( + &mut self, + end: usize) + { + self.cursor_limit = end; + } + + + fn char_at( + &self, + byte_index: usize) + -> char + { + if byte_index >= self.cursor_limit + { + '\0' + } + else + { + self.src[byte_index..self.cursor_limit] + .chars() + .next() + .unwrap_or('\0') + } + } + + + pub fn next_char( + &self) + -> char + { + self.char_at(self.cursor_index) + } + + + fn token_at( + &self, + byte_index: usize) + -> syntax::Token + { + if byte_index >= self.cursor_limit + { + let span_index = + (self.span_offset + self.cursor_limit) as diagn::SpanIndex; + + let span = diagn::Span::new( + self.file_handle, + span_index, + span_index); + + return syntax::Token { + kind: syntax::TokenKind::LineBreak, + span, + excerpt: None, + }; + } + + let src_next = &self.src[byte_index..self.cursor_limit]; + let (kind, length) = syntax::decide_next_token(src_next); + + let end = byte_index + length; + + let span = diagn::Span::new( + self.file_handle, + (self.span_offset + byte_index) as diagn::SpanIndex, + (self.span_offset + end) as diagn::SpanIndex); + + let excerpt = { + match kind.needs_excerpt() { + true => Some(self.src[byte_index..end].to_string()), + false => None, + } + }; + + syntax::Token { + kind, + span, + excerpt, + } + } + + + pub fn next_token( + &self) + -> syntax::Token + { + self.token_at(self.cursor_index) + } + + + pub fn next_nth_token( + &self, + mut nth: usize) + -> syntax::Token + { + let mut byte_index = self.cursor_index; + + loop + { + let token = self.token_at(byte_index); + + if nth == 0 + { return token; } + + if byte_index >= self.cursor_limit + { return token; } + + nth -= 1; + byte_index += token.span.length(); + } + } + + + pub fn next_nth_useful_token( + &self, + mut nth: usize) + -> syntax::Token + { + let mut byte_index = self.cursor_index; + + loop + { + let token = self.token_at(byte_index); + + if byte_index >= self.cursor_limit + { return token; } + + if !token.kind.is_ignorable() + { + if nth == 0 + { return token; } + + nth -= 1; + } + + byte_index += token.span.length(); + } + } + + + fn next_useful_token( + &self) + -> syntax::Token + { + self.next_nth_useful_token(0) + } + + + pub fn next_linebreak( + &self) + -> Option + { + let mut byte_index = self.cursor_index; + + loop + { + let token = self.token_at(byte_index); + + if token.kind == syntax::TokenKind::LineBreak + { return Some(token); } + + if !token.kind.is_ignorable() + { return None; } + + byte_index += token.span.length(); + } + } + + + pub fn next_useful_is( + &mut self, + nth: usize, + kind: syntax::TokenKind) + -> bool + { + let token = self.next_nth_useful_token(nth); + token.kind == kind + } + + + pub fn maybe_expect( + &mut self, + kind: syntax::TokenKind) + -> Option + { + let token = self.next_useful_token(); + if token.kind == kind + { + let token = token.clone(); + //self.acknowledge_whitespace(); + self.skip_to_token_end(&token); + Some(token) + } + else + { + None + } + } + + + pub fn expect( + &mut self, + report: &mut diagn::Report, + kind: syntax::TokenKind) + -> Result + { + match self.maybe_expect(kind) + { + Some(token) => Ok(token), + None => + { + report.error_span( + format!("expected {}", kind.printable()), + self.get_cursor_span()); + Err(()) + } + } + } + + + pub fn maybe_expect_char( + &mut self, + wanted_char: char) + -> bool + { + let index = self.next_useful_index(); + + let c = self.char_at(index); + + if c.eq_ignore_ascii_case(&wanted_char) + { + self.cursor_index = + index + + c.len_utf8(); + + true + } + else + { + false + } + } + + + pub fn expect_linebreak( + &mut self, + report: &mut diagn::Report) + -> Result<(), ()> + { + match self.maybe_expect_linebreak() + { + Some(()) => Ok(()), + None => + { + report.error_span( + format!("expected line break"), + self.get_cursor_span()); + Err(()) + } + } + } + + + pub fn maybe_expect_linebreak(&mut self) -> Option<()> + { + if let Some(token) = self.next_linebreak() + { + self.skip_to_token_end(&token); + Some(()) + } + else + { + None + } + } + + + pub fn advance_until_closing_brace( + &mut self) + -> Walker<'src> + { + let start = self.cursor_index; + + let mut brace_nesting = 0; + + while !self.is_over() + { + let c = self.next_char(); + + if c == '{' + { + brace_nesting += 1; + } + else if c == '}' + { + if brace_nesting == 0 + { break; } + + brace_nesting -= 1; + } + + self.advance(c.len_utf8()); + } + + let end = self.cursor_index; + + self.slice(start, end) + } + + + pub fn advance_until_linebreak( + &mut self) + -> Walker<'src> + { + let start = self.cursor_index; + let mut end = self.cursor_index; + + let mut brace_nesting = 0; + + while !self.is_over() + { + let token = self.next_token(); + + if token.kind == syntax::TokenKind::LineBreak && + brace_nesting == 0 + { + break; + } + else if token.kind == syntax::TokenKind::BraceOpen + { + brace_nesting += 1; + } + else if token.kind == syntax::TokenKind::BraceClose + { + if brace_nesting == 0 + { break; } + + brace_nesting -= 1; + } + + self.skip_to_token_end(&token); + + if !token.kind.is_ignorable() + { + end = self.cursor_index; + } + } + + self.slice(start, end) + } + + + pub fn find_lookahead_char_index( + &self, + wanted_char: char) + -> Option + { + let mut byte_index = self.cursor_index; + + let mut seen_tokens = false; + let mut paren_nesting = 0; + let mut brace_nesting = 0; + + while byte_index < self.cursor_limit + { + let c = self.char_at(byte_index); + + if c.eq_ignore_ascii_case(&wanted_char) && + seen_tokens && + paren_nesting == 0 && + brace_nesting == 0 + { + return Some(byte_index); + } + else if c == '(' + { + paren_nesting += 1; + } + else if c == ')' + { + if paren_nesting == 0 + { break; } + + paren_nesting -= 1; + } + else if c == '{' + { + brace_nesting += 1; + } + else if c == '}' + { + if brace_nesting == 0 + { break; } + + brace_nesting -= 1; + } + + + byte_index += c.len_utf8(); + + if !syntax::token::is_whitespace(c) + { + seen_tokens = true; + } + } + + None + } +} \ No newline at end of file diff --git a/src/test/expr.rs b/src/test/expr.rs index 9fbed188..329c85b6 100644 --- a/src/test/expr.rs +++ b/src/test/expr.rs @@ -13,9 +13,7 @@ where S: Into> -> Result { let chars = fileserver.get_str(report, None, file_handle)?; - let tokens = syntax::tokenize(report, file_handle, &chars)?; - - let mut walker = syntax::TokenWalker::new(&tokens); + let mut walker = syntax::Walker::new(&chars, file_handle, 0); let expr = expr::parse(report, &mut walker)?; let expr_value = expr.eval( @@ -69,10 +67,9 @@ fn test_literals() test("0o10a", Fail(("test", 1, "invalid"))); test("0x10g", Fail(("test", 1, "invalid"))); - test("8'5", Fail(("test", 1, "unexpected character"))); - test("8'0x0", Fail(("test", 1, "unexpected character"))); - test("0b8'0x00", Fail(("test", 1, "unexpected character"))); - test("0x8'0x00", Fail(("test", 1, "unexpected character"))); + test("8'5", Pass(expr::Value::make_integer(util::BigInt::new(0x8, None)))); + test("8'0x0", Pass(expr::Value::make_integer(util::BigInt::new(0x8, None)))); + test("8 xxx", Pass(expr::Value::make_integer(util::BigInt::new(0x8, None)))); } diff --git a/tests/comment/7.asm b/tests/comment/7.asm index 00244118..fa8a591d 100644 --- a/tests/comment/7.asm +++ b/tests/comment/7.asm @@ -1 +1 @@ -#d8 0 ;** ; error: unexpected \ No newline at end of file +#d8 12 ;** ; = 0x0c \ No newline at end of file diff --git a/tests/comment/8.asm b/tests/comment/8.asm index 6eccaddc..85feb048 100644 --- a/tests/comment/8.asm +++ b/tests/comment/8.asm @@ -1 +1 @@ -#d8 0 ;* ; error: unexpected \ No newline at end of file +#d8 12 ;* ; = 0x0c \ No newline at end of file diff --git a/tests/issue193/ok.asm b/tests/issue193/ok.asm new file mode 100644 index 00000000..33a07695 --- /dev/null +++ b/tests/issue193/ok.asm @@ -0,0 +1,12 @@ +#ruledef mode +{ + eq => 0xff +} + +#ruledef +{ + b{m: mode} => m + j{m: mode} => asm { b{m} } +} + +jeq ; = 0xff \ No newline at end of file diff --git a/tests/rule_arg_glued/err_one_param.asm b/tests/rule_arg_glued/err_one_param.asm index d7d5bb46..92ae4337 100644 --- a/tests/rule_arg_glued/err_one_param.asm +++ b/tests/rule_arg_glued/err_one_param.asm @@ -3,6 +3,4 @@ ld r{x} => 0x55 @ x`8 } -ld r0xff ; error: no match -ld r0x123 ; error: no match ld 0 ; error: no match \ No newline at end of file diff --git a/tests/rule_arg_glued/err_one_param_symbol.asm b/tests/rule_arg_glued/err_one_param_symbol.asm deleted file mode 100644 index a397b655..00000000 --- a/tests/rule_arg_glued/err_one_param_symbol.asm +++ /dev/null @@ -1,7 +0,0 @@ -#ruledef test -{ - ld r{x} => 0x55 @ x`8 -} - -x = 0 -ld rx ; error: no match \ No newline at end of file diff --git a/tests/rule_arg_glued/err_symbol_unknown.asm b/tests/rule_arg_glued/err_symbol_unknown.asm new file mode 100644 index 00000000..eba7c920 --- /dev/null +++ b/tests/rule_arg_glued/err_symbol_unknown.asm @@ -0,0 +1,7 @@ +#ruledef test +{ + ld r{x} => 0x55 @ x`8 +} + +x = 0x12 +ld ry ; error: failed / note:_:3: within / error: unknown \ No newline at end of file diff --git a/tests/rule_arg_glued/err_two_params.asm b/tests/rule_arg_glued/err_two_params.asm index 41cc895f..ceacc5c4 100644 --- a/tests/rule_arg_glued/err_two_params.asm +++ b/tests/rule_arg_glued/err_two_params.asm @@ -3,6 +3,4 @@ ld r{x}, {y} => 0x55 @ x`8 @ y`8 } -ld r0xff, 0x12 ; error: no match -ld r0x123, 0x12 ; error: no match ld 0, 0x12 ; error: no match \ No newline at end of file diff --git a/tests/rule_arg_glued/err_two_params_symbol.asm b/tests/rule_arg_glued/err_two_params_symbol.asm deleted file mode 100644 index 60cb6751..00000000 --- a/tests/rule_arg_glued/err_two_params_symbol.asm +++ /dev/null @@ -1,7 +0,0 @@ -#ruledef test -{ - ld r{x}, {y} => 0x55 @ x`8 @ y`8 -} - -x = 0 -ld rx, x ; error: no match \ No newline at end of file diff --git a/tests/rule_arg_glued/ok_one_param.asm b/tests/rule_arg_glued/ok_one_param.asm index 1c9e8daf..1419b5a4 100644 --- a/tests/rule_arg_glued/ok_one_param.asm +++ b/tests/rule_arg_glued/ok_one_param.asm @@ -9,5 +9,8 @@ ld r 0 ; = 0x5500 ld r12 ; = 0x550c ld r(6 + 6) ; = 0x550c ld r 6 + 6 ; = 0x550c +ld r6 + 6 ; = 0x550c ld r257 ; = 0x5501 -ld r 0xff ; = 0x55ff \ No newline at end of file +ld r 0xff ; = 0x55ff +ld r0xff ; = 0x55ff +ld r0x123 ; = 0x5523 \ No newline at end of file diff --git a/tests/rule_arg_glued/ok_one_param_symbol.asm b/tests/rule_arg_glued/ok_one_param_symbol.asm new file mode 100644 index 00000000..afae47cf --- /dev/null +++ b/tests/rule_arg_glued/ok_one_param_symbol.asm @@ -0,0 +1,10 @@ +#ruledef test +{ + ld r{x} => 0x55 @ x`8 +} + +x = 0x12 +ld rx ; = 0x5512 +ld r x + 6 ; = 0x5518 +ld r(x + 6) ; = 0x5518 +ld rx + 6 ; = 0x5518 \ No newline at end of file diff --git a/tests/rule_arg_glued/ok_two_params.asm b/tests/rule_arg_glued/ok_two_params.asm index acfce756..600ff24f 100644 --- a/tests/rule_arg_glued/ok_two_params.asm +++ b/tests/rule_arg_glued/ok_two_params.asm @@ -6,5 +6,10 @@ ld r0, 0x12 ; = 0x550012 ld r(0), 0x12 ; = 0x550012 ld r 0, 0x34 ; = 0x550034 +ld r 6 + 6, 0x12 ; = 0x550c12 +ld r(6 + 6), 0x12 ; = 0x550c12 +ld r6 + 6, 0x12 ; = 0x550c12 ld r257, 0x102 ; = 0x550102 -ld r 0xff, 0x12 ; = 0x55ff12 \ No newline at end of file +ld r 0xff, 0x12 ; = 0x55ff12 +ld r0xff, 0x12 ; = 0x55ff12 +ld r0x123, 0x12 ; = 0x552312 \ No newline at end of file diff --git a/tests/rule_arg_glued/ok_two_params_symbol.asm b/tests/rule_arg_glued/ok_two_params_symbol.asm new file mode 100644 index 00000000..018292da --- /dev/null +++ b/tests/rule_arg_glued/ok_two_params_symbol.asm @@ -0,0 +1,10 @@ +#ruledef test +{ + ld r{x}, {y} => 0x55 @ x`8 @ y`8 +} + +x = 0x12 +ld rx, x ; = 0x551212 +ld r x + 6, x ; = 0x551812 +ld r(x + 6), x ; = 0x551812 +ld rx + 6, x ; = 0x551812 \ No newline at end of file