diff --git a/Cargo.lock b/Cargo.lock index cad38830..054a2421 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,12 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - [[package]] name = "console" version = "0.15.8" @@ -26,6 +20,12 @@ version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" +[[package]] +name = "drop_bomb" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bda8e21c04aca2ae33ffc2fd8c23134f3cac46db123ba97bd9d3f3b8a4a85e1" + [[package]] name = "encode_unicode" version = "0.3.6" @@ -79,12 +79,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +name = "parsing" +version = "0.1.0" dependencies = [ - "autocfg", + "drop_bomb", + "insta", + "lexing", + "position", + "rowan", + "syntax", ] [[package]] @@ -93,13 +96,12 @@ version = "0.1.0" [[package]] name = "rowan" -version = "0.15.15" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a58fa8a7ccff2aec4f39cc45bf5f985cec7125ab271cf681c279fd00192b49" +checksum = "417a3a9f582e349834051b8a10c8d71ca88da4211e4093528e36b9845f6b5f21" dependencies = [ "countme", "hashbrown", - "memoffset", "rustc-hash", "text-size", ] diff --git a/crates/parsing/Cargo.toml b/crates/parsing/Cargo.toml new file mode 100644 index 00000000..ecd739cf --- /dev/null +++ b/crates/parsing/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "parsing" +version = "0.1.0" +edition = "2021" + +[dependencies] +drop_bomb = "0.1.5" +lexing = { version = "0.1.0", path = "../lexing" } +position = { version = "0.1.0", path = "../position" } +rowan = "0.16.1" +syntax = { version = "0.1.0", path = "../syntax" } + +[dev-dependencies] +insta = "1.41.1" diff --git a/crates/parsing/src/builder.rs b/crates/parsing/src/builder.rs new file mode 100644 index 00000000..8665701f --- /dev/null +++ b/crates/parsing/src/builder.rs @@ -0,0 +1,49 @@ +use lexing::Lexed; +use rowan::GreenNodeBuilder; +use syntax::{SyntaxKind, SyntaxNode}; + +use crate::ParseError; + +#[derive(Debug)] +pub(crate) enum Output { + Start { kind: SyntaxKind }, + Token { kind: SyntaxKind }, + Error { message: String }, + Finish, +} + +pub(crate) fn build(lexed: &Lexed<'_>, output: Vec) -> (SyntaxNode, Vec) { + let mut index = 0; + let mut builder = GreenNodeBuilder::new(); + let mut errors = vec![]; + + for event in output { + match event { + Output::Start { kind } => { + if kind != SyntaxKind::Node { + builder.start_node(kind.into()); + } + } + Output::Token { kind } => { + let text = lexed.text(index); + builder.token(kind.into(), text); + index += 1; + } + Output::Error { message } => { + let position = lexed.position(index); + errors.push(ParseError { position, message }); + } + Output::Finish => { + builder.finish_node(); + } + } + if let Some(message) = lexed.error(index) { + let position = lexed.position(index); + let message = format!("lex error: {}", message); + errors.push(ParseError { position, message }); + } + } + + let node = SyntaxNode::new_root(builder.finish()); + (node, errors) +} diff --git a/crates/parsing/src/lib.rs b/crates/parsing/src/lib.rs new file mode 100644 index 00000000..d38b4f33 --- /dev/null +++ b/crates/parsing/src/lib.rs @@ -0,0 +1,19 @@ +use lexing::Lexed; +use position::Position; +use syntax::{SyntaxKind, SyntaxNode}; + +mod builder; +mod parser; + +pub struct ParseError { + pub position: Position, + pub message: String, +} + +pub fn parse(lexed: &Lexed<'_>, tokens: &[SyntaxKind]) -> (SyntaxNode, Vec) { + let mut parser = parser::Parser::new(tokens); + parser::module(&mut parser); + + let output = parser.finish(); + builder::build(lexed, output) +} diff --git a/crates/parsing/src/parser.rs b/crates/parsing/src/parser.rs new file mode 100644 index 00000000..1f78ddd4 --- /dev/null +++ b/crates/parsing/src/parser.rs @@ -0,0 +1,145 @@ +use drop_bomb::DropBomb; +use syntax::SyntaxKind; + +use crate::builder::Output; + +pub(crate) struct Parser<'t> { + index: usize, + tokens: &'t [SyntaxKind], + output: Vec, +} + +impl<'t> Parser<'t> { + pub(crate) fn new(tokens: &'t [SyntaxKind]) -> Parser<'t> { + let index = 0; + let output = vec![]; + Parser { index, tokens, output } + } + + pub(crate) fn finish(self) -> Vec { + self.output + } + + fn consume(&mut self) { + let kind = self.tokens[self.index]; + self.index += 1; + self.output.push(Output::Token { kind }); + } + + fn start(&mut self) -> NodeMarker { + let index = self.output.len(); + self.output.push(Output::Start { kind: SyntaxKind::Node }); + NodeMarker::new(index) + } + + fn error(&mut self, message: String) { + self.output.push(Output::Error { message }); + } + + fn at(&self, kind: SyntaxKind) -> bool { + self.tokens.get(self.index) == Some(&kind) + } + + fn at_fn(&self, predicate: impl Fn(SyntaxKind) -> bool) -> bool { + predicate(*self.tokens.get(self.index).unwrap()) + } + + fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + self.consume(); + true + } + + fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } +} + +struct NodeMarker { + index: usize, + bomb: DropBomb, +} + +impl NodeMarker { + fn new(index: usize) -> NodeMarker { + let bomb = DropBomb::new("failed to call end or cancel"); + NodeMarker { index, bomb } + } + + fn end(&mut self, parser: &mut Parser, kind: SyntaxKind) { + self.bomb.defuse(); + match &mut parser.output[self.index] { + Output::Start { kind: marker } => { + *marker = kind; + } + _ => unreachable!(), + } + parser.output.push(Output::Finish); + } + + fn cancel(&mut self, parser: &mut Parser) { + self.bomb.defuse(); + if self.index == parser.output.len() - 1 { + match parser.output.pop() { + Some(Output::Start { kind: SyntaxKind::Node }) => (), + _ => unreachable!(), + } + } + } +} + +fn comment(parser: &mut Parser) { + let mut marker = parser.start(); + let mut parsed = false; + + while parser.at_fn(|k| k.is_whitespace_or_comment()) { + parser.consume(); + parsed = true; + } + + if parsed { + marker.end(parser, SyntaxKind::Comment); + } else { + marker.cancel(parser); + } +} + +fn module_name(parser: &mut Parser) { + let mut marker = parser.start(); + + if parser.at(SyntaxKind::PREFIX) { + parser.consume(); + } + parser.expect(SyntaxKind::UPPER); + + marker.end(parser, SyntaxKind::ModuleName); +} + +pub(crate) fn module(parser: &mut Parser) { + let mut marker = parser.start(); + + module_header(parser); + + marker.end(parser, SyntaxKind::Module); +} + +fn module_header(parser: &mut Parser) { + let mut marker = parser.start(); + + comment(parser); + parser.eat(SyntaxKind::MODULE); + + comment(parser); + module_name(parser); + + comment(parser); + parser.eat(SyntaxKind::WHERE); + + marker.end(parser, SyntaxKind::ModuleHeader); +} diff --git a/crates/parsing/tests/parser.rs b/crates/parsing/tests/parser.rs new file mode 100644 index 00000000..0a45c42e --- /dev/null +++ b/crates/parsing/tests/parser.rs @@ -0,0 +1,18 @@ +macro_rules! parser_tests { + ($($name:ident => $source:expr),+ $(,)?) => { + $( + #[test] + fn $name() { + let lexed = lexing::lex($source); + let tokens = lexed.kinds(); + let (node, _) = parsing::parse(&lexed, &tokens); + insta::assert_debug_snapshot!(node); + } + )+ + }; +} + +parser_tests!( + module_header => include_str!("parser/ModuleHeader.purs"), + module_header_prefixed => include_str!("parser/ModuleHeaderPrefixed.purs"), +); diff --git a/crates/parsing/tests/parser/ModuleHeader.purs b/crates/parsing/tests/parser/ModuleHeader.purs new file mode 100644 index 00000000..6ca9a1fc --- /dev/null +++ b/crates/parsing/tests/parser/ModuleHeader.purs @@ -0,0 +1 @@ +module Main where diff --git a/crates/parsing/tests/parser/ModuleHeaderPrefixed.purs b/crates/parsing/tests/parser/ModuleHeaderPrefixed.purs new file mode 100644 index 00000000..78e1be2c --- /dev/null +++ b/crates/parsing/tests/parser/ModuleHeaderPrefixed.purs @@ -0,0 +1 @@ +module PureScript.Main where diff --git a/crates/parsing/tests/snapshots/parser__module_header.snap b/crates/parsing/tests/snapshots/parser__module_header.snap new file mode 100644 index 00000000..75332b94 --- /dev/null +++ b/crates/parsing/tests/snapshots/parser__module_header.snap @@ -0,0 +1,15 @@ +--- +source: crates/parsing/tests/parser.rs +expression: node +snapshot_kind: text +--- +Module@0..17 + ModuleHeader@0..17 + MODULE@0..6 "module" + Comment@6..7 + WHITESPACE@6..7 " " + ModuleName@7..11 + UPPER@7..11 "Main" + Comment@11..12 + WHITESPACE@11..12 " " + WHERE@12..17 "where" diff --git a/crates/parsing/tests/snapshots/parser__module_header_prefixed.snap b/crates/parsing/tests/snapshots/parser__module_header_prefixed.snap new file mode 100644 index 00000000..af5c5467 --- /dev/null +++ b/crates/parsing/tests/snapshots/parser__module_header_prefixed.snap @@ -0,0 +1,16 @@ +--- +source: crates/parsing/tests/parser.rs +expression: node +snapshot_kind: text +--- +Module@0..28 + ModuleHeader@0..28 + MODULE@0..6 "module" + Comment@6..7 + WHITESPACE@6..7 " " + ModuleName@7..22 + PREFIX@7..18 "PureScript." + UPPER@18..22 "Main" + Comment@22..23 + WHITESPACE@22..23 " " + WHERE@23..28 "where" diff --git a/crates/syntax/Cargo.toml b/crates/syntax/Cargo.toml index f9a74c18..b2af18c1 100644 --- a/crates/syntax/Cargo.toml +++ b/crates/syntax/Cargo.toml @@ -6,5 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -rowan = "0.15.11" +rowan = "0.16.1" smol_str = "0.2.0" diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs index d13a315e..c05fabc7 100644 --- a/crates/syntax/src/lib.rs +++ b/crates/syntax/src/lib.rs @@ -74,6 +74,13 @@ pub enum SyntaxKind { TRUE, FALSE, + // Nodes + Node, + Comment, + Module, + ModuleHeader, + ModuleName, + // Control LAYOUT_START, LAYOUT_SEPARATOR,