Skip to content

Commit

Permalink
Initial parser implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
purefunctor committed Dec 14, 2024
1 parent ebe9797 commit 545a188
Show file tree
Hide file tree
Showing 12 changed files with 302 additions and 15 deletions.
30 changes: 16 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions crates/parsing/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "parsing"
version = "0.1.0"
edition = "2021"

[dependencies]
drop_bomb = "0.1.5"
lexing = { version = "0.1.0", path = "../lexing" }
position = { version = "0.1.0", path = "../position" }
rowan = "0.16.1"
syntax = { version = "0.1.0", path = "../syntax" }

[dev-dependencies]
insta = "1.41.1"
49 changes: 49 additions & 0 deletions crates/parsing/src/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use lexing::Lexed;
use rowan::GreenNodeBuilder;
use syntax::{SyntaxKind, SyntaxNode};

use crate::ParseError;

#[derive(Debug)]
pub(crate) enum Output {
Start { kind: SyntaxKind },
Token { kind: SyntaxKind },
Error { message: String },
Finish,
}

pub(crate) fn build(lexed: &Lexed<'_>, output: Vec<Output>) -> (SyntaxNode, Vec<ParseError>) {
let mut index = 0;
let mut builder = GreenNodeBuilder::new();
let mut errors = vec![];

for event in output {
match event {
Output::Start { kind } => {
if kind != SyntaxKind::Node {
builder.start_node(kind.into());
}
}
Output::Token { kind } => {
let text = lexed.text(index);
builder.token(kind.into(), text);
index += 1;
}
Output::Error { message } => {
let position = lexed.position(index);
errors.push(ParseError { position, message });
}
Output::Finish => {
builder.finish_node();
}
}
if let Some(message) = lexed.error(index) {
let position = lexed.position(index);
let message = format!("lex error: {}", message);
errors.push(ParseError { position, message });
}
}

let node = SyntaxNode::new_root(builder.finish());
(node, errors)
}
19 changes: 19 additions & 0 deletions crates/parsing/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use lexing::Lexed;
use position::Position;
use syntax::{SyntaxKind, SyntaxNode};

mod builder;
mod parser;

pub struct ParseError {
pub position: Position,
pub message: String,
}

pub fn parse(lexed: &Lexed<'_>, tokens: &[SyntaxKind]) -> (SyntaxNode, Vec<ParseError>) {
let mut parser = parser::Parser::new(tokens);
parser::module(&mut parser);

let output = parser.finish();
builder::build(lexed, output)
}
145 changes: 145 additions & 0 deletions crates/parsing/src/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
use drop_bomb::DropBomb;
use syntax::SyntaxKind;

use crate::builder::Output;

pub(crate) struct Parser<'t> {
index: usize,
tokens: &'t [SyntaxKind],
output: Vec<Output>,
}

impl<'t> Parser<'t> {
pub(crate) fn new(tokens: &'t [SyntaxKind]) -> Parser<'t> {
let index = 0;
let output = vec![];
Parser { index, tokens, output }
}

pub(crate) fn finish(self) -> Vec<Output> {
self.output
}

fn consume(&mut self) {
let kind = self.tokens[self.index];
self.index += 1;
self.output.push(Output::Token { kind });
}

fn start(&mut self) -> NodeMarker {
let index = self.output.len();
self.output.push(Output::Start { kind: SyntaxKind::Node });
NodeMarker::new(index)
}

fn error(&mut self, message: String) {
self.output.push(Output::Error { message });
}

fn at(&self, kind: SyntaxKind) -> bool {
self.tokens.get(self.index) == Some(&kind)
}

fn at_fn(&self, predicate: impl Fn(SyntaxKind) -> bool) -> bool {
predicate(*self.tokens.get(self.index).unwrap())
}

fn eat(&mut self, kind: SyntaxKind) -> bool {
if !self.at(kind) {
return false;
}
self.consume();
true
}

fn expect(&mut self, kind: SyntaxKind) -> bool {
if self.eat(kind) {
return true;
}
self.error(format!("expected {:?}", kind));
false
}
}

struct NodeMarker {
index: usize,
bomb: DropBomb,
}

impl NodeMarker {
fn new(index: usize) -> NodeMarker {
let bomb = DropBomb::new("failed to call end or cancel");
NodeMarker { index, bomb }
}

fn end(&mut self, parser: &mut Parser, kind: SyntaxKind) {
self.bomb.defuse();
match &mut parser.output[self.index] {
Output::Start { kind: marker } => {
*marker = kind;
}
_ => unreachable!(),
}
parser.output.push(Output::Finish);
}

fn cancel(&mut self, parser: &mut Parser) {
self.bomb.defuse();
if self.index == parser.output.len() - 1 {
match parser.output.pop() {
Some(Output::Start { kind: SyntaxKind::Node }) => (),
_ => unreachable!(),
}
}
}
}

fn comment(parser: &mut Parser) {
let mut marker = parser.start();
let mut parsed = false;

while parser.at_fn(|k| k.is_whitespace_or_comment()) {
parser.consume();
parsed = true;
}

if parsed {
marker.end(parser, SyntaxKind::Comment);
} else {
marker.cancel(parser);
}
}

fn module_name(parser: &mut Parser) {
let mut marker = parser.start();

if parser.at(SyntaxKind::PREFIX) {
parser.consume();
}
parser.expect(SyntaxKind::UPPER);

marker.end(parser, SyntaxKind::ModuleName);
}

pub(crate) fn module(parser: &mut Parser) {
let mut marker = parser.start();

module_header(parser);

marker.end(parser, SyntaxKind::Module);
}

fn module_header(parser: &mut Parser) {
let mut marker = parser.start();

comment(parser);
parser.eat(SyntaxKind::MODULE);

comment(parser);
module_name(parser);

comment(parser);
parser.eat(SyntaxKind::WHERE);

marker.end(parser, SyntaxKind::ModuleHeader);
}
18 changes: 18 additions & 0 deletions crates/parsing/tests/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
macro_rules! parser_tests {
($($name:ident => $source:expr),+ $(,)?) => {
$(
#[test]
fn $name() {
let lexed = lexing::lex($source);
let tokens = lexed.kinds();
let (node, _) = parsing::parse(&lexed, &tokens);
insta::assert_debug_snapshot!(node);
}
)+
};
}

parser_tests!(
module_header => include_str!("parser/ModuleHeader.purs"),
module_header_prefixed => include_str!("parser/ModuleHeaderPrefixed.purs"),
);
1 change: 1 addition & 0 deletions crates/parsing/tests/parser/ModuleHeader.purs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module Main where
1 change: 1 addition & 0 deletions crates/parsing/tests/parser/ModuleHeaderPrefixed.purs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module PureScript.Main where
15 changes: 15 additions & 0 deletions crates/parsing/tests/snapshots/parser__module_header.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: crates/parsing/tests/parser.rs
expression: node
snapshot_kind: text
---
Module@0..17
ModuleHeader@0..17
MODULE@0..6 "module"
Comment@6..7
WHITESPACE@6..7 " "
ModuleName@7..11
UPPER@7..11 "Main"
Comment@11..12
WHITESPACE@11..12 " "
WHERE@12..17 "where"
16 changes: 16 additions & 0 deletions crates/parsing/tests/snapshots/parser__module_header_prefixed.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
source: crates/parsing/tests/parser.rs
expression: node
snapshot_kind: text
---
Module@0..28
ModuleHeader@0..28
MODULE@0..6 "module"
Comment@6..7
WHITESPACE@6..7 " "
ModuleName@7..22
PREFIX@7..18 "PureScript."
UPPER@18..22 "Main"
Comment@22..23
WHITESPACE@22..23 " "
WHERE@23..28 "where"
2 changes: 1 addition & 1 deletion crates/syntax/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rowan = "0.15.11"
rowan = "0.16.1"
smol_str = "0.2.0"
7 changes: 7 additions & 0 deletions crates/syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ pub enum SyntaxKind {
TRUE,
FALSE,

// Nodes
Node,
Comment,
Module,
ModuleHeader,
ModuleName,

// Control
LAYOUT_START,
LAYOUT_SEPARATOR,
Expand Down

0 comments on commit 545a188

Please sign in to comment.