Skip to content

Commit

Permalink
feat: the new parser based on winnow
Browse files Browse the repository at this point in the history
  • Loading branch information
39555 committed Nov 30, 2024
1 parent 2e9447d commit cf02d3b
Show file tree
Hide file tree
Showing 20 changed files with 3,604 additions and 15 deletions.
45 changes: 42 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions brush-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,14 @@ rust-version.workspace = true
bench = false

[features]
default = ["debug"]
fuzz-testing = ["dep:arbitrary"]
debug = ["winnow/debug"]

[dependencies]
arbitrary = { version = "1.4.1", optional = true, features = ["derive"] }
cached = "0.54.0"
winnow = { version = "0.6.20", features = ["simd"] }
indenter = "0.3.3"
peg = "0.8.4"
thiserror = "2.0.3"
Expand Down
106 changes: 97 additions & 9 deletions brush-parser/benches/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,12 @@ mod unix {
.unwrap()
}

fn parse_sample_script() -> brush_parser::ast::Program {
let input = r#"
for f in A B C; do
echo "${f@L}" >&2
done
"#;

fn parse_sample_script(input: &str) -> brush_parser::ast::Program {
parse_script(input)
}
fn parse_sample_script2(input: &str) -> brush_parser::ast::Program {
brush_parser::parse_program(brush_parser::ParserOptions::default(), input).unwrap()
}

fn benchmark_parsing_script(c: &mut Criterion, script_path: &std::path::Path) {
let contents = std::fs::read_to_string(script_path).unwrap();
Expand All @@ -37,8 +34,13 @@ mod unix {
}

pub(crate) fn criterion_benchmark(c: &mut Criterion) {
let input = r#"
for f in A B C; do
echo "${f@L}" >&2
done
"#;
c.bench_function("parse_sample_script", |b| {
b.iter(|| black_box(parse_sample_script()))
b.iter(|| black_box(parse_sample_script(input)))
});

const POSSIBLE_BASH_COMPLETION_SCRIPT_PATH: &str =
Expand All @@ -50,6 +52,84 @@ mod unix {
benchmark_parsing_script(c, &well_known_complicated_script);
}
}

pub(crate) fn compare_parsers(c: &mut Criterion) {
// compare_parsers_cached(c);
compare_parsers_uncached(c);
}

fn compare_parsers_uncached(c: &mut Criterion) {
let mut group = c.benchmark_group("compare_parsers");
// prevent caching
let mut i: usize = 0;
group.bench_function("old_parser_uncached", |b| {
b.iter_batched(
|| {
i += 1;
format!(
r#"
for f in A B C; do
echo {i} "${{f@L}}" >&2
done
"#
)
},
|input| black_box(parse_sample_script(input.as_str())),
criterion::BatchSize::SmallInput,
)
});
let mut i: usize = 0;
group.bench_function("new_parser_uncached", |b| {
b.iter_batched(
|| {
i += 1;
format!(
r#"
for f in A B C; do
echo {i} "${{f@L}}" >&2
done
"#
)
},
|input| {
black_box(
brush_parser::parse_program(
brush_parser::ParserOptions::default(),
input.as_str(),
)
.unwrap(),
)
},
criterion::BatchSize::SmallInput,
)
});

group.finish();
}
fn compare_parsers_cached(c: &mut Criterion) {
let input = r#"
for f in A B C; do
echo "${f@L}" >&2
done
"#;
let mut group = c.benchmark_group("compare_parsers_cached");

group.bench_function("old_parser_cached", |b| {
b.iter(|| black_box(parse_sample_script(input)))
});
group.bench_function("new_parser_cached", |b| {
b.iter(|| {
black_box(black_box(
brush_parser::cacheable_parse_program(
brush_parser::ParserOptions::default(),
input.to_string(),
)
.unwrap(),
))
})
});
group.finish();
}
}

#[cfg(unix)]
Expand All @@ -58,8 +138,16 @@ criterion::criterion_group! {
config = criterion::Criterion::default().with_profiler(pprof::criterion::PProfProfiler::new(100, pprof::criterion::Output::Flamegraph(None)));
targets = unix::criterion_benchmark
}

#[cfg(unix)]
criterion::criterion_group! {
name = compare_parsers;
config = criterion::Criterion::default().with_profiler(pprof::criterion::PProfProfiler::new(100, pprof::criterion::Output::Flamegraph(None)));
targets =unix::compare_parsers
}

#[cfg(unix)]
criterion::criterion_main!(benches);
criterion::criterion_main!(compare_parsers);

#[cfg(not(unix))]
fn main() -> () {}
10 changes: 8 additions & 2 deletions brush-parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ pub enum SeparatorOperator {
Sequence,
}

impl Default for SeparatorOperator {
fn default() -> Self {
SeparatorOperator::Sequence
}
}

impl Display for SeparatorOperator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down Expand Up @@ -1031,7 +1037,7 @@ impl Display for ExtendedTestExpr {
}

/// A unary predicate usable in an extended test expression.
#[derive(Clone, Debug)]
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "fuzz-testing", derive(arbitrary::Arbitrary))]
#[cfg_attr(test, derive(PartialEq, Eq))]
pub enum UnaryPredicate {
Expand Down Expand Up @@ -1120,7 +1126,7 @@ impl Display for UnaryPredicate {
}

/// A binary predicate usable in an extended test expression.
#[derive(Clone, Debug)]
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "fuzz-testing", derive(arbitrary::Arbitrary))]
#[cfg_attr(test, derive(PartialEq, Eq))]
pub enum BinaryPredicate {
Expand Down
3 changes: 3 additions & 0 deletions brush-parser/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Implements a tokenizer and parsers for POSIX / bash shell syntax.
#![feature(test)]
#![deny(missing_docs)]

pub mod arithmetic;
Expand All @@ -11,8 +12,10 @@ pub mod word;

mod error;
mod parser;
mod parser2;
mod tokenizer;

pub use parser2::{parse_program, cacheable_parse_program};
pub use error::{ParseError, TestCommandParseError, WordParseError};
pub use parser::{parse_tokens, Parser, ParserOptions, SourceInfo};
pub use tokenizer::{tokenize_str, unquote_str, SourcePosition, Token, TokenLocation};
2 changes: 1 addition & 1 deletion brush-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::error;
use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};

/// Options used to control the behavior of the parser.
#[derive(Clone, Eq, Hash, PartialEq)]
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
pub struct ParserOptions {
/// Whether or not to enable extended globbing (a.k.a. `extglob`).
pub enable_extended_globbing: bool,
Expand Down
Loading

0 comments on commit cf02d3b

Please sign in to comment.