Skip to content

Commit

Permalink
Simplify parsers
Browse files Browse the repository at this point in the history
This is intended to make the winnow 0.5 transition easier
  • Loading branch information
epage committed Aug 9, 2023
1 parent a3e8040 commit 51c77ff
Show file tree
Hide file tree
Showing 14 changed files with 242 additions and 285 deletions.
101 changes: 37 additions & 64 deletions gix-actor/src/signature/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ pub(crate) mod function {
use bstr::ByteSlice;
use btoi::btoi;
use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch, Time};
use std::cell::RefCell;
use winnow::{
combinator::alt,
combinator::repeat,
combinator::separated_pair,
combinator::terminated,
error::{AddContext, ParserError},
prelude::*,
Expand All @@ -21,84 +21,57 @@ pub(crate) mod function {
pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(
i: &'a [u8],
) -> IResult<&'a [u8], SignatureRef<'a>, E> {
let tzsign = RefCell::new(b'-'); // TODO: there should be no need for this.
let (i, (identity, _, time, _tzsign_count, hours, minutes)) = (
separated_pair(
identity,
b" ",
(|i| {
(
terminated(take_until0(SPACE), take(1usize))
.parse_next(i)
.and_then(|(i, v)| {
btoi::<SecondsSinceUnixEpoch>(v)
.map(|v| (i, v))
.map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))
})
})
.context("<timestamp>"),
alt((
repeat(1.., b"-").map(|_: ()| *tzsign.borrow_mut() = b'-'), // TODO: this should be a non-allocating consumer of consecutive tags
repeat(1.., b"+").map(|_: ()| *tzsign.borrow_mut() = b'+'),
))
.context("+|-"),
(|i| {
take_while(2, AsChar::is_dec_digit).parse_next(i).and_then(|(i, v)| {
btoi::<OffsetInSeconds>(v)
.map(|v| (i, v))
.map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))
})
})
.context("HH"),
(|i| {
.verify_map(|v| btoi::<SecondsSinceUnixEpoch>(v).ok())
.context("<timestamp>"),
alt((
repeat(1.., b"-").map(|_: ()| Sign::Minus),
repeat(1.., b"+").map(|_: ()| Sign::Plus),
))
.context("+|-"),
take_while(2, AsChar::is_dec_digit)
.verify_map(|v| btoi::<OffsetInSeconds>(v).ok())
.context("HH"),
take_while(1..=2, AsChar::is_dec_digit)
.parse_next(i)
.and_then(|(i, v)| {
btoi::<OffsetInSeconds>(v)
.map(|v| (i, v))
.map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))
})
})
.context("MM"),
.verify_map(|v| btoi::<OffsetInSeconds>(v).ok())
.context("MM"),
)
.map(|(time, sign, hours, minutes)| {
let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 };
Time {
seconds: time,
offset,
sign,
}
}),
)
.context("<name> <<email>> <timestamp> <+|-><HHMM>")
.parse_next(i)?;

let tzsign = tzsign.into_inner();
debug_assert!(tzsign == b'-' || tzsign == b'+', "parser assure it's +|- only");
let sign = if tzsign == b'-' { Sign::Minus } else { Sign::Plus }; //
let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 };

Ok((
i,
SignatureRef {
name: identity.name,
email: identity.email,
time: Time {
seconds: time,
offset,
sign,
},
},
))
.context("<name> <<email>> <timestamp> <+|-><HHMM>")
.map(|(identity, time)| SignatureRef {
name: identity.name,
email: identity.email,
time,
})
.parse_next(i)
}

/// Parse an identity from the bytes input `i` (like `name <email>`) using `nom`.
pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(
i: &'a [u8],
) -> IResult<&'a [u8], IdentityRef<'a>, E> {
let (i, (name, email)) = (
(
terminated(take_until0(&b" <"[..]), take(2usize)).context("<name>"),
terminated(take_until0(&b">"[..]), take(1usize)).context("<email>"),
)
.context("<name> <<email>>")
.parse_next(i)?;

Ok((
i,
IdentityRef {
.map(|(name, email): (&[u8], &[u8])| IdentityRef {
name: name.as_bstr(),
email: email.as_bstr(),
},
))
})
.context("<name> <<email>>")
.parse_next(i)
}
}
pub use function::identity;
Expand Down Expand Up @@ -197,7 +170,7 @@ mod tests {
.map_err(to_bstr_err)
.expect_err("parse fails as > is missing")
.to_string(),
"Parse error:\nVerify at: -1215\nin section '<timestamp>', at: abc -1215\nin section '<name> <<email>> <timestamp> <+|-><HHMM>', at: hello <> abc -1215\n"
"Parse error:\nVerify at: abc -1215\nin section '<timestamp>', at: abc -1215\nin section '<name> <<email>> <timestamp> <+|-><HHMM>', at: hello <> abc -1215\n"
);
}
}
Expand Down
84 changes: 38 additions & 46 deletions gix-object/src/commit/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ use std::borrow::Cow;
use smallvec::SmallVec;
use winnow::{
combinator::alt,
combinator::preceded,
combinator::repeat,
combinator::terminated,
combinator::{eof, opt},
combinator::{eof, opt, rest},
error::{AddContext, ParserError},
prelude::*,
token::{tag, take_till1},
token::take_till1,
};

use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef};
Expand All @@ -21,52 +22,43 @@ pub fn message<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8])
.map(|err: E| err.add_context(i, "newline + <message>")),
);
}
let (i, _) = tag(NL)
preceded(NL, rest.map(ByteSlice::as_bstr))
.context("a newline separates headers from the message")
.parse_next(i)?;
Ok((&[], i.as_bstr()))
.parse_next(i)
}

pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], CommitRef<'_>, E> {
let (i, tree) = (|i| parse::header_field(i, b"tree", parse::hex_hash))
.context("tree <40 lowercase hex char>")
.parse_next(i)?;
let (i, parents): (_, Vec<_>) = repeat(0.., |i| parse::header_field(i, b"parent", parse::hex_hash))
.context("zero or more 'parent <40 lowercase hex char>'")
.parse_next(i)?;
let (i, author) = (|i| parse::header_field(i, b"author", parse::signature))
.context("author <signature>")
.parse_next(i)?;
let (i, committer) = (|i| parse::header_field(i, b"committer", parse::signature))
.context("committer <signature>")
.parse_next(i)?;
let (i, encoding) = opt(|i| parse::header_field(i, b"encoding", take_till1(NL)))
.context("encoding <encoding>")
.parse_next(i)?;
let (i, extra_headers) = repeat(
0..,
alt((
parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))),
|i| {
parse::any_header_field(i, take_till1(NL))
.map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr()))))
},
)),
pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], CommitRef<'a>, E> {
(
(|i| parse::header_field(i, b"tree", parse::hex_hash)).context("tree <40 lowercase hex char>"),
repeat(0.., |i| parse::header_field(i, b"parent", parse::hex_hash))
.map(|p: Vec<_>| p)
.context("zero or more 'parent <40 lowercase hex char>'"),
(|i| parse::header_field(i, b"author", parse::signature)).context("author <signature>"),
(|i| parse::header_field(i, b"committer", parse::signature)).context("committer <signature>"),
opt(|i| parse::header_field(i, b"encoding", take_till1(NL))).context("encoding <encoding>"),
repeat(
0..,
alt((
parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))),
|i| {
parse::any_header_field(i, take_till1(NL))
.map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr()))))
},
)),
)
.context("<field> <single-line|multi-line>"),
terminated(message, eof),
)
.context("<field> <single-line|multi-line>")
.parse_next(i)?;
let (i, message) = terminated(message, eof).parse_next(i)?;

Ok((
i,
CommitRef {
tree,
parents: SmallVec::from(parents),
author,
committer,
encoding: encoding.map(ByteSlice::as_bstr),
message,
extra_headers,
},
))
.map(
|(tree, parents, author, committer, encoding, extra_headers, message)| CommitRef {
tree,
parents: SmallVec::from(parents),
author,
committer,
encoding: encoding.map(ByteSlice::as_bstr),
message,
extra_headers,
},
)
.parse_next(i)
}
7 changes: 5 additions & 2 deletions gix-object/src/commit/message/body.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use std::ops::Deref;

use winnow::{
combinator::eof,
combinator::rest,
combinator::separated_pair,
combinator::terminated,
error::{ErrorKind, ParserError},
prelude::*,
Expand Down Expand Up @@ -33,11 +35,12 @@ pub struct TrailerRef<'a> {
}

fn parse_single_line_trailer<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, &'a BStr), E> {
let (value, token) = terminated(take_until1(b":".as_ref()), b": ").parse_next(i.trim_end())?;
let (i, (token, value)) = separated_pair(take_until1(b":".as_ref()), b": ", rest).parse_next(i.trim_end())?;

if token.trim_end().len() != token.len() || value.trim_start().len() != value.len() {
Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail).cut())
} else {
Ok((&[], (token.as_bstr(), value.as_bstr())))
Ok((i, (token.as_bstr(), value.as_bstr())))
}
}

Expand Down
49 changes: 23 additions & 26 deletions gix-object/src/commit/message/decode.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,46 @@
use winnow::{
combinator::alt, combinator::eof, combinator::terminated, error::ParserError, prelude::*, token::take_till1,
combinator::alt, combinator::eof, combinator::preceded, combinator::rest, combinator::terminated,
error::ParserError, prelude::*, stream::Offset, token::take_till1,
};

use crate::bstr::{BStr, ByteSlice};

pub(crate) fn newline<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> {
alt((b"\r\n", b"\n")).parse_next(i)
alt((b"\n", b"\r\n")).parse_next(i)
}

fn subject_and_body<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> {
let mut c = i;
let mut consumed_bytes = 0;
while !c.is_empty() {
c = match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(c) {
Ok((i1, segment)) => {
consumed_bytes += segment.len();
match (newline::<E>, newline::<E>).parse_next(i1) {
Ok((body, _)) => {
return Ok((
&[],
(
i[0usize..consumed_bytes].as_bstr(),
(!body.is_empty()).then(|| body.as_bstr()),
),
));
fn subject_and_body<'a, E: ParserError<&'a [u8]>>(
mut i: &'a [u8],
) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> {
let start = i;
while !i.is_empty() {
match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(i) {
Ok((next, _)) => {
let consumed_bytes = next.offset_from(start);
match preceded((newline::<E>, newline::<E>), rest).parse_next(next) {
Ok((next, body)) => {
let body = (!body.is_empty()).then(|| body.as_bstr());
return Ok((next, (start[0usize..consumed_bytes].as_bstr(), body)));
}
Err(_) => match i1.get(1..) {
Err(_) => match next.get(1..) {
Some(next) => {
consumed_bytes += 1;
next
i = next;
}
None => break,
},
}
}
Err(_) => match c.get(1..) {
Err(_) => match i.get(1..) {
Some(next) => {
consumed_bytes += 1;
next
i = next;
}
None => break,
},
};
}
}
Ok((&[], (i.as_bstr(), None)))

i = start;
rest.map(|r: &[u8]| (r.as_bstr(), None)).parse_next(i)
}

/// Returns title and body, without separator
Expand Down
Loading

0 comments on commit 51c77ff

Please sign in to comment.