From 51c77ff4ebd07af0b5bac767e1c2318483351cc3 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 2 Aug 2023 14:31:23 -0500 Subject: [PATCH] Simplify parsers This is intended to make the winnow 0.5 transition easier --- gix-actor/src/signature/decode.rs | 101 ++++++--------- gix-object/src/commit/decode.rs | 84 ++++++------ gix-object/src/commit/message/body.rs | 7 +- gix-object/src/commit/message/decode.rs | 49 ++++--- gix-object/src/parse.rs | 37 +++--- gix-object/src/tag/decode.rs | 121 ++++++++---------- gix-object/src/tree/ref_iter.rs | 25 ++-- gix-ref/src/parse.rs | 2 +- gix-ref/src/store/file/log/line.rs | 17 +-- .../src/store/file/loose/reference/decode.rs | 9 +- gix-ref/src/store/packed/buffer.rs | 12 +- gix-ref/src/store/packed/decode.rs | 42 +++--- gix-ref/src/store/packed/find.rs | 7 +- gix-ref/src/store/packed/iter.rs | 14 +- 14 files changed, 242 insertions(+), 285 deletions(-) diff --git a/gix-actor/src/signature/decode.rs b/gix-actor/src/signature/decode.rs index 3a76cdde8e9..3c6c6eabde1 100644 --- a/gix-actor/src/signature/decode.rs +++ b/gix-actor/src/signature/decode.rs @@ -2,10 +2,10 @@ pub(crate) mod function { use bstr::ByteSlice; use btoi::btoi; use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch, Time}; - use std::cell::RefCell; use winnow::{ combinator::alt, combinator::repeat, + combinator::separated_pair, combinator::terminated, error::{AddContext, ParserError}, prelude::*, @@ -21,84 +21,57 @@ pub(crate) mod function { pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>( i: &'a [u8], ) -> IResult<&'a [u8], SignatureRef<'a>, E> { - let tzsign = RefCell::new(b'-'); // TODO: there should be no need for this. - let (i, (identity, _, time, _tzsign_count, hours, minutes)) = ( + separated_pair( identity, b" ", - (|i| { + ( terminated(take_until0(SPACE), take(1usize)) - .parse_next(i) - .and_then(|(i, v)| { - btoi::(v) - .map(|v| (i, v)) - .map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify)) - }) - }) - .context(""), - alt(( - repeat(1.., b"-").map(|_: ()| *tzsign.borrow_mut() = b'-'), // TODO: this should be a non-allocating consumer of consecutive tags - repeat(1.., b"+").map(|_: ()| *tzsign.borrow_mut() = b'+'), - )) - .context("+|-"), - (|i| { - take_while(2, AsChar::is_dec_digit).parse_next(i).and_then(|(i, v)| { - btoi::(v) - .map(|v| (i, v)) - .map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify)) - }) - }) - .context("HH"), - (|i| { + .verify_map(|v| btoi::(v).ok()) + .context(""), + alt(( + repeat(1.., b"-").map(|_: ()| Sign::Minus), + repeat(1.., b"+").map(|_: ()| Sign::Plus), + )) + .context("+|-"), + take_while(2, AsChar::is_dec_digit) + .verify_map(|v| btoi::(v).ok()) + .context("HH"), take_while(1..=2, AsChar::is_dec_digit) - .parse_next(i) - .and_then(|(i, v)| { - btoi::(v) - .map(|v| (i, v)) - .map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify)) - }) - }) - .context("MM"), + .verify_map(|v| btoi::(v).ok()) + .context("MM"), + ) + .map(|(time, sign, hours, minutes)| { + let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 }; + Time { + seconds: time, + offset, + sign, + } + }), ) - .context(" <> <+|->") - .parse_next(i)?; - - let tzsign = tzsign.into_inner(); - debug_assert!(tzsign == b'-' || tzsign == b'+', "parser assure it's +|- only"); - let sign = if tzsign == b'-' { Sign::Minus } else { Sign::Plus }; // - let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 }; - - Ok(( - i, - SignatureRef { - name: identity.name, - email: identity.email, - time: Time { - seconds: time, - offset, - sign, - }, - }, - )) + .context(" <> <+|->") + .map(|(identity, time)| SignatureRef { + name: identity.name, + email: identity.email, + time, + }) + .parse_next(i) } /// Parse an identity from the bytes input `i` (like `name `) using `nom`. pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>( i: &'a [u8], ) -> IResult<&'a [u8], IdentityRef<'a>, E> { - let (i, (name, email)) = ( + ( terminated(take_until0(&b" <"[..]), take(2usize)).context(""), terminated(take_until0(&b">"[..]), take(1usize)).context(""), ) - .context(" <>") - .parse_next(i)?; - - Ok(( - i, - IdentityRef { + .map(|(name, email): (&[u8], &[u8])| IdentityRef { name: name.as_bstr(), email: email.as_bstr(), - }, - )) + }) + .context(" <>") + .parse_next(i) } } pub use function::identity; @@ -197,7 +170,7 @@ mod tests { .map_err(to_bstr_err) .expect_err("parse fails as > is missing") .to_string(), - "Parse error:\nVerify at: -1215\nin section '', at: abc -1215\nin section ' <> <+|->', at: hello <> abc -1215\n" + "Parse error:\nVerify at: abc -1215\nin section '', at: abc -1215\nin section ' <> <+|->', at: hello <> abc -1215\n" ); } } diff --git a/gix-object/src/commit/decode.rs b/gix-object/src/commit/decode.rs index 9f669c81107..b7bdd5fb244 100644 --- a/gix-object/src/commit/decode.rs +++ b/gix-object/src/commit/decode.rs @@ -3,12 +3,13 @@ use std::borrow::Cow; use smallvec::SmallVec; use winnow::{ combinator::alt, + combinator::preceded, combinator::repeat, combinator::terminated, - combinator::{eof, opt}, + combinator::{eof, opt, rest}, error::{AddContext, ParserError}, prelude::*, - token::{tag, take_till1}, + token::take_till1, }; use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef}; @@ -21,52 +22,43 @@ pub fn message<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) .map(|err: E| err.add_context(i, "newline + ")), ); } - let (i, _) = tag(NL) + preceded(NL, rest.map(ByteSlice::as_bstr)) .context("a newline separates headers from the message") - .parse_next(i)?; - Ok((&[], i.as_bstr())) + .parse_next(i) } -pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], CommitRef<'_>, E> { - let (i, tree) = (|i| parse::header_field(i, b"tree", parse::hex_hash)) - .context("tree <40 lowercase hex char>") - .parse_next(i)?; - let (i, parents): (_, Vec<_>) = repeat(0.., |i| parse::header_field(i, b"parent", parse::hex_hash)) - .context("zero or more 'parent <40 lowercase hex char>'") - .parse_next(i)?; - let (i, author) = (|i| parse::header_field(i, b"author", parse::signature)) - .context("author ") - .parse_next(i)?; - let (i, committer) = (|i| parse::header_field(i, b"committer", parse::signature)) - .context("committer ") - .parse_next(i)?; - let (i, encoding) = opt(|i| parse::header_field(i, b"encoding", take_till1(NL))) - .context("encoding ") - .parse_next(i)?; - let (i, extra_headers) = repeat( - 0.., - alt(( - parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), - |i| { - parse::any_header_field(i, take_till1(NL)) - .map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr())))) - }, - )), +pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], CommitRef<'a>, E> { + ( + (|i| parse::header_field(i, b"tree", parse::hex_hash)).context("tree <40 lowercase hex char>"), + repeat(0.., |i| parse::header_field(i, b"parent", parse::hex_hash)) + .map(|p: Vec<_>| p) + .context("zero or more 'parent <40 lowercase hex char>'"), + (|i| parse::header_field(i, b"author", parse::signature)).context("author "), + (|i| parse::header_field(i, b"committer", parse::signature)).context("committer "), + opt(|i| parse::header_field(i, b"encoding", take_till1(NL))).context("encoding "), + repeat( + 0.., + alt(( + parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), + |i| { + parse::any_header_field(i, take_till1(NL)) + .map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr())))) + }, + )), + ) + .context(" "), + terminated(message, eof), ) - .context(" ") - .parse_next(i)?; - let (i, message) = terminated(message, eof).parse_next(i)?; - - Ok(( - i, - CommitRef { - tree, - parents: SmallVec::from(parents), - author, - committer, - encoding: encoding.map(ByteSlice::as_bstr), - message, - extra_headers, - }, - )) + .map( + |(tree, parents, author, committer, encoding, extra_headers, message)| CommitRef { + tree, + parents: SmallVec::from(parents), + author, + committer, + encoding: encoding.map(ByteSlice::as_bstr), + message, + extra_headers, + }, + ) + .parse_next(i) } diff --git a/gix-object/src/commit/message/body.rs b/gix-object/src/commit/message/body.rs index 02eebc58099..b0e45fdcf82 100644 --- a/gix-object/src/commit/message/body.rs +++ b/gix-object/src/commit/message/body.rs @@ -2,6 +2,8 @@ use std::ops::Deref; use winnow::{ combinator::eof, + combinator::rest, + combinator::separated_pair, combinator::terminated, error::{ErrorKind, ParserError}, prelude::*, @@ -33,11 +35,12 @@ pub struct TrailerRef<'a> { } fn parse_single_line_trailer<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, &'a BStr), E> { - let (value, token) = terminated(take_until1(b":".as_ref()), b": ").parse_next(i.trim_end())?; + let (i, (token, value)) = separated_pair(take_until1(b":".as_ref()), b": ", rest).parse_next(i.trim_end())?; + if token.trim_end().len() != token.len() || value.trim_start().len() != value.len() { Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail).cut()) } else { - Ok((&[], (token.as_bstr(), value.as_bstr()))) + Ok((i, (token.as_bstr(), value.as_bstr()))) } } diff --git a/gix-object/src/commit/message/decode.rs b/gix-object/src/commit/message/decode.rs index 930d7dfbed1..ced664ea921 100644 --- a/gix-object/src/commit/message/decode.rs +++ b/gix-object/src/commit/message/decode.rs @@ -1,49 +1,46 @@ use winnow::{ - combinator::alt, combinator::eof, combinator::terminated, error::ParserError, prelude::*, token::take_till1, + combinator::alt, combinator::eof, combinator::preceded, combinator::rest, combinator::terminated, + error::ParserError, prelude::*, stream::Offset, token::take_till1, }; use crate::bstr::{BStr, ByteSlice}; pub(crate) fn newline<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> { - alt((b"\r\n", b"\n")).parse_next(i) + alt((b"\n", b"\r\n")).parse_next(i) } -fn subject_and_body<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { - let mut c = i; - let mut consumed_bytes = 0; - while !c.is_empty() { - c = match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(c) { - Ok((i1, segment)) => { - consumed_bytes += segment.len(); - match (newline::, newline::).parse_next(i1) { - Ok((body, _)) => { - return Ok(( - &[], - ( - i[0usize..consumed_bytes].as_bstr(), - (!body.is_empty()).then(|| body.as_bstr()), - ), - )); +fn subject_and_body<'a, E: ParserError<&'a [u8]>>( + mut i: &'a [u8], +) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { + let start = i; + while !i.is_empty() { + match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(i) { + Ok((next, _)) => { + let consumed_bytes = next.offset_from(start); + match preceded((newline::, newline::), rest).parse_next(next) { + Ok((next, body)) => { + let body = (!body.is_empty()).then(|| body.as_bstr()); + return Ok((next, (start[0usize..consumed_bytes].as_bstr(), body))); } - Err(_) => match i1.get(1..) { + Err(_) => match next.get(1..) { Some(next) => { - consumed_bytes += 1; - next + i = next; } None => break, }, } } - Err(_) => match c.get(1..) { + Err(_) => match i.get(1..) { Some(next) => { - consumed_bytes += 1; - next + i = next; } None => break, }, - }; + } } - Ok((&[], (i.as_bstr(), None))) + + i = start; + rest.map(|r: &[u8]| (r.as_bstr(), None)).parse_next(i) } /// Returns title and body, without separator diff --git a/gix-object/src/parse.rs b/gix-object/src/parse.rs index 014ebf3f61e..17dc10f8847 100644 --- a/gix-object/src/parse.rs +++ b/gix-object/src/parse.rs @@ -1,6 +1,5 @@ use bstr::{BStr, BString, ByteVec}; use winnow::{ - combinator::peek, combinator::repeat, combinator::{preceded, terminated}, error::{AddContext, ParserError}, @@ -18,30 +17,28 @@ const SPACE_OR_NL: &[u8] = b" \n"; pub(crate) fn any_header_field_multi_line<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>( i: &'a [u8], ) -> IResult<&'a [u8], (&'a [u8], BString), E> { - let (i, (k, o)) = peek(( + ( terminated(take_till1(SPACE_OR_NL), SPACE), ( take_till1(NL), NL, repeat(1.., terminated((SPACE, take_until0(NL)), NL)).map(|()| ()), ) - .recognize(), - )) - .context("name ") - .parse_next(i)?; - assert!(!o.is_empty(), "we have parsed more than one value here"); - let end = &o[o.len() - 1] as *const u8 as usize; - let start_input = &i[0] as *const u8 as usize; - - let bytes = o[..o.len() - 1].as_bstr(); - let mut out = BString::from(Vec::with_capacity(bytes.len())); - let mut lines = bytes.lines(); - out.push_str(lines.next().expect("first line")); - for line in lines { - out.push(b'\n'); - out.push_str(&line[1..]); // cut leading space - } - Ok((&i[end - start_input + 1..], (k, out))) + .recognize() + .map(|o: &[u8]| { + let bytes = o.as_bstr(); + let mut out = BString::from(Vec::with_capacity(bytes.len())); + let mut lines = bytes.lines(); + out.push_str(lines.next().expect("first line")); + for line in lines { + out.push(b'\n'); + out.push_str(&line[1..]); // cut leading space + } + out + }), + ) + .context("name ") + .parse_next(i) } pub(crate) fn header_field<'a, T, E: ParserError<&'a [u8]>>( @@ -68,8 +65,8 @@ pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), is_hex_digit_lc, ) + .map(ByteSlice::as_bstr) .parse_next(i) - .map(|(i, hex)| (i, hex.as_bstr())) } pub(crate) fn signature<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>( diff --git a/gix-object/src/tag/decode.rs b/gix-object/src/tag/decode.rs index 34654a59994..2bce63ac58e 100644 --- a/gix-object/src/tag/decode.rs +++ b/gix-object/src/tag/decode.rs @@ -1,45 +1,38 @@ use winnow::{ combinator::alt, + combinator::delimited, + combinator::rest, combinator::{eof, opt}, combinator::{preceded, terminated}, error::{AddContext, ParserError}, prelude::*, stream::AsChar, - token::{tag, take_until0, take_while}, + token::{take_until0, take_while}, }; use crate::{parse, parse::NL, BStr, ByteSlice, TagRef}; pub fn git_tag<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], TagRef<'a>, E> { - let (i, target) = (|i| parse::header_field(i, b"object", parse::hex_hash)) - .context("object <40 lowercase hex char>") - .parse_next(i)?; - - let (i, kind) = (|i| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) - .context("type ") - .parse_next(i)?; - let kind = crate::Kind::from_bytes(kind) - .map_err(|_| winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Verify))?; - - let (i, tag_version) = (|i| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) - .context("tag ") - .parse_next(i)?; - - let (i, signature) = opt(|i| parse::header_field(i, b"tagger", parse::signature)) - .context("tagger ") - .parse_next(i)?; - let (i, (message, pgp_signature)) = terminated(message, eof).parse_next(i)?; - Ok(( - i, - TagRef { - target, - name: tag_version.as_bstr(), - target_kind: kind, - message, - tagger: signature, - pgp_signature, - }, - )) + ( + (|i| parse::header_field(i, b"object", parse::hex_hash)).context("object <40 lowercase hex char>"), + (|i| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) + .verify_map(|kind| crate::Kind::from_bytes(kind).ok()) + .context("type "), + (|i| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))).context("tag "), + opt(|i| parse::header_field(i, b"tagger", parse::signature)).context("tagger "), + terminated(message, eof), + ) + .map( + |(target, kind, tag_version, signature, (message, pgp_signature))| TagRef { + target, + name: tag_version.as_bstr(), + target_kind: kind, + message, + tagger: signature, + pgp_signature, + }, + ) + .parse_next(i) } pub fn message<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { @@ -47,45 +40,35 @@ pub fn message<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], ( const PGP_SIGNATURE_END: &[u8] = b"-----END PGP SIGNATURE-----"; if i.is_empty() { - return Ok((i, (i.as_bstr(), None))); - } - let (i, _) = tag(NL).parse_next(i)?; - fn all_to_end<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a [u8], &'a [u8]), E> { - if i.is_empty() { - // Empty message. That's OK. - return Ok((&[], (&[], &[]))); - } - // an empty signature message signals that there is none - the function signature is needed - // to work with 'alt(…)'. PGP signatures are never empty - Ok((&[], (i, &[]))) + return Ok((i, (b"".as_bstr(), None))); } - let (i, (message, signature)) = alt(( - ( - take_until0(PGP_SIGNATURE_BEGIN), - preceded( - NL, - ( - &PGP_SIGNATURE_BEGIN[1..], - take_until0(PGP_SIGNATURE_END), - PGP_SIGNATURE_END, - take_while(0.., |_| true), - ) - .recognize(), + delimited( + NL, + alt(( + ( + take_until0(PGP_SIGNATURE_BEGIN), + preceded( + NL, + ( + &PGP_SIGNATURE_BEGIN[1..], + take_until0(PGP_SIGNATURE_END), + PGP_SIGNATURE_END, + rest, + ) + .recognize() + .map(|signature: &[u8]| { + if signature.is_empty() { + None + } else { + Some(signature.as_bstr()) + } + }), + ), ), - ), - all_to_end, - )) - .parse_next(i)?; - let (i, _) = opt(NL).parse_next(i)?; - Ok(( - i, - ( - message.as_bstr(), - if signature.is_empty() { - None - } else { - Some(signature.as_bstr()) - }, - ), - )) + rest.map(|rest: &[u8]| (rest, None)), + )), + opt(NL), + ) + .map(|(message, signature)| (message.as_bstr(), signature)) + .parse_next(i) } diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 54fc4def591..953d38032f3 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -161,24 +161,23 @@ mod decode { } pub fn entry<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], EntryRef<'_>, E> { - let (i, mode) = terminated(take_while(5..=6, AsChar::is_dec_digit), SPACE).parse_next(i)?; - let mode = tree::EntryMode::try_from(mode) - .map_err(|invalid| winnow::error::ErrMode::from_error_kind(invalid, winnow::error::ErrorKind::Verify))?; - let (i, filename) = terminated(take_while(1.., |b| b != NULL[0]), NULL).parse_next(i)?; - let (i, oid) = take(20u8).parse_next(i)?; // TODO: make this compatible with other hash lengths - - Ok(( - i, - EntryRef { + ( + terminated(take_while(5..=6, AsChar::is_dec_digit), SPACE) + .verify_map(|mode| tree::EntryMode::try_from(mode).ok()), + terminated(take_while(1.., |b| b != NULL[0]), NULL), + take(20u8), + ) + .map(|(mode, filename, oid): (_, &[u8], _)| EntryRef { mode, filename: filename.as_bstr(), oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), - }, - )) + }) + .parse_next(i) } pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], TreeRef<'a>, E> { - let (i, entries) = terminated(repeat(0.., entry), eof).parse_next(i)?; - Ok((i, TreeRef { entries })) + terminated(repeat(0.., entry), eof) + .map(|entries| TreeRef { entries }) + .parse_next(i) } } diff --git a/gix-ref/src/parse.rs b/gix-ref/src/parse.rs index 9c528502ea2..3a75b426800 100644 --- a/gix-ref/src/parse.rs +++ b/gix-ref/src/parse.rs @@ -13,8 +13,8 @@ pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), is_hex_digit_lc, ) + .map(ByteSlice::as_bstr) .parse_next(i) - .map(|(i, hex)| (i, hex.as_bstr())) } pub fn newline<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> { diff --git a/gix-ref/src/store/file/log/line.rs b/gix-ref/src/store/file/log/line.rs index af06b40471e..662d469dab3 100644 --- a/gix-ref/src/store/file/log/line.rs +++ b/gix-ref/src/store/file/log/line.rs @@ -80,6 +80,7 @@ pub mod decode { combinator::fail, combinator::opt, combinator::preceded, + combinator::rest, combinator::terminated, error::{AddContext, ParserError}, prelude::*, @@ -129,16 +130,16 @@ pub mod decode { fn message<'a, E: ParserError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { if i.is_empty() { - Ok((&[], i.as_bstr())) + rest.map(ByteSlice::as_bstr).parse_next(i) } else { terminated(take_while(0.., |c| c != b'\n'), opt(b'\n')) + .map(ByteSlice::as_bstr) .parse_next(i) - .map(|(i, o)| (i, o.as_bstr())) } } fn one<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8]>>(bytes: &'a [u8]) -> IResult<&[u8], LineRef<'a>, E> { - let (i, ((old, new, signature), message)) = ( + ( ( terminated(hex_hash, b" ").context(""), terminated(hex_hash, b" ").context(""), @@ -152,17 +153,13 @@ pub mod decode { fail.context("log message must be separated from signature with whitespace"), )), ) - .parse_next(bytes)?; - - Ok(( - i, - LineRef { + .map(|((old, new, signature), message)| LineRef { previous_oid: old, new_oid: new, signature, message, - }, - )) + }) + .parse_next(bytes) } #[cfg(test)] diff --git a/gix-ref/src/store/file/loose/reference/decode.rs b/gix-ref/src/store/file/loose/reference/decode.rs index 4af33471f39..8e2c5a05290 100644 --- a/gix-ref/src/store/file/loose/reference/decode.rs +++ b/gix-ref/src/store/file/loose/reference/decode.rs @@ -65,15 +65,14 @@ impl Reference { } } -fn parse(bytes: &[u8]) -> IResult<&[u8], MaybeUnsafeState> { - let is_space = |b: u8| b == b' '; - if let (path, Some(_ref_prefix)) = opt(terminated("ref: ", take_while(0.., is_space))).parse_next(bytes)? { +fn parse(i: &[u8]) -> IResult<&[u8], MaybeUnsafeState> { + if let (i, Some(_ref_prefix)) = opt(terminated("ref: ", take_while(0.., b' '))).parse_next(i)? { terminated(take_while(0.., |b| b != b'\r' && b != b'\n'), opt(newline)) .map(|path| MaybeUnsafeState::UnvalidatedPath(path.into())) - .parse_next(path) + .parse_next(i) } else { terminated(hex_hash, opt(newline)) .map(|hex| MaybeUnsafeState::Id(ObjectId::from_hex(hex).expect("prior validation"))) - .parse_next(bytes) + .parse_next(i) } } diff --git a/gix-ref/src/store/packed/buffer.rs b/gix-ref/src/store/packed/buffer.rs index 6786e4a9f66..27b7b19fe5d 100644 --- a/gix-ref/src/store/packed/buffer.rs +++ b/gix-ref/src/store/packed/buffer.rs @@ -20,6 +20,8 @@ pub mod open { use std::path::PathBuf; use memmap2::Mmap; + use winnow::prelude::*; + use winnow::stream::Offset; use crate::store_impl::packed; @@ -45,10 +47,12 @@ pub mod open { }; let (offset, sorted) = { - let data = backing.as_ref(); - if *data.first().unwrap_or(&b' ') == b'#' { - let (records, header) = packed::decode::header::<()>(data).map_err(|_| Error::HeaderParsing)?; - let offset = records.as_ptr() as usize - data.as_ptr() as usize; + let input = backing.as_ref(); + if *input.first().unwrap_or(&b' ') == b'#' { + let (input, header) = packed::decode::header::<()> + .parse_next(input) + .map_err(|_| Error::HeaderParsing)?; + let offset = input.offset_from(backing.as_ref()); (offset, header.sorted) } else { (0, false) diff --git a/gix-ref/src/store/packed/decode.rs b/gix-ref/src/store/packed/decode.rs index 0b9a86aa53c..618a5a4874c 100644 --- a/gix-ref/src/store/packed/decode.rs +++ b/gix-ref/src/store/packed/decode.rs @@ -50,30 +50,34 @@ pub fn header<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Header, E> where E: ParserError<&'a [u8]>, { - let (rest, traits) = preceded(b"# pack-refs with: ", until_newline).parse_next(input)?; - - let mut peeled = Peeled::Unspecified; - let mut sorted = false; - for token in traits.as_bstr().split_str(b" ") { - if token == b"fully-peeled" { - peeled = Peeled::Fully; - } else if token == b"peeled" { - peeled = Peeled::Partial; - } else if token == b"sorted" { - sorted = true; - } - } - - Ok((rest, Header { peeled, sorted })) + preceded(b"# pack-refs with: ", until_newline) + .map(|traits| { + let mut peeled = Peeled::Unspecified; + let mut sorted = false; + for token in traits.as_bstr().split_str(b" ") { + if token == b"fully-peeled" { + peeled = Peeled::Fully; + } else if token == b"peeled" { + peeled = Peeled::Partial; + } else if token == b"sorted" { + sorted = true; + } + } + Header { peeled, sorted } + }) + .parse_next(input) } pub fn reference<'a, E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], crate::name::Error>>( input: &'a [u8], ) -> IResult<&'a [u8], packed::Reference<'a>, E> { - let (input, (target, name)) = - (terminated(hex_hash, b" "), until_newline.try_map(TryInto::try_into)).parse_next(input)?; - let (rest, object) = opt(delimited(b"^", hex_hash, newline)).parse_next(input)?; - Ok((rest, packed::Reference { name, target, object })) + ( + terminated(hex_hash, b" "), + until_newline.try_map(TryInto::try_into), + opt(delimited(b"^", hex_hash, newline)), + ) + .map(|(target, name, object)| packed::Reference { name, target, object }) + .parse_next(input) } #[cfg(test)] diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 8c1dcb5b2cc..86c3c9363f1 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -1,6 +1,7 @@ use std::convert::TryInto; use gix_object::bstr::{BStr, BString, ByteSlice}; +use winnow::prelude::*; use crate::{store_impl::packed, FullNameRef, PartialNameRef}; @@ -41,7 +42,8 @@ impl packed::Buffer { pub(crate) fn try_find_full_name(&self, name: &FullNameRef) -> Result>, Error> { match self.binary_search_by(name.as_bstr()) { Ok(line_start) => Ok(Some( - packed::decode::reference::<()>(&self.as_ref()[line_start..]) + packed::decode::reference::<()> + .parse_next(&self.as_ref()[line_start..]) .map_err(|_| Error::Parse)? .1, )), @@ -91,7 +93,8 @@ impl packed::Buffer { a.binary_search_by_key(&full_name.as_ref(), |b: &u8| { let ofs = b as *const u8 as usize - a.as_ptr() as usize; let line = &a[search_start_of_record(ofs)..]; - packed::decode::reference::<()>(line) + packed::decode::reference::<()> + .parse_next(line) .map(|(_rest, r)| r.name.as_bstr().as_bytes()) .map_err(|err| { encountered_parse_failure = true; diff --git a/gix-ref/src/store/packed/iter.rs b/gix-ref/src/store/packed/iter.rs index d9c49956b4f..8b14f90b83a 100644 --- a/gix-ref/src/store/packed/iter.rs +++ b/gix-ref/src/store/packed/iter.rs @@ -1,4 +1,7 @@ use gix_object::bstr::{BString, ByteSlice}; +use winnow::combinator::preceded; +use winnow::combinator::rest; +use winnow::prelude::*; use crate::store_impl::{packed, packed::decode}; @@ -29,7 +32,7 @@ impl<'a> Iterator for packed::Iter<'a> { return None; } - match decode::reference::<()>(self.cursor) { + match decode::reference::<()>.parse_next(self.cursor) { Ok((rest, reference)) => { self.cursor = rest; self.current_line += 1; @@ -82,9 +85,12 @@ impl<'a> packed::Iter<'a> { current_line: 1, }) } else if packed[0] == b'#' { - let (refs, _header) = decode::header::<()>(packed).map_err(|_| Error::Header { - invalid_first_line: packed.lines().next().unwrap_or(packed).into(), - })?; + let input = packed; + let (_, refs) = preceded(decode::header::<()>, rest) + .parse_next(input) + .map_err(|_| Error::Header { + invalid_first_line: packed.lines().next().unwrap_or(packed).into(), + })?; Ok(packed::Iter { cursor: refs, prefix,