Skip to content

Commit

Permalink
feat: match pathspecs just like git does.
Browse files Browse the repository at this point in the history
This is important for selecting files on disk
  • Loading branch information
Byron committed Aug 12, 2023
1 parent 2b48686 commit 451a2c2
Show file tree
Hide file tree
Showing 19 changed files with 1,035 additions and 22 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions gix-pathspec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ doctest = false

[dependencies]
gix-glob = { version = "^0.10.2", path = "../gix-glob" }
gix-path = { version = "^0.8.4", path = "../gix-path" }
gix-attributes = { version = "^0.16.0", path = "../gix-attributes" }

bstr = { version = "1.3.0", default-features = false, features = ["std"]}
Expand Down
75 changes: 69 additions & 6 deletions gix-pathspec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,60 @@
#![forbid(unsafe_code)]

use bitflags::bitflags;
use bstr::BString;
use bstr::{BStr, BString, ByteSlice};
use std::path::PathBuf;

///
pub mod normalize {
use std::path::PathBuf;

/// The error returned by [Pattern::normalize()](super::Pattern::normalize()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("The path '{}' is not inside of the worktree '{}'", path.display(), worktree_path.display())]
AbsolutePathOutsideOfWorktree { path: PathBuf, worktree_path: PathBuf },
#[error("The path '{}' leaves the repository", path.display())]
OutsideOfWorktree { path: PathBuf },
}
}

mod pattern;

///
pub mod search;

///
pub mod parse;

/// A lists of pathspec patterns, possibly from a file.
///
/// Pathspecs are generally relative to the root of the repository.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)]
pub struct Search {
/// Patterns and their associated data in the order they were loaded in or specified,
/// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_).
///
/// During matching, this order is reversed.
pub patterns: Vec<gix_glob::search::pattern::Mapping<search::Spec>>,

/// The path from which the patterns were read, or `None` if the patterns
/// don't originate in a file on disk.
pub source: Option<PathBuf>,

/// If `true`, this means all `patterns` are exclude patterns. This means that if there is no match
/// (which would exclude an item), we would actually match it for lack of exclusion.
all_patterns_are_excluded: bool,
}

/// The output of a pathspec [parsing][parse()] operation. It can be used to match against a one or more paths.
#[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
pub struct Pattern {
/// The path part of a pathspec.
/// The path part of a pathspec, which is typically a path possibly mixed with glob patterns.
/// Note that it might be an empty string as well.
///
/// `:(top,literal,icase,attr,exclude)some/path` would yield `some/path`.
pub path: BString,
/// For example, `:(top,literal,icase,attr,exclude)some/path` would yield `some/path`.
path: BString,
/// All magic signatures that were included in the pathspec.
pub signature: MagicSignature,
/// The search mode of the pathspec.
Expand All @@ -24,17 +66,35 @@ pub struct Pattern {
///
/// `:(attr:a=one b=):path` would yield attribute `a` and `b`.
pub attributes: Vec<gix_attributes::Assignment>,
/// If `true`, this was the special `:` spec which acts like `null`
/// If `true`, we are a special Nil pattern and always match.
nil: bool,
/// The length of bytes in `path` that belong to the prefix, which will always be matched case-insensitively.
/// That way, even though pathspecs are applied from the top, we can emulate having changed directory into
/// a specific sub-directory in a case-sensitive file-system.
/// Is set by [Pattern::normalize()].
prefix_len: usize,
}

impl Pattern {
/// Returns `true` if this seems to be a pathspec that indicates that 'there is no pathspec'.
///
/// Note that such a spec is `:`.
pub fn is_null(&self) -> bool {
pub fn is_nil(&self) -> bool {
self.nil
}

/// Return the prefix-portion of the `path` of this spec, a directory.
/// It can be empty if there is no prefix.
///
/// A prefix is effectively the CWD seen as relative to the working tree.
pub fn prefix(&self) -> &BStr {
self.path[..self.prefix_len].as_bstr()
}

/// Return the path of this spec, typically used for matching.
pub fn path(&self) -> &BStr {
self.path.as_ref()
}
}

bitflags! {
Expand All @@ -47,6 +107,9 @@ bitflags! {
const ICASE = 1 << 1;
/// Excludes the matching patterns from the previous results
const EXCLUDE = 1 << 2;
/// The pattern must match a directory, and not a file.
/// This is equivalent to how it's handled in `gix-glob`
const MUST_BE_DIR = 1 << 3;
}
}

Expand Down
38 changes: 33 additions & 5 deletions gix-pathspec/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ pub struct Defaults {
/// The default signature.
pub signature: MagicSignature,
/// The default search-mode.
///
/// Note that even if it's [`MatchMode::Literal`], the pathspecs will be parsed as usual, but matched verbatim afterwards.
pub search_mode: MatchMode,
/// If set, the pathspec will not be parsed but used verbatim. Implies [`MatchMode::Literal`] for `search_mode`.
pub literal: bool,
}

/// The error returned by [parse()][crate::parse()].
Expand Down Expand Up @@ -43,10 +47,20 @@ pub enum Error {

impl Pattern {
/// Try to parse a path-spec pattern from the given `input` bytes.
pub fn from_bytes(input: &[u8], Defaults { signature, search_mode }: Defaults) -> Result<Self, Error> {
pub fn from_bytes(
input: &[u8],
Defaults {
signature,
search_mode,
literal,
}: Defaults,
) -> Result<Self, Error> {
if input.is_empty() {
return Err(Error::EmptyString);
}
if literal {
return Ok(Self::from_literal(input, signature));
}
if input.as_bstr() == ":" {
return Ok(Pattern {
nil: true,
Expand All @@ -55,11 +69,9 @@ impl Pattern {
}

let mut p = Pattern {
path: BString::default(),
signature,
search_mode,
attributes: Vec::new(),
nil: false,
..Default::default()
};

let mut cursor = 0;
Expand All @@ -72,9 +84,25 @@ impl Pattern {
}
}

p.path = BString::from(&input[cursor..]);
let mut path = &input[cursor..];
if path.last() == Some(&b'/') {
p.signature |= MagicSignature::MUST_BE_DIR;
path = &path[..path.len() - 1];
}
p.path = path.into();
Ok(p)
}

/// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and
/// use `default_signature` as magic signature.
pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self {
Pattern {
path: input.into(),
signature: default_signature,
search_mode: MatchMode::Literal,
..Default::default()
}
}
}

fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result<MagicSignature, Error> {
Expand Down
154 changes: 154 additions & 0 deletions gix-pathspec/src/pattern.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
use crate::{normalize, MagicSignature, MatchMode, Pattern};
use bstr::{BString, ByteSlice, ByteVec};
use std::path::{Component, Path, PathBuf};

/// Mutation
impl Pattern {
/// Normalize the pattern's path by assuring it's relative to the root of the working tree, and contains
/// no relative path components. Further, it assures that `/` are used as path separator.
///
/// If `self.path` is a relative path, it will be put in front of the pattern path if `self.signature` isn't indicating `TOP` already.
/// If `self.path` is an absolute path, we will use `root` to make it worktree relative if possible.
///
/// `prefix` can be empty, we will still normalize this pathspec to resolve relative path components, and
/// it is assumed not to contain any relative path components, e.g. '', 'a', 'a/b' are valid.
/// `root` is the absolute path to the root of either the worktree or the repository's `git_dir`.
pub fn normalize(&mut self, prefix: &Path, root: &Path) -> Result<(), normalize::Error> {
fn prefix_components_to_subtract(path: &Path) -> usize {
let parent_component_end_bound = path.components().enumerate().fold(None::<usize>, |acc, (idx, c)| {
matches!(c, Component::ParentDir).then_some(idx + 1).or(acc)
});
let count = path
.components()
.take(parent_component_end_bound.unwrap_or(0))
.map(|c| match c {
Component::ParentDir => 1_isize,
Component::Normal(_) => -1,
_ => 0,
})
.sum::<isize>();
(count > 0).then_some(count as usize).unwrap_or_default()
}

let mut path = gix_path::from_bstr(self.path.as_ref());
let mut num_prefix_components = 0;
if gix_path::is_absolute(path.as_ref()) {
let rela_path = match path.strip_prefix(root) {
Ok(path) => path,
Err(_) => {
return Err(normalize::Error::AbsolutePathOutsideOfWorktree {
path: path.into_owned(),
worktree_path: root.into(),
})
}
};
path = rela_path.to_owned().into();
} else if !prefix.as_os_str().is_empty() && !self.signature.contains(MagicSignature::TOP) {
debug_assert_eq!(
prefix
.components()
.filter(|c| matches!(c, Component::Normal(_)))
.count(),
prefix.components().count(),
"BUG: prefixes must not have relative path components, or calculations here will be wrong so pattern won't match"
);
num_prefix_components = prefix
.components()
.count()
.saturating_sub(prefix_components_to_subtract(path.as_ref()));
path = prefix.join(path).into();
}

let assure_path_cannot_break_out_upwards = Path::new("");
let path = match gix_path::normalize(path.as_ref(), assure_path_cannot_break_out_upwards) {
Some(path) => path,
None => {
return Err(normalize::Error::OutsideOfWorktree {
path: path.into_owned(),
})
}
};

self.path = if path == Path::new(".") {
BString::from(".")
} else {
let cleaned = PathBuf::from_iter(path.components().filter(|c| !matches!(c, Component::CurDir)));
let mut out = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(cleaned)).into_owned();
self.prefix_len = {
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
out.push(b'/');
}
let len = out
.find_iter(b"/")
.take(num_prefix_components)
.last()
.unwrap_or_default();
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
out.pop();
}
len
};
out
};

Ok(())
}
}

/// Access
impl Pattern {
/// Return `true` if this pathspec is negated, which means it will exclude an item from the result set instead of including it.
pub fn is_excluded(&self) -> bool {
self.signature.contains(MagicSignature::EXCLUDE)
}

/// Translate ourselves to a long display format, that when parsed back will yield the same pattern.
///
/// Note that the
pub fn to_bstring(&self) -> BString {
if self.is_nil() {
":".into()
} else {
let mut buf: BString = ":(".into();
if self.signature.contains(MagicSignature::TOP) {
buf.push_str("top,");
}
if self.signature.contains(MagicSignature::EXCLUDE) {
buf.push_str("exclude,");
}
if self.signature.contains(MagicSignature::ICASE) {
buf.push_str("icase,");
}
match self.search_mode {
MatchMode::ShellGlob => {}
MatchMode::Literal => buf.push_str("literal,"),
MatchMode::PathAwareGlob => buf.push_str("glob,"),
}
if self.attributes.is_empty() {
if buf.last() == Some(&b',') {
buf.pop();
}
} else {
buf.push_str("attr:");
for attr in &self.attributes {
let attr = attr.as_ref().to_string().replace(',', "\\,");
buf.push_str(&attr);
buf.push(b' ');
}
buf.pop(); // trailing ' '
}
buf.push(b')');
buf.extend_from_slice(&self.path);
if self.signature.contains(MagicSignature::MUST_BE_DIR) {
buf.push(b'/');
}
buf
}
}
}

impl std::fmt::Display for Pattern {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.to_bstring().fmt(f)
}
}
Loading

0 comments on commit 451a2c2

Please sign in to comment.