From 451a2c2c92b41b3365f389784474cc2824e6205c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 6 Aug 2023 16:02:01 +0200 Subject: [PATCH] feat: match pathspecs just like `git` does. This is important for selecting files on disk --- Cargo.lock | 1 + gix-pathspec/Cargo.toml | 1 + gix-pathspec/src/lib.rs | 75 ++++++- gix-pathspec/src/parse.rs | 38 +++- gix-pathspec/src/pattern.rs | 154 +++++++++++++++ gix-pathspec/src/search/init.rs | 67 +++++++ gix-pathspec/src/search/mod.rs | 147 ++++++++++++++ .../generated-archives/match_baseline.tar.xz | 3 + .../match_baseline_dirs.tar.xz | 3 + .../match_baseline_files.tar.xz | 3 + .../generated-archives/parse_baseline.tar.xz | 4 +- .../tests/fixtures/match_baseline_dirs.sh | 84 ++++++++ .../tests/fixtures/match_baseline_files.sh | 79 ++++++++ gix-pathspec/tests/fixtures/parse_baseline.sh | 12 ++ gix-pathspec/tests/normalize/mod.rs | 100 ++++++++++ gix-pathspec/tests/parse/mod.rs | 14 +- gix-pathspec/tests/parse/valid.rs | 84 +++++++- gix-pathspec/tests/pathspec.rs | 2 + gix-pathspec/tests/search/mod.rs | 186 ++++++++++++++++++ 19 files changed, 1035 insertions(+), 22 deletions(-) create mode 100644 gix-pathspec/src/pattern.rs create mode 100644 gix-pathspec/src/search/init.rs create mode 100644 gix-pathspec/src/search/mod.rs create mode 100644 gix-pathspec/tests/fixtures/generated-archives/match_baseline.tar.xz create mode 100644 gix-pathspec/tests/fixtures/generated-archives/match_baseline_dirs.tar.xz create mode 100644 gix-pathspec/tests/fixtures/generated-archives/match_baseline_files.tar.xz create mode 100644 gix-pathspec/tests/fixtures/match_baseline_dirs.sh create mode 100644 gix-pathspec/tests/fixtures/match_baseline_files.sh create mode 100644 gix-pathspec/tests/normalize/mod.rs create mode 100644 gix-pathspec/tests/search/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 9596a44b49d..b60ef6b1922 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1969,6 +1969,7 @@ dependencies = [ "bstr", "gix-attributes 0.16.0", "gix-glob 0.10.2", + "gix-path 0.8.4", "gix-testtools", "once_cell", "thiserror", diff --git a/gix-pathspec/Cargo.toml b/gix-pathspec/Cargo.toml index 8fda40e46ba..80c57cf6292 100644 --- a/gix-pathspec/Cargo.toml +++ b/gix-pathspec/Cargo.toml @@ -13,6 +13,7 @@ doctest = false [dependencies] gix-glob = { version = "^0.10.2", path = "../gix-glob" } +gix-path = { version = "^0.8.4", path = "../gix-path" } gix-attributes = { version = "^0.16.0", path = "../gix-attributes" } bstr = { version = "1.3.0", default-features = false, features = ["std"]} diff --git a/gix-pathspec/src/lib.rs b/gix-pathspec/src/lib.rs index 7fea3052d32..18c9faaef0c 100644 --- a/gix-pathspec/src/lib.rs +++ b/gix-pathspec/src/lib.rs @@ -4,18 +4,60 @@ #![forbid(unsafe_code)] use bitflags::bitflags; -use bstr::BString; +use bstr::{BStr, BString, ByteSlice}; +use std::path::PathBuf; + +/// +pub mod normalize { + use std::path::PathBuf; + + /// The error returned by [Pattern::normalize()](super::Pattern::normalize()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The path '{}' is not inside of the worktree '{}'", path.display(), worktree_path.display())] + AbsolutePathOutsideOfWorktree { path: PathBuf, worktree_path: PathBuf }, + #[error("The path '{}' leaves the repository", path.display())] + OutsideOfWorktree { path: PathBuf }, + } +} + +mod pattern; + +/// +pub mod search; /// pub mod parse; +/// A lists of pathspec patterns, possibly from a file. +/// +/// Pathspecs are generally relative to the root of the repository. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Search { + /// Patterns and their associated data in the order they were loaded in or specified, + /// the line number in its source file or its sequence number (_`(pattern, value, line_number)`_). + /// + /// During matching, this order is reversed. + pub patterns: Vec>, + + /// The path from which the patterns were read, or `None` if the patterns + /// don't originate in a file on disk. + pub source: Option, + + /// If `true`, this means all `patterns` are exclude patterns. This means that if there is no match + /// (which would exclude an item), we would actually match it for lack of exclusion. + all_patterns_are_excluded: bool, +} + /// The output of a pathspec [parsing][parse()] operation. It can be used to match against a one or more paths. #[derive(Default, PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] pub struct Pattern { - /// The path part of a pathspec. + /// The path part of a pathspec, which is typically a path possibly mixed with glob patterns. + /// Note that it might be an empty string as well. /// - /// `:(top,literal,icase,attr,exclude)some/path` would yield `some/path`. - pub path: BString, + /// For example, `:(top,literal,icase,attr,exclude)some/path` would yield `some/path`. + path: BString, /// All magic signatures that were included in the pathspec. pub signature: MagicSignature, /// The search mode of the pathspec. @@ -24,17 +66,35 @@ pub struct Pattern { /// /// `:(attr:a=one b=):path` would yield attribute `a` and `b`. pub attributes: Vec, - /// If `true`, this was the special `:` spec which acts like `null` + /// If `true`, we are a special Nil pattern and always match. nil: bool, + /// The length of bytes in `path` that belong to the prefix, which will always be matched case-insensitively. + /// That way, even though pathspecs are applied from the top, we can emulate having changed directory into + /// a specific sub-directory in a case-sensitive file-system. + /// Is set by [Pattern::normalize()]. + prefix_len: usize, } impl Pattern { /// Returns `true` if this seems to be a pathspec that indicates that 'there is no pathspec'. /// /// Note that such a spec is `:`. - pub fn is_null(&self) -> bool { + pub fn is_nil(&self) -> bool { self.nil } + + /// Return the prefix-portion of the `path` of this spec, a directory. + /// It can be empty if there is no prefix. + /// + /// A prefix is effectively the CWD seen as relative to the working tree. + pub fn prefix(&self) -> &BStr { + self.path[..self.prefix_len].as_bstr() + } + + /// Return the path of this spec, typically used for matching. + pub fn path(&self) -> &BStr { + self.path.as_ref() + } } bitflags! { @@ -47,6 +107,9 @@ bitflags! { const ICASE = 1 << 1; /// Excludes the matching patterns from the previous results const EXCLUDE = 1 << 2; + /// The pattern must match a directory, and not a file. + /// This is equivalent to how it's handled in `gix-glob` + const MUST_BE_DIR = 1 << 3; } } diff --git a/gix-pathspec/src/parse.rs b/gix-pathspec/src/parse.rs index 2315b026543..feac6b95a1a 100644 --- a/gix-pathspec/src/parse.rs +++ b/gix-pathspec/src/parse.rs @@ -12,7 +12,11 @@ pub struct Defaults { /// The default signature. pub signature: MagicSignature, /// The default search-mode. + /// + /// Note that even if it's [`MatchMode::Literal`], the pathspecs will be parsed as usual, but matched verbatim afterwards. pub search_mode: MatchMode, + /// If set, the pathspec will not be parsed but used verbatim. Implies [`MatchMode::Literal`] for `search_mode`. + pub literal: bool, } /// The error returned by [parse()][crate::parse()]. @@ -43,10 +47,20 @@ pub enum Error { impl Pattern { /// Try to parse a path-spec pattern from the given `input` bytes. - pub fn from_bytes(input: &[u8], Defaults { signature, search_mode }: Defaults) -> Result { + pub fn from_bytes( + input: &[u8], + Defaults { + signature, + search_mode, + literal, + }: Defaults, + ) -> Result { if input.is_empty() { return Err(Error::EmptyString); } + if literal { + return Ok(Self::from_literal(input, signature)); + } if input.as_bstr() == ":" { return Ok(Pattern { nil: true, @@ -55,11 +69,9 @@ impl Pattern { } let mut p = Pattern { - path: BString::default(), signature, search_mode, - attributes: Vec::new(), - nil: false, + ..Default::default() }; let mut cursor = 0; @@ -72,9 +84,25 @@ impl Pattern { } } - p.path = BString::from(&input[cursor..]); + let mut path = &input[cursor..]; + if path.last() == Some(&b'/') { + p.signature |= MagicSignature::MUST_BE_DIR; + path = &path[..path.len() - 1]; + } + p.path = path.into(); Ok(p) } + + /// Take `input` literally without parsing anything. This will also set our mode to `literal` to allow this pathspec to match `input` verbatim, and + /// use `default_signature` as magic signature. + pub fn from_literal(input: &[u8], default_signature: MagicSignature) -> Self { + Pattern { + path: input.into(), + signature: default_signature, + search_mode: MatchMode::Literal, + ..Default::default() + } + } } fn parse_short_keywords(input: &[u8], cursor: &mut usize) -> Result { diff --git a/gix-pathspec/src/pattern.rs b/gix-pathspec/src/pattern.rs new file mode 100644 index 00000000000..eeabd8f0c9d --- /dev/null +++ b/gix-pathspec/src/pattern.rs @@ -0,0 +1,154 @@ +use crate::{normalize, MagicSignature, MatchMode, Pattern}; +use bstr::{BString, ByteSlice, ByteVec}; +use std::path::{Component, Path, PathBuf}; + +/// Mutation +impl Pattern { + /// Normalize the pattern's path by assuring it's relative to the root of the working tree, and contains + /// no relative path components. Further, it assures that `/` are used as path separator. + /// + /// If `self.path` is a relative path, it will be put in front of the pattern path if `self.signature` isn't indicating `TOP` already. + /// If `self.path` is an absolute path, we will use `root` to make it worktree relative if possible. + /// + /// `prefix` can be empty, we will still normalize this pathspec to resolve relative path components, and + /// it is assumed not to contain any relative path components, e.g. '', 'a', 'a/b' are valid. + /// `root` is the absolute path to the root of either the worktree or the repository's `git_dir`. + pub fn normalize(&mut self, prefix: &Path, root: &Path) -> Result<(), normalize::Error> { + fn prefix_components_to_subtract(path: &Path) -> usize { + let parent_component_end_bound = path.components().enumerate().fold(None::, |acc, (idx, c)| { + matches!(c, Component::ParentDir).then_some(idx + 1).or(acc) + }); + let count = path + .components() + .take(parent_component_end_bound.unwrap_or(0)) + .map(|c| match c { + Component::ParentDir => 1_isize, + Component::Normal(_) => -1, + _ => 0, + }) + .sum::(); + (count > 0).then_some(count as usize).unwrap_or_default() + } + + let mut path = gix_path::from_bstr(self.path.as_ref()); + let mut num_prefix_components = 0; + if gix_path::is_absolute(path.as_ref()) { + let rela_path = match path.strip_prefix(root) { + Ok(path) => path, + Err(_) => { + return Err(normalize::Error::AbsolutePathOutsideOfWorktree { + path: path.into_owned(), + worktree_path: root.into(), + }) + } + }; + path = rela_path.to_owned().into(); + } else if !prefix.as_os_str().is_empty() && !self.signature.contains(MagicSignature::TOP) { + debug_assert_eq!( + prefix + .components() + .filter(|c| matches!(c, Component::Normal(_))) + .count(), + prefix.components().count(), + "BUG: prefixes must not have relative path components, or calculations here will be wrong so pattern won't match" + ); + num_prefix_components = prefix + .components() + .count() + .saturating_sub(prefix_components_to_subtract(path.as_ref())); + path = prefix.join(path).into(); + } + + let assure_path_cannot_break_out_upwards = Path::new(""); + let path = match gix_path::normalize(path.as_ref(), assure_path_cannot_break_out_upwards) { + Some(path) => path, + None => { + return Err(normalize::Error::OutsideOfWorktree { + path: path.into_owned(), + }) + } + }; + + self.path = if path == Path::new(".") { + BString::from(".") + } else { + let cleaned = PathBuf::from_iter(path.components().filter(|c| !matches!(c, Component::CurDir))); + let mut out = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(cleaned)).into_owned(); + self.prefix_len = { + if self.signature.contains(MagicSignature::MUST_BE_DIR) { + out.push(b'/'); + } + let len = out + .find_iter(b"/") + .take(num_prefix_components) + .last() + .unwrap_or_default(); + if self.signature.contains(MagicSignature::MUST_BE_DIR) { + out.pop(); + } + len + }; + out + }; + + Ok(()) + } +} + +/// Access +impl Pattern { + /// Return `true` if this pathspec is negated, which means it will exclude an item from the result set instead of including it. + pub fn is_excluded(&self) -> bool { + self.signature.contains(MagicSignature::EXCLUDE) + } + + /// Translate ourselves to a long display format, that when parsed back will yield the same pattern. + /// + /// Note that the + pub fn to_bstring(&self) -> BString { + if self.is_nil() { + ":".into() + } else { + let mut buf: BString = ":(".into(); + if self.signature.contains(MagicSignature::TOP) { + buf.push_str("top,"); + } + if self.signature.contains(MagicSignature::EXCLUDE) { + buf.push_str("exclude,"); + } + if self.signature.contains(MagicSignature::ICASE) { + buf.push_str("icase,"); + } + match self.search_mode { + MatchMode::ShellGlob => {} + MatchMode::Literal => buf.push_str("literal,"), + MatchMode::PathAwareGlob => buf.push_str("glob,"), + } + if self.attributes.is_empty() { + if buf.last() == Some(&b',') { + buf.pop(); + } + } else { + buf.push_str("attr:"); + for attr in &self.attributes { + let attr = attr.as_ref().to_string().replace(',', "\\,"); + buf.push_str(&attr); + buf.push(b' '); + } + buf.pop(); // trailing ' ' + } + buf.push(b')'); + buf.extend_from_slice(&self.path); + if self.signature.contains(MagicSignature::MUST_BE_DIR) { + buf.push(b'/'); + } + buf + } + } +} + +impl std::fmt::Display for Pattern { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.to_bstring().fmt(f) + } +} diff --git a/gix-pathspec/src/search/init.rs b/gix-pathspec/src/search/init.rs new file mode 100644 index 00000000000..9016490de93 --- /dev/null +++ b/gix-pathspec/src/search/init.rs @@ -0,0 +1,67 @@ +use crate::search::Spec; +use crate::{MagicSignature, Pattern, Search}; +use std::path::Path; + +/// Create a new specification to support matches from `pathspec`, [normalizing](Pattern::normalize()) it with `prefix` and `root`. +fn mapping_from_pattern( + mut pathspec: Pattern, + prefix: &Path, + root: &Path, + sequence_number: usize, +) -> Result, crate::normalize::Error> { + pathspec.normalize(prefix, root)?; + let mut match_all = pathspec.is_nil(); + let glob = { + let mut g = gix_glob::Pattern::from_bytes_without_negation(&pathspec.path).unwrap_or_else(|| { + match_all = true; + gix_glob::Pattern { + text: Default::default(), + mode: gix_glob::pattern::Mode::empty(), + first_wildcard_pos: None, + } + }); + g.mode |= gix_glob::pattern::Mode::ABSOLUTE; + if pathspec.signature.contains(MagicSignature::MUST_BE_DIR) { + g.mode |= gix_glob::pattern::Mode::MUST_BE_DIR; + } + g + }; + + Ok(gix_glob::search::pattern::Mapping { + pattern: glob, + value: Spec { match_all, pathspec }, + sequence_number, + }) +} + +/// Lifecycle +impl Search { + /// Create a search from ready-made `pathspecs`, and [normalize](Pattern::normalize()) them with `prefix` and `root`. + /// `root` is the absolute path to the worktree root, if available, or the `git_dir` in case of bare repositories. + pub fn from_specs( + pathspecs: impl IntoIterator, + prefix: Option<&std::path::Path>, + root: &std::path::Path, + ) -> Result { + let prefix = prefix.unwrap_or(std::path::Path::new("")); + let mut patterns = pathspecs + .into_iter() + .enumerate() + .map(|(idx, pattern)| mapping_from_pattern(pattern, prefix, root, idx)) + .collect::, _>>()?; + + // Excludes should always happen first so we know a match is authoritative (otherwise we could find a non-excluding match first). + patterns.sort_by(|a, b| { + a.value + .pathspec + .is_excluded() + .cmp(&b.value.pathspec.is_excluded()) + .reverse() + }); + Ok(Search { + all_patterns_are_excluded: patterns.iter().all(|s| s.value.pathspec.is_excluded()), + patterns, + source: None, + }) + } +} diff --git a/gix-pathspec/src/search/mod.rs b/gix-pathspec/src/search/mod.rs new file mode 100644 index 00000000000..a8171826e33 --- /dev/null +++ b/gix-pathspec/src/search/mod.rs @@ -0,0 +1,147 @@ +use crate::{MagicSignature, MatchMode, Pattern, Search}; +use bstr::{BStr, BString, ByteSlice}; +use gix_glob::pattern::Case; + +/// A specification specifically for matching, which helps to deal with the special case of empty pathspecs. +/// +/// The latter shouldn't be ignored, but instead have to match everything, while respecting their attributes like before, like +/// 'exclude'. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Default)] +pub struct Spec { + /// If true, this pattern will unconditionally match everything + pub match_all: bool, + /// The path specification itself. + pub pathspec: Pattern, +} + +/// Describes a matching pattern within a search for ignored paths. +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +pub struct Match<'a> { + /// The matching search specification, which contains the pathspec as well. + pub pattern: &'a Spec, + /// The number of the sequence the matching pathspec was in, or the line of pathspec file it was read from if [Search::source] is not `None`. + pub sequence_number: usize, +} + +mod init; + +impl Match<'_> { + /// Return `true` if the pathspec that matched was negative, which excludes this item from the set. + pub fn is_excluded(&self) -> bool { + self.pattern.pathspec.is_excluded() + } +} + +impl Search { + /// Return the first [`Match`] of `relative_path`, or `None`. + /// `is_dir` is true if `relative_path` is a directory. + /// + /// Note that `relative_path` is expected to be starting at the same root as is assumed for this pattern, see [`Pattern::normalize()`]. + // TODO: support for attributes + pub fn pattern_matching_relative_path<'a>( + &self, + relative_path: impl Into<&'a BStr>, + is_dir: Option, + ) -> Option> { + let relative_path = relative_path.into(); + let basename_not_important = None; + let res = self.patterns.iter().find_map(|mapping| { + let ignore_case = mapping.value.pathspec.signature.contains(MagicSignature::ICASE); + let prefix = mapping.value.pathspec.prefix(); + if ignore_case && !prefix.is_empty() { + let pattern_requirement_is_met = relative_path + .get(prefix.len()) + .map_or_else(|| is_dir.unwrap_or(false), |b| *b == b'/'); + if !pattern_requirement_is_met + || relative_path.get(..prefix.len()).map(ByteSlice::as_bstr) != Some(prefix) + { + return None; + } + } + let case = if ignore_case { Case::Fold } else { Case::Sensitive }; + let mut is_match = mapping.value.match_all; + if !is_match { + is_match = if mapping.pattern.first_wildcard_pos.is_none() { + self.match_verbatim(mapping, relative_path, is_dir, case) + } else { + let wildmatch_mode = match mapping.value.pathspec.search_mode { + MatchMode::ShellGlob => Some(gix_glob::wildmatch::Mode::empty()), + MatchMode::Literal => None, + MatchMode::PathAwareGlob => Some(gix_glob::wildmatch::Mode::NO_MATCH_SLASH_LITERAL), + }; + match wildmatch_mode { + Some(wildmatch_mode) => { + let is_match = mapping.pattern.matches_repo_relative_path( + relative_path, + basename_not_important, + is_dir, + case, + wildmatch_mode, + ); + if !is_match { + self.match_verbatim(mapping, relative_path, is_dir, case) + } else { + true + } + } + None => self.match_verbatim(mapping, relative_path, is_dir, case), + } + } + } + is_match.then_some(Match { + pattern: &mapping.value, + sequence_number: mapping.sequence_number, + }) + }); + + if res.is_none() && self.all_patterns_are_excluded { + static MATCH_ALL_STAND_IN: Spec = Spec { + pathspec: Pattern { + path: BString::new(Vec::new()), + signature: MagicSignature::empty(), + search_mode: MatchMode::ShellGlob, + attributes: Vec::new(), + prefix_len: 0, + nil: true, + }, + match_all: true, + }; + Some(Match { + pattern: &MATCH_ALL_STAND_IN, + sequence_number: self.patterns.len(), + }) + } else { + res + } + } + + fn match_verbatim( + &self, + mapping: &gix_glob::search::pattern::Mapping, + relative_path: &BStr, + is_dir: Option, + case: Case, + ) -> bool { + let pattern_len = mapping.value.pathspec.path.len(); + let mut relative_path_ends_with_slash_at_pattern_len = false; + let match_is_allowed = relative_path.get(pattern_len).map_or_else( + || relative_path.len() == pattern_len, + |b| { + relative_path_ends_with_slash_at_pattern_len = *b == b'/'; + relative_path_ends_with_slash_at_pattern_len + }, + ); + let pattern_requirement_is_met = !mapping.pattern.mode.contains(gix_glob::pattern::Mode::MUST_BE_DIR) + || (relative_path_ends_with_slash_at_pattern_len || is_dir.unwrap_or(false)); + + if match_is_allowed && pattern_requirement_is_met { + let dir_or_file = &relative_path[..mapping.value.pathspec.path.len()]; + match case { + Case::Sensitive => mapping.value.pathspec.path == dir_or_file, + Case::Fold => mapping.value.pathspec.path.eq_ignore_ascii_case(dir_or_file), + } + } else { + false + } + } +} diff --git a/gix-pathspec/tests/fixtures/generated-archives/match_baseline.tar.xz b/gix-pathspec/tests/fixtures/generated-archives/match_baseline.tar.xz new file mode 100644 index 00000000000..582b241f7d6 --- /dev/null +++ b/gix-pathspec/tests/fixtures/generated-archives/match_baseline.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba46801dfab65bcc77c8ce0dc9144d77fd142be847f3340af6ae8fcba44c82d +size 16928 diff --git a/gix-pathspec/tests/fixtures/generated-archives/match_baseline_dirs.tar.xz b/gix-pathspec/tests/fixtures/generated-archives/match_baseline_dirs.tar.xz new file mode 100644 index 00000000000..69a08dc52b7 --- /dev/null +++ b/gix-pathspec/tests/fixtures/generated-archives/match_baseline_dirs.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfea98320a4b5e336579a0e4df9d8504fd7c870915196e8e2a0d0cae86eaf71e +size 15996 diff --git a/gix-pathspec/tests/fixtures/generated-archives/match_baseline_files.tar.xz b/gix-pathspec/tests/fixtures/generated-archives/match_baseline_files.tar.xz new file mode 100644 index 00000000000..843cc09bfa1 --- /dev/null +++ b/gix-pathspec/tests/fixtures/generated-archives/match_baseline_files.tar.xz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1472e9e4b00d6a417f9be99df435747a139098a0c5b09897955de61792531de2 +size 10388 diff --git a/gix-pathspec/tests/fixtures/generated-archives/parse_baseline.tar.xz b/gix-pathspec/tests/fixtures/generated-archives/parse_baseline.tar.xz index c2c9aa9e4cd..0e0f6304456 100644 --- a/gix-pathspec/tests/fixtures/generated-archives/parse_baseline.tar.xz +++ b/gix-pathspec/tests/fixtures/generated-archives/parse_baseline.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c2360c31f4aeed0014364202790747a2feb1c7a9555246cd02fe08fb76a41c3e -size 9504 +oid sha256:123da727b33802a3a2dc80e948b07cbf133c2d66129693f4896cba2f6b92b02f +size 9528 diff --git a/gix-pathspec/tests/fixtures/match_baseline_dirs.sh b/gix-pathspec/tests/fixtures/match_baseline_dirs.sh new file mode 100644 index 00000000000..34e006ddf4d --- /dev/null +++ b/gix-pathspec/tests/fixtures/match_baseline_dirs.sh @@ -0,0 +1,84 @@ +#!/bin/bash +set -eu -o pipefail + +git init; +git init sub +(cd sub + : >empty + git add empty + git commit -m init-for-submodule +) + +git init parent +(cd parent + function baseline() { + local args="" + + for arg in "$@"; do + if [[ $arg == *" "* ]]; then + echo "BUG: Argument '$arg' contains a space - we use that for separating pathspecs right now" >&2 + exit 1 + fi + args="$args -c submodule.active=$arg" + done + + { + echo "$@" + git $args submodule + echo -n ';' + } >> baseline.git + } + + for p in a bb dir/b dir/bb dir/nested/c cc; do + git submodule add ../sub $p + git config --unset submodule.$p.active + done + git commit -m "init" + + git submodule > paths + + baseline ':' + baseline ':!' + baseline 'a' + baseline ':(icase)A' + baseline ':(icase,exclude)A' + baseline ':(icase,exclude)*/B*' + baseline ':(icase,exclude)*/B?' + baseline 'di' + baseline 'di?' + baseline 'di?/' + baseline 'dir*' + baseline 'dir/*' + baseline ':(glob)dir*' + baseline ':(glob,icase,exclude)dir*' + baseline ':(glob)dir/*' + baseline 'dir' + baseline 'dir/' + baseline ':(literal)dir' + baseline ':(literal)dir/' + baseline 'dir/nested' + baseline 'dir/nested/' + baseline ':(exclude)dir/' + baseline ':(icase)DIR' + baseline ':(icase)DIR/' + baseline ':!a' + baseline ':' ':!bb' + baseline ':!bb' + baseline 'a/' + baseline 'bb' + baseline 'dir/b' + baseline 'dir/b/' + # ["dir/b"] == [] + baseline '*/b/' git-inconsistency + baseline '*b' + baseline '*b*' + baseline '*b?' + baseline '*/b' + baseline '*/b?' + baseline '*/b*' + baseline '*c' + baseline '*/c' + baseline ':(glob)**/c' + baseline ':(glob)**/c?' + baseline ':(glob)**/c*' +) diff --git a/gix-pathspec/tests/fixtures/match_baseline_files.sh b/gix-pathspec/tests/fixtures/match_baseline_files.sh new file mode 100644 index 00000000000..7d1a4e88aa5 --- /dev/null +++ b/gix-pathspec/tests/fixtures/match_baseline_files.sh @@ -0,0 +1,79 @@ +#!/bin/bash +set -eu -o pipefail + +git init; + +function baseline() { + for arg in "$@"; do + if [[ $arg == *" "* ]]; then + echo "BUG: Argument '$arg' contains a space - we use that for separating pathspecs right now" >&2 + exit 1 + fi + done + + { + echo "$@" + git ls-files "$@" + echo -n ';' + } >> baseline.git +} + +: >goo +: >'g[o][o]' +: >bar +mkdir sub && :>sub/bar +git add . && git commit -m init +# this is to avoid issues on windows, which might not be able to manifest these files. +git -c core.protectNTFS=false update-index --add --cacheinfo 100644 "$(git rev-parse HEAD:goo)" "g*" +git update-index --add --cacheinfo 100644 "$(git rev-parse HEAD:goo)" "!a" +for p in bar bAr BAR foo/bar foo/bAr foo/BAR fOo/bar fOo/bAr fOo/BAR FOO/bar FOO/bAr FOO/BAR; do + git -c core.ignoreCase=false update-index --add --cacheinfo 100644 "$(git rev-parse HEAD:goo)" "$p" +done + +git ls-files > paths + +baseline ':(literal)g*' +baseline 'sub/' +baseline 'sub' +baseline 'sub/*' +baseline 'sub*' +baseline ':(literal)g*' +baseline ':(glob)g*' +baseline ':(exclude,literal)g*' +baseline 'g*' +baseline ':(exclude)g*' +baseline ':(literal)?*' +baseline ':(exclude,literal)?*' +baseline '?*' +baseline ':(exclude)?*' +baseline 'g[o][o]' +# ["g[o][o]", "goo"] == ["g[o][o]"] +baseline ':(icase)G[O][o]' git-inconsistency +baseline ':(literal)g[o][o]' +baseline ':(literal,icase)G[o][O]' +baseline ':(glob)g[o][o]' +# ["g[o][o]", "goo"] == ["g[o][o]"] +baseline ':(glob,icase)g[o][O]' git-inconsistency +baseline ':(glob)**/bar' +baseline ':(literal)**/bar' +baseline '**/bar' +baseline '*/bar' +baseline ':(glob)*bar' +baseline ':(glob)**bar' +baseline '*bar' +baseline '*bar*' +baseline 'bar' +baseline 'bar/' +baseline 'sub/bar/' +baseline 'sub/bar' +baseline '!a' +baseline '?a' +baseline 'foo/' +baseline 'foo' +baseline 'foo/*' +baseline 'foo*' +baseline ':(icase)foo/' +baseline ':(icase)foo' +baseline ':(icase)foo/*' +baseline ':(icase)foo*' +baseline ':(icase)foo/bar' diff --git a/gix-pathspec/tests/fixtures/parse_baseline.sh b/gix-pathspec/tests/fixtures/parse_baseline.sh index 276513e7564..56525e9ebec 100755 --- a/gix-pathspec/tests/fixtures/parse_baseline.sh +++ b/gix-pathspec/tests/fixtures/parse_baseline.sh @@ -48,8 +48,11 @@ baseline ':! some/path' # short_signatures baseline ':/some/path' +baseline '://some/path' baseline ':^some/path' +baseline ':^^some/path' baseline ':!some/path' +baseline ':!!some/path' baseline ':/!some/path' baseline ':!/^/:some/path' @@ -144,3 +147,12 @@ baseline ':(top' # glob_and_literal_keywords_present baseline ':(glob,literal)some/path' +# trailing slash +baseline ':(glob,literal)some/path/' +baseline 'some/path/' +baseline 'path/' + +baseline 'a/b/' +baseline 'a/' +baseline '!a' +baseline '\!a' diff --git a/gix-pathspec/tests/normalize/mod.rs b/gix-pathspec/tests/normalize/mod.rs new file mode 100644 index 00000000000..0602fb7f9f7 --- /dev/null +++ b/gix-pathspec/tests/normalize/mod.rs @@ -0,0 +1,100 @@ +use std::path::Path; + +#[test] +fn removes_relative_path_components() -> crate::Result { + for (input_path, expected_path, expected_prefix) in [ + ("c", "a/b/c", "a/b"), + ("../c", "a/c", "a"), + ("../b/c", "a/b/c", "a"), // this is a feature - prefixe components once consumed by .. are lost + ("../c/d", "a/c/d", "a"), + ("../../c/d", "c/d", ""), + ("../../c/d/", "c/d", ""), + ("./c", "a/b/c", "a/b"), + ("../../c", "c", ""), + ("../..", ".", ""), + ("../././c", "a/c", "a"), + ("././/./c", "a/b/c", "a/b"), + ("././/./c/", "a/b/c", "a/b"), + ("././/./../c/d/", "a/c/d", "a"), + ] { + let spec = normalized_spec(input_path, "a/b", "")?; + assert_eq!(spec.path(), expected_path); + assert_eq!(spec.prefix(), expected_prefix, "{input_path} -> {expected_path}"); + } + Ok(()) +} + +#[test] +fn single_dot_is_special_and_directory_is_implied_without_trailing_slash() -> crate::Result { + for (input_path, expected) in [(".", "."), ("./", ".")] { + let spec = normalized_spec(input_path, "", "/repo")?; + assert_eq!(spec.path(), expected); + assert_eq!(spec.prefix(), ""); + } + Ok(()) +} + +#[test] +fn absolute_path_made_relative() -> crate::Result { + for (input_path, expected) in [("/repo/a", "a"), ("/repo/a/..//.///b", "b")] { + let spec = normalized_spec(input_path, "", "/repo")?; + assert_eq!(spec.path(), expected); + } + Ok(()) +} + +#[test] +fn relative_top_patterns_ignore_the_prefix() -> crate::Result { + let spec = normalized_spec(":(top)c", "a/b", "")?; + assert_eq!(spec.path(), "c"); + assert_eq!(spec.prefix(), ""); + Ok(()) +} + +#[test] +fn absolute_top_patterns_ignore_the_prefix_but_are_made_relative() -> crate::Result { + let spec = normalized_spec(":(top)/a/b", "prefix-ignored", "/a")?; + assert_eq!(spec.path(), "b"); + assert_eq!(spec.prefix(), ""); + Ok(()) +} + +#[test] +fn relative_path_breaks_out_of_working_tree() { + let err = normalized_spec("../a", "", "").unwrap_err(); + assert_eq!(err.to_string(), "The path '../a' leaves the repository"); + let err = normalized_spec("../../b", "a", "").unwrap_err(); + assert_eq!( + err.to_string(), + format!( + "The path '{}' leaves the repository", + if cfg!(windows) { "a\\../../b" } else { "a/../../b" } + ) + ); +} + +#[test] +fn absolute_path_breaks_out_of_working_tree() { + let err = normalized_spec("/path/to/repo/..///./a", "", "/path/to/repo").unwrap_err(); + assert_eq!(err.to_string(), "The path '..///./a' leaves the repository"); + let err = normalized_spec("/path/to/repo/../../../dev", "", "/path/to/repo").unwrap_err(); + assert_eq!(err.to_string(), "The path '../../../dev' leaves the repository"); +} + +#[test] +fn absolute_path_escapes_worktree() { + assert_eq!( + normalized_spec("/dev", "", "/path/to/repo").unwrap_err().to_string(), + "The path '/dev' is not inside of the worktree '/path/to/repo'" + ); +} + +fn normalized_spec( + path: &str, + prefix: &str, + root: &str, +) -> Result { + let mut spec = gix_pathspec::parse(path.as_bytes(), Default::default()).expect("valid"); + spec.normalize(Path::new(prefix), Path::new(root))?; + Ok(spec) +} diff --git a/gix-pathspec/tests/parse/mod.rs b/gix-pathspec/tests/parse/mod.rs index 7fc4eebff3a..f735243120c 100644 --- a/gix-pathspec/tests/parse/mod.rs +++ b/gix-pathspec/tests/parse/mod.rs @@ -13,7 +13,17 @@ fn baseline() { res.is_ok(), *exit_code == 0, "{pattern:?} disagrees with baseline: {res:?}" - ) + ); + if let Ok(pat) = res { + let actual = pat.to_bstring(); + assert_eq!( + pat, + gix_pathspec::parse(actual.as_ref(), Default::default()).expect("still valid"), + "{pattern} != {actual}: display must roundtrip into actual pattern" + ); + } + let p = gix_pathspec::Pattern::from_literal(pattern, Default::default()); + assert!(matches!(p.search_mode, MatchMode::Literal)); } } @@ -32,7 +42,7 @@ struct NormalizedPattern { impl From for NormalizedPattern { fn from(p: Pattern) -> Self { NormalizedPattern { - path: p.path, + path: p.path().to_owned(), signature: p.signature, search_mode: p.search_mode, attributes: p diff --git a/gix-pathspec/tests/parse/valid.rs b/gix-pathspec/tests/parse/valid.rs index f6728178f67..11bba7d4397 100644 --- a/gix-pathspec/tests/parse/valid.rs +++ b/gix-pathspec/tests/parse/valid.rs @@ -17,11 +17,57 @@ fn repeated_matcher_keywords() { check_valid_inputs(input); } +#[test] +fn glob_negations_are_always_literal() { + check_valid_inputs([("!a", pat_with_path("!a")), ("\\!a", pat_with_path("\\!a"))]); +} + +#[test] +fn literal_default_prevents_parsing() { + let pattern = gix_pathspec::parse( + ":".as_bytes(), + gix_pathspec::parse::Defaults { + signature: MagicSignature::EXCLUDE, + search_mode: MatchMode::PathAwareGlob, + literal: true, + }, + ) + .expect("valid"); + assert!(!pattern.is_nil()); + assert_eq!(pattern.path(), ":"); + assert!(matches!(pattern.search_mode, MatchMode::Literal)); + + let input = ":(literal)f[o][o]"; + let pattern = gix_pathspec::parse( + input.as_bytes(), + gix_pathspec::parse::Defaults { + signature: MagicSignature::TOP, + search_mode: MatchMode::Literal, + literal: true, + }, + ) + .expect("valid"); + assert_eq!(pattern.path(), input, "no parsing happens at all"); + assert!(matches!(pattern.search_mode, MatchMode::Literal)); + + let pattern = gix_pathspec::parse( + input.as_bytes(), + gix_pathspec::parse::Defaults { + signature: MagicSignature::TOP, + search_mode: MatchMode::Literal, + literal: false, + }, + ) + .expect("valid"); + assert_eq!(pattern.path(), "f[o][o]", "in literal default mode, we still parse"); + assert!(matches!(pattern.search_mode, MatchMode::Literal)); +} + #[test] fn there_is_no_pathspec_pathspec() { check_against_baseline(":"); let pattern = gix_pathspec::parse(":".as_bytes(), Default::default()).expect("valid"); - assert!(pattern.is_null()); + assert!(pattern.is_nil()); let actual: NormalizedPattern = pattern.into(); assert_eq!(actual, pat_with_path("")); @@ -30,11 +76,12 @@ fn there_is_no_pathspec_pathspec() { ":".as_bytes(), gix_pathspec::parse::Defaults { signature: MagicSignature::EXCLUDE, - search_mode: MatchMode::Literal, + search_mode: MatchMode::PathAwareGlob, + literal: false, }, ) .expect("valid"); - assert!(pattern.is_null()); + assert!(pattern.is_nil()); } #[test] @@ -42,13 +89,14 @@ fn defaults_are_used() -> crate::Result { let defaults = gix_pathspec::parse::Defaults { signature: MagicSignature::EXCLUDE, search_mode: MatchMode::Literal, + literal: false, }; let p = gix_pathspec::parse(".".as_bytes(), defaults)?; - assert_eq!(p.path, "."); + assert_eq!(p.path(), "."); assert_eq!(p.signature, defaults.signature); assert_eq!(p.search_mode, defaults.search_mode); assert!(p.attributes.is_empty()); - assert!(!p.is_null()); + assert!(!p.is_nil()); Ok(()) } @@ -81,6 +129,10 @@ fn whitespace_in_pathspec() { ":! some/path", pat_with_path_and_sig(" some/path", MagicSignature::EXCLUDE), ), + ( + ":!!some/path", + pat_with_path_and_sig("some/path", MagicSignature::EXCLUDE), + ), ]; check_valid_inputs(inputs) @@ -111,6 +163,14 @@ fn short_signatures() { check_valid_inputs(inputs) } +#[test] +fn trailing_slash_is_turned_into_magic_signature_and_removed() { + check_valid_inputs([ + ("a/b/", pat_with_path_and_sig("a/b", MagicSignature::MUST_BE_DIR)), + ("a/", pat_with_path_and_sig("a", MagicSignature::MUST_BE_DIR)), + ]); +} + #[test] fn signatures_and_searchmodes() { let inputs = vec![ @@ -134,11 +194,21 @@ fn signatures_and_searchmodes() { ), ( ":(top,literal,icase,attr,exclude)some/path", - pat("some/path", MagicSignature::all(), MatchMode::Literal, vec![]), + pat( + "some/path", + MagicSignature::TOP | MagicSignature::EXCLUDE | MagicSignature::ICASE, + MatchMode::Literal, + vec![], + ), ), ( ":(top,glob,icase,attr,exclude)some/path", - pat("some/path", MagicSignature::all(), MatchMode::PathAwareGlob, vec![]), + pat( + "some/path", + MagicSignature::TOP | MagicSignature::EXCLUDE | MagicSignature::ICASE, + MatchMode::PathAwareGlob, + vec![], + ), ), ]; diff --git a/gix-pathspec/tests/pathspec.rs b/gix-pathspec/tests/pathspec.rs index fdac47f9e25..d83fefbcb2e 100644 --- a/gix-pathspec/tests/pathspec.rs +++ b/gix-pathspec/tests/pathspec.rs @@ -1,3 +1,5 @@ pub use gix_testtools::Result; +mod normalize; mod parse; +mod search; diff --git a/gix-pathspec/tests/search/mod.rs b/gix-pathspec/tests/search/mod.rs new file mode 100644 index 00000000000..d62e7443559 --- /dev/null +++ b/gix-pathspec/tests/search/mod.rs @@ -0,0 +1,186 @@ +use std::path::Path; + +#[test] +fn directories() -> crate::Result { + baseline::run("directory", true, baseline::directories) +} + +#[test] +fn prefixes_are_always_case_insensitive() -> crate::Result { + let path = gix_testtools::scripted_fixture_read_only("match_baseline_files.sh")?.join("paths"); + let items = baseline::parse_paths(path)?; + + for (spec, prefix, expected) in [ + (":(icase)bar", "FOO", &["FOO/BAR", "FOO/bAr", "FOO/bar"] as &[_]), + (":(icase)bar", "F", &[]), + (":(icase)bar", "FO", &[]), + (":(icase)../bar", "fOo", &["BAR", "bAr", "bar"]), + ("../bar", "fOo", &["bar"]), + (":(icase)../bar", "fO", &["BAR", "bAr", "bar"]), // prefixes are virtual, and don't have to exist at all. + ( + ":(icase)../foo/bar", + "FOO", + &[ + "FOO/BAR", "FOO/bAr", "FOO/bar", "fOo/BAR", "fOo/bAr", "fOo/bar", "foo/BAR", "foo/bAr", "foo/bar", + ], + ), + ("../foo/bar", "FOO", &["foo/bar"]), + ( + ":(icase)../foo/../fOo/bar", + "FOO", + &[ + "FOO/BAR", "FOO/bAr", "FOO/bar", "fOo/BAR", "fOo/bAr", "fOo/bar", "foo/BAR", "foo/bAr", "foo/bar", + ], + ), + ] { + let search = gix_pathspec::Search::from_specs( + gix_pathspec::parse(spec.as_bytes(), Default::default()), + Some(Path::new(prefix)), + Path::new(""), + )?; + let actual: Vec<_> = items + .iter() + .filter(|relative_path| { + search + .pattern_matching_relative_path(relative_path.as_str(), Some(false)) + .is_some() + }) + .collect(); + assert_eq!(actual, expected, "{spec} {prefix}"); + } + Ok(()) +} + +#[test] +fn files() -> crate::Result { + baseline::run("file", false, baseline::files) +} + +mod baseline { + use bstr::{BString, ByteSlice}; + use std::path::{Path, PathBuf}; + + pub fn run( + name: &str, + items_are_dirs: bool, + init: impl FnOnce() -> crate::Result<(Vec, Vec)>, + ) -> crate::Result { + let (items, expected) = init()?; + let tests = expected.len(); + for expected in expected { + let search = gix_pathspec::Search::from_specs(expected.pathspecs, None, Path::new(""))?; + let actual: Vec<_> = items + .iter() + .filter(|path| { + search + .pattern_matching_relative_path(path.as_str(), Some(items_are_dirs)) + .map_or(false, |m| !m.is_excluded()) + }) + .cloned() + .collect(); + let matches_expectation = actual == expected.matches; + assert_eq!( + matches_expectation, + expected.is_consistent, + "{} - {actual:?} == {:?}", + search + .patterns + .iter() + .map(|p| format!("{}", p.value.pathspec)) + .collect::>() + .join(", "), + expected.matches + ); + } + eprintln!("{tests} {name} matches OK"); + Ok(()) + } + + #[derive(Debug)] + pub struct Expected { + pub pathspecs: Vec, + pub matches: Vec, + /// If true, this means that the baseline is different from what we get, and that our solution is consistent with the rules. + pub is_consistent: bool, + } + + pub fn parse_paths(path: PathBuf) -> std::io::Result> { + let buf = std::fs::read(path)?; + Ok(buf.lines().map(BString::from).map(|s| s.to_string()).collect()) + } + + fn parse_blocks(input: &[u8], parse_matches: impl Fn(&[u8]) -> Vec) -> Vec { + input + .split(|b| *b == b';') + .filter(|b| !b.is_empty()) + .map(move |block| { + let mut lines = block.lines(); + let mut is_inconsistent = false; + let pathspecs = lines + .next() + .expect("pathspec") + .split(|b| *b == b' ') + .filter(|spec| { + is_inconsistent = spec.as_bstr() == "git-inconsistency"; + !is_inconsistent + }) + .filter_map(|s| (!s.trim().is_empty()).then(|| s.trim())) + .map(|pathspec| gix_pathspec::parse(pathspec, Default::default()).expect("valid pathspec")) + .collect(); + Expected { + pathspecs, + matches: parse_matches(lines.as_bytes()), + is_consistent: !is_inconsistent, + } + }) + .collect() + } + + mod submodule { + use bstr::ByteSlice; + + pub fn matches_from_status(input: &[u8]) -> impl Iterator + '_ { + input.lines().map(|line| { + let matches = line[0] == b' '; + assert_eq!(!matches, line[0] == b'-'); + let mut tokens = line[1..].split(|b| *b == b' ').skip(1); + let path = tokens.next().expect("path").to_str().expect("valid UTF-8"); + (matches, path.to_owned()) + }) + } + + pub fn parse_expected(input: &[u8]) -> Vec { + super::parse_blocks(input, |block| { + matches_from_status(block) + .filter_map(|(matches, module_path)| matches.then_some(module_path)) + .collect() + }) + } + } + + mod files { + use bstr::{BString, ByteSlice}; + pub fn parse_expected(input: &[u8]) -> Vec { + super::parse_blocks(input, |block| { + block.lines().map(BString::from).map(|s| s.to_string()).collect() + }) + } + } + + pub fn directories() -> crate::Result<(Vec, Vec)> { + let root = gix_testtools::scripted_fixture_read_only("match_baseline_dirs.sh")?.join("parent"); + let buf = std::fs::read(root.join("paths"))?; + let items = submodule::matches_from_status(&buf) + .map(|(_matches, path)| path) + .collect(); + let expected = submodule::parse_expected(&std::fs::read(root.join("baseline.git"))?); + Ok((items, expected)) + } + + pub fn files() -> crate::Result<(Vec, Vec)> { + let root = gix_testtools::scripted_fixture_read_only("match_baseline_files.sh")?; + let items = parse_paths(root.join("paths"))?; + let expected = files::parse_expected(&std::fs::read(root.join("baseline.git"))?); + Ok((items, expected)) + } +}