diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a89335759..c2b52ce07 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -22,4 +22,4 @@ jobs: - name: Publish to crates.io env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - run: for p in xvc-logging xvc-test-helper xvc-walker xvc-ecs xvc-config xvc-core xvc-storage xvc-file xvc-pipelines xvc ; do cargo publish --package $p ; done + run: for p in xvc-logging xvc-test-helper xvc-walker xvc-ecs xvc-config xvc-core xvc-storage xvc-file xvc-pipelines xvc ; do cargo publish --package $p ; done diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index bfa3b31bd..175b3e68a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -28,7 +28,7 @@ jobs: - build: nightly rust: nightly # rust: nightly-2024-01-01 - test-args: --features test-ci --no-fail-fast + test-args: --features test-ci # --no-fail-fast ## for submitters other than me, I'll add another job here. # test-args: --no-fail-fast # test-args: --all-features @@ -67,6 +67,7 @@ jobs: brew install tree brew install lsd brew install python3 + brew install minio/stable/mc - name: Git config for automated Git tests run: git config --global user.name 'Xvc Rabbit' && git config --global user.email 'rabbit@xvc.dev' && git config --global init.defaultBranch main @@ -105,6 +106,9 @@ jobs: - name: Check if xvc is in PATH run: tree $GITHUB_WORKSPACE && xvc --help + - name: Run Current Dev Tests + run: $GITHUB_WORKSPACE/run-tests.zsh + - name: Test if: matrix.coverage uses: actions-rs/cargo@v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fce520fe..d89997404 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ +# Xvc Changelog + ## Unreleased +## 0.6.11 (2024-09-04) + +- Bump dependencies +- Replace globset with fast-glob for memory usage +- Remove --details option from xvc check-ignore +- Fixed xvc check-ignore to work with supplied paths +- Fixed loading store targets from xvc file list +- Directory targets in various commands doesn't require / at the end when they only exist in the cache +- Removed some duplicate tests from ignore +- Minio tests now use mc instead of s3cmd +- Add a step to run a subset of tests in CI for faster feedback + ## 0.6.10 (2024-08-04) - PR: diff --git a/book/src/ref/xvc-check-ignore.md b/book/src/ref/xvc-check-ignore.md index 039a7d5dc..62c072a5f 100644 --- a/book/src/ref/xvc-check-ignore.md +++ b/book/src/ref/xvc-check-ignore.md @@ -4,7 +4,7 @@ Check whether a path is ignored or whitelisted by Xvc. -## Synopsis +## Synopsis ```console $ xvc check-ignore --help @@ -17,9 +17,6 @@ Arguments: Targets to check. If no targets are provided, they are read from stdin Options: - -d, --details - Show the exclude patterns along with each target path. A series of lines are printed in this format: :: - --ignore-filename Filename that contains ignore rules @@ -27,9 +24,6 @@ Options: [default: .xvcignore] - -n, --non-matching - Include the target paths which don’t match any pattern in the --details list. All fields in each line, except for , will be empty. Has no effect without --details - -h, --help Print help (see a summary with '-h') @@ -37,33 +31,67 @@ Options: ## Examples +```console +$ git init +... +$ xvc init +``` + +You can add files and directories to be ignored by Xvc to `.xvcignore` files. + +```console +$ zsh -cl "echo 'my-dir/my-file' >> .xvcignore" +``` + By default it checks the files supplied from `stdin`. -```shell -$ xvc check-ignore +```console +$ zsh -cl 'echo my-dir/my-file | xvc check-ignore' +[IGNORE] [CWD]/my-dir/my-file + +``` + +The `.xvcignore` file format is identical to [`.gitignore` file format](https://git-scm.com/docs/gitignore). + +```console +$ cat .xvcignore + +# Add patterns of files xvc should ignore, which could improve +# the performance. +# It's in the same format as .gitignore files. + +.DS_Store my-dir/my-file + ``` -If you supply paths from the CLI, they are checked instead. +If you supply paths from the CLI, they are checked against the ignore rules in `.xvcignore`. -```shell +```console $ xvc check-ignore my-dir/my-file another-dir/another-file +[IGNORE] [CWD]/my-dir/my-file +[NO MATCH] [CWD]/another-dir/another-file + ``` -If you're looking which `.xvcignore` file ignores (or whitelists) a certain path, you can use `--details`. +You can also add whitelist patterns to `,.xvcignore` files. -```shell -$ xvc check-ignore --details my-dir/my-file another-dir/another-file +```console +$ zsh -cl "echo '!another-dir/*' >> .xvcignore" ``` -`.xvcignore` file format is identical to [`.gitignore` file format](https://git-scm.com/docs/gitignore). -This utility can be used to check any other ignore rules in other files as well. -You can specify an alternative ignore filename with `--ignore-filename` option. -The below command is identical to `git check-ignore` and should give the same results. +```console +$ xvc check-ignore my-dir/my-file another-dir/another-file +[IGNORE] [CWD]/my-dir/my-file +[WHITELIST] [CWD]/another-dir/another-file -```shell -$ xvc check-ignore --ignore-filename .gitignore ``` +This utility can be used to check any other ignore rules in other files as well. +You can specify an alternative ignore filename with `--ignore-filename` option. +The below command is identical to `git check-ignore` and should give the same results. +```console +$ xvc check-ignore --ignore-filename .gitignore +``` diff --git a/book/src/ref/xvc-file-list.md b/book/src/ref/xvc-file-list.md index 1df07397d..5b415b7e2 100644 --- a/book/src/ref/xvc-file-list.md +++ b/book/src/ref/xvc-file-list.md @@ -168,7 +168,7 @@ Total #: 30 Workspace Size: 51195 Cached Size: 0 ``` -By default the command hides dotfiles. If you also want to show them, you can use `--show-dot-files`/`-a` flag. +By default the command hides dotfiles. If you also want to show them, you can use `--show-dot-files`/`-a` flag. ```console $ xvc file list --sort name-asc --show-dot-files @@ -209,7 +209,7 @@ Total #: 32 Workspace Size: 51443 Cached Size: 0 ``` -You can also hide the summary below the list to get only the list of files. +You can also hide the summary below the list to get only the list of files. ```console $ xvc file list --sort name-asc --no-summary @@ -276,7 +276,7 @@ If you add another set of files as hardlinks to the cached copies, it will print the second letter as `H`. ```console -$ xvc file track dir-0002 --recheck-method hardlink +$ xvc file track dir-0002/ --recheck-method hardlink $ xvc file list dir-0002 FH 2005 [..] 447933dc 447933dc dir-0002/file-0005.bin diff --git a/book/src/ref/xvc-storage-new-local.md b/book/src/ref/xvc-storage-new-local.md index 9869f51af..e5dc4911e 100644 --- a/book/src/ref/xvc-storage-new-local.md +++ b/book/src/ref/xvc-storage-new-local.md @@ -117,7 +117,7 @@ $ xvc file remove --from-storage backup dir-0001/ `--name NAME` is not checked to be unique but you should use unique storage names to refer them later. -`--path PATH` should be accessible for writing and shouldn't already exist. +`--path PATH` should be accessible for writing and shouldn't already exist. ## Technical Details diff --git a/book/src/ref/xvc-storage-new-minio.md b/book/src/ref/xvc-storage-new-minio.md index 5110776a7..d2688adde 100644 --- a/book/src/ref/xvc-storage-new-minio.md +++ b/book/src/ref/xvc-storage-new-minio.md @@ -78,7 +78,7 @@ $ xvc file track dir-0001 You can define a storage bucket as storage and begin to use it. ```console,ignore -$ xvc storage new minio --name backup --endpoint http://emresult.com:9000 --bucket-name one --region us-east-1 --storage-prefix xvc +$ xvc storage new minio --name backup --endpoint http://e1.xvc.dev:9000 --bucket-name xvc-tests --region us-east-1 --storage-prefix xvc ``` @@ -147,7 +147,6 @@ You may need to consider this when you have servers running in exact URLs. If you have a `http://minio.example.com:9001` as a Minio server, you may want to supply `http://example.com:9001` as the endpoint, and `minio` as the bucket name to form the correct URL. This behavior may change in the future. - ## Technical Details This command requires Xvc to be compiled with `minio` feature, which is _on_ by default. @@ -162,5 +161,3 @@ A file that's found in `.xvc/{{HASH_PREFIX}}/{{CACHE_PATH}}` is saved to `http:/ `{{REPO_ID}}` is the unique identifier for the repository created during `xvc init`. Hence if you use a common storage for different Xvc projects, their files are kept under different directories. There is no inter-project deduplication. - - diff --git a/book/src/ref/xvc-storage-new-rsync.md b/book/src/ref/xvc-storage-new-rsync.md index 6bb2af925..4535fb3a7 100644 --- a/book/src/ref/xvc-storage-new-rsync.md +++ b/book/src/ref/xvc-storage-new-rsync.md @@ -69,7 +69,7 @@ $ xvc file track dir-0001 You can define a storage bucket as storage and begin to use it. ```console -$ xvc storage new rsync --name backup --host one.emresult.com --user iex --storage-dir /tmp/xvc-backup/ +$ xvc storage new rsync --name backup --host e1.xvc.dev --user iex --storage-dir /tmp/xvc-backup/ ``` diff --git a/config/Cargo.toml b/config/Cargo.toml index e768e7804..57e2c258c 100644 --- a/config/Cargo.toml +++ b/config/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-config" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Xvc configuration management" authors = ["Emre Şahin "] @@ -16,8 +16,8 @@ name = "xvc_config" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.10", path = "../logging" } -xvc-walker = { version = "0.6.10", path = "../walker" } +xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-walker = { version = "0.6.11", path = "../walker" } ## Cli and config @@ -33,7 +33,7 @@ crossbeam-channel = "^0.5" crossbeam = "^0.8" ## File system -walkdir = "^2.4" +walkdir = "^2.5" ## Logging and errors thiserror = "^1.0" @@ -45,4 +45,4 @@ fern = { version = "^0.6", features = ["colored"] } regex = "^1.10" strum = "^0.26" strum_macros = "^0.26" -lazy_static = "^1.4" +lazy_static = "^1.5" diff --git a/core/Cargo.toml b/core/Cargo.toml index 80ca436ce..3190d1763 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-core" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Xvc core for common elements for all commands" authors = ["Emre Şahin "] @@ -16,13 +16,13 @@ name = "xvc_core" crate-type = ["rlib"] [dependencies] -xvc-config = { version = "0.6.10", path = "../config" } -xvc-logging = { version = "0.6.10", path = "../logging" } -xvc-ecs = { version = "0.6.10", path = "../ecs" } -xvc-walker = { version = "0.6.10", path = "../walker" } +xvc-config = { version = "0.6.11", path = "../config" } +xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-ecs = { version = "0.6.11", path = "../ecs" } +xvc-walker = { version = "0.6.11", path = "../walker" } ## Cli and config -clap = { version = "^4.4", features = ["derive"] } +clap = { version = "^4.5", features = ["derive"] } directories-next = "2.0" ## Hashing @@ -38,20 +38,20 @@ serde = { version = "^1.0", features = ["derive"] } serde_yaml = "^0.9" serde_json = "^1.0" rmp = "^0.8" -rmp-serde = "1.1.2" +rmp-serde = "1.3.0" toml = "^0.8" ## Network reqwest = { version = "^0.11", features = ["blocking", "json", "gzip"] } ## Parallelization -rayon = "^1.8" +rayon = "^1.10" crossbeam-channel = "^0.5" crossbeam = "^0.8" ## File system jwalk = "^0.8" -walkdir = "^2.4" +walkdir = "^2.5" relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" glob = "^0.3" @@ -60,6 +60,7 @@ glob = "^0.3" thiserror = "^1.0" anyhow = "^1.0" log = "^0.4" +peak_alloc = "^0.2" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 fern = { version = "^0.6", features = ["colored"] } @@ -71,8 +72,8 @@ paste = "1.0" regex = "^1.10" strum = "^0.26" strum_macros = "^0.26" -lazy_static = "^1.4" -uuid = { version = "^1.6", features = ["serde", "v4", "fast-rng"] } +lazy_static = "^1.5" +uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } cached = "^0.53" derive_more = "^0.99" @@ -80,6 +81,6 @@ itertools = "^0.13" [dev-dependencies] -xvc-test-helper = { version = "0.6.10", path = "../test_helper/" } -proptest = "^1.4" +xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } +proptest = "^1.5" test-case = "^3.3" diff --git a/core/src/check_ignore/mod.rs b/core/src/check_ignore/mod.rs index ceb313c62..b0bf6bb92 100644 --- a/core/src/check_ignore/mod.rs +++ b/core/src/check_ignore/mod.rs @@ -1,6 +1,5 @@ //! xvc check ignore CLI handling module use crate::error::Result; -use crate::util::expand_globs_to_paths; use crate::util::xvcignore::COMMON_IGNORE_PATTERNS; use crate::{XvcPath, XvcRoot, XVCIGNORE_FILENAME}; use clap::Parser; @@ -8,9 +7,8 @@ use clap::Parser; use log::trace; use std::io::BufRead; use std::path::{Path, PathBuf}; -use xvc_config::{UpdateFromXvcConfig, XvcConfig}; -use xvc_logging::{output, XvcOutputSender}; -use xvc_walker::{build_ignore_rules, check_ignore, IgnoreRules, MatchResult, WalkOptions}; +use xvc_logging::{output, watch, XvcOutputSender}; +use xvc_walker::{build_ignore_patterns, IgnoreRules, MatchResult, WalkOptions}; // DIFFERENCES from DVC // merged --all and --details, they are the same now @@ -19,10 +17,6 @@ use xvc_walker::{build_ignore_rules, check_ignore, IgnoreRules, MatchResult, Wal #[command()] /// Check whether xvcignore files ignore a path in an XVC repository pub struct CheckIgnoreCLI { - #[arg(short, long, alias = "all")] - /// Show the exclude patterns along with each target path. - /// A series of lines are printed in this format: :: - details: bool, #[arg( long, default_value = XVCIGNORE_FILENAME, @@ -31,30 +25,13 @@ pub struct CheckIgnoreCLI { /// /// This can be set to .gitignore to test whether Git and Xvc work the same way. ignore_filename: String, - #[arg(short, long)] - /// Include the target paths which don’t match any pattern in the --details list. - /// All fields in each line, except for , will be empty. Has no effect without --details. - non_matching: bool, + #[arg()] /// Targets to check. /// If no targets are provided, they are read from stdin. targets: Vec, } -impl UpdateFromXvcConfig for CheckIgnoreCLI { - fn update_from_conf(self, conf: &XvcConfig) -> xvc_config::error::Result> { - let details = self.details || conf.get_bool("check_ignore.details")?.option; - let non_matching = self.non_matching || conf.get_bool("check_ignore.non_matching")?.option; - let ignore_filename = self.ignore_filename.clone(); - Ok(Box::new(Self { - details, - non_matching, - ignore_filename, - targets: self.targets.clone(), - })) - } -} - /// # `xvc check_ignore` /// /// Check whether paths are ignored by Xvc or not. @@ -73,29 +50,32 @@ pub fn cmd_check_ignore( opts: CheckIgnoreCLI, ) -> Result<()> { let conf = xvc_root.config(); - let opts = opts.update_from_conf(conf)?; let current_dir = conf.current_dir()?; let walk_options = WalkOptions { ignore_filename: Some(opts.ignore_filename.clone()), include_dirs: true, }; - let initial_rules = IgnoreRules::try_from_patterns(xvc_root, COMMON_IGNORE_PATTERNS)?; - let ignore_rules = build_ignore_rules( - initial_rules, - current_dir, + + let ignore_rules = build_ignore_patterns( + COMMON_IGNORE_PATTERNS, + xvc_root, &walk_options.ignore_filename.unwrap_or_default(), )?; - if !opts.targets.is_empty() { - let path_bufs = expand_globs_to_paths(current_dir, &opts.targets)?; - let mut xvc_paths = Vec::::new(); - for p in path_bufs { - xvc_paths.push(XvcPath::new(xvc_root, current_dir, &p)?); - } - check_ignore_paths(xvc_root, &opts, &ignore_rules, &xvc_paths) + watch!(ignore_rules); + watch!(opts.targets); + + if !opts.targets.is_empty() { + let xvc_paths = opts + .targets + .iter() + .map(|p| XvcPath::new(xvc_root, current_dir, &PathBuf::from(p))) + .collect::>>()?; + watch!(xvc_paths); + check_ignore_paths(xvc_root, &ignore_rules, &xvc_paths) } else { - check_ignore_stdin(input, output_snd, xvc_root, &opts, &ignore_rules) + check_ignore_stdin(input, output_snd, xvc_root, &ignore_rules) } } @@ -103,38 +83,39 @@ fn check_ignore_stdin( input: R, output_snd: &XvcOutputSender, xvc_root: &XvcRoot, - opts: &CheckIgnoreCLI, ignore_rules: &IgnoreRules, ) -> Result<()> { let conf = xvc_root.config(); let current_dir = conf.current_dir()?; let mut buffer = String::new(); let lines_iter = input.lines(); - lines_iter.map_while(|line| { - if let Ok(line) = line{ - XvcPath::new(xvc_root, current_dir, &PathBuf::from(line)).ok() - } else { - None - }}).for_each(|xvc_path| { - let absolute_path = xvc_path.to_absolute_path(xvc_root); - let res = check_ignore_line(ignore_rules, &absolute_path, opts.non_matching); - if !res.trim().is_empty() { - output!(output_snd, "{}", res); - } - buffer.clear(); - }); + lines_iter + .map_while(|line| { + if let Ok(line) = line { + XvcPath::new(xvc_root, current_dir, &PathBuf::from(line)).ok() + } else { + None + } + }) + .for_each(|xvc_path| { + let absolute_path = xvc_path.to_absolute_path(xvc_root); + let res = check_ignore_line(ignore_rules, &absolute_path); + if !res.trim().is_empty() { + output!(output_snd, "{}", res); + } + buffer.clear(); + }); Ok(()) } fn check_ignore_paths( xvc_root: &XvcRoot, - opts: &CheckIgnoreCLI, ignore_rules: &IgnoreRules, xvc_paths: &[XvcPath], ) -> Result<()> { for path in xvc_paths { let absolute_path = path.to_absolute_path(xvc_root); - let output = check_ignore_line(ignore_rules, &absolute_path, opts.non_matching); + let output = check_ignore_line(ignore_rules, &absolute_path); trace!("output: {}", output); println!("{}", output) } @@ -144,18 +125,10 @@ fn check_ignore_paths( /// Check whether the records match to the full_path. It reports the details if /// set true. Non_matching inverts the reporting. -fn check_ignore_line( - ignore_rules: &IgnoreRules, - absolute_path: &Path, - show_no_match: bool, -) -> String { - match check_ignore(ignore_rules, absolute_path) { +fn check_ignore_line(ignore_rules: &IgnoreRules, absolute_path: &Path) -> String { + match ignore_rules.check(absolute_path) { MatchResult::NoMatch => { - if show_no_match { - format!("[NO MATCH] {}", absolute_path.to_string_lossy()) - } else { - String::new() - } + format!("[NO MATCH] {}", absolute_path.to_string_lossy()) } MatchResult::Ignore => { format!("[IGNORE] {}", absolute_path.to_string_lossy()) diff --git a/core/src/error.rs b/core/src/error.rs index f821dc3ea..2f9bdab72 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -130,11 +130,6 @@ pub enum Error { #[from] source: relative_path::FromPathError, }, - #[error("Glob error: {source}")] - GlobSetError { - #[from] - source: xvc_walker::globset::Error, - }, #[error("Cannot find parent path")] CannotFindParentPath { path: PathBuf }, diff --git a/core/src/lib.rs b/core/src/lib.rs index 207280777..3a8efe036 100755 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -222,7 +222,10 @@ default = "default" default_params_file = "params.yaml" # Number of command processes to run concurrently process_pool_size = 4 -# + +[check-ignore] +# Show details by default +details = false "##, guid = guid, diff --git a/core/src/types/xvcdigest/xvc_metadata_digest.rs b/core/src/types/xvcdigest/xvc_metadata_digest.rs index 47148b04b..f9bd6959e 100644 --- a/core/src/types/xvcdigest/xvc_metadata_digest.rs +++ b/core/src/types/xvcdigest/xvc_metadata_digest.rs @@ -39,11 +39,7 @@ impl XvcMetadataDigest { 0u64 }; - let size = if let Some(size) = xvc_metadata.size { - size - } else { - 0u64 - }; + let size = xvc_metadata.size.unwrap_or_default(); let mut bytes: [u8; 32] = [0; 32]; bytes[..8].clone_from_slice(&ft.to_le_bytes()); diff --git a/core/src/util/file.rs b/core/src/util/file.rs index 36d833dfa..47f00a296 100644 --- a/core/src/util/file.rs +++ b/core/src/util/file.rs @@ -11,20 +11,27 @@ use std::os::unix::fs as unix_fs; use std::os::windows::fs as windows_fs; use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; use xvc_logging::watch; -use xvc_walker::{IgnoreRules, PathMetadata, WalkOptions}; +use xvc_walker::{IgnoreRules, PathMetadata, SharedIgnoreRules, WalkOptions}; use crate::error::Error; use crate::error::Result; use crate::CHANNEL_BOUND; + use crossbeam_channel::{bounded, Receiver, Sender}; + + + use crate::types::{xvcpath::XvcPath, xvcroot::XvcRoot}; use super::pmp::XvcPathMetadataProvider; -use super::xvcignore::walk_parallel; +use super::xvcignore::{walk_parallel, COMMON_IGNORE_PATTERNS}; use super::XvcPathMetadataMap; +///w +/// /// A parallel directory walker. /// It starts from `start_dir` and sends [PathMetadata] by traversing all child directories. /// It uses [xvc_walker::walk_parallel] after building an empty [IgnoreRules]. @@ -33,19 +40,18 @@ use super::XvcPathMetadataMap; /// It doesn't check any ignore files. /// It even returns `.git` and `.xvc` directory contents. pub fn path_metadata_channel(sender: Sender>, start_dir: &Path) -> Result<()> { - let initial_rules = IgnoreRules::empty(start_dir); let walk_options = WalkOptions { ignore_filename: None, include_dirs: true, }; + let ignore_rules = Arc::new(RwLock::new(IgnoreRules::empty(start_dir, None))); let (w_sender, w_receiver) = bounded(CHANNEL_BOUND); - let (ignore_sender, _ignore_receiver) = bounded(CHANNEL_BOUND); - xvc_walker::walk_parallel( - initial_rules, + + xvc_walker::walk_parallel::walk_parallel( + ignore_rules, start_dir, walk_options, w_sender, - ignore_sender, )?; for pm in w_receiver { sender.send(Ok(pm?))?; @@ -73,8 +79,8 @@ pub fn pipe_filter_path_errors( /// NOTE: /// This function only returns a snapshot of the repository. /// If you want to handle events after this initial snapshot, see [xvc_walker::notify::make_watcher]. -pub fn all_paths_and_metadata(xvc_root: &XvcRoot) -> (XvcPathMetadataMap, IgnoreRules) { - walk_parallel(xvc_root, true).unwrap() +pub fn all_paths_and_metadata(xvc_root: &XvcRoot) -> (XvcPathMetadataMap, SharedIgnoreRules) { + walk_parallel(xvc_root, COMMON_IGNORE_PATTERNS, true).unwrap() } /// Returns a compiled [glob::Pattern] by prepending it with `pipeline_rundir`. diff --git a/core/src/util/git.rs b/core/src/util/git.rs index c3258d4a0..5f738c4ca 100644 --- a/core/src/util/git.rs +++ b/core/src/util/git.rs @@ -1,11 +1,13 @@ //! Git operations use std::path::{Path, PathBuf}; -use xvc_walker::{build_ignore_rules, AbsolutePath, IgnoreRules}; +use xvc_walker::{build_ignore_patterns, AbsolutePath, IgnoreRules}; use crate::error::Result; use crate::GIT_DIR; +use super::xvcignore::COMMON_IGNORE_PATTERNS; + /// Check whether a path is inside a Git repository. /// It returns `None` if not, otherwise returns the closest directory with `.git`. /// It works by checking `.git` directories in parents, until no more parent left. @@ -27,9 +29,11 @@ pub fn inside_git(path: &Path) -> Option { /// Returns [xvc_walker::IgnoreRules] for `.gitignore` /// It's used to check whether a path is already ignored by Git. pub fn build_gitignore(git_root: &AbsolutePath) -> Result { - let initial_rules = IgnoreRules::empty(git_root); - - let rules = build_ignore_rules(initial_rules, git_root, ".gitignore")?; + let rules = build_ignore_patterns( + COMMON_IGNORE_PATTERNS, + git_root, + ".gitignore".to_owned().as_ref(), + )?; Ok(rules) } @@ -41,7 +45,6 @@ mod test { use test_case::test_case; use xvc_logging::watch; use xvc_test_helper::*; - use xvc_walker::check_ignore; use xvc_walker::MatchResult as M; #[test_case("myfile.txt" , ".gitignore", "/myfile.txt" => matches M::Ignore ; "myfile.txt")] @@ -50,28 +53,29 @@ mod test { #[test_case("mydir/myfile.txt" , ".gitignore", "" => matches M::NoMatch ; "non ignore")] #[test_case("mydir/myfile.txt" , ".gitignore", "mydir/**" => matches M::Ignore ; "ignore dir star 2")] #[test_case("mydir/myfile.txt" , ".gitignore", "mydir/*" => matches M::Ignore ; "ignore dir star")] - #[test_case("mydir/yourdir/myfile.txt" , "mydir/.gitignore", "yourdir/**" => matches M::Ignore ; "ignore deep dir star 2")] #[test_case("mydir/yourdir/myfile.txt" , "mydir/.gitignore", "yourdir/*" => matches M::Ignore ; "ignore deep dir star")] - #[test_case("mydir/myfile.txt" , "hebelep/.gitignore", "hebelep/myfile.txt" => matches M::NoMatch ; "non ignore from dir")] + #[test_case("mydir/yourdir/myfile.txt" , "mydir/.gitignore", "yourdir/**" => matches M::Ignore ; "ignore deep dir star 2")] + #[test_case("mydir/myfile.txt" , "another-dir/.gitignore", "another-dir/myfile.txt" => matches M::NoMatch ; "non ignore from dir")] fn test_gitignore(path: &str, gitignore_path: &str, ignore_line: &str) -> M { test_logging(log::LevelFilter::Trace); let git_root = temp_git_dir(); + watch!(git_root); let path = git_root.join(PathBuf::from(path)); + watch!(path); let gitignore_path = git_root.join(PathBuf::from(gitignore_path)); + watch!(gitignore_path); if let Some(ignore_dir) = gitignore_path.parent() { + watch!(ignore_dir); fs::create_dir_all(ignore_dir).unwrap(); + watch!(ignore_dir.exists()); } fs::write(&gitignore_path, format!("{}\n", ignore_line)).unwrap(); + watch!(gitignore_path.exists()); - let gitignore = build_ignore_rules( - IgnoreRules::empty(&git_root), - gitignore_path.parent().unwrap(), - ".gitignore", - ) - .unwrap(); + let gitignore = build_ignore_patterns("", &git_root, ".gitignore").unwrap(); watch!(gitignore); - check_ignore(&gitignore, &path) + gitignore.check(&path) } } diff --git a/core/src/util/mod.rs b/core/src/util/mod.rs index 0af4d545a..ad93e05c4 100644 --- a/core/src/util/mod.rs +++ b/core/src/util/mod.rs @@ -7,55 +7,19 @@ pub mod store; pub mod xvcignore; use std::collections::HashMap; -use std::fmt::Display; use std::io::{self, Read}; -use std::path::{Path, PathBuf}; use std::thread::sleep; use std::time::Duration; use crossbeam_channel::{bounded, Receiver}; -use glob::glob; use xvc_logging::watch; -use crate::error::{Error, Result}; +use crate::error::Result; use crate::{XvcMetadata, XvcPath, CHANNEL_BOUND}; /// A hashmap to store [XvcMetadata] for [XvcPath] pub type XvcPathMetadataMap = HashMap; -/// Expands the given glob `targets` to paths under `current_dir`. -/// It uses [glob::glob] to travers and expand the paths. -/// WARNING: -/// This function doesn't consider any ignore rules in traversal. -/// It may be better to use `xvc_walker::walk_parallel` first and -/// [crate::util::file::glob_paths] to filter the paths. -pub fn expand_globs_to_paths(current_dir: &Path, targets: &[T]) -> Result> -where - T: AsRef + Display, -{ - let current_dir = current_dir.to_str().ok_or(Error::UnicodeError { - cause: current_dir.as_os_str().to_os_string(), - })?; - let mut paths = Vec::::new(); - for t in targets { - let glob_t = format!("{}/{}", current_dir, t); - match glob(&glob_t) { - Ok(glob_path_it) => { - for p in glob_path_it { - match p { - Ok(path) => paths.push(path), - Err(source) => { - Error::GlobError { source }.error(); - } - } - } - } - Err(source) => return Err(Error::GlobPatternError { source }.error()), - } - } - Ok(paths) -} - /// Converts stdin input to a channel. /// /// It works by creating a thread inside. diff --git a/core/src/util/pmp.rs b/core/src/util/pmp.rs index 80718d9e2..659fc6711 100644 --- a/core/src/util/pmp.rs +++ b/core/src/util/pmp.rs @@ -6,14 +6,13 @@ use std::sync::{Arc, Mutex, RwLock}; use std::thread::{self, JoinHandle}; use std::time::Duration; use xvc_logging::{error, uwr, watch, XvcOutputSender}; -use xvc_walker::{build_ignore_rules, make_watcher, IgnoreRules, MatchResult, PathEvent}; +use xvc_walker::{build_ignore_patterns, make_watcher, IgnoreRules, MatchResult, PathEvent}; use crate::error::Error; use crate::error::Result; use crate::util::xvcignore::COMMON_IGNORE_PATTERNS; use crate::{XvcFileType, XVCIGNORE_FILENAME}; use crossbeam_channel::{bounded, RecvError, Select, Sender}; -use xvc_walker::check_ignore; use crate::types::{xvcpath::XvcPath, xvcroot::XvcRoot}; use crate::XvcMetadata; @@ -36,8 +35,8 @@ pub struct XvcPathMetadataProvider { impl XvcPathMetadataProvider { /// Create a new PathMetadataProvider pub fn new(output_sender: &XvcOutputSender, xvc_root: &XvcRoot) -> Result { - let initial_rules = IgnoreRules::try_from_patterns(xvc_root, COMMON_IGNORE_PATTERNS)?; - let ignore_rules = build_ignore_rules(initial_rules, xvc_root, XVCIGNORE_FILENAME)?; + let ignore_rules = + build_ignore_patterns(COMMON_IGNORE_PATTERNS, xvc_root, XVCIGNORE_FILENAME)?; let path_map = Arc::new(RwLock::new(HashMap::new())); let (watcher, event_receiver) = make_watcher(ignore_rules.clone())?; @@ -194,10 +193,7 @@ impl XvcPathMetadataProvider { for entry in glob::glob(glob)? { match entry { Ok(entry) => { - if matches!( - check_ignore(&self.ignore_rules, &entry), - MatchResult::Ignore - ) { + if matches!(&self.ignore_rules.check(&entry), MatchResult::Ignore) { continue; } else { let xvc_path = XvcPath::new(&self.xvc_root, &self.xvc_root, &entry)?; diff --git a/core/src/util/xvcignore.rs b/core/src/util/xvcignore.rs index 90a79e44b..9d9119eec 100644 --- a/core/src/util/xvcignore.rs +++ b/core/src/util/xvcignore.rs @@ -8,9 +8,9 @@ use crossbeam_channel::{bounded, Sender}; use std::sync::{Arc, RwLock}; use std::thread; -use xvc_logging::{warn, XvcOutputSender}; -use xvc_walker::Result as XvcWalkerResult; +use xvc_logging::{warn, watch, XvcOutputSender}; use xvc_walker::{self, IgnoreRules, PathMetadata, WalkOptions}; +use xvc_walker::{Result as XvcWalkerResult, SharedIgnoreRules}; /// We ignore `.git` directories even we are not using `.git` pub const COMMON_IGNORE_PATTERNS: &str = ".xvc\n.git\n"; @@ -26,27 +26,29 @@ pub const COMMON_IGNORE_PATTERNS: &str = ".xvc\n.git\n"; /// /// - `xvc_root`: The root structure for Xvc /// - `include_dirs`: Whether to include directories themselves. -/// If `false`, only the actual files in the repository are listed. +/// If `false`, only the actual files in the repository are listed. /// /// ## Returns /// /// - `XvcPathMetadataMap`: A hash map of files. Keys are [XvcPath], values are their -/// [XvcMetadata]. +/// [XvcMetadata]. /// - `IgnoreRules`: The rules that were produced while reading the directories. -/// This is returned here to prevent a second traversal for ignores. +/// This is returned here to prevent a second traversal for ignores. pub fn walk_serial( output_snd: &XvcOutputSender, xvc_root: &XvcRoot, include_dirs: bool, ) -> Result<(XvcPathMetadataMap, IgnoreRules)> { - // We assume ignore_src is among the directories created - let initial_rules = IgnoreRules::try_from_patterns(xvc_root, COMMON_IGNORE_PATTERNS)?; let walk_options = WalkOptions { - ignore_filename: Some(XVCIGNORE_FILENAME.to_string()), + ignore_filename: Some(XVCIGNORE_FILENAME.to_owned()), include_dirs, }; - let (res_paths, ignore_rules) = - xvc_walker::walk_serial(output_snd, initial_rules, xvc_root, &walk_options)?; + let (res_paths, ignore_rules) = xvc_walker::walk_serial::walk_serial( + output_snd, + COMMON_IGNORE_PATTERNS, + xvc_root, + &walk_options, + )?; let pmp: XvcPathMetadataMap = res_paths .iter() .filter_map(|pm| { @@ -78,56 +80,41 @@ pub fn walk_serial( /// /// - `xvc_root`: The root structure for Xvc /// - `include_dirs`: Whether to include directories themselves. -/// If `false`, only the actual files in the repository are listed. +/// If `false`, only the actual files in the repository are listed. /// /// ## Returns /// /// - `XvcPathMetadataMap`: A hash map of files. Keys are [XvcPath], values are their -/// [XvcMetadata]. +/// [XvcMetadata]. /// - `IgnoreRules`: The rules that were produced while reading the directories. -/// This is returned here to prevent a second traversal for ignores. +/// This is returned here to prevent a second traversal for ignores. pub fn walk_parallel( xvc_root: &XvcRoot, + global_ignore_rules: &str, include_dirs: bool, -) -> Result<(XvcPathMetadataMap, IgnoreRules)> { +) -> Result<(XvcPathMetadataMap, SharedIgnoreRules)> { let (sender, receiver) = bounded::<(XvcPath, XvcMetadata)>(CHANNEL_BOUND); - let (ignore_sender, ignore_receiver) = bounded::>(CHANNEL_BOUND); - - walk_channel( + watch!(sender); + let ignore_rules = Arc::new(RwLock::new(IgnoreRules::from_global_patterns( xvc_root, - COMMON_IGNORE_PATTERNS, - Some(XVCIGNORE_FILENAME.to_string()), - include_dirs, - sender, - ignore_sender, - )?; + Some(XVCIGNORE_FILENAME), + global_ignore_rules, + ))); - let pusher = thread::spawn(move || { + watch!(ignore_rules); + + walk_channel(xvc_root, ignore_rules.clone(), include_dirs, sender)?; + + let pmm = thread::spawn(move || { let mut pmm = XvcPathMetadataMap::new(); for (path, md) in receiver.iter() { + watch!(path); pmm.insert(path, md); } pmm - }); - - let mut ignore_rules = IgnoreRules::empty(xvc_root); - let ignore_rules_thread = thread::spawn(move || { - for ignore_rule in ignore_receiver { - if let Ok(ignore_rule) = ignore_rule { - assert!(ignore_rules.root == ignore_rule.root); - ignore_rules.merge_with(&ignore_rule).unwrap(); - } else { - warn!("Error while collecting ignore rules"); - } - } - ignore_rules - }); - - let pmm = pusher.join().map_err(|e| Error::FSWalkerError { - error: format!("{:?}", e), - })?; - - let ignore_rules = ignore_rules_thread.join()?; + }) + .join() + .map_err(Error::from)?; Ok((pmm, ignore_rules)) } @@ -145,7 +132,7 @@ pub fn walk_parallel( /// - `xvc_root`: The repository root /// - `initial_patterns`: A set of patterns arranged similar to an `.xvcignore` (`.gitignore`) content. /// - `ignore_filename`: The name of the ignore files to be loaded for ignore rules. -/// (ex: `.xvcignore`, `.ignore`, or `.gitignore`) +/// (ex: `.xvcignore`, `.ignore`, or `.gitignore`) /// - `include_dirs`: Whether to send directory records themselves. /// If `false`, only the files in directories are sent. /// - `xpm_upstream`: The channel this function sends the paths and metadata. @@ -162,28 +149,18 @@ pub fn walk_parallel( /// These overlapping rules can be merged with [merge_ignores]. pub fn walk_channel( xvc_root: &XvcRoot, - initial_patterns: &str, - ignore_filename: Option, + ignore_rules: SharedIgnoreRules, include_dirs: bool, xpm_upstream: Sender<(XvcPath, XvcMetadata)>, - ignore_upstream: Sender>, ) -> Result<()> { - let initial_rules = IgnoreRules::try_from_patterns(xvc_root, initial_patterns)?; let walk_options = WalkOptions { - ignore_filename, + ignore_filename: ignore_rules.read()?.ignore_filename.clone(), include_dirs, }; let (path_sender, path_receiver) = bounded::>(CHANNEL_BOUND); - let (ignore_sender, ignore_receiver) = - bounded::>>>(CHANNEL_BOUND); - xvc_walker::walk_parallel( - initial_rules, - xvc_root, - walk_options, - path_sender, - ignore_sender, - )?; + xvc_walker::walk_parallel::walk_parallel(ignore_rules, xvc_root, walk_options, path_sender)?; + crossbeam::scope(|s| { s.spawn(|_| { for result in path_receiver { @@ -211,28 +188,6 @@ pub fn walk_channel( } } }); - - s.spawn(|_| { - for ignore_rule in ignore_receiver { - match ignore_rule { - Ok(ir) => { - ir.read() - .map(|ir| { - ignore_upstream - .send(Ok(ir.clone())) - .map_err(|e| { - Error::from(e).warn(); - }) - .unwrap_or_default(); - }) - .unwrap_or_default(); - } - Err(e) => { - e.warn(); - } - } - } - }); }) .map_err(Error::from)?; Ok(()) diff --git a/ecs/Cargo.toml b/ecs/Cargo.toml index 3be065b20..938b7b819 100644 --- a/ecs/Cargo.toml +++ b/ecs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-ecs" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Entity-Component System for Xvc" authors = ["Emre Şahin "] @@ -16,16 +16,16 @@ name = "xvc_ecs" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.10", path = "../logging" } +xvc-logging = { version = "0.6.11", path = "../logging" } ## Serialization serde = { version = "^1.0", features = ["derive"] } serde_json = "^1.0" rmp = "^0.8" -rmp-serde = "1.1.2" +rmp-serde = "1.3.0" ## Parallelization -rayon = "^1.8" +rayon = "^1.10" crossbeam-channel = "^0.5" ## Logging and errors @@ -35,7 +35,7 @@ fern = { version = "^0.6", features = ["colored"] } thiserror = "^1.0" ## Misc -lazy_static = "^1.4" +lazy_static = "^1.5" rand = "^0.8" diff --git a/ecs/src/ecs/xvcstore.rs b/ecs/src/ecs/xvcstore.rs index 2c2186456..bb7b504ea 100644 --- a/ecs/src/ecs/xvcstore.rs +++ b/ecs/src/ecs/xvcstore.rs @@ -274,6 +274,21 @@ where HStore::from(s) } + /// Runs `predicate` for all elements and returns true if one of them is true. + /// + /// `predicate` must be a function or closure that returns `bool`. + pub fn any(&self, predicate: F) -> bool + where + F: Fn(&XvcEntity, &T) -> bool, + { + for (e, v) in self.map.iter() { + if predicate(e, v) { + return true; + } + } + false + } + /// Returns the first element of the map /// /// This is useful when there is only one element after [Self::filter] diff --git a/file/Cargo.toml b/file/Cargo.toml index cf3f138e0..552ab8611 100644 --- a/file/Cargo.toml +++ b/file/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-file" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "File tracking, versioning, upload and download functions for Xvc" authors = ["Emre Şahin "] @@ -21,16 +21,16 @@ test = true bench = true [dependencies] -xvc-logging = { version = "0.6.10", path = "../logging" } -xvc-config = { version = "0.6.10", path = "../config" } -xvc-core = { version = "0.6.10", path = "../core" } -xvc-ecs = { version = "0.6.10", path = "../ecs" } -xvc-walker = { version = "0.6.10", path = "../walker" } -xvc-storage = { version = "0.6.10", path = "../storage", default-features = false } +xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-config = { version = "0.6.11", path = "../config" } +xvc-core = { version = "0.6.11", path = "../core" } +xvc-ecs = { version = "0.6.11", path = "../ecs" } +xvc-walker = { version = "0.6.11", path = "../walker" } +xvc-storage = { version = "0.6.11", path = "../storage", default-features = false } ## Cli and config -clap = { version = "^4.4", features = ["derive"] } +clap = { version = "^4.5", features = ["derive"] } directories-next = "2.0" ## Hashing @@ -46,18 +46,18 @@ serde = { version = "^1.0", features = ["derive"] } serde_yaml = "^0.9" serde_json = "^1.0" rmp = "^0.8" -rmp-serde = "1.1.2" +rmp-serde = "1.3.0" toml = "^0.8" ## Parallelization -rayon = "^1.8" +rayon = "^1.10" crossbeam-channel = "^0.5" crossbeam = "^0.8" dashmap = { version = "^6.0", features = ["serde", "rayon"] } ## File system jwalk = "^0.8" -walkdir = "^2.4" +walkdir = "^2.5" reflink = { version = "^0.1", optional = true } relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" @@ -67,6 +67,7 @@ glob = "^0.3" thiserror = "^1.0" log = "^0.4" anyhow = "^1.0" +peak_alloc = "^0.2" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 fern = { version = "^0.6", features = ["colored"] } @@ -81,8 +82,8 @@ regex = "^1.10" chrono = "^0.4" strum = "^0.26" strum_macros = "^0.26" -lazy_static = "^1.4" -uuid = { version = "^1.6", features = ["serde", "v4", "fast-rng"] } +lazy_static = "^1.5" +uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } itertools = "^0.13" @@ -94,5 +95,5 @@ default = ["reflink"] reflink = ["dep:reflink"] [dev-dependencies] -xvc-test-helper = { version = "0.6.10", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } shellfn = "^0.1" diff --git a/file/src/bring/mod.rs b/file/src/bring/mod.rs index 56208d465..5cdb51075 100644 --- a/file/src/bring/mod.rs +++ b/file/src/bring/mod.rs @@ -3,8 +3,8 @@ //! - [BringCLI] defines the command line options. //! //! - [cmd_bring] is the entry point for the command. -//! Uses [fetch] and [crate::recheck::cmd_recheck] to bring the file and copy/link it to the -//! workspace. +//! Uses [fetch] and [crate::recheck::cmd_recheck] to bring the file and copy/link it to the +//! workspace. use crate::common::{load_targets_from_store, move_to_cache}; @@ -65,7 +65,7 @@ pub fn fetch(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, opts: &BringCLI) let storage = get_storage_record(output_snd, xvc_root, &opts.storage)?; let current_dir = xvc_root.config().current_dir()?; - let targets = load_targets_from_store(xvc_root, current_dir, &opts.targets)?; + let targets = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?; let force = opts.force; watch!(targets); diff --git a/file/src/carry_in/mod.rs b/file/src/carry_in/mod.rs index 46ce65e06..b14931a7f 100644 --- a/file/src/carry_in/mod.rs +++ b/file/src/carry_in/mod.rs @@ -113,7 +113,7 @@ pub fn cmd_carry_in( let opts = cli_opts.update_from_conf(conf)?; watch!(opts); let current_dir = conf.current_dir()?; - let targets = load_targets_from_store(xvc_root, current_dir, &opts.targets)?; + let targets = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?; watch!(targets); let stored_xvc_path_store = xvc_root.load_store::()?; diff --git a/file/src/common/gitignore.rs b/file/src/common/gitignore.rs index 8ea79be10..3c85b9e00 100644 --- a/file/src/common/gitignore.rs +++ b/file/src/common/gitignore.rs @@ -9,10 +9,10 @@ use std::io::Write; use std::thread::JoinHandle; use xvc_core::util::git::build_gitignore; -use crate::Result; +use crate::{Result, CHANNEL_CAPACITY}; use xvc_core::{XvcPath, XvcRoot}; use xvc_logging::{debug, error, info, uwr, XvcOutputSender}; -use xvc_walker::{check_ignore, AbsolutePath, IgnoreRules, MatchResult}; +use xvc_walker::{AbsolutePath, IgnoreRules, MatchResult}; /// Used to signal ignored files and directories to the ignore handler pub enum IgnoreOperation { @@ -39,7 +39,7 @@ pub fn make_ignore_handler( output_snd: &XvcOutputSender, xvc_root: &XvcRoot, ) -> Result<(Sender, JoinHandle<()>)> { - let (sender, receiver) = crossbeam_channel::unbounded(); + let (sender, receiver) = crossbeam_channel::bounded(CHANNEL_CAPACITY); let output_snd = output_snd.clone(); let xvc_root = xvc_root.absolute_path().clone(); @@ -55,7 +55,7 @@ pub fn make_ignore_handler( let path = dir.to_absolute_path(&xvc_root).to_path_buf(); if !ignore_dirs.contains(&dir) - && matches!(check_ignore(&gitignore, &path), MatchResult::NoMatch) + && matches!(gitignore.check(&path), MatchResult::NoMatch) { ignore_dirs.push(dir); } @@ -63,7 +63,7 @@ pub fn make_ignore_handler( IgnoreOperation::IgnoreFile { file } => { let path = file.to_absolute_path(&xvc_root).to_path_buf(); if !ignore_files.contains(&file) - && matches!(check_ignore(&gitignore, &path), MatchResult::NoMatch) + && matches!(gitignore.check(&path), MatchResult::NoMatch) { ignore_files.push(file); } @@ -75,10 +75,12 @@ pub fn make_ignore_handler( } } debug!(output_snd, "Writing directories to .gitignore"); + uwr!( update_dir_gitignores(&xvc_root, &gitignore, &ignore_dirs), output_snd ); + // Load again to get ignored directories let gitignore = build_gitignore(&xvc_root).unwrap(); debug!(output_snd, "Writing files to .gitignore"); @@ -111,7 +113,7 @@ pub fn update_dir_gitignores( xvc_root.join(format!("{}/", dir)) }; - let ignore_res = check_ignore(current_gitignore, &abs_path); + let ignore_res = current_gitignore.check(&abs_path); match ignore_res { MatchResult::Ignore => { @@ -176,7 +178,7 @@ pub fn update_file_gitignores( files: &[XvcPath], ) -> Result<()> { // Filter already ignored files - let files: Vec = files.iter().filter_map(|f| match check_ignore(current_gitignore, &f.to_absolute_path(xvc_root)) { + let files: Vec = files.iter().filter_map(|f| match current_gitignore.check(&f.to_absolute_path(xvc_root)) { MatchResult::NoMatch => { Some(f.clone()) } diff --git a/file/src/common/mod.rs b/file/src/common/mod.rs index 49eedb973..617eae0f6 100644 --- a/file/src/common/mod.rs +++ b/file/src/common/mod.rs @@ -28,7 +28,7 @@ use xvc_logging::{error, info, uwr, warn, watch, XvcOutputSender}; use xvc_ecs::{persist, HStore, Storable, XvcStore}; -use xvc_walker::{AbsolutePath, Error as XvcWalkerError, Glob, GlobSetBuilder, PathSync}; +use xvc_walker::{AbsolutePath, Glob, PathSync}; use self::gitignore::IgnoreOp; @@ -93,12 +93,13 @@ pub fn pipe_path_digest( /// /// If `targets` is `None`, all paths in the store are returned. pub fn load_targets_from_store( + output_snd: &XvcOutputSender, xvc_root: &XvcRoot, current_dir: &AbsolutePath, targets: &Option>, ) -> Result> { - let store: XvcStore = xvc_root.load_store()?; - filter_targets_from_store(xvc_root, &store, current_dir, targets) + let xvc_path_store: XvcStore = xvc_root.load_store()?; + filter_targets_from_store(output_snd, xvc_root, &xvc_path_store, current_dir, targets) } /// Filters the paths in the store by given globs. @@ -107,8 +108,9 @@ pub fn load_targets_from_store( /// /// If `current_dir` is not the root, all targets are prefixed with it. pub fn filter_targets_from_store( + output_snd: &XvcOutputSender, xvc_root: &XvcRoot, - store: &XvcStore, + xvc_path_store: &XvcStore, current_dir: &AbsolutePath, targets: &Option>, ) -> Result> { @@ -124,8 +126,9 @@ pub fn filter_targets_from_store( }; return filter_targets_from_store( + output_snd, xvc_root, - store, + xvc_path_store, xvc_root.absolute_path(), &Some(targets), ); @@ -133,43 +136,115 @@ pub fn filter_targets_from_store( watch!(targets); - let hstore = HStore::::from(store); if let Some(targets) = targets { - let paths = filter_paths_by_globs(&hstore, targets.as_slice())?; + let paths = + filter_paths_by_globs(output_snd, xvc_root, xvc_path_store, targets.as_slice())?; watch!(paths); Ok(paths) } else { - Ok(hstore) + Ok(xvc_path_store.into()) } } /// Filter a set of paths by a set of globs. The globs are compiled into a /// GlobSet and paths are checked against the set. -pub fn filter_paths_by_globs(paths: &HStore, globs: &[String]) -> Result> { - let mut glob_matcher = GlobSetBuilder::new(); - globs.iter().for_each(|t| { - watch!(t); - if t.ends_with('/') { - glob_matcher.add(Glob::new(&format!("{t}**")).expect("Error in glob: {t}**")); - } else { - glob_matcher.add(Glob::new(&format!("{t}/**")).expect("Error in glob: {t}/**")); - } - glob_matcher.add(Glob::new(t).expect("Error in glob: {t}")); - }); - let glob_matcher = glob_matcher.build().map_err(XvcWalkerError::from)?; +/// +/// If a target ends with /, it's considered a directory and all its children are also selected. +pub fn filter_paths_by_globs( + output_snd: &XvcOutputSender, + xvc_root: &XvcRoot, + paths: &XvcStore, + globs: &[String], +) -> Result> { + watch!(globs); + if globs.is_empty() { + return Ok(paths.into()); + } - let paths = paths - .filter(|_, p| { - let str_path = &p.as_relative_path().as_str(); + // Ensure directories end with / + let globs = globs + .iter() + .map(|g| { + watch!(g); + if !g.ends_with('/') && !g.contains('*') { + let slashed = format!("{g}/"); + watch!(slashed); + // We don't track directories. Instead we look for files that start with the directory. + if paths.any(|_, p| p.as_str().starts_with(&slashed)) { + slashed + } else { + g.clone() + } + } else { + g.clone() + } + }) + .collect::>(); - glob_matcher.is_match(str_path) + watch!(globs); + let mut glob_matcher = build_glob_matcher(output_snd, xvc_root, &globs)?; + watch!(glob_matcher); + let paths = paths + .iter() + .filter_map(|(e, p)| { + if glob_matcher.is_match(p.as_str()) { + Some((*e, p.clone())) + } else { + None + } }) - .cloned(); + .collect(); watch!(paths); Ok(paths) } +/// Builds a glob matcher based on the provided directory and glob patterns. +/// +/// # Arguments +/// +/// * `output_snd`: A sender for output messages. +/// * `dir`: The directory to which the glob patterns will be applied. +/// * `globs`: A slice of glob patterns as strings. +/// +/// # Returns +/// +/// * `Result`: A `Result` that contains the `Glob` matcher if successful, or an error if not. +/// +/// # Errors +/// +/// This function will return an error if any of the glob patterns are invalid. +/// + +pub fn build_glob_matcher( + output_snd: &XvcOutputSender, + dir: &Path, + globs: &[String], +) -> Result { + let mut glob_matcher = Glob::default(); + globs.iter().for_each(|t| { + watch!(t); + if t.ends_with('/') { + if !glob_matcher.add(&format!("{t}**")) { + error!(output_snd, "Error in glob: {t}"); + } + } else if !t.contains('*') { + let abs_target = dir.join(Path::new(t)); + watch!(abs_target); + if abs_target.is_dir() { + if !glob_matcher.add(&format!("{t}/**")) { + error!(output_snd, "Error in glob: {t}") + } + } else if !glob_matcher.add(t) { + error!(output_snd, "Error in glob: {t}") + } + } else if !glob_matcher.add(t) { + error!(output_snd, "Error in glob: {t}") + } + }); + Ok(glob_matcher) +} + /// Converts targets to a map of XvcPaths and their metadata. It walks the file /// system with [`all_paths_and_metadata`]. This is aimed towards `xvc file /// track`, `xvc file hash` and similar commands where we work with the existing @@ -186,6 +261,7 @@ pub fn filter_paths_by_globs(paths: &HStore, globs: &[String]) -> Resul /// repositories. pub fn targets_from_disk( + output_snd: &XvcOutputSender, xvc_root: &XvcRoot, current_dir: &AbsolutePath, targets: &Option>, @@ -202,30 +278,26 @@ pub fn targets_from_disk( Some(targets) => targets.iter().map(|t| format!("{cwd}{t}")).collect(), None => vec![cwd.to_string()], }; - - return targets_from_disk(xvc_root, xvc_root.absolute_path(), &Some(targets)); + watch!(targets); + return targets_from_disk( + output_snd, + xvc_root, + xvc_root.absolute_path(), + &Some(targets), + ); } + // FIXME: If there are no globs/directories in the targets, no need to retrieve all the paths + // here. let (all_paths, _) = all_paths_and_metadata(xvc_root); watch!(all_paths); if let Some(targets) = targets { - let mut glob_matcher = GlobSetBuilder::new(); - targets.iter().for_each(|t| { - if t.ends_with('/') { - glob_matcher.add(Glob::new(&format!("{t}**")).expect("Error in glob: {t}**")); - } else if !t.contains('*') { - let abs_target = current_dir.join(Path::new(t)); - if abs_target.is_dir() { - glob_matcher.add(Glob::new(&format!("{t}/**")).expect("Error in glob: {t}/**")); - } else { - glob_matcher.add(Glob::new(t).expect("Error in glob: {t}")); - } - } else { - glob_matcher.add(Glob::new(t).expect("Error in glob: {t}")); - } - }); - let glob_matcher = glob_matcher.build().map_err(XvcWalkerError::from)?; + if targets.is_empty() { + return Ok(XvcPathMetadataMap::new()); + } + + let mut glob_matcher = build_glob_matcher(output_snd, xvc_root, targets)?; watch!(glob_matcher); Ok(all_paths .into_iter() @@ -342,8 +414,12 @@ pub fn recheck_from_cache( Ok(()) } -#[cfg(feature="reflink")] -fn reflink(output_snd: &XvcOutputSender, cache_path: AbsolutePath, path: AbsolutePath) -> Result<()> { +#[cfg(feature = "reflink")] +fn reflink( + output_snd: &XvcOutputSender, + cache_path: AbsolutePath, + path: AbsolutePath, +) -> Result<()> { match reflink::reflink(&cache_path, &path) { Ok(_) => { info!(output_snd, "[REFLINK] {} -> {}", cache_path, path); @@ -359,22 +435,31 @@ fn reflink(output_snd: &XvcOutputSender, cache_path: AbsolutePath, path: Absolut } } -fn copy_file(output_snd: &XvcOutputSender, cache_path: AbsolutePath, path: AbsolutePath) -> Result<()> { - watch!("Before copy"); - watch!(&cache_path); - watch!(&path); - fs::copy(&cache_path, &path)?; - info!(output_snd, "[COPY] {} -> {}", cache_path, path); - let mut perm = path.metadata()?.permissions(); - watch!(&perm); - perm.set_readonly(false); - watch!(&perm); - fs::set_permissions(&path, perm)?; +fn copy_file( + output_snd: &XvcOutputSender, + cache_path: AbsolutePath, + path: AbsolutePath, +) -> Result<()> { + watch!("Before copy"); + watch!(&cache_path); + watch!(&path); + fs::copy(&cache_path, &path)?; + info!(output_snd, "[COPY] {} -> {}", cache_path, path); + let mut perm = path.metadata()?.permissions(); + watch!(&perm); + // FIXME: Fix the clippy warning in the following line + perm.set_readonly(false); + watch!(&perm); + fs::set_permissions(&path, perm)?; Ok(()) } -#[cfg(not(feature="reflink"))] -fn reflink(output_snd: &XvcOutputSender, cache_path: AbsolutePath, path: AbsolutePath) -> Result<()> { +#[cfg(not(feature = "reflink"))] +fn reflink( + output_snd: &XvcOutputSender, + cache_path: AbsolutePath, + path: AbsolutePath, +) -> Result<()> { warn!( output_snd, "Xvc isn't compiled with reflink support. Copying the file." diff --git a/file/src/copy/mod.rs b/file/src/copy/mod.rs index 9bfb48461..a12bc326c 100644 --- a/file/src/copy/mod.rs +++ b/file/src/copy/mod.rs @@ -60,6 +60,7 @@ pub struct CopyCLI { } pub(crate) fn get_source_path_metadata( + output_snd: &XvcOutputSender, xvc_root: &XvcRoot, stored_xvc_path_store: &XvcStore, stored_xvc_metadata_store: &XvcStore, @@ -76,6 +77,7 @@ pub(crate) fn get_source_path_metadata( let current_dir = xvc_root.config().current_dir()?; let all_sources = filter_targets_from_store( + output_snd, xvc_root, stored_xvc_path_store, current_dir, @@ -327,6 +329,7 @@ pub fn cmd_copy(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, opts: CopyCLI) let stored_metadata_store = xvc_root.load_store::()?; let stored_xvc_path_store = xvc_root.load_store::()?; let (source_xvc_paths, source_metadata) = get_source_path_metadata( + output_snd, xvc_root, &stored_xvc_path_store, &stored_metadata_store, diff --git a/file/src/list/mod.rs b/file/src/list/mod.rs index e4f45aa2c..25887ae7b 100644 --- a/file/src/list/mod.rs +++ b/file/src/list/mod.rs @@ -192,12 +192,12 @@ impl ListRow { watch!(&path_prefix); let name = if let Some(ap) = path_match.actual_path { watch!(ap); - ap.strip_prefix(&path_prefix.to_string_lossy().to_string()) + ap.strip_prefix(path_prefix.to_string_lossy().as_ref()) .map_err(|e| Error::RelativeStripPrefixError { e })? .to_string() } else if let Some(rp) = path_match.recorded_path { watch!(rp); - rp.strip_prefix(&path_prefix.to_string_lossy().to_string()) + rp.strip_prefix(path_prefix.to_string_lossy().as_ref()) .map_err(|e| Error::RelativeStripPrefixError { e })? .to_string() } else { @@ -522,6 +522,7 @@ impl UpdateFromXvcConfig for ListCLI { /// - =: Recorded and actual file have the same timestamp /// - >: Cached file is newer, xvc recheck to update the file /// - <: File is newer, xvc carry-in to update the cache +/// /// TODO: - I: File is ignored pub fn cmd_list(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, cli_opts: ListCLI) -> Result<()> { @@ -530,7 +531,10 @@ pub fn cmd_list(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, cli_opts: List let current_dir = conf.current_dir()?; - let all_from_disk = targets_from_disk(xvc_root, current_dir, &opts.targets)?; + // If targets are directories on disk, make sure they end with / + + let all_from_disk = targets_from_disk(output_snd, xvc_root, current_dir, &opts.targets)?; + watch!(&all_from_disk); let from_disk = if opts.show_dot_files { all_from_disk } else { @@ -548,7 +552,7 @@ pub fn cmd_list(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, cli_opts: List }; watch!(from_disk); - let from_store = load_targets_from_store(xvc_root, current_dir, &opts.targets)?; + let from_store = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?; watch!(from_store); let stored_xvc_metadata = xvc_root.load_store::()?; let stored_recheck_method = xvc_root.load_store::()?; diff --git a/file/src/mv/mod.rs b/file/src/mv/mod.rs index b1c87a107..acc9cdcb7 100644 --- a/file/src/mv/mod.rs +++ b/file/src/mv/mod.rs @@ -163,6 +163,7 @@ pub fn cmd_move(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, opts: MoveCLI) let stored_metadata_store = xvc_root.load_store::()?; let stored_xvc_path_store = xvc_root.load_store::()?; let (source_xvc_paths, source_metadata) = get_source_path_metadata( + output_snd, xvc_root, &stored_xvc_path_store, &stored_metadata_store, diff --git a/file/src/recheck/mod.rs b/file/src/recheck/mod.rs index 37d9c07b2..f5046f35a 100644 --- a/file/src/recheck/mod.rs +++ b/file/src/recheck/mod.rs @@ -94,15 +94,20 @@ pub fn cmd_recheck( let opts = cli_opts.update_from_conf(conf)?; let current_dir = conf.current_dir()?; - let targets = load_targets_from_store(xvc_root, current_dir, &opts.targets)?; + let targets = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?; watch!(targets); let stored_xvc_path_store = xvc_root.load_store::()?; + watch!(stored_xvc_path_store); let stored_xvc_metadata_store = xvc_root.load_store::()?; + watch!(stored_xvc_metadata_store); let target_files = only_file_targets(&stored_xvc_metadata_store, &targets)?; + watch!(target_files); let target_xvc_path_metadata_map = xvc_path_metadata_map_from_disk(xvc_root, &target_files); + watch!(target_xvc_path_metadata_map); let stored_recheck_method_store = xvc_root.load_store::()?; + watch!(stored_recheck_method_store); let stored_content_digest_store = xvc_root.load_store::()?; let entities: HashSet = target_files.keys().copied().collect(); let default_recheck_method = RecheckMethod::from_conf(xvc_root.config()); diff --git a/file/src/remove/mod.rs b/file/src/remove/mod.rs index f4699aaa4..3a3e00b94 100644 --- a/file/src/remove/mod.rs +++ b/file/src/remove/mod.rs @@ -68,8 +68,13 @@ pub fn cmd_remove(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, opts: Remove let all_paths = xvc_root.load_store()?; let all_content_digests = xvc_root.load_store()?; - let remove_targets = - filter_targets_from_store(xvc_root, &all_paths, current_dir, &Some(opts.targets))?; + let remove_targets = filter_targets_from_store( + output_snd, + xvc_root, + &all_paths, + current_dir, + &Some(opts.targets), + )?; let all_cache_paths = cache_paths_for_xvc_paths(output_snd, &all_paths, &all_content_digests)?; diff --git a/file/src/send/mod.rs b/file/src/send/mod.rs index 6c71246a6..2fef1700f 100644 --- a/file/src/send/mod.rs +++ b/file/src/send/mod.rs @@ -37,7 +37,7 @@ pub fn cmd_send(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, opts: SendCLI) let remote = get_storage_record(output_snd, xvc_root, &opts.storage)?; watch!(remote); let current_dir = xvc_root.config().current_dir()?; - let targets = load_targets_from_store(xvc_root, current_dir, &opts.targets)?; + let targets = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?; watch!(targets); let target_file_xvc_metadata = xvc_root diff --git a/file/src/share/mod.rs b/file/src/share/mod.rs index 8079b32e9..f7ac131da 100644 --- a/file/src/share/mod.rs +++ b/file/src/share/mod.rs @@ -33,7 +33,8 @@ pub fn cmd_share(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, opts: ShareCL let storage = get_storage_record(output_snd, xvc_root, &opts.storage)?; watch!(storage); let current_dir = xvc_root.config().current_dir()?; - let targets = load_targets_from_store(xvc_root, current_dir, &Some(vec![opts.target]))?; + let targets = + load_targets_from_store(output_snd, xvc_root, current_dir, &Some(vec![opts.target]))?; watch!(targets); let target_file_xvc_metadata = xvc_root diff --git a/file/src/track/mod.rs b/file/src/track/mod.rs index d26f9295f..794ed8925 100644 --- a/file/src/track/mod.rs +++ b/file/src/track/mod.rs @@ -32,6 +32,7 @@ use xvc_core::RecheckMethod; use xvc_core::XvcPath; use xvc_ecs::{HStore, XvcEntity}; + /// Add files for tracking with Xvc #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, From, Parser)] #[command(rename_all = "kebab-case")] @@ -117,7 +118,7 @@ pub fn cmd_track( let conf = xvc_root.config(); let opts = cli_opts.update_from_conf(conf)?; let current_dir = conf.current_dir()?; - let targets = targets_from_disk(xvc_root, current_dir, &opts.targets)?; + let targets = targets_from_disk(output_snd, xvc_root, current_dir, &opts.targets)?; watch!(targets); let requested_recheck_method = opts.recheck_method; let text_or_binary = opts.text_or_binary.unwrap_or_default(); @@ -226,9 +227,12 @@ pub fn cmd_track( update_dir_gitignores(xvc_root, ¤t_gitignore, &dir_targets)?; // We reload gitignores here to make sure we ignore the given dirs + let current_gitignore = build_gitignore(xvc_root)?; + update_file_gitignores(xvc_root, ¤t_gitignore, &file_targets)?; + if !opts.no_commit { let current_xvc_path_store = xvc_root.load_store::()?; @@ -285,5 +289,6 @@ pub fn cmd_track( opts.force, )?; } + Ok(()) } diff --git a/file/src/untrack/mod.rs b/file/src/untrack/mod.rs index 0e7c45a66..99081605e 100644 --- a/file/src/untrack/mod.rs +++ b/file/src/untrack/mod.rs @@ -43,8 +43,14 @@ pub fn cmd_untrack( let current_dir = xvc_root.config().current_dir()?; let all_paths = xvc_root.load_store()?; let all_content_digests = xvc_root.load_store()?; - let untrack_targets = - filter_targets_from_store(xvc_root, &all_paths, current_dir, &Some(opts.targets))?; + + let untrack_targets = filter_targets_from_store( + output_snd, + xvc_root, + &all_paths, + current_dir, + &Some(opts.targets), + )?; let all_cache_paths = cache_paths_for_xvc_paths(output_snd, &all_paths, &all_content_digests)?; diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 78636363b..bf05bebd6 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "An MLOps tool to manage data files and pipelines on top of Git" authors = ["Emre Şahin "] @@ -20,16 +20,17 @@ name = "xvc" path = "src/main.rs" [dependencies] -xvc-config = { version = "0.6.10", path = "../config" } -xvc-core = { version = "0.6.10", path = "../core" } -xvc-logging = { version = "0.6.10", path = "../logging" } -xvc-ecs = { version = "0.6.10", path = "../ecs" } -xvc-file = { version = "0.6.10", path = "../file", default-features = false } -xvc-pipeline = { version = "0.6.10", path = "../pipeline" } -xvc-walker = { version = "0.6.10", path = "../walker" } +xvc-config = { version = "0.6.11", path = "../config" } +xvc-core = { version = "0.6.11", path = "../core" } +xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-ecs = { version = "0.6.11", path = "../ecs" } +xvc-file = { version = "0.6.11", path = "../file", default-features = false } +xvc-pipeline = { version = "0.6.11", path = "../pipeline" } +xvc-walker = { version = "0.6.11", path = "../walker" } +xvc-storage = { version = "0.6.11", path = "../storage", default-features = false } + ## Cli and config -clap = { version = "^4.4", features = ["derive", "cargo"] } -xvc-storage = { version = "0.6.10", path = "../storage", default-features = false } +clap = { version = "^4.5", features = ["derive", "cargo"] } directories-next = "2.0" ## Hashing @@ -45,17 +46,16 @@ serde = { version = "^1.0", features = ["derive"] } serde_yaml = "^0.9" serde_json = "^1.0" rmp = "^0.8" -rmp-serde = "1.1.2" +rmp-serde = "1.3.0" toml = "^0.8" ## Parallelization -rayon = "^1.8" +rayon = "^1.10" crossbeam-channel = "^0.5" crossbeam = "^0.8" ## File system subprocess = "^0.2" -# reflink = { version = "^0.1", optional = true } relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" glob = "^0.3" @@ -69,7 +69,7 @@ fern = { version = "^0.6", features = ["colored"] } anyhow = "^1.0" ## UI -comfy-table = "7.1.0" +comfy-table = "7.1.1" ## macros paste = "1.0" @@ -79,8 +79,8 @@ regex = "^1.10" chrono = "^0.4" strum = "^0.26" strum_macros = "^0.26" -lazy_static = "^1.4" -uuid = { version = "^1.6", features = ["serde", "v4", "fast-rng"] } +lazy_static = "^1.5" +uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } git-version = "^0.3" @@ -88,7 +88,7 @@ git-version = "^0.3" ## Mostly for testing assert_cmd = "^2.0" assert_fs = "^1.1" -predicates = "^3.0" +predicates = "^3.1" [features] @@ -125,12 +125,12 @@ assert_cmd = "^2.0" assert_fs = "^1.1" escargot = "^0.5" fs_extra = "^1.3" -globset = "^0.4" +fast-glob = "^0.3" jwalk = "^0.8" -predicates = "^3.0" -proptest = "^1.4" +predicates = "^3.1" +proptest = "^1.5" shellfn = "^0.1" test-case = "^3.3" trycmd = "^0.15" which = "^6.0" -xvc-test-helper = { version = "0.6.10", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } diff --git a/lib/src/cli/mod.rs b/lib/src/cli/mod.rs index 3b82af6f1..f77184501 100644 --- a/lib/src/cli/mod.rs +++ b/lib/src/cli/mod.rs @@ -167,7 +167,7 @@ impl XvcCLI { } } -// Implement FromStr for XvcCLI +// Implement FromStr for XvcCLI impl FromStr for XvcCLI { type Err = Error; @@ -212,7 +212,6 @@ pub fn run(args: &[&str]) -> Result { /// Run the supplied command within the optional [XvcRoot]. If xvc_root is None, it will be tried /// to be loaded from `cli_opts.workdir`. pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Result { - // XvcRoot should be kept per repository and shouldn't change directory across runs assert!( xvc_root_opt.as_ref().is_none() @@ -309,22 +308,22 @@ pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Re Some(xvc_root) } - XvcSubCommand::Aliases(opts) => { + XvcSubCommand::Aliases(opts) => { aliases::run(&output_snd, opts)?; xvc_root_opt - }, + } // following commands can only be run inside a repository - XvcSubCommand::Root(opts) => { root::run( - &output_snd, + XvcSubCommand::Root(opts) => { + root::run( + &output_snd, + xvc_root_opt + .as_ref() + .ok_or_else(|| Error::RequiresXvcRepository)?, + opts, + )?; xvc_root_opt - .as_ref() - .ok_or_else(|| Error::RequiresXvcRepository)?, - opts, - )?; - xvc_root_opt - }, - + } XvcSubCommand::File(opts) => { file::run(&output_snd, xvc_root_opt.as_ref(), opts)?; @@ -342,7 +341,6 @@ pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Re )?; xvc_root_opt - } XvcSubCommand::CheckIgnore(opts) => { @@ -357,7 +355,6 @@ pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Re )?; xvc_root_opt - } XvcSubCommand::Storage(opts) => { @@ -372,7 +369,6 @@ pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Re xvc_root_opt } - }; watch!("Before handle_git_automation"); @@ -414,8 +410,6 @@ pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Re .unwrap(); xvc_root_opt - - } /// Dispatch commands to respective functions in the API @@ -454,7 +448,6 @@ pub fn dispatch(cli_opts: cli::XvcCLI) -> Result { }; dispatch_with_root(cli_opts, xvc_root_opt) - } fn get_xvc_config_params(cli_opts: &XvcCLI) -> XvcConfigParams { diff --git a/lib/src/git.rs b/lib/src/git.rs index 2e739eeb3..a1ee653f3 100644 --- a/lib/src/git.rs +++ b/lib/src/git.rs @@ -233,3 +233,16 @@ pub fn git_auto_stage( debug!(output_snd, "Staging .xvc/ to git: {res_git_add}"); Ok(()) } + +pub fn git_ignored(output_snd: &XvcOutputSender, + git_command: &str, + xvc_root_str: &str, + path: &str) -> Result { + let command_res = exec_git(git_command, xvc_root_str, &["check-ignore", path])?; + + if command_res.trim().is_empty() { + Ok(false) + } else { + Ok(true) + } +} diff --git a/lib/src/init/mod.rs b/lib/src/init/mod.rs index e10c3309c..7ec06c225 100644 --- a/lib/src/init/mod.rs +++ b/lib/src/init/mod.rs @@ -39,6 +39,7 @@ pub struct InitCLI { /// # Arguments /// /// - `xvc_root_opt`: Optional [xvc_core::XvcRoot] +/// /// It's an error to reinit inside an Xvc repository (with `Some(xvc_root)`) normally. /// It's possible to force reinit with `opts.force` though. /// diff --git a/lib/tests/common/mod.rs b/lib/tests/common/mod.rs index 7fc9003c0..882793745 100644 --- a/lib/tests/common/mod.rs +++ b/lib/tests/common/mod.rs @@ -61,7 +61,7 @@ pub fn run_xvc(cwd: Option<&Path>, args: &[&str], verbosity: XvcVerbosity) -> Re } pub fn example_project_url() -> Result { - Ok(format!("http://one.emresult.com/~iex/{EXAMPLE_PROJECT_NAME}.tgz").to_string()) + Ok("http://e1.xvc.dev/example-xvc.tgz".to_string()) } pub fn example_project_template_path() -> Result { @@ -91,7 +91,7 @@ pub fn download_example_project() -> Result<()> { .arg("xzf") .arg(curl_output_filename) .arg("--directory") - .arg(&env::temp_dir().to_string_lossy().to_string()) + .arg(env::temp_dir().to_string_lossy().as_ref()) .output()?; if !tar_output.status.success() { diff --git a/lib/tests/test_core_util_file_walker.rs b/lib/tests/test_core_util_file_walker.rs index 25e29967c..6e4aeb1dd 100644 --- a/lib/tests/test_core_util_file_walker.rs +++ b/lib/tests/test_core_util_file_walker.rs @@ -2,12 +2,12 @@ mod common; use common::*; use log::LevelFilter; use xvc_core::{ - util::xvcignore::{walk_parallel, walk_serial}, + util::xvcignore::{walk_parallel, walk_serial, COMMON_IGNORE_PATTERNS}, XvcPath, }; use xvc_test_helper::test_logging; -use std::{path::Path, time::Duration}; +use std::{collections::HashSet, path::Path, time::Duration}; use xvc_logging::watch; @@ -22,32 +22,23 @@ fn test_walk() -> Result<()> { let (pmp1, _) = walk_serial(&output_sender, &xvc_root, true)?; - assert!(!pmp1.is_empty()); + let path_set1 = HashSet::::from_iter(pmp1.keys().cloned()); + assert!(!path_set1.is_empty()); // Test skip list for skipped in [".dvc", ".xvc", ".git"] { let xp = XvcPath::new(&xvc_root, xvc_root.absolute_path(), Path::new(skipped))?; - assert!(!pmp1.contains_key(&xp), "Result Contains {:?}", xp) + assert!(!path_set1.contains(&xp), "Result Contains {:?}", xp) } - common::test_logging(LevelFilter::Trace); - let (pmp2, _) = walk_parallel(&xvc_root, true)?; - - let mut diff1 = Vec::<&XvcPath>::new(); - for k in pmp1.keys() { - if !pmp2.contains_key(k) { - diff1.push(k); - } - } + let (pmp2, _) = walk_parallel(&xvc_root, COMMON_IGNORE_PATTERNS, true)?; + let path_set2 = HashSet::::from_iter(pmp2.keys().cloned()); + assert!(!path_set2.is_empty()); + let diff1: HashSet<&XvcPath> = path_set1.difference(&path_set2).collect(); watch!(diff1); - let mut diff2 = Vec::<&XvcPath>::new(); - for k in pmp2.keys() { - if !pmp1.contains_key(k) { - diff2.push(k); - } - } + let diff2: HashSet<&XvcPath> = path_set2.difference(&path_set1).collect(); watch!(diff2); diff --git a/lib/tests/test_core_util_notify.rs b/lib/tests/test_core_util_notify.rs index 22cc0ccaf..5afefecfe 100644 --- a/lib/tests/test_core_util_notify.rs +++ b/lib/tests/test_core_util_notify.rs @@ -4,6 +4,7 @@ use assert_fs::prelude::{FileTouch, FileWriteBin, PathChild}; use assert_fs::TempDir; use common::*; use std::env; +use std::ffi::OsString; use std::fs::remove_file; use std::path::PathBuf; use std::thread::{self, sleep}; @@ -16,7 +17,7 @@ use xvc_core::util::xvcignore::COMMON_IGNORE_PATTERNS; use xvc_core::XVCIGNORE_FILENAME; use xvc_walker::notify::{make_polling_watcher, PathEvent}; -use xvc_walker::{walk_serial, IgnoreRules, WalkOptions}; +use xvc_walker::{walk_serial, WalkOptions}; #[test] fn test_notify() -> Result<()> { @@ -24,9 +25,8 @@ fn test_notify() -> Result<()> { env::set_current_dir(&temp_dir)?; watch!(temp_dir); test_logging(log::LevelFilter::Trace); - let initial_rules = IgnoreRules::try_from_patterns(&temp_dir, COMMON_IGNORE_PATTERNS)?; let walk_options = WalkOptions { - ignore_filename: Some(XVCIGNORE_FILENAME.to_string()), + ignore_filename: Some(XVCIGNORE_FILENAME.to_owned()), include_dirs: true, }; let (created_paths_snd, created_paths_rec) = crossbeam_channel::unbounded(); @@ -48,7 +48,7 @@ fn test_notify() -> Result<()> { let (output_sender, output_receiver) = crossbeam_channel::unbounded(); let (initial_paths, all_rules) = - walk_serial(&output_sender, initial_rules, &temp_dir, &walk_options)?; + walk_serial(&output_sender, COMMON_IGNORE_PATTERNS, &temp_dir, &walk_options)?; watch!(all_rules); assert!(output_receiver.is_empty()); let (watcher, receiver) = make_polling_watcher(all_rules)?; diff --git a/lib/tests/test_file_list.rs b/lib/tests/test_file_list.rs index c408cff8f..537457268 100644 --- a/lib/tests/test_file_list.rs +++ b/lib/tests/test_file_list.rs @@ -6,7 +6,6 @@ use std::path::Path; use log::LevelFilter; use xvc::error::Result; -use xvc::watch; use xvc_config::XvcVerbosity; use xvc_test_helper::{create_directory_tree, test_logging}; use xvc_walker::AbsolutePath; @@ -55,13 +54,9 @@ fn test_file_list() -> Result<()> { common::run_xvc(Some(&xvc_root), &c, XvcVerbosity::Trace) }; - watch!("begin"); let list_all = x(&["list", "--format", "{{name}}", "--show-dot-files"])?; - watch!(list_all); - let count_all = list_all.trim().lines().count(); - watch!(count_all); // There must be 33 elements in total. 6 x 5: directories, 1 for .gitignore, // 1 for .xvcignore, another line for the summary. assert!(count_all == 33); diff --git a/lib/tests/test_file_track_issue_104.rs b/lib/tests/test_file_track_issue_104.rs index 8dfd735bf..b7d1d46c4 100644 --- a/lib/tests/test_file_track_issue_104.rs +++ b/lib/tests/test_file_track_issue_104.rs @@ -16,7 +16,7 @@ fn create_directory_hierarchy() -> Result { Ok(temp_dir) } -/// When a directory is added to projects, its child files are also ignored. +/// When a directory is added to projects, its child files should also ignored. /// #[test] fn test_file_track_issue_104() -> Result<()> { @@ -37,6 +37,10 @@ fn test_file_track_issue_104() -> Result<()> { let root_gitignore = fs::read_to_string(xvc_root.join(Path::new(".gitignore")))?; watch!(root_gitignore); let dir_ignore = xvc_root.join(Path::new("dir-0001/.gitignore")); + watch!(dir_ignore); + + watch!(std::fs::read_dir(dir_1)?); + assert!(!dir_ignore.exists()); assert!( diff --git a/lib/tests/test_storage_new_generic_rsync.rs b/lib/tests/test_storage_new_generic_rsync.rs index f42df3512..8eff19d37 100644 --- a/lib/tests/test_storage_new_generic_rsync.rs +++ b/lib/tests/test_storage_new_generic_rsync.rs @@ -37,7 +37,7 @@ fn test_storage_new_generic_rsync() -> Result<()> { "/tmp/{}/", common::random_dir_name("xvc-storage", Some(111)) ); - let test_host = "xvc-test@one.emresult.com"; + let test_host = "iex@e1.xvc.dev"; let url = test_host.to_string(); let local_test_dir = env::temp_dir().join(common::random_dir_name("xvc-storage-copy", None)); diff --git a/lib/tests/test_storage_new_minio.rs b/lib/tests/test_storage_new_minio.rs index 72814cfbe..1bf4f1754 100644 --- a/lib/tests/test_storage_new_minio.rs +++ b/lib/tests/test_storage_new_minio.rs @@ -10,108 +10,6 @@ use xvc_config::XvcVerbosity; use xvc_core::XvcRoot; use xvc_test_helper::{create_directory_tree, generate_filled_file}; -fn write_s3cmd_config(access_key: &str, secret_key: &str) -> Result { - let config_file_name = env::temp_dir().join(format!( - "{}.cfg", - common::random_dir_name("minio-config", None) - )); - let config = format!( - r#"[default] -access_key = {access_key} -access_token = -add_encoding_exts = -add_headers = -bucket_location = us-east-1 -ca_certs_file = -cache_file = -check_ssl_certificate = True -check_ssl_hostname = True -cloudfront_host = cloudfront.amazonaws.com -connection_max_age = 5 -connection_pooling = True -content_disposition = -content_type = -default_mime_type = binary/octet-stream -delay_updates = False -delete_after = False -delete_after_fetch = False -delete_removed = False -dry_run = False -enable_multipart = True -encoding = UTF-8 -encrypt = False -expiry_date = -expiry_days = -expiry_prefix = -follow_symlinks = False -force = False -get_continue = False -gpg_command = None -gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s -gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s -gpg_passphrase = -guess_mime_type = True -host_base = one.emresult.com:9000 -host_bucket = one.emresult.com:9000 -human_readable_sizes = False -invalidate_default_index_on_cf = False -invalidate_default_index_root_on_cf = True -invalidate_on_cf = False -kms_key = -limit = -1 -limitrate = 0 -list_allow_unordered = False -list_md5 = False -log_target_prefix = -long_listing = False -max_delete = -1 -mime_type = -multipart_chunk_size_mb = 15 -multipart_copy_chunk_size_mb = 1024 -multipart_max_chunks = 10000 -preserve_attrs = True -progress_meter = True -proxy_host = -proxy_port = 0 -public_url_use_https = False -put_continue = False -recursive = False -recv_chunk = 65536 -reduced_redundancy = False -requester_pays = False -restore_days = 1 -restore_priority = Standard -secret_key = {secret_key} -send_chunk = 65536 -server_side_encryption = False -signature_v2 = False -signurl_use_https = False -simpledb_host = sdb.amazonaws.com -skip_existing = False -socket_timeout = 300 -ssl_client_cert_file = -ssl_client_key_file = -stats = False -stop_on_error = False -storage_class = -throttle_max = 100 -upload_id = -urlencoding_mode = normal -use_http_expect = False -use_https = False -use_mime_magic = True -verbosity = WARNING -website_endpoint = http://%(bucket)s.s3-website-%(location)s.amazonaws.com/ -website_error = -website_index = index.html -"# - ); - - fs::write(&config_file_name, config)?; - - Ok(config_file_name.to_string_lossy().to_string()) -} - fn create_directory_hierarchy() -> Result { let temp_dir: XvcRoot = run_in_temp_xvc_dir()?; // for checking the content hash @@ -124,6 +22,25 @@ fn create_directory_hierarchy() -> Result { Ok(temp_dir) } +fn mc_config(alias: &str, endpoint: &str, access_key: &str, secret_key: &str) -> String { + format!( + r##" +{{ + "version": "10", + "aliases": {{ + "{alias}": {{ + "url": "{endpoint}", + "accessKey": "{access_key}", + "secretKey": "{secret_key}", + "api": "s3v4", + "path": "auto" + }} + }} +}} +"## + ) +} + fn sh(cmd: String) -> String { watch!(cmd); Exec::shell(cmd).capture().unwrap().stdout_str() @@ -134,30 +51,34 @@ fn sh(cmd: String) -> String { fn test_storage_new_minio() -> Result<()> { common::test_logging(LevelFilter::Trace); let xvc_root = create_directory_hierarchy()?; - let endpoint = "http://emresult.com:9000"; - let bucket_name = "one"; + let endpoint = "http://e1.xvc.dev:9000"; + let bucket_name = "xvctests"; let storage_prefix = common::random_dir_name("xvc", None); let region = "us-east-1"; let local_test_dir = env::temp_dir().join(common::random_dir_name("xvc-storage-copy", None)); let access_key = env::var("MINIO_ACCESS_KEY_ID")?; let secret_key = env::var("MINIO_SECRET_ACCESS_KEY")?; + let alias_name = "xvc"; + let mc_config = mc_config( + alias_name, + endpoint, + access_key.as_str(), + secret_key.as_str(), + ); + let mc_config_dir = xvc_root.xvc_dir().join(".mc"); + fs::create_dir_all(&mc_config_dir)?; + let mc_config_file = mc_config_dir.join("config.json"); + fs::write(mc_config_file, mc_config)?; - let config_file_name = write_s3cmd_config(&access_key, &secret_key)?; - watch!(config_file_name); - - let s3cmd = |cmd: &str, append: &str| -> String { - let sh_cmd = format!("s3cmd --config {config_file_name} {cmd} {append}"); + let mc = |cmd: &str, append: &str| -> String { + let sh_cmd = format!("mc --config-dir {mc_config_dir} {cmd} {alias_name} {append}"); sh(sh_cmd) }; let x = |cmd: &[&str]| -> Result { - common::run_xvc(Some(&xvc_root), cmd, XvcVerbosity::Warn) + common::run_xvc(Some(&xvc_root), cmd, XvcVerbosity::Trace) }; - let create_bucket_res = s3cmd(&format!("mb {bucket_name}"), ""); - - watch!(create_bucket_res); - let out = x(&[ "storage", "new", @@ -176,7 +97,7 @@ fn test_storage_new_minio() -> Result<()> { watch!(out); - let mc_bucket_list = s3cmd("ls xvc", &format!("| rg {bucket_name}")); + let mc_bucket_list = mc("ls", &format!("| rg {bucket_name}")); watch!(mc_bucket_list); assert!(!mc_bucket_list.is_empty()); @@ -196,13 +117,14 @@ fn test_storage_new_minio() -> Result<()> { let push_result = x(&["file", "send", "--to", "minio-storage", the_file])?; watch!(push_result); - let file_list = s3cmd(&format!("ls -r s3://one/{storage_prefix}"), "| rg 0.bin"); + let file_list = mc("ls -r ", &format!("| rg {bucket_name}/{storage_prefix}")); watch!(file_list); // The file should be in: // - storage_dir/REPO_ID/b3/ABCD...123/0.bin - let n_storage_files_after = file_list.lines().count(); + // Remove guid file from the count + let n_storage_files_after = file_list.lines().count() - 1; assert!( n_storage_files_before + 1 == n_storage_files_after, @@ -214,6 +136,7 @@ fn test_storage_new_minio() -> Result<()> { // remove all cache // let cache_dir = xvc_root.xvc_dir().join("b3"); + sh(format!("chmod -R +w {}", cache_dir.to_string_lossy())); sh(format!("rm -rf {}", cache_dir.to_string_lossy())); let fetch_result = x(&["file", "bring", "--no-recheck", "--from", "minio-storage"])?; @@ -223,15 +146,22 @@ fn test_storage_new_minio() -> Result<()> { let n_local_files_after_fetch = jwalk::WalkDir::new(&cache_dir) .into_iter() .filter(|f| { + watch!(f); f.as_ref() .map(|f| f.file_type().is_file()) .unwrap_or_else(|_| false) }) .count(); - assert!(n_storage_files_after == n_local_files_after_fetch); + assert!( + n_storage_files_after == n_local_files_after_fetch, + "{} - {}", + n_storage_files_after, + n_local_files_after_fetch + ); let cache_dir = xvc_root.xvc_dir().join("b3"); + sh(format!("chmod -R +w {}", cache_dir.to_string_lossy())); sh(format!("rm -rf {}", cache_dir.to_string_lossy())); fs::remove_file(the_file)?; @@ -247,7 +177,12 @@ fn test_storage_new_minio() -> Result<()> { }) .count(); - assert!(n_storage_files_after == n_local_files_after_pull); + assert!( + n_storage_files_after == n_local_files_after_pull, + "{} - {}", + n_storage_files_after, + n_local_files_after_pull + ); assert!(PathBuf::from(the_file).exists()); // Set remote specific passwords and remove general ones diff --git a/lib/tests/test_storage_new_rsync.rs b/lib/tests/test_storage_new_rsync.rs index 0786cd93d..1412fdd9f 100644 --- a/lib/tests/test_storage_new_rsync.rs +++ b/lib/tests/test_storage_new_rsync.rs @@ -37,8 +37,8 @@ fn test_storage_new_rsync() -> Result<()> { "/tmp/{}/", common::random_dir_name("xvc-storage", Some(111)) ); - let test_user = "xvc-test"; - let test_host = "one.emresult.com"; + let test_user = "iex"; + let test_host = "e1.xvc.dev"; let url = format!("{test_user}@{test_host}"); let local_test_dir = env::temp_dir().join(common::random_dir_name("xvc-storage-copy", None)); diff --git a/lib/tests/test_walker_parallel.rs b/lib/tests/test_walker_parallel.rs index 2c75e73db..1769b5f79 100644 --- a/lib/tests/test_walker_parallel.rs +++ b/lib/tests/test_walker_parallel.rs @@ -1,9 +1,10 @@ use std::{ + ffi::OsString, fs, path::{Path, PathBuf}, + sync::{Arc, RwLock}, }; -use globset::Glob; use log::LevelFilter; use xvc_walker::*; @@ -45,17 +46,14 @@ fn new_dir_with_ignores( initial_patterns: &str, ) -> Result { let patterns = create_patterns(root, dir, initial_patterns); - let mut initialized = IgnoreRules::empty(&PathBuf::from(root)); + let initialized = IgnoreRules::empty(&PathBuf::from(root), Some(".gitignore")); watch!(patterns); - initialized.update(patterns).unwrap(); + initialized.add_patterns(patterns).unwrap(); Ok(initialized) } -fn create_patterns(root: &str, dir: Option<&str>, patterns: &str) -> Vec> { +fn create_patterns(root: &str, dir: Option<&str>, patterns: &str) -> Vec { xvc_walker::content_to_patterns(Path::new(root), dir.map(Path::new), patterns) - .into_iter() - .map(|pat_res_g| pat_res_g.map(|res_g| res_g.unwrap())) - .collect() } #[test_case("", "" => it contains "dir-0002/file-0001.bin" ; "t3733909666")] @@ -76,26 +74,20 @@ fn test_walk_parallel(ignore_src: &str, ignore_content: &str) -> Vec { test_logging(LevelFilter::Trace); let root = create_directory_hierarchy(true).unwrap(); let (path_sender, path_receiver) = crossbeam_channel::unbounded(); - let (ignore_sender, _ignore_receiver) = crossbeam_channel::unbounded(); // We assume ignore_src is among the directories created fs::write( format!("{}/{ignore_src}.gitignore", root.to_string_lossy()), ignore_content, ) .unwrap(); - let initial_rules = new_dir_with_ignores(root.to_string_lossy().as_ref(), None, "").unwrap(); + let initial_rules = Arc::new(RwLock::new( + new_dir_with_ignores(root.to_string_lossy().as_ref(), None, "").unwrap(), + )); let walk_options = WalkOptions { - ignore_filename: Some(".gitignore".to_string()), + ignore_filename: Some(".gitignore".to_owned()), include_dirs: true, }; - walk_parallel( - initial_rules, - &root, - walk_options, - path_sender, - ignore_sender, - ) - .unwrap(); + walk_parallel(initial_rules, &root, walk_options, path_sender).unwrap(); let paths = path_receiver .iter() .filter_map(|e| match e { diff --git a/lib/tests/test_walker_serial.rs b/lib/tests/test_walker_serial.rs index 4e1fe6f36..211fcee02 100644 --- a/lib/tests/test_walker_serial.rs +++ b/lib/tests/test_walker_serial.rs @@ -1,9 +1,5 @@ -use std::{ - fs, - path::{Path, PathBuf}, -}; +use std::{fs, path::PathBuf}; -use globset::Glob; use xvc_walker::*; use test_case::test_case; @@ -13,27 +9,6 @@ use xvc_logging::watch; use xvc_test_helper::*; use xvc_walker::AbsolutePath; -fn new_dir_with_ignores( - root: &str, - dir: Option<&str>, - initial_patterns: &str, -) -> Result { - let patterns = create_patterns(root, dir, initial_patterns); - let mut initialized = IgnoreRules::empty(&PathBuf::from(root)); - watch!(patterns); - initialized.update(patterns).unwrap(); - watch!(initialized.ignore_patterns.read().unwrap()); - watch!(initialized.whitelist_patterns.read().unwrap()); - Ok(initialized) -} - -fn create_patterns(root: &str, dir: Option<&str>, patterns: &str) -> Vec> { - xvc_walker::content_to_patterns(Path::new(root), dir.map(Path::new), patterns) - .into_iter() - .map(|pat_res_g| pat_res_g.map(|res_g| res_g.unwrap())) - .collect() -} - // TODO: Patterns shouldn't have / prefix, but an appropriate PathKind #[test_case(true => matches Ok(_); "this is to refresh the dir for each test run")] // This builds a directory hierarchy to run the tests @@ -82,14 +57,12 @@ fn test_walk_serial(ignore_src: &str, ignore_content: &str) -> Vec { ignore_content, ) .unwrap(); - let initial_rules = new_dir_with_ignores(root.to_string_lossy().as_ref(), None, "").unwrap(); let walk_options = WalkOptions { - ignore_filename: Some(".gitignore".to_string()), + ignore_filename: Some(".gitignore".to_owned()), include_dirs: true, }; let (output_sender, output_receiver) = crossbeam_channel::unbounded(); - let (res_paths, ignore_rules) = - walk_serial(&output_sender, initial_rules, &root, &walk_options).unwrap(); + let (res_paths, ignore_rules) = walk_serial(&output_sender, "", &root, &walk_options).unwrap(); watch!(ignore_rules.ignore_patterns.read().unwrap()); watch!(ignore_rules.whitelist_patterns.read().unwrap()); watch!(output_receiver); diff --git a/logging/Cargo.toml b/logging/Cargo.toml index a928aa413..797ba00cb 100644 --- a/logging/Cargo.toml +++ b/logging/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-logging" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Logging crate for Xvc" authors = ["Emre Şahin "] @@ -18,6 +18,7 @@ crate-type = ["rlib"] [dependencies] ## Logging and errors +peak_alloc = "^0.2" log = "^0.4" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 # We don't use sled anymore, keep the above note for future reference diff --git a/logging/src/lib.rs b/logging/src/lib.rs index 5e5f16424..4a5a8a56e 100755 --- a/logging/src/lib.rs +++ b/logging/src/lib.rs @@ -11,6 +11,7 @@ use std::fmt::Display; use std::path::Path; use std::sync::Once; + /// Debugging macro to print the given expression and its value, with the module, function and line number #[macro_export] macro_rules! watch { diff --git a/logging/src/logging/mod.rs b/logging/src/logging/mod.rs deleted file mode 100644 index db224278f..000000000 --- a/logging/src/logging/mod.rs +++ /dev/null @@ -1,56 +0,0 @@ -use log::LevelFilter; -use log::{error, info}; -use std::env; -use std::path::Path; -use std::sync::Once; - -// Logging - -static INIT: Once = Once::new(); - -pub fn setup_logging(term_level: Option, file_level: Option) { - INIT.call_once(|| init_logging(term_level, file_level)); -} - -fn init_logging(term_level: Option, file_level: Option) { - let logfilename = &format!( - "{}/xvc-{}.log", - env::temp_dir().to_string_lossy(), - chrono::Local::now().format("%Y%m%d-%H%M%S") - ); - - let logfile = Path::new(&logfilename); - - let mut dispatch = fern::Dispatch::new().format(|out, message, record| { - out.finish(format_args!( - "[{}][{}:{}] {}", - record.level(), - // chrono::Local::now().format("[%H:%M:%S]"), - // record.target(), - record.file().get_or_insert("None"), - record.line().get_or_insert(0), - message - )) - }); - - if let Some(level) = term_level { - dispatch = dispatch.level(level).chain(std::io::stderr()); - } - - match dispatch.apply() { - Ok(_) => { - if let Some(level) = term_level { - debug!("Terminal logger enabled with level: {:?}", level); - }; - if let Some(level) = file_level { - debug!( - "File logger enabled with level: {:?} to {:?}", - level, logfile - ); - }; - } - Err(err) => { - error!("Error enabling logger: {:?}", err); - } - }; -} diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml index c7cfd3ecb..23f4d8816 100644 --- a/pipeline/Cargo.toml +++ b/pipeline/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-pipeline" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Xvc data pipeline management" authors = ["Emre Şahin "] @@ -19,15 +19,15 @@ default = [] bundled-sqlite = ["rusqlite/bundled"] [dependencies] -xvc-config = { version = "0.6.10", path = "../config" } -xvc-core = { version = "0.6.10", path = "../core" } -xvc-ecs = { version = "0.6.10", path = "../ecs" } -xvc-logging = { version = "0.6.10", path = "../logging" } -xvc-walker = { version = "0.6.10", path = "../walker" } -xvc-file = { version = "0.6.10", path = "../file", default-features = false } +xvc-config = { version = "0.6.11", path = "../config" } +xvc-core = { version = "0.6.11", path = "../core" } +xvc-ecs = { version = "0.6.11", path = "../ecs" } +xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-walker = { version = "0.6.11", path = "../walker" } +xvc-file = { version = "0.6.11", path = "../file", default-features = false } ## Cli and config -clap = { version = "^4.4", features = ["derive"] } +clap = { version = "^4.5", features = ["derive"] } directories-next = "2.0" ## Hashing @@ -43,14 +43,14 @@ serde = { version = "^1.0", features = ["derive"] } serde_yaml = "^0.9" serde_json = "^1.0" rmp = "^0.8" -rmp-serde = "1.1.2" +rmp-serde = "1.3.0" toml = "^0.8" ## Caching cached = "^0.53" ## Parallelization -rayon = "^1.8" +rayon = "^1.10" crossbeam-channel = "^0.5" crossbeam = "^0.8" @@ -76,7 +76,7 @@ fallible-iterator = "^0.3" reqwest = { version = "^0.11", features = ["blocking", "json", "gzip"] } ## UI -comfy-table = "7.1.0" +comfy-table = "7.1.1" ## Graphs petgraph = "^0.6" @@ -90,13 +90,13 @@ regex = "^1.10" chrono = "^0.4" strum = "^0.26" strum_macros = "^0.26" -lazy_static = "^1.4" -uuid = { version = "^1.6", features = ["serde", "v4", "fast-rng"] } +lazy_static = "^1.5" +uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } itertools = "^0.13" derive_more = "^0.99" [dev-dependencies] -xvc-test-helper = { version = "0.6.10", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } test-case = "^3.3" diff --git a/run-tests.zsh b/run-tests.zsh new file mode 100755 index 000000000..f84e8cece --- /dev/null +++ b/run-tests.zsh @@ -0,0 +1,3 @@ +# XVC_TRYCMD_TESTS=storage,file,pipeline,core,start TRYCMD=overwrite cargo test -p xvc --test z_test_docs + +cargo test --features test-ci -p xvc --test test_storage_new_minio diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 73d1132fa..78fa2e7d3 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-storage" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Xvc remote and local storage management" authors = ["Emre Şahin "] @@ -16,14 +16,14 @@ name = "xvc_storage" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.10", path = "../logging" } -xvc-config = { version = "0.6.10", path = "../config" } -xvc-core = { version = "0.6.10", path = "../core" } -xvc-ecs = { version = "0.6.10", path = "../ecs" } -xvc-walker = { version = "0.6.10", path = "../walker" } +xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-config = { version = "0.6.11", path = "../config" } +xvc-core = { version = "0.6.11", path = "../core" } +xvc-ecs = { version = "0.6.11", path = "../ecs" } +xvc-walker = { version = "0.6.11", path = "../walker" } ## Cli and config -clap = { version = "^4.4", features = ["derive"] } +clap = { version = "^4.5", features = ["derive"] } directories-next = "2.0" ## Hashing @@ -39,17 +39,17 @@ serde = { version = "^1.0", features = ["derive"] } serde_yaml = "^0.9" serde_json = "^1.0" rmp = "^0.8" -rmp-serde = "1.1.2" +rmp-serde = "1.3.0" toml = "^0.8" ## Parallelization -rayon = "^1.8" +rayon = "^1.10" crossbeam-channel = "^0.5" crossbeam = "^0.8" ## File system jwalk = "^0.8" -walkdir = "^2.4" +walkdir = "^2.5" relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" glob = "^0.3" @@ -73,17 +73,17 @@ regex = "^1.10" strum = "^0.26" strum_macros = "^0.26" lazy_static = "^1.5" -uuid = { version = "^1.6", features = ["serde", "v4", "fast-rng"] } +uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } itertools = "^0.13" derive_more = "^0.99" -tempfile = "^3.9" +tempfile = "^3.11" ## Networking & Async -tokio = { version = "^1.35", optional = true, features = ["rt-multi-thread"] } +tokio = { version = "^1.39", optional = true, features = ["rt-multi-thread"] } rust-s3 = { version = "^0.34", optional = true } futures = { version = "^0.3", optional = true } @@ -104,7 +104,7 @@ bundled-openssl = ["openssl/vendored"] [dev-dependencies] -xvc-test-helper = { version = "0.6.10", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } shellfn = "^0.1" [package.metadata.cargo-udeps.ignore] diff --git a/test_helper/Cargo.toml b/test_helper/Cargo.toml index 0bd4cc62f..7bab90fca 100644 --- a/test_helper/Cargo.toml +++ b/test_helper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-test-helper" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Unit test helper functions for Xvc" authors = ["Emre Şahin "] @@ -20,9 +20,9 @@ path = "src/main.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -xvc-logging = { version = "0.6.10", path = "../logging/" } +xvc-logging = { version = "0.6.11", path = "../logging/" } rand = "^0.8" log = "^0.4" anyhow = "^1.0" -clap = { version = "^4.4", features = ["derive"] } +clap = { version = "^4.5", features = ["derive"] } diff --git a/walker/Cargo.toml b/walker/Cargo.toml index 7eb5c786f..0eecee147 100644 --- a/walker/Cargo.toml +++ b/walker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-walker" -version = "0.6.10" +version = "0.6.11" edition = "2021" description = "Xvc parallel file system walker with ignore features" authors = ["Emre Şahin "] @@ -16,15 +16,17 @@ name = "xvc_walker" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.10", path = "../logging" } -globset = "^0.4" +xvc-logging = { version = "0.6.11", path = "../logging" } +fast-glob = "^0.3" ## Parallelization crossbeam-channel = "^0.5" crossbeam = "^0.8" +rayon = "^1.10" ## File system notify = "6.1.1" +jwalk = "^0.8" dashmap = "^6" @@ -32,13 +34,14 @@ dashmap = "^6" thiserror = "^1.0" anyhow = "^1.0" log = "^0.4" +peak_alloc = "^0.2" ## Misc itertools = "^0.13" regex = "^1.10" [dev-dependencies] -xvc-test-helper = { path = "../test_helper/", version = "0.6.10" } +xvc-test-helper = { path = "../test_helper/", version = "0.6.11" } test-case = "^3.3" [package.metadata.cargo-udeps.ignore] diff --git a/walker/src/error.rs b/walker/src/error.rs index fa4f92de1..8fffe1e8a 100644 --- a/walker/src/error.rs +++ b/walker/src/error.rs @@ -18,12 +18,6 @@ pub enum Error { #[error("Ignore rules poisoned")] LockPoisonError { t: String, cause: String }, - #[error("Glob error: {source}")] - GlobError { - #[from] - source: globset::Error, - }, - #[error("File System Notify Error: {source:?}")] NotifyError { #[from] diff --git a/walker/src/ignore_rules.rs b/walker/src/ignore_rules.rs index 9db7d5b8b..5ae2f280b 100644 --- a/walker/src/ignore_rules.rs +++ b/walker/src/ignore_rules.rs @@ -1,11 +1,13 @@ //! Ignore patterns for a directory and its child directories. +use crate::{Result, Source}; use std::path::{Path, PathBuf}; use std::sync::{Arc, RwLock}; -use globset::GlobSet; -use itertools::Itertools; +use crate::pattern::{MatchResult, Pattern}; +use rayon::prelude::*; -use crate::{build_globset, content_to_patterns, GlobPattern, PatternEffect, Result}; +use fast_glob::glob_match; +use xvc_logging::watch; /// Complete set of ignore rules for a directory and its child directories. #[derive(Debug, Clone)] @@ -14,117 +16,201 @@ pub struct IgnoreRules { /// Typically this is the root directory of Git or Xvc repository. pub root: PathBuf, + /// The name of the ignore file (e.g. `.xvcignore`, `.gitignore`) to be loaded for ignore rules. + pub ignore_filename: Option, + /// All ignore patterns collected from ignore files or specified in code. - pub ignore_patterns: Arc>>, + pub ignore_patterns: Arc>>, /// All whitelist patterns collected from ignore files or specified in code - pub whitelist_patterns: Arc>>, - - /// Compiled [GlobSet] for whitelisted paths. - pub whitelist_set: Arc>, - - /// Compiled [GlobSet] for ignored paths. - pub ignore_set: Arc>, + pub whitelist_patterns: Arc>>, } +/// IgnoreRules shared across threads. +pub type SharedIgnoreRules = Arc>; + impl IgnoreRules { /// An empty set of ignore rules that neither ignores nor whitelists any path. - pub fn empty(dir: &Path) -> Self { + pub fn empty(dir: &Path, ignore_filename: Option<&str>) -> Self { IgnoreRules { root: PathBuf::from(dir), - ignore_patterns: Arc::new(RwLock::new(Vec::::new())), - whitelist_patterns: Arc::new(RwLock::new(Vec::::new())), - ignore_set: Arc::new(RwLock::new(GlobSet::empty())), - whitelist_set: Arc::new(RwLock::new(GlobSet::empty())), + ignore_filename: ignore_filename.map(|s| s.to_string()), + ignore_patterns: Arc::new(RwLock::new(Vec::::new())), + whitelist_patterns: Arc::new(RwLock::new(Vec::::new())), } } - /// Compiles patterns as [Source::Global] and initializes the elements. - pub fn try_from_patterns(root: &Path, patterns: &str) -> Result { - let patterns = content_to_patterns(root, None, patterns) - .into_iter() - .map(|pat_res_g| pat_res_g.map(|res_g| res_g.unwrap())) - .collect(); - let mut initialized = Self::empty(&PathBuf::from(root)); - - initialized.update(patterns)?; - Ok(initialized) + pub fn from_global_patterns( + ignore_root: &Path, + ignore_filename: Option<&str>, + given: &str, + ) -> Self { + let mut given_patterns = Vec::::new(); + // Add given patterns to ignore_patterns + for line in given.lines() { + let pattern = Pattern::new(Source::Global, line); + given_patterns.push(pattern); + } + IgnoreRules::from_patterns(ignore_root, ignore_filename, given_patterns) } - /// Adds `new_patterns` to the list of patterns and recompiles ignore and - /// whitelist [GlobSet]s. - pub fn update(&mut self, new_patterns: Vec) -> Result<()> { - let (new_ignore_patterns, new_whitelist_patterns): (Vec<_>, Vec<_>) = new_patterns - .into_iter() - .partition(|p| matches!(p.effect, PatternEffect::Ignore)); - self.update_ignore(&new_ignore_patterns)?; - self.update_whitelist(&new_whitelist_patterns)?; - Ok(()) + /// Constructs a new `IgnoreRules` instance from a vector of patterns and a root path. + /// + /// This function separates the patterns into ignore patterns and whitelist patterns + /// based on their `PatternEffect`. It then stores these patterns and the root path + /// in a new `IgnoreRules` instance. + /// + /// # Arguments + /// + /// * `patterns` - A vector of `Pattern` instances to be used for creating the `IgnoreRules`. + /// * `ignore_root` - A reference to the root path for the ignore rules. + /// + /// # Returns + /// + /// A new `IgnoreRules` instance containing the given patterns and root path. + pub fn from_patterns( + ignore_root: &Path, + ignore_filename: Option<&str>, + mut patterns: Vec, + ) -> Self { + let mut ignore_patterns = Vec::new(); + let mut whitelist_patterns = Vec::new(); + patterns + .drain(0..patterns.len()) + .for_each(|pattern| match pattern.effect { + crate::PatternEffect::Ignore => ignore_patterns.push(pattern), + crate::PatternEffect::Whitelist => whitelist_patterns.push(pattern), + }); + IgnoreRules { + root: PathBuf::from(ignore_root), + ignore_filename: ignore_filename.map(|s| s.to_string()), + ignore_patterns: Arc::new(RwLock::new(ignore_patterns)), + whitelist_patterns: Arc::new(RwLock::new(whitelist_patterns)), + } } - /// Merge with other ignore rules, extending this one's patterns and rebuilding glob sets. - pub fn merge_with(&mut self, other: &IgnoreRules) -> Result<()> { - assert_eq!(self.root, other.root); + /// Checks if a given path matches any of the whitelist or ignore patterns. + /// + /// The function first checks if the path matches any of the whitelist patterns. + /// If a match is found, it returns `MatchResult::Whitelist`. + /// + /// If the path does not match any of the whitelist patterns, the function then checks + /// if the path matches any of the ignore patterns. If a match is found, it returns + /// `MatchResult::Ignore`. + /// + /// If the path does not match any of the whitelist or ignore patterns, the function + /// returns `MatchResult::NoMatch`. + /// + /// # Arguments + /// + /// * `path` - A reference to the path to check. + /// + /// # Returns + /// + /// * `MatchResult::Whitelist` if the path matches a whitelist pattern. + /// * `MatchResult::Ignore` if the path matches an ignore pattern. + /// * `MatchResult::NoMatch` if the path does not match any pattern. + pub fn check(&self, path: &Path) -> MatchResult { + let is_abs = path.is_absolute(); + // strip_prefix eats the final slash, and ends_with behave differently than str, so we work + // around here + let path_str = path.to_string_lossy(); + let final_slash = path_str.ends_with('/'); + + let path = if is_abs { + if final_slash { + format!( + "/{}/", + path.strip_prefix(&self.root) + .expect("path must be within root") + .to_string_lossy() + ) + } else { + format!( + "/{}", + path.strip_prefix(&self.root) + .expect("path must be within root") + .to_string_lossy() + ) + } + } else { + path_str.to_string() + }; - self.update_ignore(&other.ignore_patterns.read().unwrap())?; - self.update_whitelist(&other.whitelist_patterns.read().unwrap())?; - Ok(()) - } - - fn update_whitelist(&mut self, new_whitelist_patterns: &[GlobPattern]) -> Result<()> { - assert!(new_whitelist_patterns - .iter() - .all(|p| matches!(p.effect, PatternEffect::Whitelist))); { - let mut whitelist_patterns = self.whitelist_patterns.write()?; - - *whitelist_patterns = whitelist_patterns - .iter() - .chain(new_whitelist_patterns.iter()) - .unique() - .cloned() - .collect(); + let whitelist_patterns = self.whitelist_patterns.read().unwrap(); + if let Some(p) = whitelist_patterns + .par_iter() + .find_any(|pattern| glob_match(&pattern.glob, &path)) + { + watch!(p); + return MatchResult::Whitelist; + } } { - let whitelist_globs = self - .whitelist_patterns - .read()? - .iter() - .map(|g| g.pattern.clone()) - .collect(); - let whitelist_set = build_globset(whitelist_globs)?; - *self.whitelist_set.write()? = whitelist_set; + let ignore_patterns = self.ignore_patterns.read().unwrap(); + if let Some(p) = ignore_patterns + .par_iter() + .find_any(|pattern| glob_match(&pattern.glob, &path)) + { + watch!(p); + return MatchResult::Ignore; + } } - Ok(()) + MatchResult::NoMatch } - fn update_ignore(&mut self, new_ignore_patterns: &[GlobPattern]) -> Result<()> { - assert!(new_ignore_patterns - .iter() - .all(|p| matches!(p.effect, PatternEffect::Ignore))); + + /// Merges the ignore and whitelist patterns of another `IgnoreRules` instance into this one. + /// + /// This function locks the ignore and whitelist patterns of both `IgnoreRules` instances, + /// drains the patterns from the other instance, and pushes them into this instance. + /// The other instance is left empty after this operation. + /// + /// # Arguments + /// + /// * `other` - A reference to the other `IgnoreRules` instance to merge with. + /// + /// # Returns + /// + /// * `Ok(())` if the merge operation was successful. + /// * `Err` if the merge operation failed. + /// + /// # Panics + /// + /// This function will panic if the roots of the two `IgnoreRules` instances are not equal. + pub fn merge_with(&self, other: &IgnoreRules) -> Result<()> { + assert_eq!(self.root, other.root); + { - let mut ignore_patterns = self.ignore_patterns.write()?; - - *ignore_patterns = ignore_patterns - .iter() - .chain(new_ignore_patterns.iter()) - .unique() - .cloned() - .collect(); + let mut ignore_patterns = self.ignore_patterns.write().unwrap(); + let mut other_ignore_patterns = other.ignore_patterns.write().unwrap(); + let len = other_ignore_patterns.len(); + other_ignore_patterns + .drain(0..len) + .for_each(|p| ignore_patterns.push(p)); } { - let ignore_globs = self - .ignore_patterns - .read()? - .iter() - .map(|g| g.pattern.clone()) - .collect(); - let ignore_set = build_globset(ignore_globs)?; - *self.ignore_set.write()? = ignore_set; + let mut whitelist_patterns = self.whitelist_patterns.write().unwrap(); + let mut other_whitelist_patterns = other.whitelist_patterns.write().unwrap(); + let len = other_whitelist_patterns.len(); + other_whitelist_patterns + .drain(0..len) + .for_each(|p| whitelist_patterns.push(p)); } Ok(()) } + /// Adds a list of patterns to the current ignore rules. + /// + /// # Arguments + /// + /// * `patterns` - A vector of patterns to be added to the ignore rules. + + pub fn add_patterns(&self, patterns: Vec) -> Result<()> { + let other = IgnoreRules::from_patterns(&self.root, None, patterns); + self.merge_with(&other) + } } diff --git a/walker/src/lib.rs b/walker/src/lib.rs index f2b49efa4..e87a222cb 100755 --- a/walker/src/lib.rs +++ b/walker/src/lib.rs @@ -12,30 +12,40 @@ pub mod abspath; pub mod error; pub mod ignore_rules; pub mod notify; +pub mod pattern; pub mod sync; +pub mod walk_parallel; +pub mod walk_serial; + +pub use pattern::MatchResult; +pub use pattern::PathKind; +pub use pattern::Pattern; +pub use pattern::PatternEffect; +pub use pattern::PatternRelativity; +pub use pattern::Source; + +pub use walk_parallel::walk_parallel; +pub use walk_serial::walk_serial; pub use abspath::AbsolutePath; -use crossbeam::queue::SegQueue; pub use error::{Error, Result}; + pub use ignore_rules::IgnoreRules; +pub use ignore_rules::SharedIgnoreRules; + pub use notify::make_watcher; +use std::ffi::OsStr; pub use std::hash::Hash; -use std::sync::Arc; -use std::sync::Mutex; -use std::sync::RwLock; pub use sync::{PathSync, PathSyncSingleton}; -use xvc_logging::debug; use xvc_logging::warn; -use xvc_logging::XvcOutputSender; pub use notify::PathEvent; pub use notify::RecommendedWatcher; use xvc_logging::watch; -use crossbeam_channel::Sender; // use glob::{MatchOptions, Pattern, PatternError}; -pub use globset::{self, Glob, GlobSet, GlobSetBuilder}; +pub use fast_glob::Glob; use std::{ ffi::OsString, fmt::Debug, @@ -56,124 +66,6 @@ pub struct PathMetadata { pub metadata: Metadata, } -/// Show whether a path matches to a glob rule -#[derive(Debug, Clone)] -pub enum MatchResult { - /// There is no match between glob(s) and path - NoMatch, - /// Path matches to ignored glob(s) - Ignore, - /// Path matches to whitelisted glob(s) - Whitelist, -} - -/// Is the pattern matches anywhere or only relative to a directory? -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum PatternRelativity { - /// Match the path regardless of the directory prefix - Anywhere, - /// Match the path if it only starts with `directory` - RelativeTo { - /// The directory that the pattern must have as prefix to be considered a match - directory: String, - }, -} - -/// Is the path only a directory, or could it be directory or file? -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum PathKind { - /// Path matches to directory or file - Any, - /// Path matches only to directory - Directory, -} - -/// Is this pattern a ignore or whitelist patter? -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub enum PatternEffect { - /// This is an ignore pattern - Ignore, - /// This is a whitelist pattern - Whitelist, -} - -/// Do we get this pattern from a file (.gitignore, .xvcignore, ...) or specify it directly in -/// code? -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum Source { - /// Pattern is obtained from file - File { - /// Path of the pattern file - path: PathBuf, - /// (1-based) line number the pattern retrieved - line: usize, - }, - /// Pattern is globally defined in code - Global, -} - -/// Pattern is generic and could be an instance of String, Glob, Regex or any other object. -/// The type is evolved by compiling. -/// A pattern can start its life as `Pattern` and can be compiled into `Pattern` or -/// `Pattern`. -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct Pattern -where - T: PartialEq + Hash, -{ - /// The pattern type - pub pattern: T, - /// The original string that defines the pattern - original: String, - /// Where did we get this pattern? - source: Source, - /// Is this ignore or whitelist pattern? - effect: PatternEffect, - /// Does it have an implied prefix? - relativity: PatternRelativity, - /// Is the path a directory or anything? - path_kind: PathKind, -} - -impl Pattern { - /// Runs a function (like `compile`) on `pattern` to get a new pattern. - pub fn map(self, f: F) -> Pattern - where - U: PartialEq + Hash, - F: FnOnce(T) -> U, - { - Pattern:: { - pattern: f(self.pattern), - original: self.original, - source: self.source, - effect: self.effect, - relativity: self.relativity, - path_kind: self.path_kind, - } - } -} - -impl Pattern> { - /// Convert from `Pattern>` to `Result>` to get the result from - /// [Self::map] - fn transpose(self) -> Result> { - match self.pattern { - Ok(p) => Ok(Pattern:: { - pattern: p, - original: self.original, - source: self.source, - effect: self.effect, - relativity: self.relativity, - path_kind: self.path_kind, - }), - Err(e) => Err(e), - } - } -} - -/// One of the concrete types that can represent a pattern. -type GlobPattern = Pattern; - /// What's the ignore file name and should we add directories to the result? #[derive(Debug, Clone)] pub struct WalkOptions { @@ -191,7 +83,7 @@ impl WalkOptions { /// directories in results. pub fn gitignore() -> Self { Self { - ignore_filename: Some(".gitignore".to_owned()), + ignore_filename: Some(".gitignore".into()), include_dirs: true, } } @@ -200,7 +92,7 @@ impl WalkOptions { /// directories in results. pub fn xvcignore() -> Self { Self { - ignore_filename: Some(".xvcignore".to_owned()), + ignore_filename: Some(".xvcignore".into()), include_dirs: true, } } @@ -223,394 +115,61 @@ impl WalkOptions { } } -fn walk_parallel_inner( - ignore_rules: Arc>, - dir: &Path, - walk_options: WalkOptions, - path_sender: Sender>, - ignore_sender: Sender>>>, -) -> Result> { - let child_paths: Vec = directory_list(dir)? - .into_iter() - .filter_map(|pm_res| match pm_res { - Ok(pm) => Some(pm), - Err(e) => { - path_sender - .send(Err(e)) - .expect("Channel error in walk_parallel"); - None - } - }) - .collect(); - - let dir_with_ignores = if let Some(ignore_filename) = walk_options.ignore_filename.clone() { - let ignore_filename = OsString::from(ignore_filename); - if let Some(ignore_path_metadata) = child_paths - .iter() - .find(|pm| pm.path.file_name() == Some(&ignore_filename)) - { - let ignore_path = dir.join(&ignore_path_metadata.path); - let new_patterns = clear_glob_errors( - &path_sender, - patterns_from_file(&ignore_rules.read()?.root, &ignore_path)?, - ); - watch!(new_patterns); - ignore_rules.write()?.update(new_patterns)?; - watch!(ignore_rules); - ignore_sender.send(Ok(ignore_rules.clone()))?; - ignore_rules - } else { - ignore_rules - } - } else { - ignore_rules - }; +/// Build the ignore rules with the given directory +pub fn build_ignore_patterns( + given: &str, + ignore_root: &Path, + ignore_filename: &str, +) -> Result { + watch!(ignore_filename); + watch!(ignore_root); - let mut child_dirs = Vec::::new(); - watch!(child_paths); + let ignore_rules = IgnoreRules::from_global_patterns(ignore_root, Some(ignore_filename), given); - for child_path in child_paths { - match check_ignore(&(*dir_with_ignores.read()?), child_path.path.as_ref()) { - MatchResult::NoMatch | MatchResult::Whitelist => { - watch!(child_path.path); - if child_path.metadata.is_dir() { - if walk_options.include_dirs { - path_sender.send(Ok(child_path.clone()))?; + let dirs_under = |p: &Path| -> Vec { + p.read_dir() + .unwrap() + .filter_map(|p| { + if let Ok(p) = p { + if p.path().is_dir() { + Some(p.path()) + } else { + None } - child_dirs.push(child_path); } else { - path_sender.send(Ok(child_path.clone()))?; - } - } - // We can return anyhow! error here to notice the user that the path is ignored - MatchResult::Ignore => { - watch!(child_path.path); - } - } - } - - Ok(child_dirs) -} - -/// Walk all child paths under `dir` and send non-ignored paths to `path_sender`. -/// Newly found ignore rules are sent through `ignore_sender`. -/// The ignore file name (`.xvcignore`, `.gitignore`, `.ignore`, ...) is set by `walk_options`. -/// -/// It lists elements of a directory, then creates a new crossbeam scope for each child directory and -/// calls itself recursively. It may not be feasible for small directories to create threads. -pub fn walk_parallel( - ignore_rules: IgnoreRules, - dir: &Path, - walk_options: WalkOptions, - path_sender: Sender>, - ignore_sender: Sender>>>, -) -> Result<()> { - let dir_queue = Arc::new(SegQueue::::new()); - - let ignore_rules = Arc::new(RwLock::new(ignore_rules.clone())); - - let child_dirs = walk_parallel_inner( - ignore_rules.clone(), - dir, - walk_options.clone(), - path_sender.clone(), - ignore_sender.clone(), - )?; - - child_dirs.into_iter().for_each(|pm| { - dir_queue.push(pm); - }); - - if dir_queue.is_empty() { - return Ok(()); - } - - crossbeam::scope(|s| { - for thread_i in 0..MAX_THREADS_PARALLEL_WALK { - let path_sender = path_sender.clone(); - let ignore_sender = ignore_sender.clone(); - let walk_options = walk_options.clone(); - let ignore_rules = ignore_rules.clone(); - let dir_queue = dir_queue.clone(); - - s.spawn(move |_| { - watch!(path_sender); - watch!(ignore_sender); - while let Some(pm) = dir_queue.pop() { - let child_dirs = walk_parallel_inner( - ignore_rules.clone(), - &pm.path, - walk_options.clone(), - path_sender.clone(), - ignore_sender.clone(), - ) - .unwrap_or_else(|e| { - path_sender - .send(Err(e)) - .expect("Channel error in walk_parallel"); - Vec::::new() - }); - for child_dir in child_dirs { - dir_queue.push(child_dir); - } - } - watch!("End of thread {}", thread_i); - }); - } - }) - .expect("Error in crossbeam scope in walk_parallel"); - - watch!("End of walk_parallel"); - - Ok(()) -} - -/// Walk `dir` with `walk_options`, with the given _initial_ `ignore_rules`. -/// Note that ignore rules are expanded with the rules given in the `ignore_filename` in -/// `walk_options`. -/// The result is added to given `res_paths` to reduce the number of memory inits for vec. -/// -/// It collects all [`PathMetadata`] of the child paths. -/// Filters paths with the rules found in child directories and the given `ignore_rules`. -pub fn walk_serial( - output_snd: &XvcOutputSender, - ignore_rules: IgnoreRules, - dir: &Path, - walk_options: &WalkOptions, -) -> Result<(Vec, IgnoreRules)> { - let ignore_filename = walk_options.ignore_filename.clone().map(OsString::from); - let ignore_rules = Arc::new(Mutex::new(ignore_rules.clone())); - let dir_stack = crossbeam::queue::SegQueue::new(); - let res_paths = Arc::new(Mutex::new(Vec::::new())); - - dir_stack.push(dir.to_path_buf()); - - let get_child_paths = |dir: &Path| -> Result> { - Ok(directory_list(dir)? - .into_iter() - .filter_map(|pm_res| match pm_res { - Ok(pm) => Some(pm), - Err(e) => { - warn!(output_snd, "{}", e); None } }) - .collect()) + .filter_map(|p| match ignore_rules.check(&p) { + MatchResult::NoMatch | MatchResult::Whitelist => Some(p), + MatchResult::Ignore => None, + }) + .collect() }; - let update_ignore_rules = |child_paths: &Vec| -> Result<()> { - if let Some(ref ignore_filename) = &ignore_filename { - watch!(ignore_filename); - if let Some(ignore_path_metadata) = child_paths - .iter() - .find(|pm| pm.path.file_name() == Some(ignore_filename)) - { - let ignore_path = dir.join(&ignore_path_metadata.path); - let new_patterns: Vec = - patterns_from_file(&ignore_rules.lock()?.root, &ignore_path)? - .into_iter() - .filter_map(|res_p| match res_p.pattern { - Ok(_) => Some(res_p.map(|p| p.unwrap())), - Err(e) => { - warn!(output_snd, "{}", e); - None - } - }) - .collect(); + let mut dir_stack: Vec = vec![ignore_root.to_path_buf()]; - ignore_rules.lock()?.update(new_patterns)?; - } - } - Ok(()) - }; + let ignore_fn = ignore_rules.ignore_filename.as_deref().unwrap(); - let filter_child_paths = |child_paths: &Vec| -> Result<()> { - for child_path in child_paths { - watch!(child_path.path); - let ignore_res = check_ignore(&(*ignore_rules.lock()?), child_path.path.as_ref()); - watch!(ignore_res); - match ignore_res { - MatchResult::NoMatch | MatchResult::Whitelist => { - watch!(child_path); - if child_path.metadata.is_dir() { - watch!("here"); - if walk_options.include_dirs { - watch!("here2"); - res_paths.lock()?.push(child_path.clone()); - } - watch!("here3"); - dir_stack.push(child_path.path.clone()); - watch!("here4"); - } else { - watch!("here5"); - res_paths.lock()?.push(child_path.clone()); - watch!("here6"); - } - } - // We can return anyhow! error here to notice the user that the path is ignored - MatchResult::Ignore => { - debug!(output_snd, "Ignored: {:?}", child_path.path); - } - } - watch!(child_path); - } - Ok(()) - }; - - while let Some(dir) = { dir_stack.pop().clone() } { + while let Some(dir) = dir_stack.pop() { watch!(dir); - let dir = dir.clone(); - watch!(dir); - let child_paths = get_child_paths(&dir)?; - watch!(child_paths); - update_ignore_rules(&child_paths)?; - filter_child_paths(&child_paths)?; - } - - let res_paths: Vec = res_paths.lock()?.clone(); - let ignore_rules = ignore_rules.lock()?.clone(); - - Ok((res_paths, ignore_rules)) -} - -/// Just build the ignore rules with the given directory -pub fn build_ignore_rules( - given: IgnoreRules, - dir: &Path, - ignore_filename: &str, -) -> Result { - let elements = dir - .read_dir() - .map_err(|e| anyhow!("Error reading directory: {:?}, {:?}", dir, e))?; - - let mut child_dirs = Vec::::new(); - let ignore_fn = OsString::from(ignore_filename); - xvc_logging::watch!(ignore_fn); - let ignore_root = given.root.clone(); - xvc_logging::watch!(ignore_root); - let mut ignore_rules = given; - let mut new_patterns: Option> = None; - - for entry in elements { - match entry { - Ok(entry) => { - if entry.path().is_dir() { - xvc_logging::watch!(entry.path()); - child_dirs.push(entry.path()); - } - if entry.file_name() == ignore_fn && entry.path().exists() { - let ignore_path = entry.path(); - watch!(ignore_path); - new_patterns = Some( - patterns_from_file(&ignore_root, &ignore_path)? - .into_iter() - .filter_map(|p| match p.transpose() { - Ok(p) => Some(p), - Err(e) => { - warn!("{:?}", e); - None - } - }) - .collect(), - ); - } - } - Err(e) => { - warn!("{}", e); - } - } - } - - if let Some(new_patterns) = new_patterns { - ignore_rules.update(new_patterns)?; - } - - for child_dir in child_dirs { - match check_ignore(&ignore_rules, &child_dir) { - MatchResult::NoMatch | MatchResult::Whitelist => { - ignore_rules = build_ignore_rules(ignore_rules, &child_dir, ignore_filename)?; - } - MatchResult::Ignore => {} + let ignore_filename = dir.join(ignore_fn); + watch!(ignore_filename); + if ignore_filename.is_file() { + let ignore_content = fs::read_to_string(&ignore_filename)?; + let new_patterns = + content_to_patterns(ignore_root, Some(&ignore_filename), &ignore_content); + ignore_rules.add_patterns(new_patterns)?; } + let mut new_dirs = dirs_under(&dir); + watch!(new_dirs); + dir_stack.append(&mut new_dirs); + watch!(dir_stack); } Ok(ignore_rules) } -fn clear_glob_errors( - sender: &Sender>, - new_patterns: Vec>>, -) -> Vec> { - let new_glob_patterns: Vec> = new_patterns - .into_iter() - .filter_map(|p| match p.transpose() { - Ok(p) => Some(p), - Err(e) => { - sender - .send(Err(Error::from(anyhow!("Error in glob pattern: {:?}", e)))) - .expect("Error in channel"); - None - } - }) - .collect(); - new_glob_patterns -} - -fn transform_pattern_for_glob(pattern: Pattern) -> Pattern { - let anything_anywhere = |p| format!("**/{p}"); - let anything_relative = |p, directory| format!("{directory}/**/{p}"); - let directory_anywhere = |p| format!("**{p}/**"); - let directory_relative = |p, directory| format!("{directory}/**/{p}/**"); - - let transformed_pattern = match (&pattern.path_kind, &pattern.relativity) { - (PathKind::Any, PatternRelativity::Anywhere) => anything_anywhere(pattern.pattern), - (PathKind::Any, PatternRelativity::RelativeTo { directory }) => { - anything_relative(pattern.pattern, directory) - } - (PathKind::Directory, PatternRelativity::Anywhere) => directory_anywhere(pattern.pattern), - (PathKind::Directory, PatternRelativity::RelativeTo { directory }) => { - directory_relative(pattern.pattern, directory) - } - }; - - Pattern { - pattern: transformed_pattern, - ..pattern - } -} - -fn build_globset(patterns: Vec) -> Result { - let mut gs_builder = GlobSetBuilder::new(); - - for p in patterns { - gs_builder.add(p.clone()); - } - gs_builder - .build() - .map_err(|e| anyhow!("Error building glob set: {:?}", e).into()) -} - -fn patterns_from_file( - ignore_root: &Path, - ignore_path: &Path, -) -> Result>>> { - watch!(ignore_root); - watch!(ignore_path); - let content = fs::read_to_string(ignore_path).with_context(|| { - format!( - "Cannot read file: {:?}\n - If the file is present, it may be an encoding issue. Please check if it's UTF-8 encoded.", - ignore_path - ) - })?; - watch!(&content); - Ok(content_to_patterns( - ignore_root, - Some(ignore_path), - &content, - )) -} - /// convert a set of rules in `content` to glob patterns. /// patterns may come from `source`. /// the root directory of all search is in `ignore_root`. @@ -618,8 +177,8 @@ pub fn content_to_patterns( ignore_root: &Path, source: Option<&Path>, content: &str, -) -> Vec>> { - let patterns: Vec>> = content +) -> Vec { + let patterns: Vec = content .lines() .enumerate() // A line starting with # serves as a comment. Put a backslash ("\") in front of the first hash for patterns that begin with a hash. @@ -648,141 +207,27 @@ pub fn content_to_patterns( }, ) }) - .map(|(line, source)| build_pattern(source, line)) - .map(transform_pattern_for_glob) - .map(|pc| pc.map(|s| Glob::new(&s).map_err(Error::from))) + .map(|(line, source)| Pattern::new(source, line)) .collect(); patterns } -fn build_pattern(source: Source, original: &str) -> Pattern { - let current_dir = match &source { - Source::Global => "".to_string(), - Source::File { path, .. } => { - let path = path - .parent() - .expect("Pattern source file doesn't have parent") - .to_string_lossy() - .to_string(); - if path.starts_with('/') { - path - } else { - format!("/{path}") - } +pub fn update_ignore_rules(dir: &Path, ignore_rules: &IgnoreRules) -> Result<()> { + if let Some(ref ignore_filename) = ignore_rules.ignore_filename { + let ignore_root = &ignore_rules.root; + let ignore_path = dir.join(ignore_filename); + if ignore_path.is_file() { + let new_patterns: Vec = { + let content = fs::read_to_string(&ignore_path)?; + content_to_patterns(ignore_root, Some(ignore_path).as_deref(), &content) + }; + + ignore_rules.add_patterns(new_patterns)?; } - }; - - // if Pattern starts with ! it's whitelist, if ends with / it's dir only, if it contains - // non final slash, it should be considered under the current dir only, otherwise it - // matches - - let begin_exclamation = original.starts_with('!'); - let mut line = if begin_exclamation || original.starts_with(r"\!") { - original[1..].to_owned() - } else { - original.to_owned() - }; - - // TODO: We should handle filenames with trailing spaces better, with regex match and removing - // the \\ from the name - if !line.ends_with("\\ ") { - line = line.trim_end().to_string(); - } - - let end_slash = line.ends_with('/'); - if end_slash { - line = line[..line.len() - 1].to_string() - } - - let begin_slash = line.starts_with('/'); - let non_final_slash = if !line.is_empty() { - line[..line.len() - 1].chars().any(|c| c == '/') - } else { - false - }; - - if begin_slash { - line = line[1..].to_string(); - } - - let current_dir = if current_dir.ends_with('/') { - ¤t_dir[..current_dir.len() - 1] - } else { - ¤t_dir - }; - - let effect = if begin_exclamation { - PatternEffect::Whitelist - } else { - PatternEffect::Ignore - }; - - let path_kind = if end_slash { - PathKind::Directory - } else { - PathKind::Any - }; - - let relativity = if non_final_slash { - PatternRelativity::RelativeTo { - directory: current_dir.to_owned(), - } - } else { - PatternRelativity::Anywhere - }; - - Pattern:: { - pattern: line, - original: original.to_owned(), - source, - effect, - relativity, - path_kind, - } -} - -/// Check whether `path` is whitelisted or ignored with `ignore_rules` -pub fn check_ignore(ignore_rules: &IgnoreRules, path: &Path) -> MatchResult { - let is_abs = path.is_absolute(); - watch!(is_abs); - // strip_prefix eats the final slash, and ends_with behave differently than str, so we work - // around here - let path_str = path.to_string_lossy(); - watch!(path_str); - let final_slash = path_str.ends_with('/'); - watch!(final_slash); - - let path = if is_abs { - if final_slash { - format!( - "/{}/", - path.strip_prefix(&ignore_rules.root) - .expect("path must be within root") - .to_string_lossy() - ) - } else { - format!( - "/{}", - path.strip_prefix(&ignore_rules.root) - .expect("path must be within root") - .to_string_lossy() - ) - } - } else { - path_str.to_string() - }; - - watch!(path); - if ignore_rules.whitelist_set.read().unwrap().is_match(&path) { - MatchResult::Whitelist - } else if ignore_rules.ignore_set.read().unwrap().is_match(&path) { - MatchResult::Ignore - } else { - MatchResult::NoMatch } + Ok(()) } - /// Return all childs of a directory regardless of any ignore rules /// If there is an error to obtain the metadata, error is added to the element instead pub fn directory_list(dir: &Path) -> Result>> { @@ -839,7 +284,7 @@ mod tests { #[test_case("myfile*" => matches PatternEffect::Ignore ; "t3367706249")] #[test_case("myfile/" => matches PatternEffect::Ignore ; "t1204466627")] fn test_pattern_effect(line: &str) -> PatternEffect { - let pat = build_pattern(Source::Global, line); + let pat = Pattern::new(Source::Global, line); pat.effect } @@ -863,7 +308,7 @@ mod tests { path: PathBuf::from(dir).join(".gitignore"), line: 1, }; - let pat = build_pattern(source, line); + let pat = Pattern::new(source, line); pat.relativity } @@ -882,7 +327,7 @@ mod tests { path: PathBuf::from(dir).join(".gitignore"), line: 1, }; - let pat = build_pattern(source, line); + let pat = Pattern::new(source, line); pat.path_kind } @@ -899,11 +344,8 @@ mod tests { patterns.len() } - fn create_patterns(root: &str, dir: Option<&str>, patterns: &str) -> Vec> { + fn create_patterns(root: &str, dir: Option<&str>, patterns: &str) -> Vec { content_to_patterns(Path::new(root), dir.map(Path::new), patterns) - .into_iter() - .map(|pat_res_g| pat_res_g.map(|res_g| res_g.unwrap())) - .collect() } fn new_dir_with_ignores( @@ -912,9 +354,9 @@ mod tests { initial_patterns: &str, ) -> Result { let patterns = create_patterns(root, dir, initial_patterns); - let mut initialized = IgnoreRules::empty(&PathBuf::from(root)); + let initialized = IgnoreRules::empty(&PathBuf::from(root), None); - initialized.update(patterns)?; + initialized.add_patterns(patterns)?; Ok(initialized) } @@ -942,29 +384,59 @@ mod tests { path: PathBuf::from(format!("{dir}/.gitignore")), line: 1, }; - let pattern = build_pattern(source, pattern); + let pattern = Pattern::new(source, pattern); pattern.relativity } - - #[test_case("", "myfile" => "myfile" ; "t1142345310")] - #[test_case("", "/myfile" => "myfile" ; "t1427001291")] - #[test_case("", "myfile/" => "myfile" ; "t789151905")] - #[test_case("", "mydir/myfile" => "mydir/myfile" ; "t21199018162")] - #[test_case("", "myfile.*" => "myfile.*" ; "t31199018162")] - #[test_case("", "mydir/**.*" => "mydir/**.*" ; "t41199018162")] - #[test_case("dir", "myfile" => "myfile" ; "t1242345310")] - #[test_case("dir", "/myfile" => "myfile" ; "t3427001291")] - #[test_case("dir", "myfile/" => "myfile" ; "t759151905")] - #[test_case("dir", "mydir/myfile" => "mydir/myfile" ; "t21199018562")] - #[test_case("dir", "/my/file.*" => "my/file.*" ; "t61199018162")] - #[test_case("dir", "/mydir/**.*" => "mydir/**.*" ; "t47199018162")] + // ---- tests::test_pattern_line::t1242345310 stdout ---- + // thread 'tests::test_pattern_line::t1242345310' panicked at walker/src/lib.rs:391:5: + // assertion `left == right` failed + // left: "myfile" + // right: "**/myfile" + // + // ---- tests::test_pattern_line::t1142345310 stdout ---- + // thread 'tests::test_pattern_line::t1142345310' panicked at walker/src/lib.rs:391:5: + // assertion `left == right` failed + // left: "myfile" + // right: "**/myfile" + // note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace + // + // ---- tests::test_pattern_line::t1427001291 stdout ---- + // thread 'tests::test_pattern_line::t1427001291' panicked at walker/src/lib.rs:391:5: + // assertion `left == right` failed + // left: "myfile" + // right: "/**/myfile" + // + // ---- tests::test_pattern_line::t21199018562 stdout ---- + // thread 'tests::test_pattern_line::t21199018562' panicked at walker/src/lib.rs:391:5: + // assertion `left == right` failed + // left: "mydir/myfile" + // right: "/**/mydir/myfile" + // + // ---- tests::test_pattern_line::t21199018162 stdout ---- + // thread 'tests::test_pattern_line::t21199018162' panicked at walker/src/lib.rs:391:5: + // assertion `left == right` failed + // left: "mydir/myfile" + // right: "/**/mydir/myfile" + // + // ---- tests::test_pattern_line::t31199018162 stdout ---- + // thread 'tests::test_pattern_line::t31199018162' panicked at walker/src/lib.rs:391:5: + // assertion `left == right` failed + // left: "myfile.*" + // right: "**/myfile.*" + // + #[test_case("dir", "myfile" => "**/myfile" ; "t1242345310")] + #[test_case("dir", "/myfile" => "/**/myfile" ; "t3427001291")] + #[test_case("dir", "myfile/" => "**/myfile/**" ; "t759151905")] + #[test_case("dir", "mydir/myfile" => "/**/mydir/myfile" ; "t21199018562")] + #[test_case("dir", "/my/file.*" => "/**/my/file.*" ; "t61199018162")] + #[test_case("dir", "/mydir/**.*" => "/**/mydir/**.*" ; "t47199018162")] fn test_pattern_line(dir: &str, pattern: &str) -> String { let source = Source::File { path: PathBuf::from(format!("{dir}.gitignore")), line: 1, }; - let pattern = build_pattern(source, pattern); - pattern.pattern + let pattern = Pattern::new(source, pattern); + pattern.glob } // Blank file tests @@ -1046,7 +518,7 @@ mod tests { let dwi = new_dir_with_ignores(root.to_str().unwrap(), Some(&source_file), contents).unwrap(); - check_ignore(&dwi, &path) + dwi.check(&path) } // TODO: Patterns shouldn't have / prefix, but an appropriate PathKind diff --git a/walker/src/notify.rs b/walker/src/notify.rs index fed0ca152..92098895c 100644 --- a/walker/src/notify.rs +++ b/walker/src/notify.rs @@ -5,7 +5,6 @@ //! It defines [PathEvent] as a simple version of [notify::EventKind]. //! It defines [PathEventHandler] that handles events from [notify::EventHandler]. use crate::{ - check_ignore, error::{Error, Result}, IgnoreRules, MatchResult, }; @@ -89,7 +88,7 @@ impl EventHandler for PathEventHandler { impl PathEventHandler { fn write_event(&mut self, path: PathBuf) { - match check_ignore(&self.ignore_rules, &path) { + match self.ignore_rules.check(&path) { MatchResult::Whitelist | MatchResult::NoMatch => { if let Ok(metadata) = path.metadata() { self.sender @@ -111,7 +110,7 @@ impl PathEventHandler { } fn create_event(&mut self, path: PathBuf) { - match check_ignore(&self.ignore_rules, &path) { + match self.ignore_rules.check(&path) { MatchResult::Whitelist | MatchResult::NoMatch => { if let Ok(metadata) = path.metadata() { self.sender @@ -133,7 +132,7 @@ impl PathEventHandler { } fn remove_event(&mut self, path: PathBuf) { - match check_ignore(&self.ignore_rules, &path) { + match self.ignore_rules.check(&path) { MatchResult::Whitelist | MatchResult::NoMatch => { self.sender .send(Some(PathEvent::Delete { path })) diff --git a/walker/src/pattern.rs b/walker/src/pattern.rs new file mode 100644 index 000000000..0feaa0602 --- /dev/null +++ b/walker/src/pattern.rs @@ -0,0 +1,219 @@ +//! Pattern describes a single line in an ignore file and its semantics +//! It is used to match a path with the given pattern +use crate::sync; +pub use error::{Error, Result}; +pub use ignore_rules::IgnoreRules; +pub use std::hash::Hash; +pub use sync::{PathSync, PathSyncSingleton}; + +pub use crate::notify::{make_watcher, PathEvent, RecommendedWatcher}; + +// use glob::{MatchOptions, Pattern, PatternError}; +pub use fast_glob::Glob; +use std::{fmt::Debug, path::PathBuf}; + +use crate::error; +use crate::ignore_rules; + +/// Show whether a path matches to a glob rule +#[derive(Debug, Clone)] +pub enum MatchResult { + /// There is no match between glob(s) and path + NoMatch, + /// Path matches to ignored glob(s) + Ignore, + /// Path matches to whitelisted glob(s) + Whitelist, +} + +/// Is the pattern matches anywhere or only relative to a directory? +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum PatternRelativity { + /// Match the path regardless of the directory prefix + Anywhere, + /// Match the path if it only starts with `directory` + RelativeTo { + /// The directory that the pattern must have as prefix to be considered a match + directory: String, + }, +} + +/// Is the path only a directory, or could it be directory or file? +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum PathKind { + /// Path matches to directory or file + Any, + /// Path matches only to directory + Directory, +} + +/// Is this pattern a ignore or whitelist pattern? +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum PatternEffect { + /// This is an ignore pattern + Ignore, + /// This is a whitelist pattern + Whitelist, +} + +/// Do we get this pattern from a file (.gitignore, .xvcignore, ...) or specify it directly in +/// code? +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum Source { + /// Pattern is globally defined in code + Global, + + /// Pattern is obtained from file + File { + /// Path of the pattern file + path: PathBuf, + /// (1-based) line number the pattern retrieved + line: usize, + }, + + /// Pattern is from CLI + CommandLine { current_dir: PathBuf }, +} + +/// Pattern is generic and could be an instance of String, Glob, Regex or any other object. +/// The type is evolved by compiling. +/// A pattern can start its life as `Pattern` and can be compiled into `Pattern` or +/// `Pattern`. +#[derive(Debug)] +pub struct Pattern { + /// The pattern type + pub glob: String, + /// The original string that defines the pattern + pub original: String, + /// Where did we get this pattern? + pub source: Source, + /// Is this ignore or whitelist pattern? + pub effect: PatternEffect, + /// Does it have an implied prefix? + pub relativity: PatternRelativity, + /// Is the path a directory or anything? + pub path_kind: PathKind, +} + +impl Pattern { + pub fn new(source: Source, original: &str) -> Self { + let original = original.to_owned(); + let current_dir = match &source { + Source::Global => "".to_string(), + Source::File { path, .. } => { + let path = path + .parent() + .expect("Pattern source file doesn't have parent") + .to_string_lossy() + .to_string(); + if path.starts_with('/') { + path + } else { + format!("/{path}") + } + } + Source::CommandLine { current_dir } => current_dir.to_string_lossy().to_string(), + }; + + // if Pattern starts with ! it's whitelist, if ends with / it's dir only, if it contains + // non final slash, it should be considered under the current dir only, otherwise it + // matches + + let begin_exclamation = original.starts_with('!'); + let mut line = if begin_exclamation || original.starts_with(r"\!") { + original[1..].to_owned() + } else { + original.to_owned() + }; + + // TODO: We should handle filenames with trailing spaces better, with regex match and removing + // the \\ from the name + if !line.ends_with("\\ ") { + line = line.trim_end().to_string(); + } + + let end_slash = line.ends_with('/'); + if end_slash { + line = line[..line.len() - 1].to_string() + } + + let begin_slash = line.starts_with('/'); + let non_final_slash = if !line.is_empty() { + line[..line.len() - 1].chars().any(|c| c == '/') + } else { + false + }; + + if begin_slash { + line = line[1..].to_string(); + } + + let current_dir = if current_dir.ends_with('/') { + ¤t_dir[..current_dir.len() - 1] + } else { + ¤t_dir + }; + + let effect = if begin_exclamation { + PatternEffect::Whitelist + } else { + PatternEffect::Ignore + }; + + let path_kind = if end_slash { + PathKind::Directory + } else { + PathKind::Any + }; + + let relativity = if non_final_slash { + PatternRelativity::RelativeTo { + directory: current_dir.to_owned(), + } + } else { + PatternRelativity::Anywhere + }; + + let glob = transform_pattern_for_glob(&line, relativity.clone(), path_kind.clone()); + + Pattern { + glob, + original, + source, + effect, + relativity, + path_kind, + } + } +} + +fn transform_pattern_for_glob( + original: &str, + relativity: PatternRelativity, + path_kind: PathKind, +) -> String { + let anything_anywhere = |p| format!("**/{p}"); + let anything_relative = |p, directory| format!("{directory}/**/{p}"); + let directory_anywhere = |p| format!("**/{p}/**"); + let directory_relative = |p, directory| format!("{directory}/**/{p}/**"); + + let transformed_pattern = match (path_kind, relativity) { + (PathKind::Any, PatternRelativity::Anywhere) => anything_anywhere(original), + (PathKind::Any, PatternRelativity::RelativeTo { directory }) => { + anything_relative(original, directory) + } + (PathKind::Directory, PatternRelativity::Anywhere) => directory_anywhere(original), + (PathKind::Directory, PatternRelativity::RelativeTo { directory }) => { + directory_relative(original, directory) + } + }; + + transformed_pattern +} + +pub fn build_pattern_list(patterns: Vec, source: Source) -> Vec { + patterns + .iter() + .map(|p| Pattern::new(source.clone(), p)) + .collect() +} diff --git a/walker/src/walk_parallel.rs b/walker/src/walk_parallel.rs new file mode 100644 index 000000000..6b921ba2c --- /dev/null +++ b/walker/src/walk_parallel.rs @@ -0,0 +1,138 @@ +use std::{ + path::Path, + sync::{Arc, Mutex}, +}; + +use crossbeam::queue::SegQueue; +use crossbeam_channel::Sender; +use xvc_logging::{uwr, watch}; + +use crate::{ + directory_list, update_ignore_rules, IgnoreRules, MatchResult, PathMetadata, Result, + SharedIgnoreRules, WalkOptions, MAX_THREADS_PARALLEL_WALK, +}; + +fn walk_parallel_inner( + ignore_rules: SharedIgnoreRules, + dir: &Path, + walk_options: WalkOptions, + path_sender: Sender>, +) -> Result> { + update_ignore_rules(dir, &ignore_rules.write().unwrap())?; + + Ok(directory_list(dir)? + .drain(..) + .filter_map(|pm_res| match pm_res { + Ok(pm) => { + watch!(pm); + Some(pm) + } + Err(e) => { + path_sender + .send(Err(e)) + .expect("Channel error in walk_parallel"); + None + } + }) + .filter_map(|pm| { + let ignore_res = ignore_rules.read().unwrap().check(pm.path.as_ref()); + watch!(ignore_res); + match ignore_res { + MatchResult::NoMatch | MatchResult::Whitelist => { + // If the path is a file, don't send it to caller, just send it to the channel. + // If the path is a directory, send it to the channel if `include_dirs` is true. + // The caller expects a list of directories to recurse into. + + if pm.metadata.is_file() || pm.metadata.is_symlink() { + path_sender + .send(Ok(pm.clone())) + .expect("Channel error in walk_parallel"); + None + } else if pm.metadata.is_dir() { + path_sender + .send(Ok(pm.clone())) + .expect("Channel error in walk_parallel"); + + if walk_options.include_dirs { + Some(pm) + } else { + None + } + } else { + None + } + } + + MatchResult::Ignore => { + watch!(pm.path); + None + } + } + }) + .collect::>()) +} + +/// Walk all child paths under `dir` and send non-ignored paths to `path_sender`. +/// Newly found ignore rules are sent through `ignore_sender`. +/// The ignore file name (`.xvcignore`, `.gitignore`, `.ignore`, ...) is set by `walk_options`. +/// +/// It lists elements of a directory, then creates a new crossbeam scope for each child directory and +/// calls itself recursively. It may not be feasible for small directories to create threads. +pub fn walk_parallel( + ignore_rules: SharedIgnoreRules, + dir: &Path, + walk_options: WalkOptions, + path_sender: Sender>, +) -> Result<()> { + let dir_queue = Arc::new(SegQueue::::new()); + + let child_dirs = walk_parallel_inner( + ignore_rules.clone(), + dir, + walk_options.clone(), + path_sender.clone(), + )?; + watch!(child_dirs); + + child_dirs.into_iter().for_each(|pm| { + dir_queue.push(pm); + }); + + watch!(dir_queue); + + if dir_queue.is_empty() { + return Ok(()); + } + + crossbeam::scope(|s| { + for thread_i in 0..MAX_THREADS_PARALLEL_WALK { + let path_sender = path_sender.clone(); + let walk_options = walk_options.clone(); + let ignore_rules = ignore_rules.clone(); + let dir_queue = dir_queue.clone(); + + s.spawn(move |_| { + watch!(path_sender); + while let Some(pm) = dir_queue.pop() { + let child_dirs = walk_parallel_inner( + ignore_rules.clone(), + &pm.path, + walk_options.clone(), + path_sender.clone(), + ) + .unwrap(); + + for child_dir in child_dirs { + dir_queue.push(child_dir); + } + } + watch!("End of thread {}", thread_i); + }); + } + }) + .expect("Error in crossbeam scope in walk_parallel"); + + watch!("End of walk_parallel"); + + Ok(()) +} diff --git a/walker/src/walk_serial.rs b/walker/src/walk_serial.rs new file mode 100644 index 000000000..bedb66b2c --- /dev/null +++ b/walker/src/walk_serial.rs @@ -0,0 +1,70 @@ +//! Serial directory walker without parallelization +//! See [`walk_parallel`] for parallel version. +use std::path::Path; + +use xvc_logging::{debug, warn, XvcOutputSender}; + +use crate::{ + directory_list, pattern::MatchResult, update_ignore_rules, IgnoreRules, PathMetadata, Result, + WalkOptions, +}; + +/// Walk `dir` with `walk_options`, with the given _initial_ `ignore_rules`. +/// Note that ignore rules are expanded with the rules given in the `ignore_filename` in +/// `walk_options`. +/// The result is added to given `res_paths` to reduce the number of memory inits for vec. +/// +/// It collects all [`PathMetadata`] of the child paths. +/// Filters paths with the rules found in child directories and the given `ignore_rules`. +pub fn walk_serial( + output_snd: &XvcOutputSender, + global_ignore_rules: &str, + dir: &Path, + walk_options: &WalkOptions, +) -> Result<(Vec, IgnoreRules)> { + let ignore_rules = IgnoreRules::from_global_patterns( + dir, + walk_options.ignore_filename.as_deref(), + global_ignore_rules, + ); + + let mut dir_stack = Vec::new(); + + dir_stack.push(dir.to_path_buf()); + + let get_child_paths = |dir: &Path| -> Result> { + Ok(directory_list(dir)? + .into_iter() + .filter_map(|pm_res| match pm_res { + Ok(pm) => Some(pm), + Err(e) => { + warn!(output_snd, "{}", e); + None + } + }) + .collect()) + }; + + let mut res_paths = Vec::new(); + while let Some(dir) = dir_stack.pop() { + update_ignore_rules(&dir, &ignore_rules)?; + + res_paths.extend(get_child_paths(&dir)?.drain(..).filter_map(|p| { + let ignore_result = ignore_rules.check(p.path.as_ref()); + match ignore_result { + MatchResult::NoMatch | MatchResult::Whitelist => { + if p.metadata.is_dir() { + dir_stack.push(p.path.clone()); + } + Some(p) + } + MatchResult::Ignore => { + debug!(output_snd, "Ignored: {:?}", p.path); + None + } + } + })); + } + + Ok((res_paths, ignore_rules)) +}