diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 175b3e68a..106a6b5fd 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,21 +19,16 @@ jobs: strategy: matrix: - build: [nightly] + build: [stable] include: - # - build: stable - # benches: true - # - build: beta - # rust: beta - - build: nightly - rust: nightly + - build: stable + rust: stable # rust: nightly-2024-01-01 test-args: --features test-ci # --no-fail-fast ## for submitters other than me, I'll add another job here. - # test-args: --no-fail-fast - # test-args: --all-features # benches: true coverage: true + env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} @@ -52,17 +47,14 @@ jobs: # We don't run xvc-storage and how-to tests here XVC_TRYCMD_TESTS: core,file,pipeline,intro,start,storage XVC_TRYCMD_STORAGE_TESTS: minio,generic,local, + steps: - name: Checkout uses: actions/checkout@v1 - # - name: Install required packages - # - # run: sudo apt install -y inotify-tools ripgrep s3cmd - - name: Install required packages run: | - brew install --head s3cmd # 2.3.0 has a bug with Python 3.12 + brew install s3cmd brew install ripgrep brew install tree brew install lsd @@ -72,73 +64,74 @@ jobs: - name: Git config for automated Git tests run: git config --global user.name 'Xvc Rabbit' && git config --global user.email 'rabbit@xvc.dev' && git config --global init.defaultBranch main - - name: Write the private key file for one.emresult.com connection + - name: Write the private key file for e1.xvc.dev connection run: mkdir -p $HOME/.ssh/ && echo "${XVC_TEST_ONE_EMRESULT_COM_KEY}" > $HOME/.ssh/id_rsa ; chmod 600 ~/.ssh/id_rsa - - name: Write an ssh config to allow connection - run: echo 'Host *' >> $HOME/.ssh/config ; echo ' StrictHostKeyChecking no' >> $HOME/.ssh/config ; chmod 400 $HOME/.ssh/config - - - name: List .ssh contents - run: ls -R $HOME/.ssh ; cat $HOME/.ssh/id_rsa ; cat $HOME/.ssh/config + - name: Write an ssh config to allow connection without noise + run: echo 'Host *' >> $HOME/.ssh/config ; echo ' StrictHostKeyChecking no' >> $HOME/.ssh/config ; echo ' LogLevel ERROR' >> $HOME/.ssh/config ; chmod 400 $HOME/.ssh/config + # - name: List .ssh contents + # run: ls -R $HOME/.ssh ; cat $HOME/.ssh/id_rsa ; cat $HOME/.ssh/config + # - name: Install rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust || 'stable' }} - profile: minimal components: llvm-tools-preview - override: true - name: Build debug - uses: actions-rs/cargo@v1 - with: - command: build - args: ${{ matrix.build-args }} + run: cargo build ${{ matrix.build-args }} env: RUSTFLAGS: "-A dead_code" - name: Add xvc to PATH run: echo "${GITHUB_WORKSPACE}/target/debug" >> $GITHUB_PATH - - name: Print contents of $GITHUB_WORKSPACE - run: tree $GITHUB_WORKSPACE + # - name: Print contents of $GITHUB_WORKSPACE + # run: tree $GITHUB_WORKSPACE - name: Check if xvc is in PATH - run: tree $GITHUB_WORKSPACE && xvc --help + # run: tree $GITHUB_WORKSPACE && xvc --version + run: xvc --version + + # - name: Test + # if: matrix.coverage + # run: cargo test ${{ matrix.test-args }} + # env: + # CARGO_INCREMENTAL: "0" + # # To debug the output when commands fail + # TRYCMD: "dump" + # # RUSTFLAGS: "-Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" + # RUSTFLAGS: "-Cinstrument-coverage" + # # RUSTDOCFLAGS: "-Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" + # LLVM_PROFILE_FILE: "${TMPDIR}/xvc-%p-%m.profraw" + # + - name: Install cargo-llvm-cov + if: matrix.coverage + uses: taiki-e/install-action@cargo-llvm-cov - name: Run Current Dev Tests run: $GITHUB_WORKSPACE/run-tests.zsh - - name: Test + - name: Test and Coverage + id: coverage if: matrix.coverage - uses: actions-rs/cargo@v1 - with: - command: test - args: ${{ matrix.test-args }} + run: cargo llvm-cov ${{ matrix.test-args }} --workspace --lcov --output-path lcov.info env: - CARGO_INCREMENTAL: "0" # To debug the output when commands fail TRYCMD: "dump" - RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" - RUSTDOCFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off" - - name: Coverage - id: coverage - if: matrix.coverage - uses: actions-rs/grcov@v0.1 - - name: Test all benches - if: matrix.benches - uses: actions-rs/cargo@v1 - with: - command: test - args: --benches ${{ matrix.features }} - name: Upload to codecov.io if: matrix.coverage - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} files: ${{ steps.coverage.outputs.report }} + - name: Test all benches + if: matrix.benches + run: cargo test --benches ${{ matrix.features }} + deploy-linux: name: deploy-linux # needs: [coverage] @@ -147,31 +140,28 @@ jobs: strategy: matrix: target: [x86_64-unknown-linux-gnu] + steps: - name: Checkout uses: actions/checkout@v1 + - name: Install rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: stable - profile: minimal - override: true - target: ${{ matrix.target }} + targets: ${{ matrix.target }} - name: Build target - uses: actions-rs/cargo@v1 - with: - use-cross: false - command: build - args: --release --target ${{ matrix.target }} --features=bundled-openssl + run: cargo build --release --target ${{ matrix.target }} --features=bundled-openssl - name: Package shell: bash run: | #strip target/${{ matrix.target }}/release/xvc cd target/${{ matrix.target }}/release - tar czvf ../../../xvc-${{ github.ref_name}}-${{ matrix.target }}.tar.gz xvc + tar czvf ../../../xvc-${{ github.ref_name }}-${{ matrix.target }}.tar.gz xvc cd - + - name: Publish uses: softprops/action-gh-release@v1 # TODO: if any of the build step fails, the release should be deleted. @@ -191,20 +181,15 @@ jobs: steps: - name: Checkout uses: actions/checkout@v1 + - name: Install rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: stable - profile: minimal - override: true - target: ${{ matrix.target }} + targets: ${{ matrix.target }} - name: Build target - uses: actions-rs/cargo@v1 - with: - use-cross: false - command: build - args: --release --target ${{ matrix.target }} --features bundled-sqlite + run: cargo build --release --target ${{ matrix.target }} --features bundled-sqlite - name: Package shell: bash @@ -213,6 +198,7 @@ jobs: cd target/${{ matrix.target }}/release tar czvf ../../../xvc-${{ github.ref_name}}-${{ matrix.target }}.tar.gz xvc cd - + - name: Publish uses: softprops/action-gh-release@v1 # TODO: if any of the build step fails, the release should be deleted. @@ -220,6 +206,7 @@ jobs: files: "xvc*" env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + deploy-macos: name: deploy-macos # needs: [coverage] @@ -228,23 +215,19 @@ jobs: strategy: matrix: target: [x86_64-apple-darwin] + steps: - name: Checkout uses: actions/checkout@v1 + - name: Install rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: stable - profile: minimal - override: true - target: ${{ matrix.target }} + targets: ${{ matrix.target }} - name: Build target - uses: actions-rs/cargo@v1 - with: - use-cross: false - command: build - args: --release --target ${{ matrix.target }} + run: cargo build --release --target ${{ matrix.target }} - name: Package shell: bash @@ -253,6 +236,7 @@ jobs: cd target/${{ matrix.target }}/release tar czvf ../../../xvc-${{ github.ref_name}}-${{ matrix.target }}.tar.gz xvc cd - + - name: Publish uses: softprops/action-gh-release@v1 # TODO: if any of the build step fails, the release should be deleted. diff --git a/.gitignore b/.gitignore index d576b81b5..c2b510c79 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,6 @@ Cargo.lock flamegraph.svg workflow_tests/docs lib/docs +# Ignore all profiling files +*.profraw +lcov.info diff --git a/CHANGELOG.md b/CHANGELOG.md index d89997404..d561b2983 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,20 @@ # Xvc Changelog -## Unreleased +## 0.6.12 (2024-11-30) + +- PR: +- Add --include-git-files option to xvc file track and xvc file list commands +- Don't track and list Git-tracked files by default +- Add ListFormat::empty for default xvc file list format +- Expose types from `xvc::file::list` to be used in GUI +- Refactor `xvc file list` command handler for Xvc GUI +- Began to use dtolnay/rust-toolchain for Github Actions +- Began to use taiki-e/install-action@cargo-llvm-cov for codecov +- Fixed cache permissions issue ## 0.6.11 (2024-09-04) +- PR: - Bump dependencies - Replace globset with fast-glob for memory usage - Remove --details option from xvc check-ignore diff --git a/book/src/ref/xvc-file-carry-in.md b/book/src/ref/xvc-file-carry-in.md index c87f03ec4..538159795 100644 --- a/book/src/ref/xvc-file-carry-in.md +++ b/book/src/ref/xvc-file-carry-in.md @@ -52,6 +52,7 @@ $ xvc file track data.txt $ xvc file list data.txt FC 19 [..] c85f3e81 c85f3e81 data.txt + Total #: 1 Workspace Size: 19 Cached Size: 19 @@ -67,6 +68,7 @@ Oh, deetee, my, deetee $ xvc file list data.txt FC 23 [..] c85f3e81 e37c686a data.txt + Total #: 1 Workspace Size: 23 Cached Size: 19 @@ -79,6 +81,7 @@ $ xvc file carry-in data.txt $ xvc file list data.txt FC 23 [..] e37c686a e37c686a data.txt + Total #: 1 Workspace Size: 23 Cached Size: 23 diff --git a/book/src/ref/xvc-file-copy.md b/book/src/ref/xvc-file-copy.md index 6f0c59c19..5b3210f02 100644 --- a/book/src/ref/xvc-file-copy.md +++ b/book/src/ref/xvc-file-copy.md @@ -83,12 +83,14 @@ Note that, multiple copies of the same content don't add up to the cache size. ```console $ xvc file list data.txt FC 19 [..] c85f3e81 c85f3e81 data.txt + Total #: 1 Workspace Size: 19 Cached Size: 19 $ xvc file list 'data*' FC 19 [..] c85f3e81 c85f3e81 data2.txt FC 19 [..] c85f3e81 c85f3e81 data.txt + Total #: 2 Workspace Size: 38 Cached Size: 19 @@ -115,6 +117,7 @@ $ xvc file list another-set/ FH 19 [..] c85f3e81 c85f3e81 another-set/data3.txt FH 19 [..] c85f3e81 c85f3e81 another-set/data2.txt FH 19 [..] c85f3e81 c85f3e81 another-set/data.txt + Total #: 3 Workspace Size: 57 Cached Size: 19 @@ -169,7 +172,8 @@ FH 19 [..] c85f3e81 c85f3e81 another-set/data3.txt FH 19 [..] c85f3e81 c85f3e81 another-set/data2.txt FH 19 [..] c85f3e81 c85f3e81 another-set/data.txt DX 160 [..] another-set -Total #: 9 Workspace Size: [..] Cached Size: 19 + +Total #: 9 Workspace Size: [..] Cached Size: 19 ``` diff --git a/book/src/ref/xvc-file-list.md b/book/src/ref/xvc-file-list.md index 5b415b7e2..ddcc80b17 100644 --- a/book/src/ref/xvc-file-list.md +++ b/book/src/ref/xvc-file-list.md @@ -58,6 +58,11 @@ Options: If not supplied, hides dot files like .gitignore and .xvcignore + --include-git-files + List files tracked by Git. + + By default, Xvc doesn't list files tracked by Git. Supply this option to list them. + -h, --help Print help (see a summary with '-h') @@ -163,17 +168,40 @@ FX 2002 [..] 7e807161 dir-0005/file-0002.bin FX 2003 [..] d2432259 dir-0005/file-0003.bin FX 2004 [..] 63535612 dir-0005/file-0004.bin FX 2005 [..] 447933dc dir-0005/file-0005.bin + Total #: 30 Workspace Size: 51195 Cached Size: 0 ``` -By default the command hides dotfiles. If you also want to show them, you can use `--show-dot-files`/`-a` flag. +This command doesn't list Git-tracked files by default. If you want to list them, use `--include-git-files` flag. +```console +$ zsh -c 'echo "#!/bin/bash" > my-git-tracked-script.sh' +$ git add my-git-tracked-script.sh +$ git commit -m "Added a script" +[main [..]] Added a script + 1 file changed, 1 insertion(+) + create mode 100644 my-git-tracked-script.sh + +$ xvc file list 'my-git-tracked-script.sh' + +Total #: 0 Workspace Size: 0 Cached Size: 0 + + +$ xvc file list --include-git-files 'my-git-tracked-script.sh' +FX 12 [..] 6ecb3ffc my-git-tracked-script.sh + +Total #: 1 Workspace Size: 12 Cached Size: 0 + + +``` + +By default the command hides dotfiles too. If you also want to show them, you can use `--show-dot-files`/`-a` flag. If you want to show dotfiles also tracked by git, you may use `--show-dot-files` and `--include-git-files` together. ```console -$ xvc file list --sort name-asc --show-dot-files -FX [..] [..] [..] .gitignore -FX [..] [..] [..] .xvcignore +$ xvc file list --sort name-asc --show-dot-files --include-git-files +FX 107 [..] ce9fcf30 .gitignore +FX 141 [..] 3054b812 .xvcignore DX 224 [..] dir-0001 FX 2001 [..] 1953f05d dir-0001/file-0001.bin FX 2002 [..] 7e807161 dir-0001/file-0002.bin @@ -204,7 +232,9 @@ FX 2002 [..] 7e807161 dir-0005/file-0002.bin FX 2003 [..] d2432259 dir-0005/file-0003.bin FX 2004 [..] 63535612 dir-0005/file-0004.bin FX 2005 [..] 447933dc dir-0005/file-0005.bin -Total #: 32 Workspace Size: 51443 Cached Size: 0 +FX 12 [..] 6ecb3ffc my-git-tracked-script.sh + +Total #: 33 Workspace Size: 51455 Cached Size: 0 ``` @@ -267,6 +297,7 @@ FC 2004 [..] 63535612 63535612 dir-0001/file-0004.bin FC 2003 [..] d2432259 d2432259 dir-0001/file-0003.bin FC 2002 [..] 7e807161 7e807161 dir-0001/file-0002.bin FC 2001 [..] 1953f05d 1953f05d dir-0001/file-0001.bin + Total #: 5 Workspace Size: 10015 Cached Size: 10015 @@ -284,6 +315,7 @@ FH 2004 [..] 63535612 63535612 dir-0002/file-0004.bin FH 2003 [..] d2432259 d2432259 dir-0002/file-0003.bin FH 2002 [..] 7e807161 7e807161 dir-0002/file-0002.bin FH 2001 [..] 1953f05d 1953f05d dir-0002/file-0001.bin + Total #: 5 Workspace Size: 10015 Cached Size: 10015 @@ -305,6 +337,7 @@ SS [..] 63535612 dir-0003/file-0004.bin SS [..] d2432259 dir-0003/file-0003.bin SS [..] 7e807161 dir-0003/file-0002.bin SS [..] 1953f05d dir-0003/file-0001.bin + Total #: 5 Workspace Size: [..] Cached Size: 10015 @@ -323,6 +356,7 @@ FX 2001 [..] 1953f05d dir-0004/file-0001.bin SS [..] 1953f05d dir-0003/file-0001.bin FH 2[..] 1953f05d 1953f05d dir-0002/file-0001.bin FC 2[..] 1953f05d 1953f05d dir-0001/file-0001.bin + Total #: 5 Workspace Size: [..] Cached Size: 2001 @@ -345,6 +379,7 @@ FH [..] 7e807161 7e807161 dir-0002/file-0002.bin FH [..] 1953f05d 1953f05d dir-0002/file-0001.bin FC [..] 7e807161 7e807161 dir-0001/file-0002.bin FC [..] 1953f05d 1953f05d dir-0001/file-0001.bin + Total #: 10 Workspace Size: [..] Cached Size: 4003 @@ -364,6 +399,7 @@ FC 2004 [..] 63535612 63535612 dir-0001/file-0004.bin FC 2003 [..] d2432259 d2432259 dir-0001/file-0003.bin FC 2002 [..] 7e807161 7e807161 dir-0001/file-0002.bin FC 2001 [..] 1953f05d 1953f05d dir-0001/file-0001.bin + Total #: 5 Workspace Size: 10015 Cached Size: 10015 @@ -376,6 +412,7 @@ FC 2002 [..] 7e807161 7e807161 dir-0001/file-0002.bin FC 2003 [..] d2432259 d2432259 dir-0001/file-0003.bin FC 2004 [..] 63535612 63535612 dir-0001/file-0004.bin FC 2005 [..] 447933dc 447933dc dir-0001/file-0005.bin + Total #: 5 Workspace Size: 10015 Cached Size: 10015 @@ -398,7 +435,8 @@ $ xvc file list --format '{{asz}} {{name}}' --sort size-desc dir-0001/ 2003 dir-0001/file-0003.bin 2002 dir-0001/file-0002.bin 2001 dir-0001/file-0001.bin -Total #: 5 Workspace Size: 10015 Cached Size: 10015 + +Total #: 5 Workspace Size: 10015 Cached Size: [..] ``` @@ -412,6 +450,7 @@ $ xvc file list --format '{{acd8}} {{rcd8}} {{name}}' --sort ts-asc dir-0001 d2432259 d2432259 dir-0001/file-0003.bin 63535612 63535612 dir-0001/file-0004.bin 447933dc 447933dc dir-0001/file-0005.bin + Total #: 5 Workspace Size: 10015 Cached Size: 10015 @@ -434,7 +473,8 @@ $ xvc file list --format '{{cst}} {{name}}' dir-0001/ = dir-0001/file-0002.bin = dir-0001/file-0001.bin X dir-0001/a-new-file.bin -Total #: 6 Workspace Size: 10115 Cached Size: 10015 + +Total #: 6 Workspace Size: 10115 Cached Size: 0 ``` diff --git a/book/src/ref/xvc-file-move.md b/book/src/ref/xvc-file-move.md index 1c5a82f43..96d455f47 100644 --- a/book/src/ref/xvc-file-move.md +++ b/book/src/ref/xvc-file-move.md @@ -106,6 +106,7 @@ $ xvc file move d*.txt another-set/ --as hardlink $ xvc file list another-set/ FH [..] c85f3e81 c85f3e81 another-set/data5.txt FH [..] c85f3e81 c85f3e81 another-set/data4.txt + Total #: 2 Workspace Size: 38 Cached Size: 19 @@ -122,6 +123,7 @@ $ xvc file list XH c85f3e81 data6.txt FH 19 [..] c85f3e81 c85f3e81 another-set/data4.txt DX 96 [..] another-set + Total #: 3 Workspace Size: 115 Cached Size: 19 diff --git a/book/src/ref/xvc-file-recheck.md b/book/src/ref/xvc-file-recheck.md index f0c2b1a39..59b1d54a2 100644 --- a/book/src/ref/xvc-file-recheck.md +++ b/book/src/ref/xvc-file-recheck.md @@ -96,7 +96,7 @@ You can track and recheck complete directories $ xvc file track dir-0002/ $ rm -rf dir-0002/ $ xvc -v file recheck dir-0002/ -$ ls -l dir-0002/ +$ lsd -l dir-0002/ total 24 -rw-rw-rw-[..] file-0001.bin -rw-rw-rw-[..] file-0002.bin diff --git a/book/src/ref/xvc-file-remove.md b/book/src/ref/xvc-file-remove.md index 3bb971ea1..f4892f068 100644 --- a/book/src/ref/xvc-file-remove.md +++ b/book/src/ref/xvc-file-remove.md @@ -58,6 +58,7 @@ $ xvc file track 'd*.txt' $ xvc file list FC [..] c85f3e81 c85f3e81 data.txt + Total #: 1 Workspace Size: 19 Cached Size: 19 @@ -116,6 +117,7 @@ $ xvc file carry-in --force data.txt $ xvc file list FC [..] c85f3e81 c85f3e81 data.txt + Total #: 1 Workspace Size: 19 Cached Size: 19 @@ -153,6 +155,7 @@ $ tree .xvc/b3/ $ xvc file list FC [..] 6602cff6 6602cff6 data.txt + Total #: 1 Workspace Size: 19 Cached Size: 19 @@ -230,6 +233,7 @@ You can use this command to remove cached files from (remote) storages as well. ```console $ xvc-test-helper generate-random-file --seed 2 data.txt + $ xvc file carry-in data.txt $ xvc storage new local --name local-storage --path '../local-storage' @@ -275,6 +279,7 @@ $ xvc file copy data.txt data2.txt --as symlink $ xvc file list SS [..] [..] 4a2e9d7c data2.txt FC 1024 [..] 4a2e9d7c 4a2e9d7c data.txt + Total #: 2 Workspace Size: [..] Cached Size: 1024 diff --git a/book/src/ref/xvc-file-track.md b/book/src/ref/xvc-file-track.md index 51df7d9d3..49c844b73 100644 --- a/book/src/ref/xvc-file-track.md +++ b/book/src/ref/xvc-file-track.md @@ -28,6 +28,11 @@ Options: --text-or-binary Calculate digests as text or binary file without checking contents, or by automatically. (Default: auto) + --include-git-files + Include git tracked files as well. (Default: false) + + Xvc doesn't track files that are already tracked by git by default. You can set files.track.include-git to true in the configuration file to change this behavior. + --force Add targets even if they are already tracked @@ -175,6 +180,7 @@ $ xvc file list dir-0004/ FS [..] ab361981 ab361981 dir-0004/file-0003.bin FS [..] 493eeb65 493eeb65 dir-0004/file-0002.bin FS [..] e517d6b9 e517d6b9 dir-0004/file-0001.bin + Total #: 3 Workspace Size: 6006 Cached Size: 6006 diff --git a/book/src/ref/xvc-file-untrack.md b/book/src/ref/xvc-file-untrack.md index 27d76abeb..4ff8e0301 100644 --- a/book/src/ref/xvc-file-untrack.md +++ b/book/src/ref/xvc-file-untrack.md @@ -35,6 +35,7 @@ $ xvc file track 'd*.txt' $ xvc file list FC 19 [..] c85f3e81 c85f3e81 data.txt + Total #: 1 Workspace Size: 19 Cached Size: 19 diff --git a/config/Cargo.toml b/config/Cargo.toml index 57e2c258c..b075db3cc 100644 --- a/config/Cargo.toml +++ b/config/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-config" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Xvc configuration management" authors = ["Emre Şahin "] @@ -16,8 +16,8 @@ name = "xvc_config" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.11", path = "../logging" } -xvc-walker = { version = "0.6.11", path = "../walker" } +xvc-logging = { version = "0.6.12", path = "../logging" } +xvc-walker = { version = "0.6.12", path = "../walker" } ## Cli and config @@ -39,7 +39,7 @@ walkdir = "^2.5" thiserror = "^1.0" log = "^0.4" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } ## Misc regex = "^1.10" diff --git a/config/src/config_params.rs b/config/src/config_params.rs new file mode 100644 index 000000000..322714307 --- /dev/null +++ b/config/src/config_params.rs @@ -0,0 +1,92 @@ +//! Specify configuration sources +use xvc_walker::AbsolutePath; + +/// How should we initialize the configuration? +/// +/// It's possible to ignore certain sources by supplying `None` to their values here. +#[derive(Debug, Clone)] +pub struct XvcConfigParams { + /// The default configuration for the project. + /// It should contain all default values as a TOML document. + /// Xvc produces this in [xvc_core::default_configuration]. + pub default_configuration: String, + /// The directory where the application runs. + /// This can be set by various Options. + /// It affects how paths are handled in general. + pub current_dir: AbsolutePath, + /// Should we include system configuration? + /// If `true`, it's read from [SYSTEM_CONFIG_DIRS]. + pub include_system_config: bool, + /// Should the user's (home) config be included. + /// If `true`, it's read from [USER_CONFIG_DIRS]. + pub include_user_config: bool, + /// Where should we load the project's (public) configuration? + /// It's loaded in [XvcRootInner::new] + /// TODO: Add a option to ignore this + pub project_config_path: Option, + /// Where should we load the project's (private) configuration? + /// It's loaded in [XvcRootInner::new] + /// TODO: Add a option to ignore this + pub local_config_path: Option, + /// Should we include configuration from the environment. + /// If `true`, look for all variables in the form + /// + /// `XVC_group.key=value` + /// + /// from the environment and put them into the configuration. + pub include_environment_config: bool, + /// Command line configuration + pub command_line_config: Option>, +} + +impl XvcConfigParams { + /// Create a new blank config params + pub fn new(default_configuration: String, current_dir: AbsolutePath) -> Self { + Self { + default_configuration, + current_dir, + include_system_config: true, + include_user_config: true, + project_config_path: None, + local_config_path: None, + include_environment_config: true, + command_line_config: None, + } + } + + /// Update include_system_config value + pub fn include_system_config(mut self, include_system_config: bool) -> Self { + self.include_system_config = include_system_config; + self + } + + /// Update include_user_config value + pub fn include_user_config(mut self, include_user_config: bool) -> Self { + self.include_user_config = include_user_config; + self + } + + /// Update project config path + pub fn project_config_path(mut self, project_config_path: Option) -> Self { + self.project_config_path = project_config_path; + self + } + + /// Update local config path + pub fn local_config_path(mut self, local_config_path: Option) -> Self { + self.local_config_path = local_config_path; + self + } + + /// Whether to include enviroment variables in the configuration + pub fn include_environment_config(mut self, include_environment_config: bool) -> Self { + self.include_environment_config = include_environment_config; + self + } + + /// Command line config from key=value definitions + pub fn command_line_config(mut self, command_line_config: Option>) -> Self { + self.command_line_config = command_line_config; + self + } +} diff --git a/config/src/lib.rs b/config/src/lib.rs index 960023c29..c0791ef8f 100755 --- a/config/src/lib.rs +++ b/config/src/lib.rs @@ -25,7 +25,11 @@ //! #![warn(missing_docs)] #![forbid(unsafe_code)] +pub mod config_params; pub mod error; + +pub use config_params::XvcConfigParams; + use directories_next::{BaseDirs, ProjectDirs, UserDirs}; use lazy_static::lazy_static; use regex::Regex; @@ -151,44 +155,6 @@ pub struct XvcConfigMap { pub map: HashMap, } -/// How should we initialize the configuration? -/// -/// It's possible to ignore certain sources by supplying `None` to their values here. -#[derive(Debug, Clone)] -pub struct XvcConfigParams { - /// The default configuration for the project. - /// It should contain all default values as a TOML document. - /// Xvc produces this in [xvc_core::default_configuration]. - pub default_configuration: String, - /// The directory where the application runs. - /// This can be set by various Options. - /// It affects how paths are handled in general. - pub current_dir: AbsolutePath, - /// Should we include system configuration? - /// If `true`, it's read from [SYSTEM_CONFIG_DIRS]. - pub include_system_config: bool, - /// Should the user's (home) config be included. - /// If `true`, it's read from [USER_CONFIG_DIRS]. - pub include_user_config: bool, - /// Where should we load the project's (public) configuration? - /// It's loaded in [XvcRootInner::new] - /// TODO: Add a option to ignore this - pub project_config_path: Option, - /// Where should we load the project's (private) configuration? - /// It's loaded in [XvcRootInner::new] - /// TODO: Add a option to ignore this - pub local_config_path: Option, - /// Should we include configuration from the environment. - /// If `true`, look for all variables in the form - /// - /// `XVC_group.key=value` - /// - /// from the environment and put them into the configuration. - pub include_environment_config: bool, - /// Command line configuration - pub command_line_config: Option>, -} - /// Keeps track of all Xvc configuration. /// /// It's created by [XvcRoot] using the options from [XvcConfigInitParams]. diff --git a/core/Cargo.toml b/core/Cargo.toml index 3190d1763..940ee4b00 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-core" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Xvc core for common elements for all commands" authors = ["Emre Şahin "] @@ -16,10 +16,10 @@ name = "xvc_core" crate-type = ["rlib"] [dependencies] -xvc-config = { version = "0.6.11", path = "../config" } -xvc-logging = { version = "0.6.11", path = "../logging" } -xvc-ecs = { version = "0.6.11", path = "../ecs" } -xvc-walker = { version = "0.6.11", path = "../walker" } +xvc-config = { version = "0.6.12", path = "../config" } +xvc-logging = { version = "0.6.12", path = "../logging" } +xvc-ecs = { version = "0.6.12", path = "../ecs" } +xvc-walker = { version = "0.6.12", path = "../walker" } ## Cli and config clap = { version = "^4.5", features = ["derive"] } @@ -42,7 +42,7 @@ rmp-serde = "1.3.0" toml = "^0.8" ## Network -reqwest = { version = "^0.11", features = ["blocking", "json", "gzip"] } +reqwest = { version = "^0.12", features = ["blocking", "json", "gzip"] } ## Parallelization rayon = "^1.10" @@ -50,19 +50,21 @@ crossbeam-channel = "^0.5" crossbeam = "^0.8" ## File system +subprocess = "^0.2" jwalk = "^0.8" walkdir = "^2.5" relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" glob = "^0.3" +which = "^7.0" ## Logging and errors -thiserror = "^1.0" +thiserror = "^2.0" anyhow = "^1.0" log = "^0.4" peak_alloc = "^0.2" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } ## macros @@ -75,12 +77,12 @@ strum_macros = "^0.26" lazy_static = "^1.5" uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } -cached = "^0.53" -derive_more = "^0.99" +cached = "^0.54" +derive_more = { version = "^1.0", features = ["full"] } itertools = "^0.13" [dev-dependencies] -xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.12", path = "../test_helper/" } proptest = "^1.5" test-case = "^3.3" diff --git a/core/src/error.rs b/core/src/error.rs index 2f9bdab72..f23ed9f13 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -123,6 +123,9 @@ pub enum Error { source: reqwest::Error, }, + #[error("Git Process Error: \nSTDOUT: {stdout}\nSTDERR: {stderr}")] + GitProcessError { stdout: String, stderr: String }, + #[error("Crossbeam Send Error for Type: {t:?} {cause:?}")] CrossbeamSendError { t: String, cause: String }, #[error("Relative Path Conversion Error: {source}")] @@ -131,6 +134,18 @@ pub enum Error { source: relative_path::FromPathError, }, + #[error("Cannot Find Executable: {source}")] + WhichError { + #[from] + source: which::Error, + }, + + #[error("Process Exec Error: {source}")] + ProcessExecError { + #[from] + source: subprocess::PopenError, + }, + #[error("Cannot find parent path")] CannotFindParentPath { path: PathBuf }, diff --git a/core/src/lib.rs b/core/src/lib.rs index 3a8efe036..0b1425b4c 100755 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -44,6 +44,11 @@ pub use error::Error; pub use error::Result; pub use util::file::{all_paths_and_metadata, dir_includes, glob_includes, glob_paths}; +pub use util::git::{ + build_gitignore, exec_git, get_absolute_git_command, get_git_tracked_files, git_auto_commit, + git_auto_stage, git_checkout_ref, git_ignored, handle_git_automation, inside_git, + stash_user_staged_files, unstash_user_staged_files, +}; pub use util::pmp::XvcPathMetadataProvider; pub use util::XvcPathMetadataMap; @@ -157,6 +162,9 @@ text_or_binary = "auto" # Note that some of the operations are implemented in parallel by default, and this option affects some heavier operations. no_parallel = false +# Track files that are tracked by Git. +include_git_files = {include_git_files} + [file.list] # Format for `xvc file list` rows. You can reorder or remove columns. @@ -199,6 +207,9 @@ no_summary = false # List files recursively always. recursive = false +# List files tracked by Git. +include_git_files = {include_git_files} + [file.carry-in] # Carry-in the files to cache always, even if they are already present. force = false @@ -230,5 +241,6 @@ details = false "##, guid = guid, use_git = use_git, + include_git_files = !use_git ) } diff --git a/core/src/util/git.rs b/core/src/util/git.rs index 5f738c4ca..fe88c7d31 100644 --- a/core/src/util/git.rs +++ b/core/src/util/git.rs @@ -1,13 +1,18 @@ -//! Git operations -use std::path::{Path, PathBuf}; +//! Git operations for Xvc repositories +use std::{ffi::OsString, path::PathBuf, str::FromStr}; + +use crate::XvcRoot; +use subprocess::Exec; +use xvc_logging::{debug, watch, XvcOutputSender}; + +use crate::{Error, Result}; +use std::path::Path; use xvc_walker::{build_ignore_patterns, AbsolutePath, IgnoreRules}; -use crate::error::Result; use crate::GIT_DIR; use super::xvcignore::COMMON_IGNORE_PATTERNS; - /// Check whether a path is inside a Git repository. /// It returns `None` if not, otherwise returns the closest directory with `.git`. /// It works by checking `.git` directories in parents, until no more parent left. @@ -38,6 +43,259 @@ pub fn build_gitignore(git_root: &AbsolutePath) -> Result { Ok(rules) } +/// Find the absolute path to the git executable to run +/// TODO: This must be cached. It makes a which request every time a command runs +pub fn get_absolute_git_command(git_command: &str) -> Result { + let git_cmd_path = PathBuf::from(git_command); + let git_cmd = if git_cmd_path.is_absolute() { + git_command.to_string() + } else { + let cmd_path = which::which(git_command)?; + cmd_path.to_string_lossy().to_string() + }; + Ok(git_cmd) +} + +/// Run a git command with a specific git binary +pub fn exec_git(git_command: &str, xvc_directory: &str, args_str_vec: &[&str]) -> Result { + let mut args = vec!["-C", xvc_directory]; + args.extend(args_str_vec); + let args: Vec = args + .iter() + .map(|s| OsString::from_str(s).unwrap()) + .collect(); + watch!(args); + let proc_res = Exec::cmd(git_command).args(&args).capture()?; + + match proc_res.exit_status { + subprocess::ExitStatus::Exited(0) => Ok(proc_res.stdout_str()), + subprocess::ExitStatus::Exited(_) => Err(Error::GitProcessError { + stdout: proc_res.stdout_str(), + stderr: proc_res.stderr_str(), + }), + subprocess::ExitStatus::Signaled(_) + | subprocess::ExitStatus::Other(_) + | subprocess::ExitStatus::Undetermined => Err(Error::GitProcessError { + stdout: proc_res.stdout_str(), + stderr: proc_res.stderr_str(), + }), + } +} + +/// Get files tracked by git +/// +/// NOTE: Assumptions for this function: +/// - No submodules +pub fn get_git_tracked_files(git_command: &str, xvc_directory: &str) -> Result> { + let git_ls_files_out = exec_git(git_command, xvc_directory, &["ls-files", "--full-name"])?; + watch!(git_ls_files_out); + let git_ls_files_out = git_ls_files_out + .lines() + .map(|s| s.to_string()) + .collect::>(); + Ok(git_ls_files_out) +} + +/// Stash user's staged files to avoid committing them before auto-commit +pub fn stash_user_staged_files( + output_snd: &XvcOutputSender, + git_command: &str, + xvc_directory: &str, +) -> Result { + // Do we have user staged files? + let git_diff_staged_out = exec_git( + git_command, + xvc_directory, + &["diff", "--name-only", "--cached"], + )?; + + watch!(git_diff_staged_out); + + // If so stash them + if !git_diff_staged_out.trim().is_empty() { + debug!( + output_snd, + "Stashing user staged files: {git_diff_staged_out}" + ); + let stash_out = exec_git(git_command, xvc_directory, &["stash", "push", "--staged"])?; + debug!(output_snd, "Stashed user staged files: {stash_out}"); + } + + Ok(git_diff_staged_out) +} + +/// Unstash user's staged files after auto-commit +pub fn unstash_user_staged_files( + output_snd: &XvcOutputSender, + git_command: &str, + xvc_directory: &str, +) -> Result<()> { + let res_git_stash_pop = exec_git(git_command, xvc_directory, &["stash", "pop", "--index"])?; + debug!( + output_snd, + "Unstashed user staged files: {res_git_stash_pop}" + ); + Ok(()) +} + +/// Checkout a git branch or tag before running an Xvc command +pub fn git_checkout_ref( + output_snd: &XvcOutputSender, + xvc_root: &XvcRoot, + from_ref: String, +) -> Result<()> { + let xvc_directory = xvc_root.as_path().to_str().unwrap(); + let git_command_option = xvc_root.config().get_str("git.command")?.option; + let git_command = get_absolute_git_command(&git_command_option)?; + + let git_diff_staged_out = stash_user_staged_files(output_snd, &git_command, xvc_directory)?; + exec_git(&git_command, xvc_directory, &["checkout", &from_ref])?; + + if !git_diff_staged_out.trim().is_empty() { + debug!("Unstashing user staged files: {git_diff_staged_out}"); + unstash_user_staged_files(output_snd, &git_command, xvc_directory)?; + } + Ok(()) +} + +/// This receives `xvc_root` ownership because as a final operation, it must drop the root to +/// record the last entity counter before commit. +pub fn handle_git_automation( + output_snd: &XvcOutputSender, + xvc_root: &XvcRoot, + to_branch: Option<&str>, + xvc_cmd: &str, +) -> Result<()> { + let xvc_root_dir = xvc_root.as_path().to_path_buf(); + let xvc_root_str = xvc_root_dir.to_str().unwrap(); + let use_git = xvc_root.config().get_bool("git.use_git")?.option; + let auto_commit = xvc_root.config().get_bool("git.auto_commit")?.option; + let auto_stage = xvc_root.config().get_bool("git.auto_stage")?.option; + let git_command_str = xvc_root.config().get_str("git.command")?.option; + let git_command = get_absolute_git_command(&git_command_str)?; + let xvc_dir = xvc_root.xvc_dir().clone(); + let xvc_dir_str = xvc_dir.to_str().unwrap(); + + if use_git { + if auto_commit { + git_auto_commit( + output_snd, + &git_command, + xvc_root_str, + xvc_dir_str, + xvc_cmd, + to_branch, + )?; + } else if auto_stage { + git_auto_stage(output_snd, &git_command, xvc_root_str, xvc_dir_str)?; + } + } + + Ok(()) +} + +/// Commit `.xvc` directory after Xvc operations +pub fn git_auto_commit( + output_snd: &XvcOutputSender, + git_command: &str, + xvc_root_str: &str, + xvc_dir_str: &str, + xvc_cmd: &str, + to_branch: Option<&str>, +) -> Result<()> { + debug!(output_snd, "Using Git: {git_command}"); + + let git_diff_staged_out = stash_user_staged_files(output_snd, git_command, xvc_root_str)?; + + if let Some(branch) = to_branch { + debug!(output_snd, "Checking out branch {branch}"); + exec_git(git_command, xvc_root_str, &["checkout", "-b", branch])?; + } + + // Add and commit `.xvc` + match exec_git( + git_command, + xvc_root_str, + // We check the output of the git add command to see if there were any files added. + // "--verbose" is required to get the output we need. + &[ + "add", + "--verbose", + xvc_dir_str, + "*.gitignore", + "*.xvcignore", + ], + ) { + Ok(git_add_output) => { + watch!(git_add_output); + if git_add_output.trim().is_empty() { + debug!(output_snd, "No files to commit"); + return Ok(()); + } else { + match exec_git( + git_command, + xvc_root_str, + &[ + "commit", + "-m", + &format!("Xvc auto-commit after '{xvc_cmd}'"), + ], + ) { + Ok(res_git_commit) => { + debug!(output_snd, "Committing .xvc/ to git: {res_git_commit}"); + } + Err(e) => { + debug!(output_snd, "Error committing .xvc/ to git: {e}"); + return Err(e); + } + } + } + } + Err(e) => { + debug!(output_snd, "Error adding .xvc/ to git: {e}"); + return Err(e); + } + } + + // Pop the stash if there were files we stashed + + if !git_diff_staged_out.trim().is_empty() { + debug!( + output_snd, + "Unstashing user staged files: {git_diff_staged_out}" + ); + unstash_user_staged_files(output_snd, git_command, xvc_root_str)?; + } + Ok(()) +} + +/// runs `git add .xvc *.gitignore *.xvcignore` to stage the files after Xvc operations +pub fn git_auto_stage( + output_snd: &XvcOutputSender, + git_command: &str, + xvc_root_str: &str, + xvc_dir_str: &str, +) -> Result<()> { + let res_git_add = exec_git( + git_command, + xvc_root_str, + &["add", xvc_dir_str, "*.gitignore", "*.xvcignore"], + )?; + debug!(output_snd, "Staging .xvc/ to git: {res_git_add}"); + Ok(()) +} + +/// Run `git check-ignore` to check if a path is ignored by Git +pub fn git_ignored(git_command: &str, xvc_root_str: &str, path: &str) -> Result { + let command_res = exec_git(git_command, xvc_root_str, &["check-ignore", path])?; + + if command_res.trim().is_empty() { + Ok(false) + } else { + Ok(true) + } +} + #[cfg(test)] mod test { use super::*; diff --git a/ecs/Cargo.toml b/ecs/Cargo.toml index 938b7b819..94acd14de 100644 --- a/ecs/Cargo.toml +++ b/ecs/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-ecs" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Entity-Component System for Xvc" authors = ["Emre Şahin "] @@ -16,7 +16,7 @@ name = "xvc_ecs" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-logging = { version = "0.6.12", path = "../logging" } ## Serialization serde = { version = "^1.0", features = ["derive"] } @@ -31,7 +31,7 @@ crossbeam-channel = "^0.5" ## Logging and errors log = "^0.4" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } thiserror = "^1.0" ## Misc diff --git a/file/Cargo.toml b/file/Cargo.toml index 552ab8611..39c473755 100644 --- a/file/Cargo.toml +++ b/file/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-file" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "File tracking, versioning, upload and download functions for Xvc" authors = ["Emre Şahin "] @@ -21,12 +21,12 @@ test = true bench = true [dependencies] -xvc-logging = { version = "0.6.11", path = "../logging" } -xvc-config = { version = "0.6.11", path = "../config" } -xvc-core = { version = "0.6.11", path = "../core" } -xvc-ecs = { version = "0.6.11", path = "../ecs" } -xvc-walker = { version = "0.6.11", path = "../walker" } -xvc-storage = { version = "0.6.11", path = "../storage", default-features = false } +xvc-logging = { version = "0.6.12", path = "../logging" } +xvc-config = { version = "0.6.12", path = "../config" } +xvc-core = { version = "0.6.12", path = "../core" } +xvc-ecs = { version = "0.6.12", path = "../ecs" } +xvc-walker = { version = "0.6.12", path = "../walker" } +xvc-storage = { version = "0.6.12", path = "../storage", default-features = false } ## Cli and config @@ -69,7 +69,7 @@ log = "^0.4" anyhow = "^1.0" peak_alloc = "^0.2" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } ## macros @@ -87,13 +87,13 @@ uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } itertools = "^0.13" -derive_more = "^0.99" -parse-size = "^1.0" +derive_more = { version = "^1.0", features = ["full"] } +parse-size = "^1.1" [features] default = ["reflink"] reflink = ["dep:reflink"] [dev-dependencies] -xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.12", path = "../test_helper/" } shellfn = "^0.1" diff --git a/file/src/carry_in/mod.rs b/file/src/carry_in/mod.rs index b14931a7f..05c7fefbc 100644 --- a/file/src/carry_in/mod.rs +++ b/file/src/carry_in/mod.rs @@ -22,7 +22,7 @@ use crate::common::compare::{diff_content_digest, diff_text_or_binary, diff_xvc_ use crate::common::gitignore::make_ignore_handler; use crate::common::{ load_targets_from_store, move_xvc_path_to_cache, only_file_targets, recheck_from_cache, - xvc_path_metadata_map_from_disk, + set_writable, xvc_path_metadata_map_from_disk, }; use crate::common::{update_store_records, FileTextOrBinary}; use crate::error::Result; @@ -241,8 +241,6 @@ pub fn carry_in( watch!(ignore_writer); watch!(ignore_thread); - // TODO: Remove this when we set unix permissions in platform dependent fashion - #[allow(clippy::permissions_set_readonly_false)] let copy_path_to_cache_and_recheck = |xe, xp| { let cache_path = uwo!(cache_paths.get(xe).cloned(), output_snd); watch!(cache_path); @@ -251,20 +249,10 @@ pub fn carry_in( if abs_cache_path.exists() { if force { let cache_dir = uwo!(abs_cache_path.parent(), output_snd); - watch!(cache_dir); - let mut dir_perm = uwr!(cache_dir.metadata(), output_snd).permissions(); - watch!(dir_perm); - dir_perm.set_readonly(false); - watch!(dir_perm); - uwr!(fs::set_permissions(cache_dir, dir_perm), output_snd); - watch!(cache_dir); - let mut file_perm = - uwr!(abs_cache_path.as_path().metadata(), output_snd).permissions(); - watch!(file_perm); - watch!(abs_cache_path); - watch!(file_perm); - file_perm.set_readonly(false); - uwr!(fs::set_permissions(&abs_cache_path, file_perm), output_snd); + uwr!(set_writable(cache_dir), output_snd); + watch!(cache_dir.metadata().unwrap().permissions()); + uwr!(set_writable(&abs_cache_path), output_snd); + watch!(abs_cache_path.metadata().unwrap().permissions()); /* let mut dir_perm = cache_dir.metadata()?.permissions(); */ /* dir_perm.set_readonly(true); */ uwr!(fs::remove_file(&abs_cache_path), output_snd); diff --git a/file/src/common/mod.rs b/file/src/common/mod.rs index 617eae0f6..dc5f4b6b2 100644 --- a/file/src/common/mod.rs +++ b/file/src/common/mod.rs @@ -2,6 +2,7 @@ pub mod compare; pub mod gitignore; +use std::collections::{HashMap, HashSet}; use std::fs::{self}; use std::{ @@ -9,6 +10,9 @@ use std::{ path::{Path, PathBuf}, }; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + use crate::common::gitignore::IgnoreOperation; use crate::error::{Error, Result}; use crossbeam_channel::{Receiver, Sender}; @@ -18,16 +22,18 @@ use serde::{Deserialize, Serialize}; use xvc_config::{conf, FromConfigKey}; use xvc_core::types::xvcpath::XvcCachePath; use xvc_core::util::file::make_symlink; -use xvc_core::HashAlgorithm; +use xvc_core::util::xvcignore::COMMON_IGNORE_PATTERNS; use xvc_core::{ all_paths_and_metadata, apply_diff, ContentDigest, DiffStore, RecheckMethod, TextOrBinary, XvcFileType, XvcMetadata, XvcPath, XvcPathMetadataMap, XvcRoot, }; +use xvc_core::{get_absolute_git_command, get_git_tracked_files, HashAlgorithm}; use xvc_ecs::ecs::event::EventLog; use xvc_logging::{error, info, uwr, warn, watch, XvcOutputSender}; use xvc_ecs::{persist, HStore, Storable, XvcStore}; +use xvc_walker::walk_serial::path_metadata_map_from_file_targets; use xvc_walker::{AbsolutePath, Glob, PathSync}; use self::gitignore::IgnoreOp; @@ -265,6 +271,7 @@ pub fn targets_from_disk( xvc_root: &XvcRoot, current_dir: &AbsolutePath, targets: &Option>, + filter_git_paths: bool, ) -> Result { watch!(current_dir); watch!(xvc_root.absolute_path()); @@ -274,6 +281,13 @@ pub fn targets_from_disk( .strip_prefix(xvc_root.absolute_path())? .to_str() .unwrap(); + + let cwd = if cwd.ends_with('/') { + cwd.to_owned() + } else { + format!("{cwd}/") + }; + let targets = match targets { Some(targets) => targets.iter().map(|t| format!("{cwd}{t}")).collect(), None => vec![cwd.to_string()], @@ -284,15 +298,91 @@ pub fn targets_from_disk( xvc_root, xvc_root.absolute_path(), &Some(targets), + filter_git_paths, ); } - // FIXME: If there are no globs/directories in the targets, no need to retrieve all the paths + + let has_globs_or_dirs = targets + .as_ref() + .map(|targets| { + targets.iter().any(|t| { + t.contains('*') || t.ends_with('/') || t.contains('/') || PathBuf::from(t).is_dir() + }) + }) + // None means all paths + .unwrap_or(true); + // If there are no globs/directories in the targets, no need to retrieve all the paths // here. - let (all_paths, _) = all_paths_and_metadata(xvc_root); + + let all_paths = if has_globs_or_dirs { + all_paths_and_metadata(xvc_root).0 + } else { + // FIXME: Move this to a function + let (pmm, _) = path_metadata_map_from_file_targets( + output_snd, + COMMON_IGNORE_PATTERNS, + xvc_root, + // This should be ok as we checked empty condition on has_globs_or_dirs + targets.clone().unwrap(), + &xvc_walker::WalkOptions::xvcignore(), + )?; + let mut xpmm = HashMap::new(); + + pmm.into_iter().for_each(|pm| { + let md: XvcMetadata = XvcMetadata::from(pm.metadata); + let rxp = XvcPath::new(xvc_root, xvc_root.absolute_path(), &pm.path); + match rxp { + Ok(xvc_path) => { + xpmm.insert(xvc_path, md); + } + Err(e) => { + e.warn(); + } + } + }); + xpmm + }; watch!(all_paths); + // Return false when the path is a git path + + let git_files: HashSet = if filter_git_paths { + let git_command_str = xvc_root.config().get_str("git.command")?.option; + let git_command = get_absolute_git_command(&git_command_str)?; + get_git_tracked_files( + &git_command, + xvc_root + .absolute_path() + .to_str() + .expect("xvc_root must have a path"), + )? + .into_iter() + .collect() + } else { + HashSet::new() + }; + + let mut git_path_filter: Box bool> = if filter_git_paths { + Box::new(|p: &XvcPath| { + let path_str = p.as_str(); + let path_str = path_str + .strip_prefix( + xvc_root + .absolute_path() + .to_str() + .expect("xvc_root must have a path"), + ) + .unwrap_or(path_str); + !git_files.contains(path_str) + }) + } else { + Box::new(|_p: &XvcPath| true) + }; if let Some(targets) = targets { + // FIXME: Is this a bug? When targets is empty, we can return all files. + // Targets should be None to return all paths but what about we pass Some([])? + if targets.is_empty() { return Ok(XvcPathMetadataMap::new()); } @@ -301,10 +391,14 @@ pub fn targets_from_disk( watch!(glob_matcher); Ok(all_paths .into_iter() + .filter(|(p, _)| git_path_filter(p)) .filter(|(p, _)| glob_matcher.is_match(p.as_str())) .collect()) } else { - Ok(all_paths) + Ok(all_paths + .into_iter() + .filter(|(p, _)| git_path_filter(p)) + .collect()) } } @@ -386,8 +480,6 @@ pub fn recheck_from_cache( watch!(path); watch!(recheck_method); - // TODO: Remove this when we set unix permissions in platform dependent fashion - #[allow(clippy::permissions_set_readonly_false)] match recheck_method { RecheckMethod::Copy => { copy_file(output_snd, cache_path, path)?; @@ -440,17 +532,51 @@ fn copy_file( cache_path: AbsolutePath, path: AbsolutePath, ) -> Result<()> { - watch!("Before copy"); - watch!(&cache_path); - watch!(&path); fs::copy(&cache_path, &path)?; + set_writable(&path)?; info!(output_snd, "[COPY] {} -> {}", cache_path, path); + Ok(()) +} + +#[cfg(not(unix))] +pub fn set_writable(path: &Path) -> Result<()> { let mut perm = path.metadata()?.permissions(); watch!(&perm); - // FIXME: Fix the clippy warning in the following line perm.set_readonly(false); watch!(&perm); - fs::set_permissions(&path, perm)?; + fs::set_permissions(path, perm)?; + Ok(()) +} + +#[cfg(not(unix))] +pub fn set_readonly(path: &Path) -> Result<()> { + let mut perm = path.metadata()?.permissions(); + watch!(&perm); + perm.set_readonly(true); + watch!(&perm); + fs::set_permissions(path, perm)?; + Ok(()) +} + +/// Set a path to user writable on unix systems. +#[cfg(unix)] +pub fn set_writable(path: &Path) -> Result<()> { + let mut permissions = path.metadata()?.permissions(); + let mode = permissions.mode(); + let new_mode = mode | 0o200; + permissions.set_mode(new_mode); + fs::set_permissions(path, permissions)?; + Ok(()) +} + +/// Set a path to readonly on unix systems. +#[cfg(unix)] +pub fn set_readonly(path: &Path) -> Result<()> { + let mut permissions = path.metadata()?.permissions(); + let mode = permissions.mode(); + let new_mode = mode & !0o200; + permissions.set_mode(new_mode); + fs::set_permissions(path, permissions)?; Ok(()) } diff --git a/file/src/list/mod.rs b/file/src/list/mod.rs index 25887ae7b..6c5050143 100644 --- a/file/src/list/mod.rs +++ b/file/src/list/mod.rs @@ -10,7 +10,6 @@ use anyhow::anyhow; use chrono; use clap::Parser; -use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::fmt::{Display, Formatter}; use std::path::Path; @@ -25,39 +24,67 @@ use xvc_core::{ use xvc_ecs::XvcEntity; use xvc_logging::{error, output, watch, XvcOutputSender}; +/// Format specifier for file list columns #[derive(Debug, Clone, EnumString, EnumDisplay, PartialEq, Eq)] -enum ListColumn { +pub enum ListColumn { + /// Column for the actual content digest (base64 encoded). #[strum(serialize = "acd64")] ActualContentDigest64, + + /// Column for the actual content digest (base8 encoded). #[strum(serialize = "acd8")] ActualContentDigest8, + + /// Column for the actual file type. #[strum(serialize = "aft")] ActualFileType, + + /// Column for the actual size of the file. #[strum(serialize = "asz")] ActualSize, + + /// Column for the actual timestamp of the file. #[strum(serialize = "ats")] ActualTimestamp, + + /// Column for the name of the file. #[strum(serialize = "name")] Name, + + /// Column for the cache status of the file. #[strum(serialize = "cst")] CacheStatus, + + /// Column for the recorded recheck method. #[strum(serialize = "rrm")] RecordedRecheckMethod, + + /// Column for the recorded content digest (base64 encoded). #[strum(serialize = "rcd64")] RecordedContentDigest64, + + /// Column for the recorded content digest (base8 encoded). #[strum(serialize = "rcd8")] RecordedContentDigest8, + + /// Column for the recorded size of the file. #[strum(serialize = "rsz")] RecordedSize, + + /// Column for the recorded timestamp of the file. #[strum(serialize = "rts")] RecordedTimestamp, + + /// Column for a literal string value. #[strum(disabled)] Literal(String), } +/// Represents the format of a list, including the columns to be displayed. #[derive(Debug, Clone, PartialEq, Eq)] -struct ListFormat { - columns: Vec, +pub struct ListFormat { + /// A vector of [ListColumn] enums representing the columns in the table. + pub columns: Vec, } impl FromStr for ListFormat { @@ -81,49 +108,72 @@ impl FromStr for ListFormat { conf!(ListFormat, "file.list.format"); +/// Specify how to sort file list #[derive(Debug, Copy, Clone, EnumString, EnumDisplay, PartialEq, Eq)] -enum ListSortCriteria { +pub enum ListSortCriteria { #[strum(serialize = "none")] + /// No sorting None, #[strum(serialize = "name-asc")] + /// Sort by name in ascending order NameAsc, #[strum(serialize = "name-desc")] + /// Sort by name in descending order NameDesc, #[strum(serialize = "size-asc")] + /// Sort by size in ascending order SizeAsc, #[strum(serialize = "size-desc")] + /// Sort by size in descending order SizeDesc, #[strum(serialize = "t-asc", serialize = "timestamp-asc", serialize = "ts-asc")] + /// Sort by timestamp in ascending order TimestampAsc, #[strum( serialize = "t-desc", serialize = "timestamp-desc", serialize = "ts-desc" )] + /// Sort by timestamp in descending order TimestampDesc, } conf!(ListSortCriteria, "file.list.sort"); -#[derive(Debug, Clone)] -struct ListRow { - actual_content_digest_str: String, - actual_size: u64, - actual_size_str: String, - actual_timestamp: SystemTime, - actual_timestamp_str: String, - actual_file_type: String, - - name: String, - cache_status: String, - - recorded_recheck_method: String, - recorded_content_digest_str: String, - recorded_size: u64, - recorded_size_str: String, - // This can be used as a separate field to sort in the future +/// A single item in the list output +#[derive(Debug, Clone, PartialEq)] +pub struct ListRow { + /// The actual (on-disk) content digest of the file + pub actual_content_digest_str: String, + /// The actual (on-disk) file size + pub actual_size: u64, + /// The actual (on-disk) file size as a string + pub actual_size_str: String, + /// The actual (on-disk) file modification timestamp + pub actual_timestamp: SystemTime, + /// The actual (on-disk) file modification timestamp as a string + pub actual_timestamp_str: String, + /// The actual (on-disk) file type + pub actual_file_type: String, + + /// The basename of the file + pub name: String, + /// The cache status of the file + pub cache_status: String, + + /// The recheck method used to link to the cached file + pub recorded_recheck_method: String, + /// The recorded content digest of the file + pub recorded_content_digest_str: String, + /// The recorded size of the file + pub recorded_size: u64, + /// The recorded size of the file as a string + pub recorded_size_str: String, + /// The recorded timestamp of the file + // FIXME: This can be used as a separate field to sort in the future #[allow(dead_code)] - recorded_timestamp: SystemTime, - recorded_timestamp_str: String, + pub recorded_timestamp: SystemTime, + /// The recorded timestamp of the file as a string + pub recorded_timestamp_str: String, } impl ListRow { @@ -303,108 +353,145 @@ struct PathMatch { recorded_recheck_method: Option, } -#[derive(Debug, Clone)] -struct ListRows { - format: ListFormat, - sort_criteria: ListSortCriteria, - rows: RefCell>, +/// All rows of the file list and its format and sorting criteria +#[derive(Debug, Clone, PartialEq)] +pub struct ListRows { + /// How to format the file row. See [ListColumn] for the available columns. + pub format: ListFormat, + /// How to sort the list. See [ListSortCriteria] for the available criteria. + pub sort_criteria: ListSortCriteria, + /// All elements of the file list + pub rows: Vec, } impl ListRows { + /// Create a new table with the specified params and sort it pub fn new(format: ListFormat, sort_criteria: ListSortCriteria, rows: Vec) -> Self { - Self { + let mut s = Self { format, sort_criteria, - rows: RefCell::new(rows), + rows, + }; + sort_list_rows(&mut s); + s + } + + /// Create an empty table without any rows, format or sorting criteria + pub fn empty() -> Self { + Self { + format: ListFormat { columns: vec![] }, + sort_criteria: ListSortCriteria::None, + rows: vec![], } } - fn build_row(&self, row: &ListRow) -> String { - let mut output = String::new(); - for column in &self.format.columns { - match column { - ListColumn::RecordedRecheckMethod => output.push_str(&row.recorded_recheck_method), - ListColumn::ActualFileType => output.push_str(&row.actual_file_type), - ListColumn::ActualSize => output.push_str(&row.actual_size_str), - ListColumn::ActualContentDigest64 => { - output.push_str(&row.actual_content_digest_str) - } - ListColumn::ActualContentDigest8 => { - output.push_str(if row.actual_content_digest_str.len() >= 8 { - &row.actual_content_digest_str[..8] - } else { - &row.actual_content_digest_str - }) - } - ListColumn::ActualTimestamp => output.push_str(&row.actual_timestamp_str), - ListColumn::Name => output.push_str(&row.name), - ListColumn::RecordedSize => output.push_str(&row.recorded_size_str), - ListColumn::RecordedContentDigest64 => { - output.push_str(&row.recorded_content_digest_str) - } - ListColumn::RecordedContentDigest8 => { - output.push_str(if row.recorded_content_digest_str.len() >= 8 { - &row.recorded_content_digest_str[..8] - } else { - &row.recorded_content_digest_str - }) - } - ListColumn::RecordedTimestamp => output.push_str(&row.recorded_timestamp_str), - ListColumn::CacheStatus => output.push_str(&row.cache_status), - ListColumn::Literal(literal) => output.push_str(literal), + /// Number if file lines in the table + pub fn total_lines(&self) -> usize { + self.rows.len() + } + + /// Total size of the files in the table + pub fn total_actual_size(&self) -> u64 { + self.rows.iter().fold(0u64, |tot, r| tot + r.actual_size) + } + + /// Total size of the recorded files in the table + pub fn total_cached_size(&self) -> u64 { + let mut cached_sizes = HashMap::::new(); + self.rows.iter().for_each(|r| { + if !r.recorded_content_digest_str.trim().is_empty() { + cached_sizes.insert(r.recorded_content_digest_str.to_string(), r.recorded_size); } - } - output + }); + + cached_sizes.values().sum() } +} - pub fn build_table(&self, print_summary: bool) -> String { - let mut output = String::new(); - let row_cmp = |a: &ListRow, b: &ListRow| match self.sort_criteria { - ListSortCriteria::NameAsc => a.name.cmp(&b.name), - ListSortCriteria::NameDesc => b.name.cmp(&a.name), - ListSortCriteria::SizeAsc => a.actual_size.cmp(&b.actual_size), - ListSortCriteria::SizeDesc => b.actual_size.cmp(&a.actual_size), - ListSortCriteria::TimestampAsc => a.actual_timestamp.cmp(&b.actual_timestamp), - ListSortCriteria::TimestampDesc => b.actual_timestamp.cmp(&a.actual_timestamp), - ListSortCriteria::None => std::cmp::Ordering::Equal, - }; - if self.sort_criteria != ListSortCriteria::None { - self.rows.borrow_mut().sort_unstable_by(row_cmp) +/// Print a single row from the given element and the format +pub fn build_row(row: &ListRow, format: &ListFormat) -> String { + let mut output = String::new(); + for column in &format.columns { + match column { + ListColumn::RecordedRecheckMethod => output.push_str(&row.recorded_recheck_method), + ListColumn::ActualFileType => output.push_str(&row.actual_file_type), + ListColumn::ActualSize => output.push_str(&row.actual_size_str), + ListColumn::ActualContentDigest64 => output.push_str(&row.actual_content_digest_str), + ListColumn::ActualContentDigest8 => { + output.push_str(if row.actual_content_digest_str.len() >= 8 { + &row.actual_content_digest_str[..8] + } else { + &row.actual_content_digest_str + }) + } + ListColumn::ActualTimestamp => output.push_str(&row.actual_timestamp_str), + ListColumn::Name => output.push_str(&row.name), + ListColumn::RecordedSize => output.push_str(&row.recorded_size_str), + ListColumn::RecordedContentDigest64 => { + output.push_str(&row.recorded_content_digest_str) + } + ListColumn::RecordedContentDigest8 => { + output.push_str(if row.recorded_content_digest_str.len() >= 8 { + &row.recorded_content_digest_str[..8] + } else { + &row.recorded_content_digest_str + }) + } + ListColumn::RecordedTimestamp => output.push_str(&row.recorded_timestamp_str), + ListColumn::CacheStatus => output.push_str(&row.cache_status), + ListColumn::Literal(literal) => output.push_str(literal), } + } + output +} - for row in self.rows.borrow().iter() { - let row_str = self.build_row(row); - output.push_str(&row_str); - output.push('\n'); - } +/// Fn type to decouple the build_row function from the build_table function +type BuildRowFn = Box String>; - if print_summary { - let total_lines = self.rows.borrow().len(); - let total_actual_size = format_size(Some( - self.rows - .borrow() - .iter() - .fold(0u64, |tot, r| tot + r.actual_size), - )); - let mut cached_sizes = HashMap::::new(); - self.rows.borrow().iter().for_each(|r| { - if !r.recorded_content_digest_str.trim().is_empty() { - cached_sizes.insert(r.recorded_content_digest_str.to_string(), r.recorded_size); - } - }); +/// Build a table from the list of rows +pub fn build_table(list_rows: &ListRows, build_row: BuildRowFn) -> String { + let mut output = String::new(); - let total_cached_size = format_size(Some(cached_sizes.values().sum())); - output.push_str( - &format!("Total #: {total_lines} Workspace Size: {total_actual_size} Cached Size: {total_cached_size}\n"), - ) - } - output + let format = &list_rows.format; + for row in list_rows.rows.iter() { + let row_str = build_row(row, format); + output.push_str(&row_str); + output.push('\n'); } + + output +} + +fn add_summary_line(list_rows: &ListRows) -> String { + let total_lines = list_rows.total_lines(); + let total_actual_size = format_size(Some(list_rows.total_actual_size())); + let total_cached_size = format_size(Some(list_rows.total_cached_size())); + + // TODO: Add a format string to this output similar to files + format!("Total #: {total_lines} Workspace Size: {total_actual_size} Cached Size: {total_cached_size}\n") +} + +fn sort_list_rows(list_rows: &mut ListRows) { + let row_cmp = match list_rows.sort_criteria { + ListSortCriteria::NameAsc => |a: &ListRow, b: &ListRow| a.name.cmp(&b.name), + ListSortCriteria::NameDesc => |a: &ListRow, b: &ListRow| b.name.cmp(&a.name), + ListSortCriteria::SizeAsc => |a: &ListRow, b: &ListRow| a.actual_size.cmp(&b.actual_size), + ListSortCriteria::SizeDesc => |a: &ListRow, b: &ListRow| b.actual_size.cmp(&a.actual_size), + ListSortCriteria::TimestampAsc => { + |a: &ListRow, b: &ListRow| a.actual_timestamp.cmp(&b.actual_timestamp) + } + ListSortCriteria::TimestampDesc => { + |a: &ListRow, b: &ListRow| b.actual_timestamp.cmp(&a.actual_timestamp) + } + ListSortCriteria::None => |_: &ListRow, _: &ListRow| std::cmp::Ordering::Equal, + }; + + list_rows.rows.sort_unstable_by(row_cmp); } impl Display for ListRows { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { - write!(f, "{}", self.build_table(true))?; + write!(f, "{}", build_table(self, Box::new(build_row)))?; Ok(()) } } @@ -453,32 +540,38 @@ pub struct ListCLI { /// /// The default format can be set with file.list.format in the config file. #[arg(long, short = 'f', verbatim_doc_comment)] - format: Option, + pub format: Option, /// Sort criteria. /// /// It can be one of none (default), name-asc, name-desc, size-asc, size-desc, ts-asc, ts-desc. /// /// The default option can be set with file.list.sort in the config file. #[arg(long, short = 's')] - sort: Option, + pub sort: Option, /// Don't show total number and size of the listed files. /// /// The default option can be set with file.list.no_summary in the config file. #[arg(long)] - no_summary: bool, + pub no_summary: bool, /// Don't hide dot files /// /// If not supplied, hides dot files like .gitignore and .xvcignore #[arg(long, short = 'a')] - show_dot_files: bool, + pub show_dot_files: bool, + + /// List files tracked by Git. + /// + /// By default, Xvc doesn't list files tracked by Git. Supply this option to list them. + #[arg(long)] + pub include_git_files: bool, /// Files/directories to list. /// /// If not supplied, lists all files under the current directory. #[arg()] - targets: Option>, + pub targets: Option>, } impl UpdateFromXvcConfig for ListCLI { @@ -494,9 +587,13 @@ impl UpdateFromXvcConfig for ListCLI { let sort_criteria = self .sort .unwrap_or_else(|| ListSortCriteria::from_conf(conf)); + let include_git_files = + self.include_git_files || conf.get_bool("file.list.include_git_files")?.option; + Ok(Box::new(Self { no_summary, show_dot_files, + include_git_files, format: Some(format), sort: Some(sort_criteria), ..self @@ -526,47 +623,194 @@ impl UpdateFromXvcConfig for ListCLI { /// TODO: - I: File is ignored pub fn cmd_list(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, cli_opts: ListCLI) -> Result<()> { + // FIXME: `opts` shouldn't be sent to the inner function, but we cannot make sure that it's + // updated from the config files in callers. A refactoring is good here. let conf = xvc_root.config(); let opts = cli_opts.update_from_conf(conf)?; + let no_summary = opts.no_summary; + let list_rows = cmd_list_inner(output_snd, xvc_root, &opts)?; + + // TODO: All output should be produced in a central location with implemented traits. + // [ListRows] could receive no_summary when it's built and implement Display + output!( + output_snd, + "{}", + build_table(&list_rows, Box::new(build_row)) + ); + if !no_summary { + output!(output_snd, "{}", add_summary_line(&list_rows)); + } - let current_dir = conf.current_dir()?; + Ok(()) +} - // If targets are directories on disk, make sure they end with / +/// The actual implementation moved here to get the listed elements separately to be used in +/// desktop and server +pub fn cmd_list_inner( + output_snd: &XvcOutputSender, + xvc_root: &XvcRoot, + opts: &ListCLI, +) -> Result { + let conf = xvc_root.config(); - let all_from_disk = targets_from_disk(output_snd, xvc_root, current_dir, &opts.targets)?; + let current_dir = conf.current_dir()?; + let filter_git_paths = !opts.include_git_files; + + let all_from_disk = targets_from_disk( + output_snd, + xvc_root, + current_dir, + &opts.targets, + filter_git_paths, + )?; watch!(&all_from_disk); - let from_disk = if opts.show_dot_files { - all_from_disk - } else { - all_from_disk - .into_iter() - .filter_map(|(path, md)| { - let path_str = path.to_string(); - if path_str.starts_with('.') || path_str.contains("./") { - None - } else { - Some((path, md)) - } - }) - .collect() - }; + let from_disk = filter_dot_files(all_from_disk, opts.show_dot_files); watch!(from_disk); + let from_store = load_targets_from_store(output_snd, xvc_root, current_dir, &opts.targets)?; watch!(from_store); + let stored_xvc_metadata = xvc_root.load_store::()?; let stored_recheck_method = xvc_root.load_store::()?; + let matches = match_store_and_disk_paths( + from_disk, + from_store, + stored_xvc_metadata, + stored_recheck_method, + ); + watch!(matches); + + let matches = if opts.format.as_ref().unwrap().columns.iter().any(|c| { + *c == ListColumn::RecordedContentDigest64 || *c == ListColumn::RecordedContentDigest8 + }) { + fill_recorded_content_digests(xvc_root, matches)? + } else { + matches + }; + + let matches = + if opts.format.as_ref().unwrap().columns.iter().any(|c| { + *c == ListColumn::ActualContentDigest64 || *c == ListColumn::ActualContentDigest8 + }) { + let algorithm = HashAlgorithm::from_conf(conf); + fill_actual_content_digests(output_snd, xvc_root, algorithm, matches)? + } else { + matches + }; + + let path_prefix = current_dir.strip_prefix(xvc_root.absolute_path())?; + + let rows = build_rows_from_matches(output_snd, matches, path_prefix); + let format = opts + .format + .clone() + .expect("Option must be filled at this point"); + let sort_criteria = opts.sort.expect("Option must be filled at this point"); + + let list_rows = ListRows::new(format, sort_criteria, rows); + Ok(list_rows) +} + +fn build_rows_from_matches( + output_snd: &XvcOutputSender, + matches: Vec, + path_prefix: &Path, +) -> Vec { + matches + .into_iter() + .filter_map(|pm| match ListRow::new(path_prefix, pm) { + Ok(lr) => Some(lr), + Err(e) => { + error!(output_snd, "{}", e); + None + } + }) + .collect() +} + +fn fill_actual_content_digests( + output_snd: &XvcOutputSender, + xvc_root: &XvcRoot, + algorithm: HashAlgorithm, + matches: Vec, +) -> Result> { + let text_or_binary_store = xvc_root.load_store::()?; + Ok(matches + .into_iter() + .filter_map(|pm| { + if pm + .actual_path + .as_deref() + .and(pm.actual_metadata.map(|md| md.is_file())) + == Some(true) + { + let actual_path = pm.actual_path.as_ref().unwrap(); + let path = actual_path.to_absolute_path(xvc_root); + let text_or_binary = if let Some(xvc_entity) = pm.xvc_entity { + text_or_binary_store + .get(&xvc_entity) + .copied() + .unwrap_or_default() + } else { + FileTextOrBinary::default() + }; + + match ContentDigest::new(&path, algorithm, text_or_binary.as_inner()) { + Ok(digest) => Some(PathMatch { + actual_digest: Some(digest), + ..pm + }), + Err(e) => { + error!(output_snd, "{}", e); + None + } + } + } else { + Some(pm) + } + }) + .collect()) +} + +fn fill_recorded_content_digests( + xvc_root: &std::sync::Arc, + matches: Vec, +) -> Result> { + let content_digest_store = xvc_root.load_store::()?; + let matches: Vec = matches + .into_iter() + .map(|pm| { + if let Some(xvc_entity) = pm.xvc_entity { + let digest = content_digest_store.get(&xvc_entity).cloned(); + PathMatch { + recorded_digest: digest, + ..pm + } + } else { + pm + } + }) + .collect(); + Ok(matches) +} + +/// There are four groups of paths: +/// 1. Paths that are in the store and on disk and have identical metadata +/// 2. Paths that are in the store and on disk but have different metadata +/// 3. Paths that are in the store but not on disk +/// 4. Paths that are on disk but not in the store +fn match_store_and_disk_paths( + from_disk: HashMap, + from_store: xvc_ecs::HStore, + stored_xvc_metadata: xvc_ecs::XvcStore, + stored_recheck_method: xvc_ecs::XvcStore, +) -> Vec { // Now match actual and recorded paths let mut matches = Vec::::new(); - // There are four groups of paths: - // 1. Paths that are in the store and on disk and have identical metadata - // 2. Paths that are in the store and on disk but have different metadata - // 3. Paths that are in the store but not on disk - // 4. Paths that are on disk but not in the store - let mut found_entities = HashSet::::new(); for (disk_xvc_path, disk_xvc_md) in from_disk { @@ -631,89 +875,26 @@ pub fn cmd_list(output_snd: &XvcOutputSender, xvc_root: &XvcRoot, cli_opts: List }; matches.push(pm); } + matches +} - watch!(matches); - - // Now fill in the digests if needed. - // We use rec content digest to identify cache paths and calculate cache - // size. So we always load and fill these values. - let content_digest_store = xvc_root.load_store::()?; - let matches: Vec = matches - .into_iter() - .map(|pm| { - if let Some(xvc_entity) = pm.xvc_entity { - let digest = content_digest_store.get(&xvc_entity).cloned(); - PathMatch { - recorded_digest: digest, - ..pm +fn filter_dot_files( + all_from_disk: HashMap, + show_dot_files: bool, +) -> HashMap { + if show_dot_files { + all_from_disk + } else { + all_from_disk + .into_iter() + .filter_map(|(path, md)| { + let path_str = path.to_string(); + if path_str.starts_with('.') || path_str.contains("./") { + None + } else { + Some((path, md)) } - } else { - pm - } - }) - .collect(); - - // Do not calculate actual content hashes if it's not requested in the - // format string. - let matches = - if opts.format.as_ref().unwrap().columns.iter().any(|c| { - *c == ListColumn::ActualContentDigest64 || *c == ListColumn::ActualContentDigest8 - }) { - let algorithm = HashAlgorithm::from_conf(conf); - let text_or_binary_store = xvc_root.load_store::()?; - matches - .into_iter() - .filter_map(|pm| { - if pm - .actual_path - .as_deref() - .and(pm.actual_metadata.map(|md| md.is_file())) - == Some(true) - { - let actual_path = pm.actual_path.as_ref().unwrap(); - let path = actual_path.to_absolute_path(xvc_root); - let text_or_binary = if let Some(xvc_entity) = pm.xvc_entity { - text_or_binary_store - .get(&xvc_entity) - .copied() - .unwrap_or_default() - } else { - FileTextOrBinary::default() - }; - - match ContentDigest::new(&path, algorithm, text_or_binary.as_inner()) { - Ok(digest) => Some(PathMatch { - actual_digest: Some(digest), - ..pm - }), - Err(e) => { - error!(output_snd, "{}", e); - None - } - } - } else { - Some(pm) - } - }) - .collect() - } else { - matches - }; - - let path_prefix = current_dir.strip_prefix(xvc_root.absolute_path())?; - - let rows = matches - .into_iter() - .filter_map(|pm| match ListRow::new(path_prefix, pm) { - Ok(lr) => Some(lr), - Err(e) => { - error!(output_snd, "{}", e); - None - } - }) - .collect(); - - let list_rows = ListRows::new(opts.format.unwrap(), opts.sort.unwrap(), rows); - output!(output_snd, "{}", list_rows.build_table(!opts.no_summary)); - Ok(()) + }) + .collect() + } } diff --git a/file/src/recheck/mod.rs b/file/src/recheck/mod.rs index f5046f35a..2e4398aff 100644 --- a/file/src/recheck/mod.rs +++ b/file/src/recheck/mod.rs @@ -30,7 +30,7 @@ use xvc_logging::{error, info, uwr, warn, watch, XvcOutputSender}; /// /// - If the workspace copy is missing. /// - If the workspace copy is not changed but the user wants to change recheck method. (e.g. from copy -/// to symlink.) +/// to symlink.) /// - If the `--force` is set. /// /// If the workspace copy of a file is changed, this command doesn't overwrite it by default. Set diff --git a/file/src/track/mod.rs b/file/src/track/mod.rs index 794ed8925..dcdb8ec3b 100644 --- a/file/src/track/mod.rs +++ b/file/src/track/mod.rs @@ -32,7 +32,6 @@ use xvc_core::RecheckMethod; use xvc_core::XvcPath; use xvc_ecs::{HStore, XvcEntity}; - /// Add files for tracking with Xvc #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, From, Parser)] #[command(rename_all = "kebab-case")] @@ -50,9 +49,19 @@ pub struct TrackCLI { /// auto) #[arg(long)] text_or_binary: Option, + + /// Include git tracked files as well. (Default: false) + /// + /// Xvc doesn't track files that are already tracked by git by default. + /// You can set files.track.include-git to true in the configuration file to + /// change this behavior. + #[arg(long)] + include_git_files: bool, + /// Add targets even if they are already tracked #[arg(long)] force: bool, + /// Don't use parallelism #[arg(long)] no_parallel: bool, @@ -76,6 +85,8 @@ impl UpdateFromXvcConfig for TrackCLI { || Some(FileTextOrBinary::from_conf(conf)), |v| Some(v.to_owned()), ); + let include_git_files = + self.include_git_files || conf.get_bool("file.track.include_git_files")?.option; Ok(Box::new(Self { targets: self.targets.clone(), @@ -84,6 +95,7 @@ impl UpdateFromXvcConfig for TrackCLI { force, no_parallel, text_or_binary, + include_git_files, })) } } @@ -118,7 +130,14 @@ pub fn cmd_track( let conf = xvc_root.config(); let opts = cli_opts.update_from_conf(conf)?; let current_dir = conf.current_dir()?; - let targets = targets_from_disk(output_snd, xvc_root, current_dir, &opts.targets)?; + let filter_git_files = !opts.include_git_files; + let targets = targets_from_disk( + output_snd, + xvc_root, + current_dir, + &opts.targets, + filter_git_files, + )?; watch!(targets); let requested_recheck_method = opts.recheck_method; let text_or_binary = opts.text_or_binary.unwrap_or_default(); @@ -232,7 +251,6 @@ pub fn cmd_track( update_file_gitignores(xvc_root, ¤t_gitignore, &file_targets)?; - if !opts.no_commit { let current_xvc_path_store = xvc_root.load_store::()?; diff --git a/lib/Cargo.toml b/lib/Cargo.toml index bf05bebd6..54f6b2100 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "An MLOps tool to manage data files and pipelines on top of Git" authors = ["Emre Şahin "] @@ -20,14 +20,14 @@ name = "xvc" path = "src/main.rs" [dependencies] -xvc-config = { version = "0.6.11", path = "../config" } -xvc-core = { version = "0.6.11", path = "../core" } -xvc-logging = { version = "0.6.11", path = "../logging" } -xvc-ecs = { version = "0.6.11", path = "../ecs" } -xvc-file = { version = "0.6.11", path = "../file", default-features = false } -xvc-pipeline = { version = "0.6.11", path = "../pipeline" } -xvc-walker = { version = "0.6.11", path = "../walker" } -xvc-storage = { version = "0.6.11", path = "../storage", default-features = false } +xvc-config = { version = "0.6.12", path = "../config" } +xvc-core = { version = "0.6.12", path = "../core" } +xvc-logging = { version = "0.6.12", path = "../logging" } +xvc-ecs = { version = "0.6.12", path = "../ecs" } +xvc-file = { version = "0.6.12", path = "../file", default-features = false } +xvc-pipeline = { version = "0.6.12", path = "../pipeline" } +xvc-walker = { version = "0.6.12", path = "../walker" } +xvc-storage = { version = "0.6.12", path = "../storage", default-features = false } ## Cli and config clap = { version = "^4.5", features = ["derive", "cargo"] } @@ -59,13 +59,12 @@ subprocess = "^0.2" relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" glob = "^0.3" -which = "^6.0" ## Logging and errors -thiserror = "^1.0" +thiserror = "^2.0" log = "^0.4" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } anyhow = "^1.0" ## UI @@ -125,6 +124,7 @@ assert_cmd = "^2.0" assert_fs = "^1.1" escargot = "^0.5" fs_extra = "^1.3" +# NOTE: 0.4 removes Glob struct and this should be handled with care fast-glob = "^0.3" jwalk = "^0.8" predicates = "^3.1" @@ -132,5 +132,5 @@ proptest = "^1.5" shellfn = "^0.1" test-case = "^3.3" trycmd = "^0.15" -which = "^6.0" -xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } +which = "^7.0" +xvc-test-helper = { version = "0.6.12", path = "../test_helper/" } diff --git a/lib/src/api.rs b/lib/src/api.rs index 388bc4dca..4e881c3f9 100644 --- a/lib/src/api.rs +++ b/lib/src/api.rs @@ -19,8 +19,8 @@ pub use xvc_logging::warn; pub use xvc_logging::watch; pub use xvc_config::XvcConfig; -pub use xvc_config::XvcConfigParams; pub use xvc_config::XvcConfigOptionSource; +pub use xvc_config::XvcConfigParams; pub use xvc_core::AbsolutePath; @@ -64,12 +64,3 @@ pub use xvc_pipeline::cmd_step_new as pipeline_step_new; pub use xvc_pipeline::cmd_step_output as pipeline_step_output; pub use xvc_pipeline::cmd_step_show as pipeline_step_show; pub use xvc_pipeline::cmd_step_update as pipeline_step_update; - -pub use crate::git::exec_git; -pub use crate::git::get_absolute_git_command; -pub use crate::git::git_auto_commit; -pub use crate::git::git_auto_stage; -pub use crate::git::git_checkout_ref; -pub use crate::git::handle_git_automation; -pub use crate::git::stash_user_staged_files; -pub use crate::git::unstash_user_staged_files; diff --git a/lib/src/cli/mod.rs b/lib/src/cli/mod.rs index f77184501..169ebc67f 100644 --- a/lib/src/cli/mod.rs +++ b/lib/src/cli/mod.rs @@ -5,11 +5,12 @@ use std::ffi::OsString; use std::path::PathBuf; use std::str::FromStr; -use crate::git_checkout_ref; -use crate::handle_git_automation; use crate::init; use crate::XvcRootOpt; +use xvc_core::git_checkout_ref; +use xvc_core::handle_git_automation; + use clap::Parser; use crossbeam::thread; use crossbeam_channel::bounded; @@ -17,12 +18,13 @@ use log::LevelFilter; use std::io; use xvc_core::types::xvcroot::load_xvc_root; use xvc_core::types::xvcroot::XvcRootInner; +use xvc_logging::XvcOutputSender; use xvc_logging::{debug, error, uwr, XvcOutputLine}; use xvc_config::{XvcConfigParams, XvcVerbosity}; use xvc_core::aliases; use xvc_core::check_ignore; -use xvc_core::default_project_config; +pub use xvc_core::default_project_config; use xvc_core::root; use xvc_core::CHANNEL_BOUND; use xvc_file as file; @@ -117,7 +119,7 @@ pub struct XvcCLI { impl XvcCLI { /// Parse the given elements with [clap::Parser::parse_from] and merge them to set /// [XvcCLI::command_string]. - pub fn from_str_slice(args: &[&str]) -> Result { + pub fn from_str_slice(args: &[&str]) -> Result { let command_string = args.join(" "); let parsed = Self::parse_from(args); Ok(Self { @@ -128,7 +130,7 @@ impl XvcCLI { /// Parse the given elements with [clap::Parser::parse_from] and merge them to set /// [XvcCLI::command_string]. - pub fn from_string_slice(args: &[String]) -> Result { + pub fn from_string_slice(args: &[String]) -> Result { let command_string = args.join(" "); let parsed = Self::parse_from(args); Ok(Self { @@ -139,7 +141,7 @@ impl XvcCLI { /// Parse the command line from the result of [`std::env::args_os`]. /// This updates [XvcCLI::command_string] with the command line. - pub fn from_args_os(args_os: ArgsOs) -> Result { + pub fn from_args_os(args_os: ArgsOs) -> Result { let args: Vec = args_os.collect(); let args: Vec = args .iter() @@ -302,6 +304,7 @@ pub fn dispatch_with_root(cli_opts: cli::XvcCLI, xvc_root_opt: XvcRootOpt) -> Re } } let xvc_root_opt_res = s.spawn(move |_| -> Result { + // FIXME: Use command matcher below instead of this let xvc_root_opt = match cli_opts.command { XvcSubCommand::Init(opts) => { let xvc_root = init::run(xvc_root_opt.as_ref(), opts)?; @@ -450,7 +453,8 @@ pub fn dispatch(cli_opts: cli::XvcCLI) -> Result { dispatch_with_root(cli_opts, xvc_root_opt) } -fn get_xvc_config_params(cli_opts: &XvcCLI) -> XvcConfigParams { +/// Decide configuration sources from CLI options +pub fn get_xvc_config_params(cli_opts: &XvcCLI) -> XvcConfigParams { XvcConfigParams { current_dir: AbsolutePath::from(&cli_opts.workdir), include_system_config: !cli_opts.no_system_config, @@ -463,7 +467,8 @@ fn get_xvc_config_params(cli_opts: &XvcCLI) -> XvcConfigParams { } } -fn get_term_log_level(verbosity: XvcVerbosity) -> LevelFilter { +/// Convert verbosity to log level +pub fn get_term_log_level(verbosity: XvcVerbosity) -> LevelFilter { match verbosity { XvcVerbosity::Quiet => LevelFilter::Off, XvcVerbosity::Default => LevelFilter::Error, @@ -474,7 +479,8 @@ fn get_term_log_level(verbosity: XvcVerbosity) -> LevelFilter { } } -fn get_verbosity(cli_opts: &XvcCLI) -> XvcVerbosity { +/// Convert verbosity value to XvcVerbosity +pub fn get_verbosity(cli_opts: &XvcCLI) -> XvcVerbosity { if cli_opts.quiet { XvcVerbosity::Quiet } else { @@ -487,3 +493,187 @@ fn get_verbosity(cli_opts: &XvcCLI) -> XvcVerbosity { } } } + +/// Collect all output from the channel in a string and return +// FIXME: Maybe move to xvc-logging +pub fn collect_output( + output_rcv: &crossbeam_channel::Receiver>, + term_log_level: LevelFilter, +) -> String { + let mut output_str = String::new(); + while let Ok(Some(output_line)) = output_rcv.recv() { + // output_str.push_str(&output_line); + match term_log_level { + LevelFilter::Off => match output_line { + XvcOutputLine::Output(_) => {} + XvcOutputLine::Info(_) => {} + XvcOutputLine::Warn(_) => {} + XvcOutputLine::Error(_) => {} + XvcOutputLine::Panic(m) => output_str.push_str(&format!("[PANIC] {}", m)), + XvcOutputLine::Tick(_) => todo!(), + XvcOutputLine::Debug(_) => {} + }, + LevelFilter::Error => match output_line { + XvcOutputLine::Output(m) => output_str.push_str(&m), + XvcOutputLine::Info(_) => {} + XvcOutputLine::Warn(_) => {} + XvcOutputLine::Error(m) => output_str.push_str(&format!("[ERROR] {}", m)), + XvcOutputLine::Panic(m) => output_str.push_str(&format!("[PANIC] {}", m)), + XvcOutputLine::Tick(_) => todo!(), + XvcOutputLine::Debug(_) => {} + }, + LevelFilter::Warn => match output_line { + XvcOutputLine::Output(m) => output_str.push_str(&m), + XvcOutputLine::Warn(m) => output_str.push_str(&format!("[WARN] {}", m)), + XvcOutputLine::Error(m) => output_str.push_str(&format!("[ERROR] {}", m)), + XvcOutputLine::Panic(m) => output_str.push_str(&format!("[PANIC] {}", m)), + XvcOutputLine::Info(_) => {} + XvcOutputLine::Tick(_) => todo!(), + XvcOutputLine::Debug(_) => {} + }, + LevelFilter::Info => match output_line { + XvcOutputLine::Output(m) => output_str.push_str(&m), + XvcOutputLine::Info(m) => output_str.push_str(&format!("[INFO] {}", m)), + XvcOutputLine::Warn(m) => output_str.push_str(&format!("[WARN] {}", m)), + XvcOutputLine::Error(m) => output_str.push_str(&format!("[ERROR] {}", m)), + XvcOutputLine::Panic(m) => output_str.push_str(&format!("[PANIC] {}", m)), + XvcOutputLine::Tick(_) => todo!(), + XvcOutputLine::Debug(_) => {} + }, + LevelFilter::Debug => match output_line { + XvcOutputLine::Output(m) => output_str.push_str(&m), + XvcOutputLine::Info(m) => output_str.push_str(&format!("[INFO] {}", m)), + XvcOutputLine::Warn(m) => output_str.push_str(&format!("[WARN] {}", m)), + XvcOutputLine::Error(m) => output_str.push_str(&format!("[ERROR] {}", m)), + XvcOutputLine::Panic(m) => output_str.push_str(&format!("[PANIC] {}", m)), + XvcOutputLine::Debug(m) => output_str.push_str(&format!("[DEBUG] {}", m)), + XvcOutputLine::Tick(_) => todo!(), + }, + LevelFilter::Trace => match output_line { + XvcOutputLine::Output(m) => output_str.push_str(&m), + XvcOutputLine::Info(m) => output_str.push_str(&format!("[INFO] {}", m)), + XvcOutputLine::Warn(m) => output_str.push_str(&format!("[WARN] {}", m)), + XvcOutputLine::Error(m) => output_str.push_str(&format!("[ERROR] {}", m)), + XvcOutputLine::Debug(m) => output_str.push_str(&format!("[DEBUG] {}", m)), + XvcOutputLine::Panic(m) => output_str.push_str(&format!("[PANIC] {}", m)), + XvcOutputLine::Tick(_) => todo!(), + }, + } + } + output_str +} + +/// Run the given command and return the modified [XvcRoot] +pub fn command_matcher( + cli_opts: XvcCLI, + xvc_root_opt: XvcRootOpt, + output_snd: XvcOutputSender, +) -> Result { + { + let res_xvc_root_opt: Result = match cli_opts.command { + XvcSubCommand::Init(opts) => { + let use_git = !opts.no_git; + let xvc_root = init::run(xvc_root_opt.as_ref(), opts)?; + + if use_git { + handle_git_automation( + &output_snd, + &xvc_root, + cli_opts.to_branch.as_deref(), + &cli_opts.command_string, + )?; + } + Ok(Some(xvc_root)) + } + + XvcSubCommand::Aliases(opts) => { + aliases::run(&output_snd, opts)?; + Ok(xvc_root_opt) + } + + // following commands can only be run inside a repository + XvcSubCommand::Root(opts) => { + root::run( + &output_snd, + xvc_root_opt + .as_ref() + .ok_or_else(|| Error::RequiresXvcRepository)?, + opts, + )?; + + Ok(xvc_root_opt) + } + + XvcSubCommand::File(opts) => { + file::run(&output_snd, xvc_root_opt.as_ref(), opts)?; + Ok(xvc_root_opt) + } + + XvcSubCommand::Pipeline(opts) => { + // FIXME: We can replace this stdin with another channel + let stdin = io::stdin(); + let input = stdin.lock(); + pipeline::cmd_pipeline( + input, + &output_snd, + xvc_root_opt.as_ref().ok_or(Error::RequiresXvcRepository)?, + opts, + )?; + Ok(xvc_root_opt) + } + + XvcSubCommand::CheckIgnore(opts) => { + // FIXME: We can replace this stdin with another channel + let stdin = io::stdin(); + let input = stdin.lock(); + + check_ignore::cmd_check_ignore( + input, + &output_snd, + xvc_root_opt.as_ref().ok_or(Error::RequiresXvcRepository)?, + opts, + )?; + + Ok(xvc_root_opt) + } + + XvcSubCommand::Storage(opts) => { + let stdin = io::stdin(); + let input = stdin.lock(); + storage::cmd_storage( + input, + &output_snd, + xvc_root_opt.as_ref().ok_or(Error::RequiresXvcRepository)?, + opts, + )?; + + Ok(xvc_root_opt) + } + }; + + let xvc_root_opt = match res_xvc_root_opt { + Ok(xvc_root_opt) => xvc_root_opt, + Err(e) => { + error!(&output_snd, "{}", e); + None + } + }; + + if let Some(ref xvc_root) = xvc_root_opt { + if !cli_opts.skip_git { + xvc_root.record(); + handle_git_automation( + &output_snd, + xvc_root, + cli_opts.to_branch.as_deref(), + &cli_opts.command_string, + // FIXME: Handle this error more gracefully + ) + .unwrap(); + } + } + + assert!(xvc_root_opt.is_some()); + Ok(xvc_root_opt) + } +} diff --git a/lib/src/error.rs b/lib/src/error.rs index 32ba5192b..80f2ee40c 100644 --- a/lib/src/error.rs +++ b/lib/src/error.rs @@ -75,11 +75,6 @@ pub enum Error { #[error("Process Error - stdout: {stdout}\nstderr: {stderr}")] ProcessError { stdout: String, stderr: String }, - #[error("Process Exec Error: {source}")] - ProcessExecError { - #[from] - source: subprocess::PopenError, - }, #[error("[E1004] Json Serialization Error: {source}")] JsonError { #[from] @@ -112,14 +107,6 @@ pub enum Error { #[from] source: ParseIntError, }, - #[error("Cannot Find Executable: {source}")] - WhichError { - #[from] - source: which::Error, - }, - - #[error("Git Process Error: \nSTDOUT: {stdout}\nSTDERR: {stderr}")] - GitProcessError { stdout: String, stderr: String }, #[error("Fixture Error: {source}")] FixtureError { diff --git a/lib/src/git.rs b/lib/src/git.rs deleted file mode 100644 index a1ee653f3..000000000 --- a/lib/src/git.rs +++ /dev/null @@ -1,248 +0,0 @@ -//! Git operations for Xvc repository commands -use std::{ffi::OsString, path::PathBuf, str::FromStr}; - -use subprocess::Exec; -use xvc_core::XvcRoot; -use xvc_logging::{debug, watch, XvcOutputSender}; - -use crate::{Error, Result}; - -/// Find the absolute path to the git executable to run -pub fn get_absolute_git_command(git_command: &str) -> Result { - let git_cmd_path = PathBuf::from(git_command); - let git_cmd = if git_cmd_path.is_absolute() { - git_command.to_string() - } else { - let cmd_path = which::which(git_command)?; - cmd_path.to_string_lossy().to_string() - }; - Ok(git_cmd) -} - -/// Run a git command with a specific git binary -pub fn exec_git(git_command: &str, xvc_directory: &str, args_str_vec: &[&str]) -> Result { - let mut args = vec!["-C", xvc_directory]; - args.extend(args_str_vec); - let args: Vec = args - .iter() - .map(|s| OsString::from_str(s).unwrap()) - .collect(); - watch!(args); - let proc_res = Exec::cmd(git_command).args(&args).capture()?; - - match proc_res.exit_status { - subprocess::ExitStatus::Exited(0) => Ok(proc_res.stdout_str()), - subprocess::ExitStatus::Exited(_) => Err(Error::GitProcessError { - stdout: proc_res.stdout_str(), - stderr: proc_res.stderr_str(), - }), - subprocess::ExitStatus::Signaled(_) - | subprocess::ExitStatus::Other(_) - | subprocess::ExitStatus::Undetermined => Err(Error::GitProcessError { - stdout: proc_res.stdout_str(), - stderr: proc_res.stderr_str(), - }), - } -} - -/// Stash user's staged files to avoid committing them before auto-commit -pub fn stash_user_staged_files( - output_snd: &XvcOutputSender, - git_command: &str, - xvc_directory: &str, -) -> Result { - // Do we have user staged files? - let git_diff_staged_out = exec_git( - git_command, - xvc_directory, - &["diff", "--name-only", "--cached"], - )?; - - watch!(git_diff_staged_out); - - // If so stash them - if !git_diff_staged_out.trim().is_empty() { - debug!( - output_snd, - "Stashing user staged files: {git_diff_staged_out}" - ); - let stash_out = exec_git(git_command, xvc_directory, &["stash", "push", "--staged"])?; - debug!(output_snd, "Stashed user staged files: {stash_out}"); - } - - Ok(git_diff_staged_out) -} - -/// Unstash user's staged files after auto-commit -pub fn unstash_user_staged_files( - output_snd: &XvcOutputSender, - git_command: &str, - xvc_directory: &str, -) -> Result<()> { - let res_git_stash_pop = exec_git(git_command, xvc_directory, &["stash", "pop", "--index"])?; - debug!( - output_snd, - "Unstashed user staged files: {res_git_stash_pop}" - ); - Ok(()) -} - -/// Checkout a git branch or tag before running an Xvc command -pub fn git_checkout_ref( - output_snd: &XvcOutputSender, - xvc_root: &XvcRoot, - from_ref: String, -) -> Result<()> { - let xvc_directory = xvc_root.as_path().to_str().unwrap(); - let git_command_option = xvc_root.config().get_str("git.command")?.option; - let git_command = get_absolute_git_command(&git_command_option)?; - - let git_diff_staged_out = stash_user_staged_files(output_snd, &git_command, xvc_directory)?; - exec_git(&git_command, xvc_directory, &["checkout", &from_ref])?; - - if !git_diff_staged_out.trim().is_empty() { - debug!("Unstashing user staged files: {git_diff_staged_out}"); - unstash_user_staged_files(output_snd, &git_command, xvc_directory)?; - } - Ok(()) -} - -/// This receives `xvc_root` ownership because as a final operation, it must drop the root to -/// record the last entity counter before commit. -pub fn handle_git_automation( - output_snd: &XvcOutputSender, - xvc_root: &XvcRoot, - to_branch: Option<&str>, - xvc_cmd: &str, -) -> Result<()> { - let xvc_root_dir = xvc_root.as_path().to_path_buf(); - let xvc_root_str = xvc_root_dir.to_str().unwrap(); - let use_git = xvc_root.config().get_bool("git.use_git")?.option; - let auto_commit = xvc_root.config().get_bool("git.auto_commit")?.option; - let auto_stage = xvc_root.config().get_bool("git.auto_stage")?.option; - let git_command_str = xvc_root.config().get_str("git.command")?.option; - let git_command = get_absolute_git_command(&git_command_str)?; - let xvc_dir = xvc_root.xvc_dir().clone(); - let xvc_dir_str = xvc_dir.to_str().unwrap(); - - if use_git { - if auto_commit { - git_auto_commit( - output_snd, - &git_command, - xvc_root_str, - xvc_dir_str, - xvc_cmd, - to_branch, - )?; - } else if auto_stage { - git_auto_stage(output_snd, &git_command, xvc_root_str, xvc_dir_str)?; - } - } - - Ok(()) -} - -/// Commit `.xvc` directory after Xvc operations -pub fn git_auto_commit( - output_snd: &XvcOutputSender, - git_command: &str, - xvc_root_str: &str, - xvc_dir_str: &str, - xvc_cmd: &str, - to_branch: Option<&str>, -) -> Result<()> { - debug!(output_snd, "Using Git: {git_command}"); - - let git_diff_staged_out = stash_user_staged_files(output_snd, git_command, xvc_root_str)?; - - if let Some(branch) = to_branch { - debug!(output_snd, "Checking out branch {branch}"); - exec_git(git_command, xvc_root_str, &["checkout", "-b", branch])?; - } - - // Add and commit `.xvc` - match exec_git( - git_command, - xvc_root_str, - // We check the output of the git add command to see if there were any files added. - // "--verbose" is required to get the output we need. - &[ - "add", - "--verbose", - xvc_dir_str, - "*.gitignore", - "*.xvcignore", - ], - ) { - Ok(git_add_output) => { - watch!(git_add_output); - if git_add_output.trim().is_empty() { - debug!(output_snd, "No files to commit"); - return Ok(()); - } else { - match exec_git( - git_command, - xvc_root_str, - &[ - "commit", - "-m", - &format!("Xvc auto-commit after '{xvc_cmd}'"), - ], - ) { - Ok(res_git_commit) => { - debug!(output_snd, "Committing .xvc/ to git: {res_git_commit}"); - } - Err(e) => { - debug!(output_snd, "Error committing .xvc/ to git: {e}"); - return Err(e); - } - } - } - } - Err(e) => { - debug!(output_snd, "Error adding .xvc/ to git: {e}"); - return Err(e); - } - } - - // Pop the stash if there were files we stashed - - if !git_diff_staged_out.trim().is_empty() { - debug!( - output_snd, - "Unstashing user staged files: {git_diff_staged_out}" - ); - unstash_user_staged_files(output_snd, git_command, xvc_root_str)?; - } - Ok(()) -} - -/// runs `git add .xvc *.gitignore *.xvcignore` to stage the files after Xvc operations -pub fn git_auto_stage( - output_snd: &XvcOutputSender, - git_command: &str, - xvc_root_str: &str, - xvc_dir_str: &str, -) -> Result<()> { - let res_git_add = exec_git( - git_command, - xvc_root_str, - &["add", xvc_dir_str, "*.gitignore", "*.xvcignore"], - )?; - debug!(output_snd, "Staging .xvc/ to git: {res_git_add}"); - Ok(()) -} - -pub fn git_ignored(output_snd: &XvcOutputSender, - git_command: &str, - xvc_root_str: &str, - path: &str) -> Result { - let command_res = exec_git(git_command, xvc_root_str, &["check-ignore", path])?; - - if command_res.trim().is_empty() { - Ok(false) - } else { - Ok(true) - } -} diff --git a/lib/src/lib.rs b/lib/src/lib.rs index bbe8c8680..788f4f639 100755 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -3,7 +3,6 @@ //! The main dispatching functions for the entire Xvc CLI pub mod cli; pub mod error; -pub mod git; pub mod init; pub mod api; diff --git a/lib/tests/test_core_util_file_pmp.rs b/lib/tests/test_core_util_file_pmp.rs index 9655791d0..4f60a26d4 100644 --- a/lib/tests/test_core_util_file_pmp.rs +++ b/lib/tests/test_core_util_file_pmp.rs @@ -52,6 +52,7 @@ fn test_pmp() -> Result<()> { watch!(xmd1_real); sleep(Duration::from_millis(100)); let xmd1 = pmp.get(&xpath1); + watch!(xmd1); assert!(xmd1.is_some()); assert!(xmd1.unwrap().is_file()); assert!(xmd1.unwrap().size == Some(new_size as u64), "{:?}", xmd1); diff --git a/lib/tests/test_core_util_notify.rs b/lib/tests/test_core_util_notify.rs index 5afefecfe..7b19f708c 100644 --- a/lib/tests/test_core_util_notify.rs +++ b/lib/tests/test_core_util_notify.rs @@ -4,7 +4,6 @@ use assert_fs::prelude::{FileTouch, FileWriteBin, PathChild}; use assert_fs::TempDir; use common::*; use std::env; -use std::ffi::OsString; use std::fs::remove_file; use std::path::PathBuf; use std::thread::{self, sleep}; @@ -47,8 +46,12 @@ fn test_notify() -> Result<()> { let (output_sender, output_receiver) = crossbeam_channel::unbounded(); - let (initial_paths, all_rules) = - walk_serial(&output_sender, COMMON_IGNORE_PATTERNS, &temp_dir, &walk_options)?; + let (initial_paths, all_rules) = walk_serial( + &output_sender, + COMMON_IGNORE_PATTERNS, + &temp_dir, + &walk_options, + )?; watch!(all_rules); assert!(output_receiver.is_empty()); let (watcher, receiver) = make_polling_watcher(all_rules)?; diff --git a/lib/tests/test_file_list.rs b/lib/tests/test_file_list.rs index 537457268..387fe3a00 100644 --- a/lib/tests/test_file_list.rs +++ b/lib/tests/test_file_list.rs @@ -54,22 +54,36 @@ fn test_file_list() -> Result<()> { common::run_xvc(Some(&xvc_root), &c, XvcVerbosity::Trace) }; - let list_all = x(&["list", "--format", "{{name}}", "--show-dot-files"])?; + let count_lines = |s: &str| s.trim().lines().filter(|l| !l.trim().is_empty()).count(); + let list_no_dots = x(&["list", "--format", "{{name}}"])?; + let count_no_dots = count_lines(&list_no_dots); + // There must be 33 elements in total. 6 x 5: directories another line for the summary and a + // space between them. + assert!(count_no_dots == 31, "count_no_dots: {}", count_no_dots); - let count_all = list_all.trim().lines().count(); - // There must be 33 elements in total. 6 x 5: directories, 1 for .gitignore, - // 1 for .xvcignore, another line for the summary. - assert!(count_all == 33); + let list_all = x(&[ + "list", + "--format", + "{{name}}", + "--show-dot-files", + "--include-git-files", + ])?; - let list_no_dots = x(&["list", "--format", "{{name}}"])?; - let count_no_dots = list_no_dots.trim().lines().count(); - // There must be 31 elements in total. 6 x 5: directories another line for the summary. - assert!(count_no_dots == 31); + let count_all = count_lines(&list_all); + // 6 x 5: directories, 1 for .gitignore, 1 for .xvcignore, another line for the summary and a + // space before summary. + assert!(count_all == 33, "count_all: {}", count_all); let list_no_dots_no_summary = x(&["list", "--format", "{{name}}", "--no-summary"])?; - let count_no_dots_no_summary = list_no_dots_no_summary.trim().lines().count(); - // There must be 31 elements in total. 6 x 5: directories another line for the summary. - assert!(count_no_dots_no_summary == 30); + let count_no_dots_no_summary = count_lines(&list_no_dots_no_summary); + + // There must be 31 elements in total. 6 x 5: directories and a new line. + assert!( + count_no_dots_no_summary == 30, + "count_no_dots_no_summary: {} {}", + count_no_dots_no_summary, + list_no_dots_no_summary + ); // test all sort options diff --git a/lib/tests/test_walker_parallel.rs b/lib/tests/test_walker_parallel.rs index 1769b5f79..418752a0c 100644 --- a/lib/tests/test_walker_parallel.rs +++ b/lib/tests/test_walker_parallel.rs @@ -1,5 +1,4 @@ use std::{ - ffi::OsString, fs, path::{Path, PathBuf}, sync::{Arc, RwLock}, diff --git a/logging/Cargo.toml b/logging/Cargo.toml index 797ba00cb..87bccd077 100644 --- a/logging/Cargo.toml +++ b/logging/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-logging" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Logging crate for Xvc" authors = ["Emre Şahin "] @@ -22,6 +22,6 @@ peak_alloc = "^0.2" log = "^0.4" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 # We don't use sled anymore, keep the above note for future reference -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } crossbeam-channel = "^0.5" diff --git a/pipeline/Cargo.toml b/pipeline/Cargo.toml index 23f4d8816..5ab6ea990 100644 --- a/pipeline/Cargo.toml +++ b/pipeline/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-pipeline" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Xvc data pipeline management" authors = ["Emre Şahin "] @@ -19,12 +19,12 @@ default = [] bundled-sqlite = ["rusqlite/bundled"] [dependencies] -xvc-config = { version = "0.6.11", path = "../config" } -xvc-core = { version = "0.6.11", path = "../core" } -xvc-ecs = { version = "0.6.11", path = "../ecs" } -xvc-logging = { version = "0.6.11", path = "../logging" } -xvc-walker = { version = "0.6.11", path = "../walker" } -xvc-file = { version = "0.6.11", path = "../file", default-features = false } +xvc-config = { version = "0.6.12", path = "../config" } +xvc-core = { version = "0.6.12", path = "../core" } +xvc-ecs = { version = "0.6.12", path = "../ecs" } +xvc-logging = { version = "0.6.12", path = "../logging" } +xvc-walker = { version = "0.6.12", path = "../walker" } +xvc-file = { version = "0.6.12", path = "../file", default-features = false } ## Cli and config clap = { version = "^4.5", features = ["derive"] } @@ -65,7 +65,7 @@ log = "^0.4" anyhow = "^1.0" ## meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored", "meta-logging-in-format"] } +fern = { version = "^0.7", features = ["colored", "meta-logging-in-format"] } ## sqlite ## TODO: Add feature flags for sqlite when bundling needed @@ -95,8 +95,9 @@ uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } itertools = "^0.13" -derive_more = "^0.99" +# FIXME: Use features selectively +derive_more = { version = "^1.0", features = ["full"] } [dev-dependencies] -xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.12", path = "../test_helper/" } test-case = "^3.3" diff --git a/pipeline/src/pipeline/mod.rs b/pipeline/src/pipeline/mod.rs index 68c576138..53622a8e3 100644 --- a/pipeline/src/pipeline/mod.rs +++ b/pipeline/src/pipeline/mod.rs @@ -769,10 +769,7 @@ fn s_no_need_to_run_f_run_never<'a>( s: &DoneWithoutRunningState, params: StepStateParams<'a>, ) -> StateTransition<'a> { - output!( - params.output_snd, - "[NEVER] [{}]", params.step.name - ); + output!(params.output_snd, "[NEVER] [{}]", params.step.name); Ok((s.keep_done(), params)) } @@ -780,10 +777,7 @@ fn s_no_need_to_run_f_diffs_not_changed<'a>( s: &DoneWithoutRunningState, params: StepStateParams<'a>, ) -> StateTransition<'a> { - output!( - params.output_snd, - "[SKIP] [{}]", params.step.name - ); + output!(params.output_snd, "[SKIP] [{}]", params.step.name); Ok((s.keep_done(), params)) } diff --git a/run-tests.zsh b/run-tests.zsh index 2f22bb882..2ac15be92 100755 --- a/run-tests.zsh +++ b/run-tests.zsh @@ -1,3 +1,3 @@ -# XVC_TRYCMD_TESTS=storage,file,pipeline,core,start TRYCMD=overwrite cargo test -p xvc --test z_test_docs +LLVM_PROFILE_FILE="${TMPDIR}/xvc-%p-%m.profraw" CARGO_INCREMENTAL=0 RUSTFLAGS="-Cinstrument-coverage" XVC_TRYCMD_TESTS=storage,file,pipeline,core,start TRYCMD=overwrite cargo llvm-cov --features test-ci --lcov --output-path lcov.info -p xvc --test z_test_docs # cargo test --features test-ci -p xvc --test test_storage_new_minio diff --git a/storage/Cargo.toml b/storage/Cargo.toml index 78fa2e7d3..96f91bab9 100644 --- a/storage/Cargo.toml +++ b/storage/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-storage" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Xvc remote and local storage management" authors = ["Emre Şahin "] @@ -16,11 +16,11 @@ name = "xvc_storage" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.11", path = "../logging" } -xvc-config = { version = "0.6.11", path = "../config" } -xvc-core = { version = "0.6.11", path = "../core" } -xvc-ecs = { version = "0.6.11", path = "../ecs" } -xvc-walker = { version = "0.6.11", path = "../walker" } +xvc-logging = { version = "0.6.12", path = "../logging" } +xvc-config = { version = "0.6.12", path = "../config" } +xvc-core = { version = "0.6.12", path = "../core" } +xvc-ecs = { version = "0.6.12", path = "../ecs" } +xvc-walker = { version = "0.6.12", path = "../walker" } ## Cli and config clap = { version = "^4.5", features = ["derive"] } @@ -53,14 +53,14 @@ walkdir = "^2.5" relative-path = { version = "^1.9", features = ["serde"] } path-absolutize = "^3.1" glob = "^0.3" -which = "^6.0" +which = "^7.0" ## Logging and errors thiserror = "^1.0" log = "^0.4" anyhow = "^1.0" ### meta-logging-in-format is required for sled: https://github.com/spacejam/sled/issues/1384 -fern = { version = "^0.6", features = ["colored"] } +fern = { version = "^0.7", features = ["colored"] } ## macros @@ -77,14 +77,15 @@ uuid = { version = "^1.10", features = ["serde", "v4", "fast-rng"] } hex = { version = "^0.4", features = ["serde"] } url = { version = "^2.5", features = ["serde"] } itertools = "^0.13" -derive_more = "^0.99" +# FIXME: Use features selectively +derive_more = { version = "^1.0", features = ["full"] } tempfile = "^3.11" ## Networking & Async tokio = { version = "^1.39", optional = true, features = ["rt-multi-thread"] } -rust-s3 = { version = "^0.34", optional = true } +rust-s3 = { version = "^0.35", optional = true } futures = { version = "^0.3", optional = true } # On Linux we use "vendored" feature and on Windows we don't use that feature. @@ -104,7 +105,7 @@ bundled-openssl = ["openssl/vendored"] [dev-dependencies] -xvc-test-helper = { version = "0.6.11", path = "../test_helper/" } +xvc-test-helper = { version = "0.6.12", path = "../test_helper/" } shellfn = "^0.1" [package.metadata.cargo-udeps.ignore] diff --git a/storage/src/storage/async_common.rs b/storage/src/storage/async_common.rs index 8e5066907..63891e294 100644 --- a/storage/src/storage/async_common.rs +++ b/storage/src/storage/async_common.rs @@ -1,4 +1,4 @@ -//! Home for async operations for S3 compatible storage services. +//! Home for async operations for S3 compatible storage services. use std::fs; use std::str::FromStr; @@ -12,8 +12,8 @@ use xvc_core::XvcCachePath; use xvc_core::XvcRoot; use xvc_logging::error; use xvc_logging::info; -use xvc_logging::watch; use xvc_logging::output; +use xvc_logging::watch; use xvc_logging::XvcOutputSender; use crate::Error; @@ -32,15 +32,15 @@ use super::XvcStorageTempDir; use super::XVC_STORAGE_GUID_FILENAME; /// Operations for S3 compatible storage services. Each service implements functions in this trait -/// for xvc file send and xvc file bring commands to work with the common functions. +/// for xvc file send and xvc file bring commands to work with the common functions. pub(crate) trait XvcS3StorageOperations { /// Prefix within the storage bucket if you want to separate Xvc files from the rest of the - /// bucket. + /// bucket. fn storage_prefix(&self) -> String; - /// GUID for the storage. This is generated when the storage is first initialized. + /// GUID for the storage. This is generated when the storage is first initialized. fn guid(&self) -> &XvcStorageGuid; /// Get the bucket for the storage - fn get_bucket(&self) -> Result; + fn get_bucket(&self) -> Result>; /// Get the credentials for the fn credentials(&self) -> Result; /// Name of the bucket @@ -172,7 +172,12 @@ pub(crate) trait XvcS3StorageOperations { match res_response { Ok(_) => { - info!(output_snd, "{} -> {}", abs_cache_path, storage_path.as_str()); + info!( + output_snd, + "{} -> {}", + abs_cache_path, + storage_path.as_str() + ); copied_paths.push(storage_path); watch!(copied_paths.len()); } @@ -211,7 +216,12 @@ pub(crate) trait XvcS3StorageOperations { match response_data_stream { Ok(mut response) => { - info!(output_snd, "{} -> {}", storage_path.as_str(), abs_cache_path); + info!( + output_snd, + "{} -> {}", + storage_path.as_str(), + abs_cache_path + ); let mut async_cache_path = tokio::fs::File::create(&abs_cache_path).await?; while let Some(chunk) = response.bytes().next().await { async_cache_path.write_all(&chunk?).await?; @@ -276,7 +286,9 @@ pub(crate) trait XvcS3StorageOperations { let expiration_seconds = duration.as_secs() as u32; let path = self.build_storage_path(path); - let signed_url = bucket.presign_get(path.as_str(), expiration_seconds, None).await?; + let signed_url = bucket + .presign_get(path.as_str(), expiration_seconds, None) + .await?; info!(output, "[SHARED] {}", path.as_str()); output!(output, "{}", signed_url); Ok(super::XvcStorageExpiringShareEvent { diff --git a/storage/src/storage/digital_ocean.rs b/storage/src/storage/digital_ocean.rs index 32cea6ec5..50368b1b3 100644 --- a/storage/src/storage/digital_ocean.rs +++ b/storage/src/storage/digital_ocean.rs @@ -124,7 +124,7 @@ impl XvcS3StorageOperations for XvcDigitalOceanStorage { .map_err(|e| e.into()) } - fn get_bucket(&self) -> Result { + fn get_bucket(&self) -> Result> { // We'll just put guid file to endpoint/bucket/prefix/XVC_GUID_FILENAME let credentials = self.credentials()?; let region: Region = self.region.parse().expect("Cannot parse region name"); diff --git a/storage/src/storage/gcs.rs b/storage/src/storage/gcs.rs index 3ea5a429d..9754326bf 100644 --- a/storage/src/storage/gcs.rs +++ b/storage/src/storage/gcs.rs @@ -109,7 +109,7 @@ impl XvcS3StorageOperations for XvcGcsStorage { fn guid(&self) -> &XvcStorageGuid { &self.guid } - fn get_bucket(&self) -> Result { + fn get_bucket(&self) -> Result> { let credentials = self.credentials()?; let region = Region::Custom { region: self.region.to_owned(), diff --git a/storage/src/storage/minio.rs b/storage/src/storage/minio.rs index bc26f3ba7..7abe2acaf 100644 --- a/storage/src/storage/minio.rs +++ b/storage/src/storage/minio.rs @@ -117,7 +117,7 @@ impl XvcS3StorageOperations for XvcMinioStorage { &self.guid } - fn get_bucket(&self) -> Result { + fn get_bucket(&self) -> Result> { // We'll just put guid file to endpoint/bucket/prefix/XVC_GUID_FILENAME let credentials = self.credentials()?; let region = Region::Custom { diff --git a/storage/src/storage/mod.rs b/storage/src/storage/mod.rs index c2b145d70..3bd13da9e 100644 --- a/storage/src/storage/mod.rs +++ b/storage/src/storage/mod.rs @@ -18,7 +18,7 @@ pub mod s3; #[cfg(feature = "wasabi")] pub mod wasabi; -use std::{fmt::Display, str::FromStr, time::Duration}; +use std::{str::FromStr, time::Duration}; use derive_more::Display; pub use event::{ @@ -74,7 +74,7 @@ pub enum XvcStorage { } persist!(XvcStorage, "storage"); -impl Display for XvcStorage { +impl std::fmt::Display for XvcStorage { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { XvcStorage::Local(lr) => { @@ -497,7 +497,10 @@ pub fn get_storage_record( error!(output_snd, "Cannot find remote {}", identifier); } if storage_store.len() > 1 { - error!(output_snd, "Ambiguous remote identifier: {} Please use Storage GUID.", identifier); + error!( + output_snd, + "Ambiguous remote identifier: {} Please use Storage GUID.", identifier + ); } let (_, storage) = diff --git a/storage/src/storage/r2.rs b/storage/src/storage/r2.rs index abdef9a4b..5438c8125 100644 --- a/storage/src/storage/r2.rs +++ b/storage/src/storage/r2.rs @@ -144,7 +144,7 @@ impl XvcS3StorageOperations for XvcR2Storage { .map_err(|e| e.into()) } - fn get_bucket(&self) -> Result { + fn get_bucket(&self) -> Result> { // We'll just put guid file to endpoint/bucket/prefix/XVC_GUID_FILENAME let credentials = self.credentials()?; let region = Region::R2 { diff --git a/storage/src/storage/s3.rs b/storage/src/storage/s3.rs index 3d3d893df..226a145f2 100644 --- a/storage/src/storage/s3.rs +++ b/storage/src/storage/s3.rs @@ -124,7 +124,7 @@ impl XvcS3StorageOperations for XvcS3Storage { &self.guid } - fn get_bucket(&self) -> Result { + fn get_bucket(&self) -> Result> { // We'll just put guid file to endpoint/bucket/prefix/XVC_GUID_FILENAME let credentials = self.credentials()?; let region: Region = self.region.parse().expect("Cannot parse region name"); diff --git a/storage/src/storage/wasabi.rs b/storage/src/storage/wasabi.rs index 639d18cb7..27ab9f34d 100644 --- a/storage/src/storage/wasabi.rs +++ b/storage/src/storage/wasabi.rs @@ -133,7 +133,7 @@ impl XvcS3StorageOperations for XvcWasabiStorage { self.endpoint.clone() } - fn get_bucket(&self) -> Result { + fn get_bucket(&self) -> Result> { // We'll just put guid file to endpoint/bucket/prefix/XVC_GUID_FILENAME let credentials = self.credentials()?; let region: Region = Region::Custom { diff --git a/test_helper/Cargo.toml b/test_helper/Cargo.toml index 7bab90fca..9a1517dbe 100644 --- a/test_helper/Cargo.toml +++ b/test_helper/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-test-helper" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Unit test helper functions for Xvc" authors = ["Emre Şahin "] @@ -20,7 +20,7 @@ path = "src/main.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -xvc-logging = { version = "0.6.11", path = "../logging/" } +xvc-logging = { version = "0.6.12", path = "../logging/" } rand = "^0.8" log = "^0.4" diff --git a/walker/Cargo.toml b/walker/Cargo.toml index 0eecee147..68a22d639 100644 --- a/walker/Cargo.toml +++ b/walker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "xvc-walker" -version = "0.6.11" +version = "0.6.12" edition = "2021" description = "Xvc parallel file system walker with ignore features" authors = ["Emre Şahin "] @@ -16,7 +16,8 @@ name = "xvc_walker" crate-type = ["rlib"] [dependencies] -xvc-logging = { version = "0.6.11", path = "../logging" } +xvc-logging = { version = "0.6.12", path = "../logging" } +# NOTE: 0.4 removes Glob struct and this should be handled with care fast-glob = "^0.3" ## Parallelization @@ -25,7 +26,7 @@ crossbeam = "^0.8" rayon = "^1.10" ## File system -notify = "6.1.1" +notify = "7.0" jwalk = "^0.8" dashmap = "^6" @@ -41,7 +42,7 @@ itertools = "^0.13" regex = "^1.10" [dev-dependencies] -xvc-test-helper = { path = "../test_helper/", version = "0.6.11" } +xvc-test-helper = { path = "../test_helper/", version = "0.6.12" } test-case = "^3.3" [package.metadata.cargo-udeps.ignore] diff --git a/walker/src/ignore_rules.rs b/walker/src/ignore_rules.rs index 5ae2f280b..c7572e938 100644 --- a/walker/src/ignore_rules.rs +++ b/walker/src/ignore_rules.rs @@ -40,6 +40,7 @@ impl IgnoreRules { } } + /// Constructs a new `IgnoreRules` instance from a given set of global ignore patterns. pub fn from_global_patterns( ignore_root: &Path, ignore_filename: Option<&str>, diff --git a/walker/src/lib.rs b/walker/src/lib.rs index e87a222cb..60c60743a 100755 --- a/walker/src/lib.rs +++ b/walker/src/lib.rs @@ -34,7 +34,6 @@ pub use ignore_rules::IgnoreRules; pub use ignore_rules::SharedIgnoreRules; pub use notify::make_watcher; -use std::ffi::OsStr; pub use std::hash::Hash; pub use sync::{PathSync, PathSyncSingleton}; use xvc_logging::warn; @@ -42,18 +41,17 @@ use xvc_logging::warn; pub use notify::PathEvent; pub use notify::RecommendedWatcher; +pub use fast_glob::Glob; + use xvc_logging::watch; -// use glob::{MatchOptions, Pattern, PatternError}; -pub use fast_glob::Glob; use std::{ - ffi::OsString, fmt::Debug, fs::{self, Metadata}, path::{Path, PathBuf}, }; -use anyhow::{anyhow, Context}; +use anyhow::anyhow; static MAX_THREADS_PARALLEL_WALK: usize = 8; @@ -213,6 +211,10 @@ pub fn content_to_patterns( patterns } +/// Updates the ignore rules from a given directory. +/// +/// Gets ignore filename from the ignore rules, concatenates it with the directory path and reads +/// the file if it exists. Then updates the ignore rules with the new patterns. pub fn update_ignore_rules(dir: &Path, ignore_rules: &IgnoreRules) -> Result<()> { if let Some(ref ignore_filename) = ignore_rules.ignore_filename { let ignore_root = &ignore_rules.root; diff --git a/walker/src/pattern.rs b/walker/src/pattern.rs index 0feaa0602..f114a938f 100644 --- a/walker/src/pattern.rs +++ b/walker/src/pattern.rs @@ -8,8 +8,6 @@ pub use sync::{PathSync, PathSyncSingleton}; pub use crate::notify::{make_watcher, PathEvent, RecommendedWatcher}; -// use glob::{MatchOptions, Pattern, PatternError}; -pub use fast_glob::Glob; use std::{fmt::Debug, path::PathBuf}; use crate::error; @@ -72,7 +70,10 @@ pub enum Source { }, /// Pattern is from CLI - CommandLine { current_dir: PathBuf }, + CommandLine { + /// Current directory + current_dir: PathBuf, + }, } /// Pattern is generic and could be an instance of String, Glob, Regex or any other object. @@ -96,6 +97,7 @@ pub struct Pattern { } impl Pattern { + /// Create a new pattern from a string and its source pub fn new(source: Source, original: &str) -> Self { let original = original.to_owned(); let current_dir = match &source { @@ -197,7 +199,7 @@ fn transform_pattern_for_glob( let directory_anywhere = |p| format!("**/{p}/**"); let directory_relative = |p, directory| format!("{directory}/**/{p}/**"); - let transformed_pattern = match (path_kind, relativity) { + match (path_kind, relativity) { (PathKind::Any, PatternRelativity::Anywhere) => anything_anywhere(original), (PathKind::Any, PatternRelativity::RelativeTo { directory }) => { anything_relative(original, directory) @@ -206,11 +208,10 @@ fn transform_pattern_for_glob( (PathKind::Directory, PatternRelativity::RelativeTo { directory }) => { directory_relative(original, directory) } - }; - - transformed_pattern + } } +/// Build a list of patterns from a list of strings pub fn build_pattern_list(patterns: Vec, source: Source) -> Vec { patterns .iter() diff --git a/walker/src/walk_parallel.rs b/walker/src/walk_parallel.rs index 6b921ba2c..5e73ab9c0 100644 --- a/walker/src/walk_parallel.rs +++ b/walker/src/walk_parallel.rs @@ -1,14 +1,15 @@ +//! Parallel walk implementation use std::{ path::Path, - sync::{Arc, Mutex}, + sync::Arc, }; use crossbeam::queue::SegQueue; use crossbeam_channel::Sender; -use xvc_logging::{uwr, watch}; +use xvc_logging::watch; use crate::{ - directory_list, update_ignore_rules, IgnoreRules, MatchResult, PathMetadata, Result, + directory_list, update_ignore_rules, MatchResult, PathMetadata, Result, SharedIgnoreRules, WalkOptions, MAX_THREADS_PARALLEL_WALK, }; diff --git a/walker/src/walk_serial.rs b/walker/src/walk_serial.rs index bedb66b2c..d02eea529 100644 --- a/walker/src/walk_serial.rs +++ b/walker/src/walk_serial.rs @@ -1,12 +1,12 @@ //! Serial directory walker without parallelization //! See [`walk_parallel`] for parallel version. -use std::path::Path; +use std::path::{Path, PathBuf}; -use xvc_logging::{debug, warn, XvcOutputSender}; +use xvc_logging::{debug, error, warn, XvcOutputSender}; use crate::{ - directory_list, pattern::MatchResult, update_ignore_rules, IgnoreRules, PathMetadata, Result, - WalkOptions, + build_ignore_patterns, directory_list, pattern::MatchResult, update_ignore_rules, IgnoreRules, + PathMetadata, Result, WalkOptions, }; /// Walk `dir` with `walk_options`, with the given _initial_ `ignore_rules`. @@ -47,6 +47,8 @@ pub fn walk_serial( let mut res_paths = Vec::new(); while let Some(dir) = dir_stack.pop() { + // TODO: Keep ignore rules in a single file in the root. Most of the time, we don't need to + // read the ignore rules in all over the repository. update_ignore_rules(&dir, &ignore_rules)?; res_paths.extend(get_child_paths(&dir)?.drain(..).filter_map(|p| { @@ -68,3 +70,51 @@ pub fn walk_serial( Ok((res_paths, ignore_rules)) } + +/// Return path metadata only for the listed files to avoid recursing directories +pub fn path_metadata_map_from_file_targets( + output_snd: &XvcOutputSender, + global_ignore_rules: &str, + ignore_root: &Path, + targets: Vec, + walk_options: &WalkOptions, +) -> Result<(Vec, IgnoreRules)> { + let ignore_filename = walk_options.ignore_filename.as_deref(); + let ignore_rules = ignore_filename + .map(|ignore_filename| { + build_ignore_patterns(global_ignore_rules, ignore_root, ignore_filename) + }) + .unwrap_or_else(|| { + Ok(IgnoreRules::from_global_patterns( + ignore_root, + None, + global_ignore_rules, + )) + })?; + + let mut res_paths = Vec::new(); + + res_paths.extend(targets.into_iter().filter_map(|target| { + let path: PathBuf = target.into(); + let ignore_result = ignore_rules.check(&path); + match ignore_result { + MatchResult::NoMatch | MatchResult::Whitelist => { + let md_res = path.metadata(); + match md_res { + Ok(metadata) => Some(PathMetadata { path, metadata }), + Err(e) => { + error!(output_snd, "{}", e); + None + } + } + } + + MatchResult::Ignore => { + warn!(output_snd, "Ignored: {:?}", path); + None + } + } + })); + + Ok((res_paths, ignore_rules)) +}