diff --git a/.github/workflows/cd-deploy-nodes-gcp.yml b/.github/workflows/cd-deploy-nodes-gcp.yml index 0b167ceb63c..bf6b9cb5842 100644 --- a/.github/workflows/cd-deploy-nodes-gcp.yml +++ b/.github/workflows/cd-deploy-nodes-gcp.yml @@ -181,7 +181,7 @@ jobs: if: ${{ !cancelled() && !failure() && ((github.event_name == 'push' && github.ref_name == 'main') || github.event_name == 'release') }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false @@ -283,7 +283,7 @@ jobs: if: github.event_name == 'workflow_dispatch' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/chore-delete-gcp-resources.yml b/.github/workflows/chore-delete-gcp-resources.yml index 439ce8a2ec0..19ed00db56d 100644 --- a/.github/workflows/chore-delete-gcp-resources.yml +++ b/.github/workflows/chore-delete-gcp-resources.yml @@ -30,7 +30,7 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false @@ -239,7 +239,7 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/ci-build-crates.patch.yml b/.github/workflows/ci-build-crates.patch.yml index b99adfc4cfe..e173752dd06 100644 --- a/.github/workflows/ci-build-crates.patch.yml +++ b/.github/workflows/ci-build-crates.patch.yml @@ -23,7 +23,7 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 # Setup Rust with stable toolchain and minimal profile - name: Setup Rust diff --git a/.github/workflows/ci-build-crates.yml b/.github/workflows/ci-build-crates.yml index e12f6031cb8..77ea4b44e2c 100644 --- a/.github/workflows/ci-build-crates.yml +++ b/.github/workflows/ci-build-crates.yml @@ -50,7 +50,7 @@ jobs: outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 - uses: r7kamura/rust-problem-matchers@v1.4.0 # Setup Rust with stable toolchain and minimal profile @@ -99,14 +99,17 @@ jobs: build: name: Build ${{ matrix.crate }} crate + timeout-minutes: 90 needs: [ matrix, check-matrix ] runs-on: ubuntu-latest strategy: + # avoid rate-limit errors by only launching a few of these jobs at a time + max-parallel: 2 fail-fast: true matrix: ${{ fromJson(needs.matrix.outputs.matrix) }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 diff --git a/.github/workflows/ci-coverage.yml b/.github/workflows/ci-coverage.yml index 0c913be1ad0..aea08fbba4f 100644 --- a/.github/workflows/ci-coverage.yml +++ b/.github/workflows/ci-coverage.yml @@ -57,7 +57,7 @@ jobs: runs-on: ubuntu-latest-xl steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml index 1e22a99a0f2..02881a0280e 100644 --- a/.github/workflows/ci-lint.yml +++ b/.github/workflows/ci-lint.yml @@ -30,14 +30,14 @@ jobs: rust: ${{ steps.changed-files-rust.outputs.any_changed == 'true' }} workflows: ${{ steps.changed-files-workflows.outputs.any_changed == 'true' }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false fetch-depth: 0 - name: Rust files id: changed-files-rust - uses: tj-actions/changed-files@v39.2.1 + uses: tj-actions/changed-files@v39.2.3 with: files: | **/*.rs @@ -49,7 +49,7 @@ jobs: - name: Workflow files id: changed-files-workflows - uses: tj-actions/changed-files@v39.2.1 + uses: tj-actions/changed-files@v39.2.3 with: files: | .github/workflows/*.yml @@ -62,7 +62,7 @@ jobs: if: ${{ needs.changed-files.outputs.rust == 'true' }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false @@ -112,7 +112,7 @@ jobs: if: ${{ needs.changed-files.outputs.rust == 'true' }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 @@ -142,7 +142,7 @@ jobs: needs: changed-files if: ${{ needs.changed-files.outputs.workflows == 'true' }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 - name: actionlint uses: reviewdog/action-actionlint@v1.39.1 with: @@ -155,7 +155,7 @@ jobs: runs-on: ubuntu-latest needs: changed-files steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 - uses: plettich/action-codespell@master with: github_token: ${{ secrets.github_token }} diff --git a/.github/workflows/ci-unit-tests-docker.yml b/.github/workflows/ci-unit-tests-docker.yml index c8395365963..5655fb6ad23 100644 --- a/.github/workflows/ci-unit-tests-docker.yml +++ b/.github/workflows/ci-unit-tests-docker.yml @@ -98,6 +98,7 @@ jobs: # - We activate the gRPC feature to avoid recompiling `zebrad`, but we don't actually run any gRPC tests. test-all: name: Test all + timeout-minutes: 180 runs-on: ubuntu-latest-xl needs: build steps: @@ -135,6 +136,7 @@ jobs: # (We activate the test features to avoid recompiling dependencies, but we don't actually run any gRPC tests.) test-fake-activation-heights: name: Test with fake activation heights + timeout-minutes: 60 runs-on: ubuntu-latest needs: build steps: @@ -157,6 +159,7 @@ jobs: # Test that Zebra syncs and checkpoints a few thousand blocks from an empty state. test-empty-sync: name: Test checkpoint sync from empty state + timeout-minutes: 60 runs-on: ubuntu-latest needs: build steps: @@ -178,6 +181,7 @@ jobs: # Test launching lightwalletd with an empty lightwalletd and Zebra state. test-lightwalletd-integration: name: Test integration with lightwalletd + timeout-minutes: 60 runs-on: ubuntu-latest needs: build steps: diff --git a/.github/workflows/ci-unit-tests-os.yml b/.github/workflows/ci-unit-tests-os.yml index 695454e6090..3f5d764336e 100644 --- a/.github/workflows/ci-unit-tests-os.yml +++ b/.github/workflows/ci-unit-tests-os.yml @@ -92,7 +92,7 @@ jobs: rust: beta steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 @@ -204,7 +204,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 @@ -226,7 +226,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 @@ -269,7 +269,7 @@ jobs: continue-on-error: ${{ matrix.checks == 'advisories' }} steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 @@ -290,7 +290,7 @@ jobs: steps: - name: Checkout git repository - uses: actions/checkout@v4.1.0 + uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 diff --git a/.github/workflows/docs-deploy-firebase.yml b/.github/workflows/docs-deploy-firebase.yml index 078c2e6a1fe..806211131c1 100644 --- a/.github/workflows/docs-deploy-firebase.yml +++ b/.github/workflows/docs-deploy-firebase.yml @@ -72,7 +72,7 @@ jobs: pull-requests: write steps: - name: Checkout the source code - uses: actions/checkout@v4.1.0 + uses: actions/checkout@v4.1.1 with: persist-credentials: false @@ -125,7 +125,7 @@ jobs: pull-requests: write steps: - name: Checkout the source code - uses: actions/checkout@v4.1.0 + uses: actions/checkout@v4.1.1 with: persist-credentials: false @@ -181,7 +181,7 @@ jobs: pull-requests: write steps: - name: Checkout the source code - uses: actions/checkout@v4.1.0 + uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/docs-dockerhub-description.yml b/.github/workflows/docs-dockerhub-description.yml index 275a0e7b5fd..e20b126646f 100644 --- a/.github/workflows/docs-dockerhub-description.yml +++ b/.github/workflows/docs-dockerhub-description.yml @@ -17,7 +17,7 @@ jobs: dockerHubDescription: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/manual-zcashd-deploy.yml b/.github/workflows/manual-zcashd-deploy.yml index 5ea57ef7652..11ace8f39e1 100644 --- a/.github/workflows/manual-zcashd-deploy.yml +++ b/.github/workflows/manual-zcashd-deploy.yml @@ -22,7 +22,7 @@ jobs: id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/release-crates-io.yml b/.github/workflows/release-crates-io.yml index 860f3ba3cb9..8546ee774db 100644 --- a/.github/workflows/release-crates-io.yml +++ b/.github/workflows/release-crates-io.yml @@ -70,7 +70,7 @@ jobs: - uses: r7kamura/rust-problem-matchers@v1.4.0 - name: Checkout git repository - uses: actions/checkout@v4.1.0 + uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/.github/workflows/sub-build-docker-image.yml b/.github/workflows/sub-build-docker-image.yml index 6356563a22e..1b929324619 100644 --- a/.github/workflows/sub-build-docker-image.yml +++ b/.github/workflows/sub-build-docker-image.yml @@ -66,7 +66,7 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false - uses: r7kamura/rust-problem-matchers@v1.4.0 diff --git a/.github/workflows/sub-build-lightwalletd.yml b/.github/workflows/sub-build-lightwalletd.yml index adce75ed7b7..513b4bd1cd9 100644 --- a/.github/workflows/sub-build-lightwalletd.yml +++ b/.github/workflows/sub-build-lightwalletd.yml @@ -56,14 +56,14 @@ jobs: id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: # Note: check service.proto when modifying lightwalletd repo repository: zcash/lightwalletd ref: 'master' persist-credentials: false - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: path: zebra persist-credentials: false diff --git a/.github/workflows/sub-deploy-integration-tests-gcp.yml b/.github/workflows/sub-deploy-integration-tests-gcp.yml index e718eaa43af..fd5dd6c4394 100644 --- a/.github/workflows/sub-deploy-integration-tests-gcp.yml +++ b/.github/workflows/sub-deploy-integration-tests-gcp.yml @@ -104,153 +104,21 @@ env: CACHED_STATE_UPDATE_LIMIT: 576 jobs: - # set up and launch the test, if it doesn't use any cached state - # each test runs one of the *-with/without-cached-state job series, and skips the other - launch-without-cached-state: - name: Launch ${{ inputs.test_id }} test - if: ${{ !inputs.needs_zebra_state }} - runs-on: zfnd-runners - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.1.0 - with: - persist-credentials: false - fetch-depth: '2' - - uses: r7kamura/rust-problem-matchers@v1.4.0 - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Makes the Zcash network name lowercase. - # - # Labels in GCP are required to be in lowercase, but the blockchain network - # uses sentence case, so we need to downcase ${{ inputs.network }}. - # - # Passes ${{ inputs.network }} to subsequent steps using $NETWORK env variable. - - name: Downcase network name for labels - run: | - NETWORK_CAPS="${{ inputs.network }}" - echo "NETWORK=${NETWORK_CAPS,,}" >> "$GITHUB_ENV" - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: | - sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client - ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - retries: '3' - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - - # Create a Compute Engine virtual machine - - name: Create ${{ inputs.test_id }} GCP compute instance - id: create-instance - run: | - gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ - --boot-disk-size 50GB \ - --boot-disk-type pd-ssd \ - --image-project=cos-cloud \ - --image-family=cos-stable \ - --create-disk=name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=400GB,type=pd-ssd \ - --container-image=gcr.io/google-containers/busybox \ - --machine-type ${{ vars.GCP_LARGE_MACHINE }} \ - --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ - --scopes cloud-platform \ - --metadata=google-monitoring-enabled=TRUE,google-logging-enabled=TRUE \ - --metadata-from-file=startup-script=.github/workflows/scripts/gcp-vm-startup-script.sh \ - --labels=app=${{ inputs.app_name }},environment=test,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }},test=${{ inputs.test_id }} \ - --tags ${{ inputs.app_name }} \ - --zone ${{ vars.GCP_ZONE }} - - # Format the mounted disk if the test doesn't use a cached state. - - name: Format ${{ inputs.test_id }} volume - shell: /usr/bin/bash -exo pipefail {0} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - set -ex; - # Extract the correct disk name based on the device-name - DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); - sudo mkfs.ext4 -v /dev/$DISK_NAME \ - ' - - # Launch the test without any cached state - - name: Launch ${{ inputs.test_id }} test - id: launch-test - shell: /usr/bin/bash -exo pipefail {0} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - set -ex; - # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); \ - - sudo docker run \ - --name ${{ inputs.test_id }} \ - --tty \ - --detach \ - ${{ inputs.test_variables }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - ' - - # Show debug logs if previous job failed - - name: Show debug logs if previous job failed - if: ${{ failure() }} - shell: /usr/bin/bash -exo pipefail {0} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - lsblk; - sudo lsof /dev/$DISK_NAME; - sudo dmesg; - sudo journalctl -b \ - ' - - # set up and launch the test, if it uses cached state - # each test runs one of the *-with/without-cached-state job series, and skips the other - launch-with-cached-state: - name: Launch ${{ inputs.test_id }} test - if: ${{ inputs.needs_zebra_state }} + # Show all the test logs, then follow the logs of the test we just launched, until it finishes. + # Then check the result of the test. + # + # If `inputs.is_long_test` is `true`, the timeout is 5 days, otherwise it's 3 hours. + test-result: + name: Run ${{ inputs.test_id }} test runs-on: zfnd-runners + timeout-minutes: ${{ inputs.is_long_test && 7200 || 180 }} outputs: cached_disk_name: ${{ steps.get-disk-name.outputs.cached_disk_name }} permissions: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false fetch-depth: '2' @@ -268,7 +136,7 @@ jobs: # Install our SSH secret - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 + uses: shimataro/ssh-key-action@v2.6.1 with: key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} name: google_compute_engine @@ -314,6 +182,7 @@ jobs: # TODO: move this script into a file, and call it from sub-find-cached-disks.yml as well. - name: Find ${{ inputs.test_id }} cached state disk id: get-disk-name + if: ${{ inputs.needs_zebra_state || inputs.needs_lwd_state }} run: | set -x LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) @@ -381,18 +250,21 @@ jobs: echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> "$GITHUB_ENV" echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> "$GITHUB_ENV" + echo "DISK_OPTION=image=$CACHED_DISK_NAME," >> "$GITHUB_ENV" # Create a Compute Engine virtual machine and attach a cached state disk using the # $CACHED_DISK_NAME variable as the source image to populate the disk cached state + # if the test needs it. - name: Create ${{ inputs.test_id }} GCP compute instance id: create-instance + shell: /usr/bin/bash -x {0} run: | gcloud compute instances create-with-container "${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ --boot-disk-size 50GB \ --boot-disk-type pd-ssd \ --image-project=cos-cloud \ --image-family=cos-stable \ - --create-disk=image=${{ env.CACHED_DISK_NAME }},name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=400GB,type=pd-ssd \ + --create-disk=${DISK_OPTION}name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",device-name="${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }}",size=400GB,type=pd-ssd \ --container-image=gcr.io/google-containers/busybox \ --machine-type ${{ vars.GCP_LARGE_MACHINE }} \ --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \ @@ -403,29 +275,10 @@ jobs: --tags ${{ inputs.app_name }} \ --zone ${{ vars.GCP_ZONE }} - # Launch the test with the previously created Zebra-only cached state. - # Each test runs one of the "Launch test" steps, and skips the other. - # - # SSH into the just created VM, and create a Docker container to run the incoming test - # from ${{ inputs.test_id }}, then mount the sudo docker volume created in the previous job. - # - # The disk mounted in the VM is located at /dev/$DISK_NAME, we mount the root `/` of this disk to the docker - # container in one path: - # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR - # - # This path must match the variable used by the tests in Rust, which are also set in - # `ci-unit-tests-docker.yml` to be able to run this tests. - # - # Although we're mounting the disk root, Zebra will only respect the values from - # $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used - # to match that variable paths. - - name: Launch ${{ inputs.test_id }} test - # This step only runs for tests that just read or write a Zebra state. - # - # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. - # TODO: we should find a better logic for this use cases - if: ${{ (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} - shell: /usr/bin/bash -exo pipefail {0} + # Format the mounted disk if the test doesn't use a cached state. + - name: Format ${{ inputs.test_id }} volume + if: ${{ !inputs.needs_zebra_state && !inputs.needs_lwd_state }} + shell: /usr/bin/bash -ex {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ @@ -435,36 +288,14 @@ jobs: --command=' \ set -ex; # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); \ - - sudo docker run \ - --name ${{ inputs.test_id }} \ - --tty \ - --detach \ - ${{ inputs.test_variables }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ - ' - - # Show debug logs if previous job failed - - name: Show debug logs if previous job failed - if: ${{ failure() && (inputs.needs_zebra_state && !inputs.needs_lwd_state) && inputs.test_id != 'lwd-full-sync' }} - shell: /usr/bin/bash -exo pipefail {0} - run: | - gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ - --zone ${{ vars.GCP_ZONE }} \ - --ssh-flag="-o ServerAliveInterval=5" \ - --ssh-flag="-o ConnectionAttempts=20" \ - --ssh-flag="-o ConnectTimeout=5" \ - --command=' \ - lsblk; - sudo lsof /dev/$DISK_NAME; - sudo dmesg; - sudo journalctl -b \ + DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); + sudo mkfs.ext4 -v /dev/$DISK_NAME \ ' - # Launch the test with the previously created Lightwalletd and Zebra cached state. - # Each test runs one of the "Launch test" steps, and skips the other. + # Launch the test with the previously created disk or cached state. + # + # This step uses a $MOUNT_FLAGS variable to mount the disk to the docker container. + # If the test needs Lightwalletd state, we add the Lightwalletd state mount to the $MOUNT_FLAGS variable. # # SSH into the just created VM, and create a Docker container to run the incoming test # from ${{ inputs.test_id }}, then mount the sudo docker volume created in the previous job. @@ -473,8 +304,8 @@ jobs: # VM and to the container might require more steps in this workflow, and additional # considerations. # - # The disk mounted in the VM is located at /dev/$DISK_NAME, we want the root `/` of this disk to be - # available in the docker container at two different paths: + # The disk mounted in the VM is located at /dev/$DISK_NAME, we mount the root `/` of this disk to the docker + # container, and might have two different paths (if lightwalletd state is needed): # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR # - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR # @@ -484,19 +315,16 @@ jobs: # subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not # delete the whole cache directory.) # - # This paths must match the variables used by the tests in Rust, which are also set in + # These paths must match the variables used by the tests in Rust, which are also set in # `ci-unit-tests-docker.yml` to be able to run this tests. # # Although we're mounting the disk root to both directories, Zebra and Lightwalletd # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR, - # the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths. + # the inputs like ${{ inputs.zebra_state_dir }} and ${{ inputs.lwd_state_dir }} + # are only used to match those variables paths. - name: Launch ${{ inputs.test_id }} test - # This step only runs for tests that read or write Lightwalletd and Zebra states. - # - # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. - # TODO: we should find a better logic for this use cases - if: ${{ (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} - shell: /usr/bin/bash -exo pipefail {0} + id: launch-test + shell: /usr/bin/bash -x {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ @@ -504,24 +332,31 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - set -ex; + # Extract the correct disk name based on the device-name - export DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-); \ + DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-) + + MOUNT_FLAGS="--mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}" + + # Check if we need to mount for Lightwalletd state + # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. + if [[ "${{ inputs.needs_lwd_state }}" == "true" || "${{ inputs.test_id }}" == "lwd-full-sync" ]]; then + MOUNT_FLAGS="$MOUNT_FLAGS --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }}" + fi sudo docker run \ --name ${{ inputs.test_id }} \ --tty \ --detach \ ${{ inputs.test_variables }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \ - --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \ + ${MOUNT_FLAGS} \ ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ ' # Show debug logs if previous job failed - name: Show debug logs if previous job failed - if: ${{ failure() && (inputs.needs_zebra_state && inputs.needs_lwd_state) || inputs.test_id == 'lwd-full-sync' }} - shell: /usr/bin/bash -exo pipefail {0} + if: ${{ failure() }} + shell: /usr/bin/bash -x {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ @@ -535,58 +370,6 @@ jobs: sudo journalctl -b \ ' - # Show all the test logs, then follow the logs of the test we just launched, until it finishes. - # Then check the result of the test. - # - # If `inputs.is_long_test` is `true`, the timeout is 5 days, otherwise it's 3 hours. - test-result: - name: Run ${{ inputs.test_id }} test - # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one. - needs: [ launch-with-cached-state, launch-without-cached-state ] - # If the previous job fails, we also want to run and fail this job, - # so that the branch protection rule fails in Mergify and GitHub. - if: ${{ !cancelled() }} - timeout-minutes: ${{ inputs.is_long_test && 7200 || 180 }} - runs-on: zfnd-runners - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v4.1.0 - with: - persist-credentials: false - fetch-depth: '2' - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: | - sudo apt-get update && sudo apt-get -qq install -y --no-install-recommends openssh-client - ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - retries: '3' - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - # Show all the logs since the container launched, # following until we see zebrad startup messages. # @@ -600,7 +383,7 @@ jobs: # # Errors in the tests are caught by the final test status job. - name: Check startup logs for ${{ inputs.test_id }} - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -x {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ @@ -608,10 +391,6 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - trap "" PIPE; - - # Temporarily disable "set -e" to handle the broken pipe error gracefully - set +e; sudo docker logs \ --tail all \ --follow \ @@ -633,7 +412,7 @@ jobs: # with that status. # (`docker wait` can also wait for multiple containers, but we only ever wait for a single container.) - name: Result of ${{ inputs.test_id }} test - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -x {0} run: | gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ vars.GCP_ZONE }} \ @@ -641,10 +420,6 @@ jobs: --ssh-flag="-o ConnectionAttempts=20" \ --ssh-flag="-o ConnectTimeout=5" \ --command=' \ - trap "" PIPE; - - # Temporarily disable "set -e" to handle the broken pipe error gracefully - set +e; sudo docker logs \ --tail all \ --follow \ @@ -653,7 +428,6 @@ jobs: grep --max-count=1 --extended-regexp --color=always \ "test result: .*ok.* [1-9][0-9]* passed.*finished in"; LOGS_EXIT_STATUS=$?; - set -e; EXIT_STATUS=$(sudo docker wait ${{ inputs.test_id }} || echo "Error retrieving exit status"); echo "sudo docker exit status: $EXIT_STATUS"; @@ -672,7 +446,7 @@ jobs: create-state-image: name: Create ${{ inputs.test_id }} cached state image runs-on: ubuntu-latest - needs: [ test-result, launch-with-cached-state ] + needs: [ test-result ] # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one. # Normally, if a job is skipped, all the jobs that depend on it are also skipped. # So we need to override the default success() check to make this job run. @@ -681,7 +455,7 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false fetch-depth: '2' @@ -711,7 +485,7 @@ jobs: # Install our SSH secret - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 + uses: shimataro/ssh-key-action@v2.6.1 with: key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} name: google_compute_engine @@ -779,7 +553,7 @@ jobs: # Passes the versions to subsequent steps using the $INITIAL_DISK_DB_VERSION, # $RUNNING_DB_VERSION, and $DB_VERSION_SUMMARY env variables. - name: Get database versions from logs - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -x {0} run: | INITIAL_DISK_DB_VERSION="" RUNNING_DB_VERSION="" @@ -869,7 +643,7 @@ jobs: # # Passes the sync height to subsequent steps using the $SYNC_HEIGHT env variable. - name: Get sync height from logs - shell: /usr/bin/bash -exo pipefail {0} + shell: /usr/bin/bash -x {0} run: | SYNC_HEIGHT="" @@ -917,7 +691,7 @@ jobs: - name: Get original cached state height from google cloud run: | ORIGINAL_HEIGHT="0" - ORIGINAL_DISK_NAME="${{ format('{0}', needs.launch-with-cached-state.outputs.cached_disk_name) }}" + ORIGINAL_DISK_NAME="${{ format('{0}', needs.test-result.outputs.cached_disk_name) }}" if [[ -n "$ORIGINAL_DISK_NAME" ]]; then ORIGINAL_HEIGHT=$(gcloud compute images list --filter="status=READY AND name=$ORIGINAL_DISK_NAME" --format="value(labels.height)") @@ -988,7 +762,7 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false fetch-depth: '2' diff --git a/.github/workflows/sub-find-cached-disks.yml b/.github/workflows/sub-find-cached-disks.yml index 03181c18711..3d0182c45a8 100644 --- a/.github/workflows/sub-find-cached-disks.yml +++ b/.github/workflows/sub-find-cached-disks.yml @@ -30,7 +30,7 @@ jobs: contents: 'read' id-token: 'write' steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false fetch-depth: 0 diff --git a/.github/workflows/sub-test-zebra-config.yml b/.github/workflows/sub-test-zebra-config.yml index d8e856f0748..a7288d5f733 100644 --- a/.github/workflows/sub-test-zebra-config.yml +++ b/.github/workflows/sub-test-zebra-config.yml @@ -34,7 +34,7 @@ jobs: timeout-minutes: 15 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.0 + - uses: actions/checkout@v4.1.1 with: persist-credentials: false diff --git a/book/src/dev/state-db-upgrades.md b/book/src/dev/state-db-upgrades.md index 6bd6aeaddbd..db5e4bce868 100644 --- a/book/src/dev/state-db-upgrades.md +++ b/book/src/dev/state-db-upgrades.md @@ -40,6 +40,28 @@ This means that: If there is an upgrade failure, it can panic and tell the user to delete their cached state and re-launch Zebra. +### Performance Constraints + +Some column family access patterns can lead to very poor performance. + +Known performance issues include: +- using an iterator on a column family which also deletes keys +- creating large numbers of iterators +- holding an iterator for a long time + +See the performance notes and bug reports in: +- https://github.com/facebook/rocksdb/wiki/Iterator#iterating-upper-bound-and-lower-bound +- https://tracker.ceph.com/issues/55324 +- https://jira.mariadb.org/browse/MDEV-19670 + +But we need to use iterators for some operations, so our alternatives are (in preferred order): +1. Minimise the number of keys we delete, and how often we delete them +2. Avoid using iterators on column families where we delete keys +3. If we must use iterators on those column families, set read bounds to minimise the amount of deleted data that is read + +Currently only UTXOs require key deletion, and only `utxo_loc_by_transparent_addr_loc` requires +deletion and iterators. + ### Implementation Steps - [ ] update the [database format](https://github.com/ZcashFoundation/zebra/blob/main/book/src/dev/state-db-upgrades.md#current) in the Zebra docs @@ -87,7 +109,7 @@ We use the following rocksdb column families: | *Sprout* | | | | | `sprout_nullifiers` | `sprout::Nullifier` | `()` | Create | | `sprout_anchors` | `sprout::tree::Root` | `sprout::NoteCommitmentTree` | Create | -| `sprout_note_commitment_tree` | `block::Height` | `sprout::NoteCommitmentTree` | Delete | +| `sprout_note_commitment_tree` | `()` | `sprout::NoteCommitmentTree` | Update | | *Sapling* | | | | | `sapling_nullifiers` | `sapling::Nullifier` | `()` | Create | | `sapling_anchors` | `sapling::tree::Root` | `()` | Create | @@ -99,7 +121,7 @@ We use the following rocksdb column families: | `orchard_note_commitment_tree` | `block::Height` | `orchard::NoteCommitmentTree` | Create | | `orchard_note_commitment_subtree` | `block::Height` | `NoteCommitmentSubtreeData` | Create | | *Chain* | | | | -| `history_tree` | `block::Height` | `NonEmptyHistoryTree` | Delete | +| `history_tree` | `()` | `NonEmptyHistoryTree` | Update | | `tip_chain_value_pool` | `()` | `ValueBalance` | Update | Zcash structures are encoded using `ZcashSerialize`/`ZcashDeserialize`. @@ -131,6 +153,7 @@ Amounts: Derived Formats: - `*::NoteCommitmentTree`: `bincode` using `serde` + - stored note commitment trees always have cached roots - `NonEmptyHistoryTree`: `bincode` using `serde`, using `zcash_history`'s `serde` implementation diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index b5a06851638..af232e0dbfe 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -49,11 +49,11 @@ pub(crate) const DATABASE_FORMAT_VERSION: u64 = 25; /// - adding new column families, /// - changing the format of a column family in a compatible way, or /// - breaking changes with compatibility code in all supported Zebra versions. -pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 2; +pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 3; /// The database format patch version, incremented each time the on-disk database format has a /// significant format compatibility fix. -pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 2; +pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 0; /// Returns the highest database version that modifies the subtree index format. /// diff --git a/zebra-state/src/lib.rs b/zebra-state/src/lib.rs index d1076c68b95..ad2cec55207 100644 --- a/zebra-state/src/lib.rs +++ b/zebra-state/src/lib.rs @@ -66,7 +66,7 @@ pub use response::GetBlockTemplateChainInfo; pub use service::{ arbitrary::{populated_state, CHAIN_TIP_UPDATE_WAIT_LIMIT}, chain_tip::{ChainTipBlock, ChainTipSender}, - finalized_state::MAX_ON_DISK_HEIGHT, + finalized_state::{DiskWriteBatch, MAX_ON_DISK_HEIGHT}, init_test, init_test_services, ReadStateService, }; diff --git a/zebra-state/src/service/check/tests/anchors.rs b/zebra-state/src/service/check/tests/anchors.rs index d96c8b0410b..09d33b29190 100644 --- a/zebra-state/src/service/check/tests/anchors.rs +++ b/zebra-state/src/service/check/tests/anchors.rs @@ -6,8 +6,9 @@ use zebra_chain::{ amount::Amount, block::{Block, Height}, primitives::Groth16Proof, + sapling, serialization::ZcashDeserializeInto, - sprout::JoinSplit, + sprout::{self, JoinSplit}, transaction::{JoinSplitData, LockTime, Transaction, UnminedTx}, }; @@ -18,7 +19,7 @@ use crate::{ write::validate_and_commit_non_finalized, }, tests::setup::{new_state_with_mainnet_genesis, transaction_v4_from_coinbase}, - SemanticallyVerifiedBlock, ValidateContextError, + DiskWriteBatch, SemanticallyVerifiedBlock, ValidateContextError, }; // Sprout @@ -31,12 +32,22 @@ fn check_sprout_anchors() { let (finalized_state, mut non_finalized_state, _genesis) = new_state_with_mainnet_genesis(); - // Bootstrap a block at height == 1. + // Delete the empty anchor from the database + let mut batch = DiskWriteBatch::new(); + batch.delete_sprout_anchor( + &finalized_state, + &sprout::tree::NoteCommitmentTree::default().root(), + ); + finalized_state + .write_batch(batch) + .expect("unexpected I/O error"); + + // Create a block at height 1. let block_1 = zebra_test::vectors::BLOCK_MAINNET_1_BYTES .zcash_deserialize_into::() .expect("block should deserialize"); - // Bootstrap a block just before the first Sprout anchors. + // Create a block just before the first Sprout anchors. let block_395 = zebra_test::vectors::BLOCK_MAINNET_395_BYTES .zcash_deserialize_into::() .expect("block should deserialize"); @@ -44,7 +55,7 @@ fn check_sprout_anchors() { // Add initial transactions to [`block_1`]. let block_1 = prepare_sprout_block(block_1, block_395); - // Bootstrap a block at height == 2 that references the Sprout note commitment tree state + // Create a block at height == 2 that references the Sprout note commitment tree state // from [`block_1`]. let block_2 = zebra_test::vectors::BLOCK_MAINNET_2_BYTES .zcash_deserialize_into::() @@ -74,10 +85,13 @@ fn check_sprout_anchors() { ) }); - assert!(matches!( - check_unmined_tx_anchors_result, - Err(ValidateContextError::UnknownSproutAnchor { .. }) - )); + assert!( + matches!( + check_unmined_tx_anchors_result, + Err(ValidateContextError::UnknownSproutAnchor { .. }), + ), + "unexpected result: {check_unmined_tx_anchors_result:?}", + ); // Validate and commit [`block_1`]. This will add an anchor referencing the // empty note commitment tree to the state. @@ -182,7 +196,17 @@ fn check_sapling_anchors() { let (finalized_state, mut non_finalized_state, _genesis) = new_state_with_mainnet_genesis(); - // Bootstrap a block at height == 1 that has the first Sapling note commitments + // Delete the empty anchor from the database + let mut batch = DiskWriteBatch::new(); + batch.delete_sapling_anchor( + &finalized_state, + &sapling::tree::NoteCommitmentTree::default().root(), + ); + finalized_state + .write_batch(batch) + .expect("unexpected I/O error"); + + // Create a block at height 1 that has the first Sapling note commitments let mut block1 = zebra_test::vectors::BLOCK_MAINNET_1_BYTES .zcash_deserialize_into::() .expect("block should deserialize"); @@ -227,7 +251,7 @@ fn check_sapling_anchors() { let block1 = Arc::new(block1).prepare(); - // Bootstrap a block at height == 2 that references the Sapling note commitment tree state + // Create a block at height == 2 that references the Sapling note commitment tree state // from earlier block let mut block2 = zebra_test::vectors::BLOCK_MAINNET_2_BYTES .zcash_deserialize_into::() @@ -315,3 +339,5 @@ fn check_sapling_anchors() { Ok(()) ); } + +// TODO: create a test for orchard anchors diff --git a/zebra-state/src/service/finalized_state.rs b/zebra-state/src/service/finalized_state.rs index e092f0610b3..fde5c414c28 100644 --- a/zebra-state/src/service/finalized_state.rs +++ b/zebra-state/src/service/finalized_state.rs @@ -42,7 +42,10 @@ pub use disk_format::{OutputIndex, OutputLocation, TransactionLocation, MAX_ON_D pub(super) use zebra_db::ZebraDb; -pub(super) use disk_db::DiskWriteBatch; +#[cfg(any(test, feature = "proptest-impl"))] +pub use disk_db::DiskWriteBatch; +#[cfg(not(any(test, feature = "proptest-impl")))] +use disk_db::DiskWriteBatch; /// The finalized part of the chain state, stored in the db. /// @@ -88,14 +91,33 @@ pub struct FinalizedState { } impl FinalizedState { - /// Returns an on-disk database instance for `config` and `network`. + /// Returns an on-disk database instance for `config`, `network`, and `elastic_db`. /// If there is no existing database, creates a new database on disk. pub fn new( config: &Config, network: Network, #[cfg(feature = "elasticsearch")] elastic_db: Option, ) -> Self { - let db = ZebraDb::new(config, network, false); + Self::new_with_debug( + config, + network, + false, + #[cfg(feature = "elasticsearch")] + elastic_db, + ) + } + + /// Returns an on-disk database instance with the supplied production and debug settings. + /// If there is no existing database, creates a new database on disk. + /// + /// This method is intended for use in tests. + pub(crate) fn new_with_debug( + config: &Config, + network: Network, + debug_skip_format_upgrades: bool, + #[cfg(feature = "elasticsearch")] elastic_db: Option, + ) -> Self { + let db = ZebraDb::new(config, network, debug_skip_format_upgrades); #[cfg(feature = "elasticsearch")] let new_state = Self { diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index 3772fb7a789..9ae009f6dcd 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -98,7 +98,7 @@ pub struct DiskDb { // and make them accessible via read-only methods #[must_use = "batches must be written to the database"] #[derive(Default)] -pub(crate) struct DiskWriteBatch { +pub struct DiskWriteBatch { /// The inner RocksDB write batch. batch: rocksdb::WriteBatch, } diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap index 4b37e3baef3..3c333a9fc43 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap @@ -5,13 +5,10 @@ expression: empty_column_families [ "balance_by_transparent_addr: no entries", "history_tree: no entries", - "orchard_anchors: no entries", "orchard_note_commitment_subtree: no entries", "orchard_nullifiers: no entries", - "sapling_anchors: no entries", "sapling_note_commitment_subtree: no entries", "sapling_nullifiers: no entries", - "sprout_anchors: no entries", "sprout_nullifiers: no entries", "tip_chain_value_pool: no entries", "tx_loc_by_transparent_addr_loc: no entries", diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap index 4b37e3baef3..3c333a9fc43 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap @@ -5,13 +5,10 @@ expression: empty_column_families [ "balance_by_transparent_addr: no entries", "history_tree: no entries", - "orchard_anchors: no entries", "orchard_note_commitment_subtree: no entries", "orchard_nullifiers: no entries", - "sapling_anchors: no entries", "sapling_note_commitment_subtree: no entries", "sapling_nullifiers: no entries", - "sprout_anchors: no entries", "sprout_nullifiers: no entries", "tip_chain_value_pool: no entries", "tx_loc_by_transparent_addr_loc: no entries", diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/orchard_anchors_raw_data@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/orchard_anchors_raw_data@mainnet_0.snap new file mode 100644 index 00000000000..6ff419bc322 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/orchard_anchors_raw_data@mainnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "ae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/orchard_anchors_raw_data@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/orchard_anchors_raw_data@testnet_0.snap new file mode 100644 index 00000000000..6ff419bc322 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/orchard_anchors_raw_data@testnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "ae2935f1dfd8a24aed7c70df7de3a668eb7a49b1319880dde2bbd9031ae5d82f", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sapling_anchors_raw_data@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sapling_anchors_raw_data@mainnet_0.snap new file mode 100644 index 00000000000..fec72b61b35 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sapling_anchors_raw_data@mainnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493e", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sapling_anchors_raw_data@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sapling_anchors_raw_data@testnet_0.snap new file mode 100644 index 00000000000..fec72b61b35 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sapling_anchors_raw_data@testnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "fbc2f4300c01f0b7820d00e3347c8da4ee614674376cbc45359daa54f9b5493e", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_anchors_raw_data@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_anchors_raw_data@mainnet_0.snap new file mode 100644 index 00000000000..57a94746e00 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_anchors_raw_data@mainnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", + v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_anchors_raw_data@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_anchors_raw_data@testnet_0.snap new file mode 100644 index 00000000000..57a94746e00 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_anchors_raw_data@testnet_0.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", + v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_0.snap index 6d9892d5d65..f48489ff12e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_0.snap @@ -4,7 +4,7 @@ expression: cf_data --- [ KV( - k: "000000", + k: "", v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", ), ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_1.snap index c8264029db2..f48489ff12e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_1.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_1.snap @@ -4,7 +4,7 @@ expression: cf_data --- [ KV( - k: "000001", + k: "", v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", ), ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_2.snap index 2fa029f60bb..f48489ff12e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_2.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@mainnet_2.snap @@ -4,7 +4,7 @@ expression: cf_data --- [ KV( - k: "000002", + k: "", v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", ), ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_0.snap index 6d9892d5d65..f48489ff12e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_0.snap @@ -4,7 +4,7 @@ expression: cf_data --- [ KV( - k: "000000", + k: "", v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", ), ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_1.snap index c8264029db2..f48489ff12e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_1.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_1.snap @@ -4,7 +4,7 @@ expression: cf_data --- [ KV( - k: "000001", + k: "", v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", ), ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_2.snap index 2fa029f60bb..f48489ff12e 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_2.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/sprout_note_commitment_tree_raw_data@testnet_2.snap @@ -4,7 +4,7 @@ expression: cf_data --- [ KV( - k: "000002", + k: "", v: "0001d7c612c817793191a1e68652121876d6b3bde40f4fa52bc314145ce6e5cdd259", ), ] diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index ee8c050f391..86a04553c50 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -29,6 +29,8 @@ use crate::{ }; pub(crate) mod add_subtrees; +pub(crate) mod cache_genesis_roots; +pub(crate) mod fix_tree_key_type; /// The kind of database format change or validity check we're performing. #[derive(Clone, Debug, Eq, PartialEq)] @@ -541,6 +543,32 @@ impl DbFormatChange { timer.finish(module_path!(), line!(), "add subtrees upgrade"); } + // Sprout & history tree key formats, and cached genesis tree roots database upgrades. + + let version_for_tree_keys_and_caches = + Version::parse("25.3.0").expect("Hardcoded version string should be valid."); + + // Check if we need to do the upgrade. + if older_disk_version < &version_for_tree_keys_and_caches { + let timer = CodeTimer::start(); + + // It shouldn't matter what order these are run in. + cache_genesis_roots::run(initial_tip_height, db, cancel_receiver)?; + fix_tree_key_type::run(initial_tip_height, db, cancel_receiver)?; + + // Before marking the state as upgraded, check that the upgrade completed successfully. + cache_genesis_roots::detailed_check(db, cancel_receiver)? + .expect("database format is valid after upgrade"); + fix_tree_key_type::detailed_check(db, cancel_receiver)? + .expect("database format is valid after upgrade"); + + // Mark the database as upgraded. Zebra won't repeat the upgrade anymore once the + // database is marked, so the upgrade MUST be complete at this point. + Self::mark_as_upgraded_to(&version_for_tree_keys_and_caches, config, network); + + timer.finish(module_path!(), line!(), "tree keys and caches upgrade"); + } + // # New Upgrades Usually Go Here // // New code goes above this comment! @@ -571,6 +599,9 @@ impl DbFormatChange { // upgrade, they would accidentally break compatibility with older Zebra cached states.) results.push(add_subtrees::subtree_format_calculation_pre_checks(db)); + results.push(cache_genesis_roots::quick_check(db)); + results.push(fix_tree_key_type::quick_check(db)); + // The work is done in the functions we just called. timer.finish(module_path!(), line!(), "format_validity_checks_quick()"); @@ -602,6 +633,8 @@ impl DbFormatChange { db, cancel_receiver, )?); + results.push(cache_genesis_roots::detailed_check(db, cancel_receiver)?); + results.push(fix_tree_key_type::detailed_check(db, cancel_receiver)?); // The work is done in the functions we just called. timer.finish(module_path!(), line!(), "format_validity_checks_detailed()"); diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs new file mode 100644 index 00000000000..57fcacb9d5b --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade/cache_genesis_roots.rs @@ -0,0 +1,181 @@ +//! Updating the genesis note commitment trees to cache their roots. +//! +//! This reduces CPU usage when the genesis tree roots are used for transaction validation. +//! Since mempool transactions are cheap to create, this is a potential remote denial of service. + +use std::sync::mpsc; + +use zebra_chain::{block::Height, sprout}; + +use crate::service::finalized_state::{disk_db::DiskWriteBatch, ZebraDb}; + +use super::CancelFormatChange; + +/// Runs disk format upgrade for changing the sprout and history tree key types. +/// +/// Returns `Ok` if the upgrade completed, and `Err` if it was cancelled. +/// +/// # Panics +/// +/// If the state is empty. +#[allow(clippy::unwrap_in_result)] +#[instrument(skip(upgrade_db, cancel_receiver))] +pub fn run( + _initial_tip_height: Height, + upgrade_db: &ZebraDb, + cancel_receiver: &mpsc::Receiver, +) -> Result<(), CancelFormatChange> { + let sprout_genesis_tree = sprout::tree::NoteCommitmentTree::default(); + let sprout_tip_tree = upgrade_db.sprout_tree_for_tip(); + + let sapling_genesis_tree = upgrade_db + .sapling_tree_by_height(&Height(0)) + .expect("caller has checked for genesis block"); + let orchard_genesis_tree = upgrade_db + .orchard_tree_by_height(&Height(0)) + .expect("caller has checked for genesis block"); + + // Writing the trees back to the database automatically caches their roots. + let mut batch = DiskWriteBatch::new(); + + // Fix the cached root of the Sprout genesis tree in its anchors column family. + + // It's ok to write the genesis tree to the tip tree index, because it's overwritten by + // the actual tip before the batch is written to the database. + batch.update_sprout_tree(upgrade_db, &sprout_genesis_tree); + // This method makes sure the sprout tip tree has a cached root, even if it's the genesis tree. + batch.update_sprout_tree(upgrade_db, &sprout_tip_tree); + + batch.create_sapling_tree(upgrade_db, &Height(0), &sapling_genesis_tree); + batch.create_orchard_tree(upgrade_db, &Height(0), &orchard_genesis_tree); + + // Return before we write if the upgrade is cancelled. + if !matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) { + return Err(CancelFormatChange); + } + + upgrade_db + .write_batch(batch) + .expect("updating tree cached roots should always succeed"); + + Ok(()) +} + +/// Quickly check that the genesis trees and sprout tip tree have cached roots. +/// +/// This allows us to fail the upgrade quickly in tests and during development, +/// rather than waiting to see if it failed. +/// +/// # Panics +/// +/// If the state is empty. +pub fn quick_check(db: &ZebraDb) -> Result<(), String> { + // An empty database doesn't have any trees, so its format is trivially correct. + if db.is_empty() { + return Ok(()); + } + + let sprout_genesis_tree = sprout::tree::NoteCommitmentTree::default(); + let sprout_genesis_tree = db + .sprout_tree_by_anchor(&sprout_genesis_tree.root()) + .expect("just checked for genesis block"); + let sprout_tip_tree = db.sprout_tree_for_tip(); + + let sapling_genesis_tree = db + .sapling_tree_by_height(&Height(0)) + .expect("just checked for genesis block"); + let orchard_genesis_tree = db + .orchard_tree_by_height(&Height(0)) + .expect("just checked for genesis block"); + + // Check the entire format before returning any errors. + let sprout_result = sprout_genesis_tree + .cached_root() + .ok_or("no cached root in sprout genesis tree"); + let sprout_tip_result = sprout_tip_tree + .cached_root() + .ok_or("no cached root in sprout tip tree"); + + let sapling_result = sapling_genesis_tree + .cached_root() + .ok_or("no cached root in sapling genesis tree"); + let orchard_result = orchard_genesis_tree + .cached_root() + .ok_or("no cached root in orchard genesis tree"); + + if sprout_result.is_err() + || sprout_tip_result.is_err() + || sapling_result.is_err() + || orchard_result.is_err() + { + let err = Err(format!( + "missing cached genesis root: sprout: {sprout_result:?}, {sprout_tip_result:?} \ + sapling: {sapling_result:?}, orchard: {orchard_result:?}" + )); + warn!(?err); + return err; + } + + Ok(()) +} + +/// Detailed check that all trees have cached roots. +/// +/// # Panics +/// +/// If the state is empty. +pub fn detailed_check( + db: &ZebraDb, + cancel_receiver: &mpsc::Receiver, +) -> Result, CancelFormatChange> { + // This is redundant in some code paths, but not in others. But it's quick anyway. + // Check the entire format before returning any errors. + let mut result = quick_check(db); + + for (root, tree) in db.sprout_trees_full_map() { + // Return early if the format check is cancelled. + if !matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) { + return Err(CancelFormatChange); + } + + if tree.cached_root().is_none() { + result = Err(format!( + "found un-cached sprout tree root after running genesis tree root fix \ + {root:?}" + )); + error!(?result); + } + } + + for (height, tree) in db.sapling_tree_by_height_range(..) { + // Return early if the format check is cancelled. + if !matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) { + return Err(CancelFormatChange); + } + + if tree.cached_root().is_none() { + result = Err(format!( + "found un-cached sapling tree root after running genesis tree root fix \ + {height:?}" + )); + error!(?result); + } + } + + for (height, tree) in db.orchard_tree_by_height_range(..) { + // Return early if the format check is cancelled. + if !matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) { + return Err(CancelFormatChange); + } + + if tree.cached_root().is_none() { + result = Err(format!( + "found un-cached orchard tree root after running genesis tree root fix \ + {height:?}" + )); + error!(?result); + } + } + + Ok(result) +} diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade/fix_tree_key_type.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade/fix_tree_key_type.rs new file mode 100644 index 00000000000..069d9cb4c2b --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade/fix_tree_key_type.rs @@ -0,0 +1,138 @@ +//! Updating the sprout and history tree key type from `Height` to the empty key `()`. +//! +//! This avoids a potential concurrency bug, and a known database performance issue. + +use std::sync::{mpsc, Arc}; + +use zebra_chain::{block::Height, history_tree::HistoryTree, sprout}; + +use crate::service::finalized_state::{ + disk_db::DiskWriteBatch, disk_format::MAX_ON_DISK_HEIGHT, ZebraDb, +}; + +use super::CancelFormatChange; + +/// Runs disk format upgrade for changing the sprout and history tree key types. +/// +/// Returns `Ok` if the upgrade completed, and `Err` if it was cancelled. +#[allow(clippy::unwrap_in_result)] +#[instrument(skip(upgrade_db, cancel_receiver))] +pub fn run( + _initial_tip_height: Height, + upgrade_db: &ZebraDb, + cancel_receiver: &mpsc::Receiver, +) -> Result<(), CancelFormatChange> { + let sprout_tip_tree = upgrade_db.sprout_tree_for_tip(); + let history_tip_tree = upgrade_db.history_tree(); + + // Writing the trees back to the database automatically updates their format. + let mut batch = DiskWriteBatch::new(); + + // Delete the previous `Height` tip key format, which is now a duplicate. + // It's ok to do a full delete, because the trees are restored before the batch is written. + batch.delete_range_sprout_tree(upgrade_db, &Height(0), &MAX_ON_DISK_HEIGHT); + batch.delete_range_history_tree(upgrade_db, &Height(0), &MAX_ON_DISK_HEIGHT); + + // Update the sprout tip key format in the database. + batch.update_sprout_tree(upgrade_db, &sprout_tip_tree); + batch.update_history_tree(upgrade_db, &history_tip_tree); + + // Return before we write if the upgrade is cancelled. + if !matches!(cancel_receiver.try_recv(), Err(mpsc::TryRecvError::Empty)) { + return Err(CancelFormatChange); + } + + upgrade_db + .write_batch(batch) + .expect("updating tree key formats should always succeed"); + + Ok(()) +} + +/// Quickly check that the sprout and history tip trees have updated key formats. +/// +/// # Panics +/// +/// If the state is empty. +pub fn quick_check(db: &ZebraDb) -> Result<(), String> { + // Check the entire format before returning any errors. + let mut result = Ok(()); + + let mut prev_key = None; + let mut prev_tree: Option> = None; + + for (key, tree) in db.sprout_trees_full_tip() { + // The tip tree should be indexed by `()` (which serializes to an empty array). + if !key.raw_bytes().is_empty() { + result = Err(format!( + "found incorrect sprout tree key format after running key format upgrade \ + key: {key:?}, tree: {:?}", + tree.root() + )); + error!(?result); + } + + // There should only be one tip tree in this column family. + if prev_tree.is_some() { + result = Err(format!( + "found duplicate sprout trees after running key format upgrade\n\ + key: {key:?}, tree: {:?}\n\ + prev key: {prev_key:?}, prev_tree: {:?}\n\ + ", + tree.root(), + prev_tree.unwrap().root(), + )); + error!(?result); + } + + prev_key = Some(key); + prev_tree = Some(tree); + } + + let mut prev_key = None; + let mut prev_tree: Option> = None; + + for (key, tree) in db.history_trees_full_tip() { + // The tip tree should be indexed by `()` (which serializes to an empty array). + if !key.raw_bytes().is_empty() { + result = Err(format!( + "found incorrect history tree key format after running key format upgrade \ + key: {key:?}, tree: {:?}", + tree.hash() + )); + error!(?result); + } + + // There should only be one tip tree in this column family. + if prev_tree.is_some() { + result = Err(format!( + "found duplicate history trees after running key format upgrade\n\ + key: {key:?}, tree: {:?}\n\ + prev key: {prev_key:?}, prev_tree: {:?}\n\ + ", + tree.hash(), + prev_tree.unwrap().hash(), + )); + error!(?result); + } + + prev_key = Some(key); + prev_tree = Some(tree); + } + + result +} + +/// Detailed check that the sprout and history tip trees have updated key formats. +/// This is currently the same as the quick check. +/// +/// # Panics +/// +/// If the state is empty. +pub fn detailed_check( + db: &ZebraDb, + _cancel_receiver: &mpsc::Receiver, +) -> Result, CancelFormatChange> { + // This upgrade only changes two key-value pairs, so checking it is always quick. + Ok(quick_check(db)) +} diff --git a/zebra-state/src/service/finalized_state/zebra_db/block.rs b/zebra-state/src/service/finalized_state/zebra_db/block.rs index 85959849985..0a1a49e72cb 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block.rs @@ -24,7 +24,6 @@ use zebra_chain::{ parameters::{Network, GENESIS_PREVIOUS_BLOCK_HASH}, sapling, serialization::TrustedPreallocate, - sprout, transaction::{self, Transaction}, transparent, value_balance::ValueBalance, @@ -456,10 +455,18 @@ impl DiskWriteBatch { prev_note_commitment_trees: Option, ) -> Result<(), BoxError> { let db = &zebra_db.db; - // Commit block and transaction data. - // (Transaction indexes, note commitments, and UTXOs are committed later.) + // Commit block, transaction, and note commitment tree data. self.prepare_block_header_and_transaction_data_batch(db, &finalized.verified)?; + // The consensus rules are silent on shielded transactions in the genesis block, + // because there aren't any in the mainnet or testnet genesis blocks. + // So this means the genesis anchor is the same as the empty anchor, + // which is already present from height 1 to the first shielded transaction. + // + // In Zebra we include the nullifiers and note commitments in the genesis block because it simplifies our code. + self.prepare_shielded_transaction_batch(db, &finalized.verified)?; + self.prepare_trees_batch(zebra_db, finalized, prev_note_commitment_trees)?; + // # Consensus // // > A transaction MUST NOT spend an output of the genesis block coinbase transaction. @@ -467,34 +474,30 @@ impl DiskWriteBatch { // // https://zips.z.cash/protocol/protocol.pdf#txnconsensus // - // By returning early, Zebra commits the genesis block and transaction data, - // but it ignores the genesis UTXO and value pool updates. - if self.prepare_genesis_batch(db, finalized) { - return Ok(()); + // So we ignore the genesis UTXO, transparent address index, and value pool updates + // for the genesis block. This also ignores genesis shielded value pool updates, but there + // aren't any of those on mainnet or testnet. + if !finalized.verified.height.is_min() { + // Commit transaction indexes + self.prepare_transparent_transaction_batch( + db, + network, + &finalized.verified, + &new_outputs_by_out_loc, + &spent_utxos_by_outpoint, + &spent_utxos_by_out_loc, + address_balances, + )?; + + // Commit UTXOs and value pools + self.prepare_chain_value_pools_batch( + db, + &finalized.verified, + spent_utxos_by_outpoint, + value_pool, + )?; } - // Commit transaction indexes - self.prepare_transparent_transaction_batch( - db, - network, - &finalized.verified, - &new_outputs_by_out_loc, - &spent_utxos_by_outpoint, - &spent_utxos_by_out_loc, - address_balances, - )?; - self.prepare_shielded_transaction_batch(db, &finalized.verified)?; - - self.prepare_trees_batch(zebra_db, finalized, prev_note_commitment_trees)?; - - // Commit UTXOs and value pools - self.prepare_chain_value_pools_batch( - db, - &finalized.verified, - spent_utxos_by_outpoint, - value_pool, - )?; - // The block has passed contextual validation, so update the metrics block_precommit_metrics( &finalized.verified.block, @@ -560,72 +563,4 @@ impl DiskWriteBatch { Ok(()) } - - /// If `finalized.block` is a genesis block, prepares a database batch that finishes - /// initializing the database, and returns `true` without actually writing anything. - /// - /// Since the genesis block's transactions are skipped, the returned genesis batch should be - /// written to the database immediately. - /// - /// If `finalized.block` is not a genesis block, does nothing. - /// - /// # Panics - /// - /// If `finalized.block` is a genesis block, and a note commitment tree in `finalized` doesn't - /// match its corresponding empty tree. - pub fn prepare_genesis_batch( - &mut self, - db: &DiskDb, - finalized: &SemanticallyVerifiedBlockWithTrees, - ) -> bool { - if finalized.verified.block.header.previous_block_hash == GENESIS_PREVIOUS_BLOCK_HASH { - assert_eq!( - *finalized.treestate.note_commitment_trees.sprout, - sprout::tree::NoteCommitmentTree::default(), - "The Sprout tree in the finalized block must match the empty Sprout tree." - ); - assert_eq!( - *finalized.treestate.note_commitment_trees.sapling, - sapling::tree::NoteCommitmentTree::default(), - "The Sapling tree in the finalized block must match the empty Sapling tree." - ); - assert_eq!( - *finalized.treestate.note_commitment_trees.orchard, - orchard::tree::NoteCommitmentTree::default(), - "The Orchard tree in the finalized block must match the empty Orchard tree." - ); - - // We want to store the trees of the genesis block together with their roots, and since - // the trees cache the roots after their computation, we trigger the computation. - // - // At the time of writing this comment, the roots are precomputed before this function - // is called, so the roots should already be cached. - finalized.treestate.note_commitment_trees.sprout.root(); - finalized.treestate.note_commitment_trees.sapling.root(); - finalized.treestate.note_commitment_trees.orchard.root(); - - // Insert the empty note commitment trees. Note that these can't be used too early - // (e.g. the Orchard tree before Nu5 activates) since the block validation will make - // sure only appropriate transactions are allowed in a block. - self.zs_insert( - &db.cf_handle("sprout_note_commitment_tree").unwrap(), - finalized.verified.height, - finalized.treestate.note_commitment_trees.sprout.clone(), - ); - self.zs_insert( - &db.cf_handle("sapling_note_commitment_tree").unwrap(), - finalized.verified.height, - finalized.treestate.note_commitment_trees.sapling.clone(), - ); - self.zs_insert( - &db.cf_handle("orchard_note_commitment_tree").unwrap(), - finalized.verified.height, - finalized.treestate.note_commitment_trees.orchard.clone(), - ); - - true - } else { - false - } - } } diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@mainnet_0.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@mainnet_0.snap index fc004eddd5a..438e0809a21 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@mainnet_0.snap +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@mainnet_0.snap @@ -2,4 +2,11 @@ source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs expression: stored_sprout_trees --- -{} +{ + Root((215, 198, 18, 200, 23, 121, 49, 145, 161, 230, 134, 82, 18, 24, 118, 214, 179, 189, 228, 15, 79, 165, 43, 195, 20, 20, 92, 230, 229, 205, 210, 89)): NoteCommitmentTree( + inner: Frontier( + frontier: None, + ), + cached_root: Some(Root((215, 198, 18, 200, 23, 121, 49, 145, 161, 230, 134, 82, 18, 24, 118, 214, 179, 189, 228, 15, 79, 165, 43, 195, 20, 20, 92, 230, 229, 205, 210, 89))), + ), +} diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@testnet_0.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@testnet_0.snap index fc004eddd5a..438e0809a21 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@testnet_0.snap +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/sprout_trees@testnet_0.snap @@ -2,4 +2,11 @@ source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs expression: stored_sprout_trees --- -{} +{ + Root((215, 198, 18, 200, 23, 121, 49, 145, 161, 230, 134, 82, 18, 24, 118, 214, 179, 189, 228, 15, 79, 165, 43, 195, 20, 20, 92, 230, 229, 205, 210, 89)): NoteCommitmentTree( + inner: Frontier( + frontier: None, + ), + cached_root: Some(Root((215, 198, 18, 200, 23, 121, 49, 145, 161, 230, 134, 82, 18, 24, 118, 214, 179, 189, 228, 15, 79, 165, 43, 195, 20, 20, 92, 230, 229, 205, 210, 89))), + ), +} diff --git a/zebra-state/src/service/finalized_state/zebra_db/chain.rs b/zebra-state/src/service/finalized_state/zebra_db/chain.rs index 1e31741b0a0..49649c6c6b9 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/chain.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/chain.rs @@ -14,19 +14,22 @@ use std::{borrow::Borrow, collections::HashMap, sync::Arc}; use zebra_chain::{ - amount::NonNegative, history_tree::HistoryTree, transparent, value_balance::ValueBalance, + amount::NonNegative, block::Height, history_tree::HistoryTree, transparent, + value_balance::ValueBalance, }; use crate::{ - request::SemanticallyVerifiedBlockWithTrees, service::finalized_state::{ disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk}, + disk_format::RawBytes, zebra_db::ZebraDb, }, BoxError, SemanticallyVerifiedBlock, }; impl ZebraDb { + // History tree methods + /// Returns the ZIP-221 history tree of the finalized tip. /// /// If history trees have not been activated yet (pre-Heartwood), or the state is empty, @@ -36,22 +39,9 @@ impl ZebraDb { return Arc::::default(); } - // # Performance - // - // Using `zs_last_key_value()` on this column family significantly reduces sync performance - // (#7618). But it seems to work for other column families. This is probably because - // `zs_delete()` is also used on the same column family: - // - // - // - // See also the performance notes in: - // - // - // This bug will be fixed by PR #7392, because it changes this column family to update the - // existing key, rather than deleting old keys. let history_tree_cf = self.db.cf_handle("history_tree").unwrap(); - // # Forwards Compatibility + // # Backwards Compatibility // // This code can read the column family format in 1.2.0 and earlier (tip height key), // and after PR #7392 is merged (empty key). The height-based code can be removed when @@ -59,18 +49,12 @@ impl ZebraDb { // // # Concurrency // - // There is only one tree in this column family, which is atomically updated by a block - // write batch (database transaction). If this update runs between the height read and - // the tree read, the height will be wrong, and the tree will be missing. - // That could cause consensus bugs. + // There is only one entry in this column family, which is atomically updated by a block + // write batch (database transaction). If we used a height as the key in this column family, + // any updates between reading the tip height and reading the tree could cause panics. // - // Instead, try reading the new empty-key format (from PR #7392) first, - // then read the old format if needed. - // - // See ticket #7581 for more details. - // - // TODO: this concurrency bug will be permanently fixed in PR #7392, - // by changing the block update to overwrite the tree, rather than deleting it. + // So we use the empty key `()`. Since the key has a constant value, we will always read + // the latest tree. let mut history_tree: Option> = self.db.zs_get(&history_tree_cf, &()); if history_tree.is_none() { @@ -84,6 +68,18 @@ impl ZebraDb { history_tree.unwrap_or_default() } + /// Returns all the history tip trees. + /// We only store the history tree for the tip, so this method is mainly used in tests. + pub fn history_trees_full_tip( + &self, + ) -> impl Iterator)> + '_ { + let history_tree_cf = self.db.cf_handle("history_tree").unwrap(); + + self.db.zs_range_iter(&history_tree_cf, ..) + } + + // Value pool methods + /// Returns the stored `ValueBalance` for the best chain at the finalized tip height. pub fn finalized_value_pool(&self) -> ValueBalance { let value_pool_cf = self.db.cf_handle("tip_chain_value_pool").unwrap(); @@ -94,43 +90,31 @@ impl ZebraDb { } impl DiskWriteBatch { - /// Prepare a database batch containing the history tree updates - /// from `finalized.block`, and return it (without actually writing anything). - /// - /// If this method returns an error, it will be propagated, - /// and the batch should not be written to the database. - /// - /// # Errors - /// - /// - Returns any errors from updating the history tree - #[allow(clippy::unwrap_in_result)] - pub fn prepare_history_batch( - &mut self, - db: &DiskDb, - finalized: &SemanticallyVerifiedBlockWithTrees, - ) -> Result<(), BoxError> { - let history_tree_cf = db.cf_handle("history_tree").unwrap(); + // History tree methods - let height = finalized.verified.height; + /// Updates the history tree for the tip, if it is not empty. + pub fn update_history_tree(&mut self, zebra_db: &ZebraDb, tree: &HistoryTree) { + let history_tree_cf = zebra_db.db.cf_handle("history_tree").unwrap(); - // Update the tree in state - let current_tip_height = height - 1; - if let Some(h) = current_tip_height { - self.zs_delete(&history_tree_cf, h); + if let Some(tree) = tree.as_ref().as_ref() { + self.zs_insert(&history_tree_cf, (), tree); } + } - // TODO: if we ever need concurrent read-only access to the history tree, - // store it by `()`, not height. - // Otherwise, the ReadStateService could access a height - // that was just deleted by a concurrent StateService write. - // This requires a database version update. - if let Some(history_tree) = finalized.treestate.history_tree.as_ref().as_ref() { - self.zs_insert(&history_tree_cf, height, history_tree); - } + /// Legacy method: Deletes the range of history trees at the given [`Height`]s. + /// Doesn't delete the upper bound. + /// + /// From state format 25.3.0 onwards, the history trees are indexed by an empty key, + /// so this method does nothing. + pub fn delete_range_history_tree(&mut self, zebra_db: &ZebraDb, from: &Height, to: &Height) { + let history_tree_cf = zebra_db.db.cf_handle("history_tree").unwrap(); - Ok(()) + // TODO: convert zs_delete_range() to take std::ops::RangeBounds + self.zs_delete_range(&history_tree_cf, from, to); } + // Value pool methods + /// Prepare a database batch containing the chain value pool update from `finalized.block`, /// and return it (without actually writing anything). /// diff --git a/zebra-state/src/service/finalized_state/zebra_db/metrics.rs b/zebra-state/src/service/finalized_state/zebra_db/metrics.rs index 9f102ec80ed..75b342e2cdc 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/metrics.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/metrics.rs @@ -21,25 +21,9 @@ pub(crate) fn block_precommit_metrics(block: &Block, hash: block::Hash, height: .flat_map(|t| t.outputs().iter()) .count(); - let sprout_nullifier_count = block - .transactions - .iter() - .flat_map(|t| t.sprout_nullifiers()) - .count(); - - let sapling_nullifier_count = block - .transactions - .iter() - .flat_map(|t| t.sapling_nullifiers()) - .count(); - - // Work around a compiler panic (ICE) with flat_map(): - // https://github.com/rust-lang/rust/issues/105044 - let orchard_nullifier_count: usize = block - .transactions - .iter() - .map(|t| t.orchard_nullifiers().count()) - .sum(); + let sprout_nullifier_count = block.sprout_nullifiers().count(); + let sapling_nullifier_count = block.sapling_nullifiers().count(); + let orchard_nullifier_count = block.orchard_nullifiers().count(); tracing::debug!( ?hash, @@ -50,7 +34,8 @@ pub(crate) fn block_precommit_metrics(block: &Block, hash: block::Hash, height: sprout_nullifier_count, sapling_nullifier_count, orchard_nullifier_count, - "preparing to commit finalized block" + "preparing to commit finalized {:?}block", + if height.is_min() { "genesis " } else { "" } ); metrics::counter!("state.finalized.block.count", 1); @@ -60,14 +45,7 @@ pub(crate) fn block_precommit_metrics(block: &Block, hash: block::Hash, height: "state.finalized.cumulative.transactions", transaction_count as u64 ); - metrics::counter!( - "state.finalized.cumulative.transparent_prevouts", - transparent_prevout_count as u64 - ); - metrics::counter!( - "state.finalized.cumulative.transparent_newouts", - transparent_newout_count as u64 - ); + metrics::counter!( "state.finalized.cumulative.sprout_nullifiers", sprout_nullifier_count as u64 @@ -80,4 +58,16 @@ pub(crate) fn block_precommit_metrics(block: &Block, hash: block::Hash, height: "state.finalized.cumulative.orchard_nullifiers", orchard_nullifier_count as u64 ); + + // The outputs from the genesis block can't be spent, so we skip them here. + if !height.is_min() { + metrics::counter!( + "state.finalized.cumulative.transparent_prevouts", + transparent_prevout_count as u64 + ); + metrics::counter!( + "state.finalized.cumulative.transparent_newouts", + transparent_newout_count as u64 + ); + } } diff --git a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs index 58d9e43a6c1..a8e76931b76 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs @@ -30,6 +30,7 @@ use crate::{ request::SemanticallyVerifiedBlockWithTrees, service::finalized_state::{ disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk}, + disk_format::RawBytes, zebra_db::ZebraDb, }, BoxError, SemanticallyVerifiedBlock, @@ -61,7 +62,7 @@ impl ZebraDb { } /// Returns `true` if the finalized state contains `sprout_anchor`. - #[allow(unused)] + #[allow(dead_code)] pub fn contains_sprout_anchor(&self, sprout_anchor: &sprout::tree::Root) -> bool { let sprout_anchors = self.db.cf_handle("sprout_anchors").unwrap(); self.db.zs_contains(&sprout_anchors, &sprout_anchor) @@ -88,17 +89,9 @@ impl ZebraDb { return Arc::::default(); } - // # Performance - // - // Using `zs_last_key_value()` on this column family significantly reduces sync performance - // (#7618). This is probably because `zs_delete()` is also used on the same column family. - // See the comment in `ZebraDb::history_tree()` for details. - // - // This bug will be fixed by PR #7392, because it changes this column family to update the - // existing key, rather than deleting old keys. let sprout_tree_cf = self.db.cf_handle("sprout_note_commitment_tree").unwrap(); - // # Forwards Compatibility + // # Backwards Compatibility // // This code can read the column family format in 1.2.0 and earlier (tip height key), // and after PR #7392 is merged (empty key). The height-based code can be removed when @@ -106,15 +99,12 @@ impl ZebraDb { // // # Concurrency // - // There is only one tree in this column family, which is atomically updated by a block - // write batch (database transaction). If this update runs between the height read and - // the tree read, the height will be wrong, and the tree will be missing. - // That could cause consensus bugs. + // There is only one entry in this column family, which is atomically updated by a block + // write batch (database transaction). If we used a height as the column family tree, + // any updates between reading the tip height and reading the tree could cause panics. // - // See the comment in `ZebraDb::history_tree()` for details. - // - // TODO: this concurrency bug will be permanently fixed in PR #7392, - // by changing the block update to overwrite the tree, rather than deleting it. + // So we use the empty key `()`. Since the key has a constant value, we will always read + // the latest tree. let mut sprout_tree: Option> = self.db.zs_get(&sprout_tree_cf, &()); @@ -147,7 +137,7 @@ impl ZebraDb { /// Returns all the Sprout note commitment trees in the database. /// /// Calling this method can load a lot of data into RAM, and delay block commit transactions. - #[allow(dead_code, clippy::unwrap_in_result)] + #[allow(dead_code)] pub fn sprout_trees_full_map( &self, ) -> HashMap> { @@ -157,6 +147,15 @@ impl ZebraDb { .zs_items_in_range_unordered(&sprout_anchors_handle, ..) } + /// Returns all the Sprout note commitment tip trees. + /// We only store the sprout tree for the tip, so this method is mainly used in tests. + pub fn sprout_trees_full_tip( + &self, + ) -> impl Iterator)> + '_ { + let sprout_trees = self.db.cf_handle("sprout_note_commitment_tree").unwrap(); + self.db.zs_range_iter(&sprout_trees, ..) + } + // # Sapling trees /// Returns the Sapling note commitment tree of the finalized tip or the empty tree if the state @@ -200,7 +199,6 @@ impl ZebraDb { } /// Returns the Sapling note commitment trees in the supplied range, in increasing height order. - #[allow(clippy::unwrap_in_result)] pub fn sapling_tree_by_height_range( &self, range: R, @@ -213,7 +211,6 @@ impl ZebraDb { } /// Returns the Sapling note commitment trees in the reversed range, in decreasing height order. - #[allow(clippy::unwrap_in_result)] pub fn sapling_tree_by_reversed_height_range( &self, range: R, @@ -258,7 +255,6 @@ impl ZebraDb { /// /// This method is specifically designed for the `z_getsubtreesbyindex` state request. /// It might not work for other RPCs or state checks. - #[allow(clippy::unwrap_in_result)] pub fn sapling_subtree_list_by_index_for_rpc( &self, start_index: NoteCommitmentSubtreeIndex, @@ -373,7 +369,6 @@ impl ZebraDb { } /// Returns the Orchard note commitment trees in the supplied range, in increasing height order. - #[allow(clippy::unwrap_in_result)] pub fn orchard_tree_by_height_range( &self, range: R, @@ -386,7 +381,6 @@ impl ZebraDb { } /// Returns the Orchard note commitment trees in the reversed range, in decreasing height order. - #[allow(clippy::unwrap_in_result)] pub fn orchard_tree_by_reversed_height_range( &self, range: R, @@ -431,7 +425,6 @@ impl ZebraDb { /// /// This method is specifically designed for the `z_getsubtreesbyindex` state request. /// It might not work for other RPCs or state checks. - #[allow(clippy::unwrap_in_result)] pub fn orchard_subtree_list_by_index_for_rpc( &self, start_index: NoteCommitmentSubtreeIndex, @@ -589,74 +582,112 @@ impl DiskWriteBatch { finalized: &SemanticallyVerifiedBlockWithTrees, prev_note_commitment_trees: Option, ) -> Result<(), BoxError> { - let db = &zebra_db.db; - - let sprout_anchors = db.cf_handle("sprout_anchors").unwrap(); - let sapling_anchors = db.cf_handle("sapling_anchors").unwrap(); - let orchard_anchors = db.cf_handle("orchard_anchors").unwrap(); - - let sprout_tree_cf = db.cf_handle("sprout_note_commitment_tree").unwrap(); - let sapling_tree_cf = db.cf_handle("sapling_note_commitment_tree").unwrap(); - let orchard_tree_cf = db.cf_handle("orchard_note_commitment_tree").unwrap(); - let height = finalized.verified.height; let trees = finalized.treestate.note_commitment_trees.clone(); - // Use the cached values that were previously calculated in parallel. - let sprout_root = trees.sprout.root(); - let sapling_root = trees.sapling.root(); - let orchard_root = trees.orchard.root(); - - // Index the new anchors. - // Note: if the root hasn't changed, we write the same value again. - self.zs_insert(&sprout_anchors, sprout_root, &trees.sprout); - self.zs_insert(&sapling_anchors, sapling_root, ()); - self.zs_insert(&orchard_anchors, orchard_root, ()); - - // Delete the previously stored Sprout note commitment tree. - let current_tip_height = height - 1; - if let Some(h) = current_tip_height { - self.zs_delete(&sprout_tree_cf, h); + let prev_sprout_tree = prev_note_commitment_trees.as_ref().map_or_else( + || zebra_db.sprout_tree_for_tip(), + |prev_trees| prev_trees.sprout.clone(), + ); + let prev_sapling_tree = prev_note_commitment_trees.as_ref().map_or_else( + || zebra_db.sapling_tree_for_tip(), + |prev_trees| prev_trees.sapling.clone(), + ); + let prev_orchard_tree = prev_note_commitment_trees.as_ref().map_or_else( + || zebra_db.orchard_tree_for_tip(), + |prev_trees| prev_trees.orchard.clone(), + ); + + // Update the Sprout tree and store its anchor only if it has changed + if height.is_min() || prev_sprout_tree != trees.sprout { + self.update_sprout_tree(zebra_db, &trees.sprout) } - // TODO: if we ever need concurrent read-only access to the sprout tree, - // store it by `()`, not height. Otherwise, the ReadStateService could - // access a height that was just deleted by a concurrent StateService - // write. This requires a database version update. - self.zs_insert(&sprout_tree_cf, height, trees.sprout); - - // Store the Sapling tree only if it is not already present at the previous height. - if height.is_min() - || prev_note_commitment_trees.as_ref().map_or_else( - || zebra_db.sapling_tree_for_tip(), - |trees| trees.sapling.clone(), - ) != trees.sapling - { - self.zs_insert(&sapling_tree_cf, height, trees.sapling); - } + // Store the Sapling tree, anchor, and any new subtrees only if they have changed + if height.is_min() || prev_sapling_tree != trees.sapling { + self.create_sapling_tree(zebra_db, &height, &trees.sapling); - // Store the Orchard tree only if it is not already present at the previous height. - if height.is_min() - || prev_note_commitment_trees - .map_or_else(|| zebra_db.orchard_tree_for_tip(), |trees| trees.orchard) - != trees.orchard - { - self.zs_insert(&orchard_tree_cf, height, trees.orchard); + if let Some(subtree) = trees.sapling_subtree { + self.insert_sapling_subtree(zebra_db, &subtree); + } } - if let Some(subtree) = trees.sapling_subtree { - self.insert_sapling_subtree(zebra_db, &subtree); - } + // Store the Orchard tree, anchor, and any new subtrees only if they have changed + if height.is_min() || prev_orchard_tree != trees.orchard { + self.create_orchard_tree(zebra_db, &height, &trees.orchard); - if let Some(subtree) = trees.orchard_subtree { - self.insert_orchard_subtree(zebra_db, &subtree); + if let Some(subtree) = trees.orchard_subtree { + self.insert_orchard_subtree(zebra_db, &subtree); + } } - self.prepare_history_batch(db, finalized) + self.update_history_tree(zebra_db, &finalized.treestate.history_tree); + + Ok(()) + } + + // Sprout tree methods + + /// Updates the Sprout note commitment tree for the tip, and the Sprout anchors. + pub fn update_sprout_tree( + &mut self, + zebra_db: &ZebraDb, + tree: &sprout::tree::NoteCommitmentTree, + ) { + let sprout_anchors = zebra_db.db.cf_handle("sprout_anchors").unwrap(); + let sprout_tree_cf = zebra_db + .db + .cf_handle("sprout_note_commitment_tree") + .unwrap(); + + // Sprout lookups need all previous trees by their anchors. + // The root must be calculated first, so it is cached in the database. + self.zs_insert(&sprout_anchors, tree.root(), tree); + self.zs_insert(&sprout_tree_cf, (), tree); + } + + /// Legacy method: Deletes the range of Sprout note commitment trees at the given [`Height`]s. + /// Doesn't delete anchors from the anchor index. Doesn't delete the upper bound. + /// + /// From state format 25.3.0 onwards, the Sprout trees are indexed by an empty key, + /// so this method does nothing. + pub fn delete_range_sprout_tree(&mut self, zebra_db: &ZebraDb, from: &Height, to: &Height) { + let sprout_tree_cf = zebra_db + .db + .cf_handle("sprout_note_commitment_tree") + .unwrap(); + + // TODO: convert zs_delete_range() to take std::ops::RangeBounds + self.zs_delete_range(&sprout_tree_cf, from, to); + } + + /// Deletes the given Sprout note commitment tree `anchor`. + #[allow(dead_code)] + pub fn delete_sprout_anchor(&mut self, zebra_db: &ZebraDb, anchor: &sprout::tree::Root) { + let sprout_anchors = zebra_db.db.cf_handle("sprout_anchors").unwrap(); + self.zs_delete(&sprout_anchors, anchor); } // Sapling tree methods + /// Inserts or overwrites the Sapling note commitment tree at the given [`Height`], + /// and the Sapling anchors. + pub fn create_sapling_tree( + &mut self, + zebra_db: &ZebraDb, + height: &Height, + tree: &sapling::tree::NoteCommitmentTree, + ) { + let sapling_anchors = zebra_db.db.cf_handle("sapling_anchors").unwrap(); + let sapling_tree_cf = zebra_db + .db + .cf_handle("sapling_note_commitment_tree") + .unwrap(); + + self.zs_insert(&sapling_anchors, tree.root(), ()); + self.zs_insert(&sapling_tree_cf, height, tree); + } + /// Inserts the Sapling note commitment subtree into the batch. pub fn insert_sapling_subtree( &mut self, @@ -679,7 +710,8 @@ impl DiskWriteBatch { self.zs_delete(&sapling_tree_cf, height); } - /// Deletes the range of Sapling note commitment trees at the given [`Height`]s. Doesn't delete the upper bound. + /// Deletes the range of Sapling note commitment trees at the given [`Height`]s. + /// Doesn't delete anchors from the anchor index. Doesn't delete the upper bound. #[allow(dead_code)] pub fn delete_range_sapling_tree(&mut self, zebra_db: &ZebraDb, from: &Height, to: &Height) { let sapling_tree_cf = zebra_db @@ -691,6 +723,13 @@ impl DiskWriteBatch { self.zs_delete_range(&sapling_tree_cf, from, to); } + /// Deletes the given Sapling note commitment tree `anchor`. + #[allow(dead_code)] + pub fn delete_sapling_anchor(&mut self, zebra_db: &ZebraDb, anchor: &sapling::tree::Root) { + let sapling_anchors = zebra_db.db.cf_handle("sapling_anchors").unwrap(); + self.zs_delete(&sapling_anchors, anchor); + } + /// Deletes the range of Sapling subtrees at the given [`NoteCommitmentSubtreeIndex`]es. /// Doesn't delete the upper bound. pub fn delete_range_sapling_subtree( @@ -710,6 +749,24 @@ impl DiskWriteBatch { // Orchard tree methods + /// Inserts or overwrites the Orchard note commitment tree at the given [`Height`], + /// and the Orchard anchors. + pub fn create_orchard_tree( + &mut self, + zebra_db: &ZebraDb, + height: &Height, + tree: &orchard::tree::NoteCommitmentTree, + ) { + let orchard_anchors = zebra_db.db.cf_handle("orchard_anchors").unwrap(); + let orchard_tree_cf = zebra_db + .db + .cf_handle("orchard_note_commitment_tree") + .unwrap(); + + self.zs_insert(&orchard_anchors, tree.root(), ()); + self.zs_insert(&orchard_tree_cf, height, tree); + } + /// Inserts the Orchard note commitment subtree into the batch. pub fn insert_orchard_subtree( &mut self, @@ -732,7 +789,8 @@ impl DiskWriteBatch { self.zs_delete(&orchard_tree_cf, height); } - /// Deletes the range of Orchard note commitment trees at the given [`Height`]s. Doesn't delete the upper bound. + /// Deletes the range of Orchard note commitment trees at the given [`Height`]s. + /// Doesn't delete anchors from the anchor index. Doesn't delete the upper bound. #[allow(dead_code)] pub fn delete_range_orchard_tree(&mut self, zebra_db: &ZebraDb, from: &Height, to: &Height) { let orchard_tree_cf = zebra_db @@ -744,6 +802,13 @@ impl DiskWriteBatch { self.zs_delete_range(&orchard_tree_cf, from, to); } + /// Deletes the given Orchard note commitment tree `anchor`. + #[allow(dead_code)] + pub fn delete_orchard_anchor(&mut self, zebra_db: &ZebraDb, anchor: &orchard::tree::Root) { + let orchard_anchors = zebra_db.db.cf_handle("orchard_anchors").unwrap(); + self.zs_delete(&orchard_anchors, anchor); + } + /// Deletes the range of Orchard subtrees at the given [`NoteCommitmentSubtreeIndex`]es. /// Doesn't delete the upper bound. pub fn delete_range_orchard_subtree( diff --git a/zebra-state/src/tests/setup.rs b/zebra-state/src/tests/setup.rs index 296ee10a0e1..1432e72f368 100644 --- a/zebra-state/src/tests/setup.rs +++ b/zebra-state/src/tests/setup.rs @@ -92,9 +92,11 @@ pub(crate) fn new_state_with_mainnet_genesis( let config = Config::ephemeral(); let network = Mainnet; - let mut finalized_state = FinalizedState::new( + let mut finalized_state = FinalizedState::new_with_debug( &config, network, + // The tests that use this setup function also commit invalid blocks to the state. + true, #[cfg(feature = "elasticsearch")] None, ); diff --git a/zebra-test/src/command.rs b/zebra-test/src/command.rs index d65d438307f..18a529fe32d 100644 --- a/zebra-test/src/command.rs +++ b/zebra-test/src/command.rs @@ -1,6 +1,7 @@ //! Launching test commands for Zebra integration and acceptance tests. use std::{ + collections::HashSet, convert::Infallible as NoDir, fmt::{self, Debug, Write as _}, io::{BufRead, BufReader, ErrorKind, Read, Write as _}, @@ -25,7 +26,7 @@ mod arguments; pub mod to_regex; pub use self::arguments::Arguments; -use self::to_regex::{CollectRegexSet, ToRegexSet}; +use self::to_regex::{CollectRegexSet, RegexSetExt, ToRegexSet}; /// A super-trait for [`Iterator`] + [`Debug`]. pub trait IteratorDebug: Iterator + Debug {} @@ -781,7 +782,7 @@ impl TestChild { self } - /// Checks each line of the child's stdout against `success_regex`, + /// Checks each line of the child's stdout against any regex in `success_regex`, /// and returns the first matching line. Prints all stdout lines. /// /// Kills the child on error, or after the configured timeout has elapsed. @@ -815,7 +816,7 @@ impl TestChild { } } - /// Checks each line of the child's stderr against `success_regex`, + /// Checks each line of the child's stderr against any regex in `success_regex`, /// and returns the first matching line. Prints all stderr lines to stdout. /// /// Kills the child on error, or after the configured timeout has elapsed. @@ -847,6 +848,96 @@ impl TestChild { } } + /// Checks each line of the child's stdout, until it finds every regex in `unordered_regexes`, + /// and returns all lines matched by any regex, until each regex has been matched at least once. + /// If the output finishes or the command times out before all regexes are matched, returns an error with + /// a list of unmatched regexes. Prints all stdout lines. + /// + /// Kills the child on error, or after the configured timeout has elapsed. + /// See [`Self::expect_line_matching_regex_set`] for details. + // + // TODO: these methods could block if stderr is full and stdout is waiting for stderr to be read + #[instrument(skip(self))] + #[allow(clippy::unwrap_in_result)] + pub fn expect_stdout_line_matches_all_unordered( + &mut self, + unordered_regexes: RegexList, + ) -> Result> + where + RegexList: IntoIterator + Debug, + RegexList::Item: ToRegexSet, + { + let regex_list = unordered_regexes.collect_regex_set()?; + + let mut unmatched_indexes: HashSet = (0..regex_list.len()).collect(); + let mut matched_lines = Vec::new(); + + while !unmatched_indexes.is_empty() { + let line = self + .expect_stdout_line_matches(regex_list.clone()) + .map_err(|err| { + let unmatched_regexes = regex_list.patterns_for_indexes(&unmatched_indexes); + + err.with_section(|| { + format!("{unmatched_regexes:#?}").header("Unmatched regexes:") + }) + .with_section(|| format!("{matched_lines:#?}").header("Matched lines:")) + })?; + + let matched_indices: HashSet = regex_list.matches(&line).iter().collect(); + unmatched_indexes = &unmatched_indexes - &matched_indices; + + matched_lines.push(line); + } + + Ok(matched_lines) + } + + /// Checks each line of the child's stderr, until it finds every regex in `unordered_regexes`, + /// and returns all lines matched by any regex, until each regex has been matched at least once. + /// If the output finishes or the command times out before all regexes are matched, returns an error with + /// a list of unmatched regexes. Prints all stderr lines. + /// + /// Kills the child on error, or after the configured timeout has elapsed. + /// See [`Self::expect_line_matching_regex_set`] for details. + // + // TODO: these methods could block if stdout is full and stderr is waiting for stdout to be read + #[instrument(skip(self))] + #[allow(clippy::unwrap_in_result)] + pub fn expect_stderr_line_matches_all_unordered( + &mut self, + unordered_regexes: RegexList, + ) -> Result> + where + RegexList: IntoIterator + Debug, + RegexList::Item: ToRegexSet, + { + let regex_list = unordered_regexes.collect_regex_set()?; + + let mut unmatched_indexes: HashSet = (0..regex_list.len()).collect(); + let mut matched_lines = Vec::new(); + + while !unmatched_indexes.is_empty() { + let line = self + .expect_stderr_line_matches(regex_list.clone()) + .map_err(|err| { + let unmatched_regexes = regex_list.patterns_for_indexes(&unmatched_indexes); + + err.with_section(|| { + format!("{unmatched_regexes:#?}").header("Unmatched regexes:") + }) + .with_section(|| format!("{matched_lines:#?}").header("Matched lines:")) + })?; + + let matched_indices: HashSet = regex_list.matches(&line).iter().collect(); + unmatched_indexes = &unmatched_indexes - &matched_indices; + + matched_lines.push(line); + } + + Ok(matched_lines) + } + /// Checks each line of the child's stdout against `success_regex`, /// and returns the first matching line. Does not print any output. /// diff --git a/zebra-test/src/command/to_regex.rs b/zebra-test/src/command/to_regex.rs index 66e00c874e0..0d362394f1f 100644 --- a/zebra-test/src/command/to_regex.rs +++ b/zebra-test/src/command/to_regex.rs @@ -1,6 +1,6 @@ //! Convenience traits for converting to [`Regex`] and [`RegexSet`]. -use std::iter; +use std::{collections::HashSet, iter}; use itertools::Itertools; use regex::{Error, Regex, RegexBuilder, RegexSet, RegexSetBuilder}; @@ -151,3 +151,20 @@ where RegexSet::new(regexes) } } + +/// A trait for getting additional information from a [`RegexSet`]. +pub trait RegexSetExt { + /// Returns the regex patterns for the supplied `indexes`. + fn patterns_for_indexes(&self, indexes: &HashSet) -> Vec; +} + +impl RegexSetExt for RegexSet { + fn patterns_for_indexes(&self, indexes: &HashSet) -> Vec { + self.patterns() + .iter() + .enumerate() + .filter(|(index, _regex)| indexes.contains(index)) + .map(|(_index, regex)| regex.to_string()) + .collect() + } +} diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 0c40df31a50..c51bdc98949 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -190,7 +190,9 @@ use common::{ test_type::TestType::{self, *}, }; -use crate::common::cached_state::{wait_for_state_version_message, wait_for_state_version_upgrade}; +use crate::common::cached_state::{ + wait_for_state_version_message, wait_for_state_version_upgrade, DATABASE_FORMAT_UPGRADE_IS_LONG, +}; /// The maximum amount of time that we allow the creation of a future to block the `tokio` executor. /// @@ -1847,18 +1849,36 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { zebrad.expect_stdout_line_matches("loaded Zebra state cache .*tip.*=.*None")?; } - // Launch lightwalletd, if needed - let lightwalletd_and_port = if test_type.launches_lightwalletd() { + // Wait for the state to upgrade and the RPC port, if the upgrade is short. + // + // If incompletely upgraded states get written to the CI cache, + // change DATABASE_FORMAT_UPGRADE_IS_LONG to true. + if test_type.launches_lightwalletd() && !DATABASE_FORMAT_UPGRADE_IS_LONG { tracing::info!( ?test_type, ?zebra_rpc_address, "waiting for zebrad to open its RPC port..." ); - zebrad.expect_stdout_line_matches(format!( - "Opened RPC endpoint at {}", - zebra_rpc_address.expect("lightwalletd test must have RPC port") - ))?; + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + [format!( + "Opened RPC endpoint at {}", + zebra_rpc_address.expect("lightwalletd test must have RPC port") + )], + )?; + } else { + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + None, + )?; + } + // Launch lightwalletd, if needed + let lightwalletd_and_port = if test_type.launches_lightwalletd() { tracing::info!( ?zebra_rpc_address, "launching lightwalletd connected to zebrad", @@ -1957,17 +1977,17 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { use_internet_connection, )?; - // Before we write a cached state image, wait for a database upgrade. - // - // TODO: this line will hang if the state upgrade finishes before zebra is synced. - // But that is unlikely with the 25.2 upgrade, because it takes 20+ minutes. - // If it happens for a later upgrade, this code can be moved earlier in the test, - // as long as all the cached states are version 25.2.2 or later. - wait_for_state_version_upgrade( - &mut zebrad, - &state_version_message, - database_format_version_in_code(), - )?; + // Wait for the state to upgrade, if the upgrade is long. + // If this line hangs, change DATABASE_FORMAT_UPGRADE_IS_LONG to false, + // or combine "wait for sync" with "wait for state version upgrade". + if DATABASE_FORMAT_UPGRADE_IS_LONG { + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + None, + )?; + } (zebrad, Some(lightwalletd)) } @@ -1984,17 +2004,16 @@ fn lightwalletd_integration_test(test_type: TestType) -> Result<()> { tracing::info!(?test_type, "waiting for zebrad to sync to the tip"); zebrad.expect_stdout_line_matches(SYNC_FINISHED_REGEX)?; - // Before we write a cached state image, wait for a database upgrade. - // - // TODO: this line will hang if the state upgrade finishes before zebra is synced. - // But that is unlikely with the 25.2 upgrade, because it takes 20+ minutes. - // If it happens for a later upgrade, this code can be moved earlier in the test, - // as long as all the cached states are version 25.2.2 or later. - wait_for_state_version_upgrade( - &mut zebrad, - &state_version_message, - database_format_version_in_code(), - )?; + // Wait for the state to upgrade, if the upgrade is long. + // If this line hangs, change DATABASE_FORMAT_UPGRADE_IS_LONG to false. + if DATABASE_FORMAT_UPGRADE_IS_LONG { + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + None, + )?; + } (zebrad, None) } @@ -2719,6 +2738,15 @@ async fn fully_synced_rpc_z_getsubtreesbyindex_snapshot_test() -> Result<()> { // Store the state version message so we can wait for the upgrade later if needed. let state_version_message = wait_for_state_version_message(&mut zebrad)?; + // It doesn't matter how long the state version upgrade takes, + // because the sync finished regex is repeated every minute. + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + None, + )?; + // Wait for zebrad to load the full cached blockchain. zebrad.expect_stdout_line_matches(SYNC_FINISHED_REGEX)?; @@ -2758,18 +2786,6 @@ async fn fully_synced_rpc_z_getsubtreesbyindex_snapshot_test() -> Result<()> { ), ]; - // Before we write a cached state image, wait for a database upgrade. - // - // TODO: this line will hang if the state upgrade finishes before zebra is synced. - // But that is unlikely with the 25.2 upgrade, because it takes 20+ minutes. - // If it happens for a later upgrade, this code can be moved earlier in the test, - // as long as all the cached states are version 25.2.2 or later. - wait_for_state_version_upgrade( - &mut zebrad, - &state_version_message, - database_format_version_in_code(), - )?; - for i in zcashd_test_vectors { let res = client.call("z_getsubtreesbyindex", i.1).await?; let body = res.bytes().await; diff --git a/zebrad/tests/common/cached_state.rs b/zebrad/tests/common/cached_state.rs index 588d889b562..b893024c9ad 100644 --- a/zebrad/tests/common/cached_state.rs +++ b/zebrad/tests/common/cached_state.rs @@ -6,6 +6,7 @@ #![allow(dead_code)] use std::{ + iter, path::{Path, PathBuf}, time::Duration, }; @@ -39,6 +40,16 @@ pub const ZEBRA_CACHED_STATE_DIR: &str = "ZEBRA_CACHED_STATE_DIR"; /// but long enough that it doesn't impact performance. pub const DATABASE_FORMAT_CHECK_INTERVAL: Duration = Duration::from_secs(5 * 60); +/// Is the current state version upgrade longer than the typical CI update sync time? +/// This is the time it takes Zebra to sync from a previously cached state to the current tip. +/// +/// If this is set to `false`, but the state upgrades finish after zebrad is synced, +/// incomplete upgrades will be written to the cached state. +/// +/// If this is set to `true`, but the state upgrades finish before zebrad is synced, +/// some tests will hang. +pub const DATABASE_FORMAT_UPGRADE_IS_LONG: bool = false; + /// Type alias for a boxed state service. pub type BoxStateService = BoxService; @@ -64,6 +75,7 @@ pub fn wait_for_state_version_message(zebrad: &mut TestChild) -> Result( zebrad: &mut TestChild, state_version_message: &str, required_version: Version, + extra_required_log_regexes: impl IntoIterator + std::fmt::Debug, ) -> Result<()> { if state_version_message.contains("launching upgrade task") { tracing::info!( zebrad = ?zebrad.cmd, %state_version_message, %required_version, + ?extra_required_log_regexes, "waiting for zebrad state upgrade..." ); - let upgrade_message = zebrad.expect_stdout_line_matches(&format!( + let upgrade_pattern = format!( "marked database format as upgraded.*format_upgrade_version.*=.*{required_version}" - ))?; + ); + let extra_required_log_regexes = extra_required_log_regexes.into_iter(); + let required_logs: Vec = iter::once(upgrade_pattern) + .chain(extra_required_log_regexes) + .collect(); + + let upgrade_messages = zebrad.expect_stdout_line_matches_all_unordered(&required_logs)?; tracing::info!( zebrad = ?zebrad.cmd, %state_version_message, %required_version, - %upgrade_message, + ?required_logs, + ?upgrade_messages, "zebrad state has been upgraded" ); } diff --git a/zebrad/tests/common/checkpoints.rs b/zebrad/tests/common/checkpoints.rs index c43b5ca7e92..a1aa1dbb43c 100644 --- a/zebrad/tests/common/checkpoints.rs +++ b/zebrad/tests/common/checkpoints.rs @@ -87,12 +87,19 @@ pub async fn run(network: Network) -> Result<()> { // Before we write a cached state image, wait for a database upgrade. // + // It is ok if the logs are in the wrong order and the test sometimes fails, + // because testnet is unreliable anyway. + // // TODO: this line will hang if the state upgrade is slower than the RPC server spawn. // But that is unlikely, because both 25.1 and 25.2 are quick on testnet. + // + // TODO: combine this check with the CHECKPOINT_VERIFIER_REGEX and RPC endpoint checks. + // This is tricky because we need to get the last checkpoint log. wait_for_state_version_upgrade( &mut zebrad, &state_version_message, database_format_version_in_code(), + None, )?; } @@ -105,6 +112,7 @@ pub async fn run(network: Network) -> Result<()> { ); let last_checkpoint = zebrad.expect_stdout_line_matches(CHECKPOINT_VERIFIER_REGEX)?; + // TODO: do this with a regex? let (_prefix, last_checkpoint) = last_checkpoint .split_once("max_checkpoint_height") diff --git a/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs b/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs index 2d7d2d06b87..3030f1c63ba 100644 --- a/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs +++ b/zebrad/tests/common/lightwalletd/wallet_grpc_test.rs @@ -43,10 +43,13 @@ use zebra_chain::{ parameters::NetworkUpgrade::{Nu5, Sapling}, serialization::ZcashDeserializeInto, }; -use zebra_state::latest_version_for_adding_subtrees; +use zebra_state::database_format_version_in_code; use crate::common::{ - cached_state::{wait_for_state_version_message, wait_for_state_version_upgrade}, + cached_state::{ + wait_for_state_version_message, wait_for_state_version_upgrade, + DATABASE_FORMAT_UPGRADE_IS_LONG, + }, launch::spawn_zebrad_for_rpc, lightwalletd::{ can_spawn_lightwalletd_for_rpc, spawn_lightwalletd_for_rpc, @@ -107,7 +110,22 @@ pub async fn run() -> Result<()> { ?zebra_rpc_address, "launched zebrad, waiting for zebrad to open its RPC port..." ); - zebrad.expect_stdout_line_matches(&format!("Opened RPC endpoint at {zebra_rpc_address}"))?; + + // Wait for the state to upgrade, if the upgrade is short. + // + // If incompletely upgraded states get written to the CI cache, + // change DATABASE_FORMAT_UPGRADE_IS_LONG to true. + // + // If this line hangs, move it before the RPC port check. + // (The RPC port is usually much faster than even a quick state upgrade.) + if !DATABASE_FORMAT_UPGRADE_IS_LONG { + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + [format!("Opened RPC endpoint at {zebra_rpc_address}")], + )?; + } tracing::info!( ?zebra_rpc_address, @@ -135,6 +153,17 @@ pub async fn run() -> Result<()> { use_internet_connection, )?; + // Wait for the state to upgrade, if the upgrade is long. + // If this line hangs, change DATABASE_FORMAT_UPGRADE_IS_LONG to false. + if DATABASE_FORMAT_UPGRADE_IS_LONG { + wait_for_state_version_upgrade( + &mut zebrad, + &state_version_message, + database_format_version_in_code(), + None, + )?; + } + tracing::info!( ?lightwalletd_rpc_port, "connecting gRPC client to lightwalletd...", @@ -384,18 +413,6 @@ pub async fn run() -> Result<()> { zebrad::application::user_agent() ); - // Before we call `z_getsubtreesbyindex`, we might need to wait for a database upgrade. - // - // TODO: this line will hang if the state upgrade finishes before the subtree tests start. - // But that is unlikely with the 25.2 upgrade, because it takes 20+ minutes. - // If it happens for a later upgrade, this code can be moved earlier in the test, - // as long as all the cached states are version 25.2.2 or later. - wait_for_state_version_upgrade( - &mut zebrad, - &state_version_message, - latest_version_for_adding_subtrees(), - )?; - // Call `z_getsubtreesbyindex` separately for... // ... Sapling.