From b017f8cb9d0bca6a89f69c84f0c4e158fe89ff77 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Thu, 17 Oct 2024 11:36:54 -0700 Subject: [PATCH 1/2] ci: update internal testnet reset workflow --- .../scripts/exit-standby-all-chain-nodes.sh | 25 ------ .../scripts/put-all-chain-nodes-on-standby.sh | 27 ------- .../workflows/cd-internal-testnet-manual.yml | 54 ------------- .github/workflows/cd-internal-testnet.yml | 59 +++----------- .../workflows/cd-reset-internal-testnet.yml | 79 ------------------- .github/workflows/cd-start-chain.yml | 77 ------------------ 6 files changed, 13 insertions(+), 308 deletions(-) delete mode 100755 .github/scripts/exit-standby-all-chain-nodes.sh delete mode 100755 .github/scripts/put-all-chain-nodes-on-standby.sh delete mode 100644 .github/workflows/cd-internal-testnet-manual.yml delete mode 100644 .github/workflows/cd-reset-internal-testnet.yml delete mode 100644 .github/workflows/cd-start-chain.yml diff --git a/.github/scripts/exit-standby-all-chain-nodes.sh b/.github/scripts/exit-standby-all-chain-nodes.sh deleted file mode 100755 index 3fad215555..0000000000 --- a/.github/scripts/exit-standby-all-chain-nodes.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -x - -# get all the node's ec2 instance ids for the specified chain id -chain_node_instance_ids=$(aws ec2 describe-instances --filters "Name=tag:KavaChainId,Values=$CHAIN_ID" | jq -r '[.Reservations | .[] | .Instances | .[] | .InstanceId] | join(" ")') - -for chain_node_instance_id in ${chain_node_instance_ids} -do - autoscaling_group_state=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$chain_node_instance_id" | jq -r '[.AutoScalingInstances | .[].LifecycleState] | join(" ")') - # Possible states: https://docs.aws.amazon.com/autoscaling/ec2/userguide/ec2-auto-scaling-lifecycle.html - case "$autoscaling_group_state" in - Standby) - # exit standby to make the node elgible for reciving - # requests from the target groups for the public load balancer - autoscaling_group_name=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$chain_node_instance_id" | jq -r '[.AutoScalingInstances | .[].AutoScalingGroupName] | join(" ")') - - aws autoscaling exit-standby \ - --instance-ids "$chain_node_instance_id" \ - --auto-scaling-group-name "$autoscaling_group_name" - ;; - *) - echo "instance ($chain_node_instance_id) not in an elgible state ($autoscaling_group_state) for exiting standby, skipping" - ;; - esac -done diff --git a/.github/scripts/put-all-chain-nodes-on-standby.sh b/.github/scripts/put-all-chain-nodes-on-standby.sh deleted file mode 100755 index 591e64d0c7..0000000000 --- a/.github/scripts/put-all-chain-nodes-on-standby.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -x - -# get all the node's ec2 instance ids for the specified chain id -chain_node_instance_ids=$(aws ec2 describe-instances --filters "Name=tag:KavaChainId,Values=$CHAIN_ID" | jq -r '[.Reservations | .[] | .Instances | .[] | .InstanceId] | join(" ")') - -for chain_node_instance_id in ${chain_node_instance_ids} -do - autoscaling_group_state=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$chain_node_instance_id" | jq -r '[.AutoScalingInstances | .[].LifecycleState] | join(" ")') - # Possible states: https://docs.aws.amazon.com/autoscaling/ec2/userguide/ec2-auto-scaling-lifecycle.html - case "$autoscaling_group_state" in - InService) - # place the nodes on standby so they won't get terminated - # by the autoscaling group during the time - # they are offline for a deploy / upgrade - autoscaling_group_name=$(aws autoscaling describe-auto-scaling-instances --instance-ids "$chain_node_instance_id" | jq -r '[.AutoScalingInstances | .[].AutoScalingGroupName] | join(" ")') - - aws autoscaling enter-standby \ - --instance-ids "$chain_node_instance_id" \ - --auto-scaling-group-name "$autoscaling_group_name" \ - --should-decrement-desired-capacity - ;; - *) - echo "instance ($chain_node_instance_id) not in an elgible state ($autoscaling_group_state) for going on standby, skipping" - ;; - esac -done diff --git a/.github/workflows/cd-internal-testnet-manual.yml b/.github/workflows/cd-internal-testnet-manual.yml deleted file mode 100644 index bcb84cf95b..0000000000 --- a/.github/workflows/cd-internal-testnet-manual.yml +++ /dev/null @@ -1,54 +0,0 @@ -name: Manual Deployment (Internal Testnet) -# allow to be triggered manually -on: workflow_dispatch - -jobs: - # in order: - # enter standby (prevents autoscaling group from killing node during deploy) - # stop kava - # take ebs + zfs snapshots - # download updated binary and genesis - # reset application database state (only done on internal testnet) - reset-chain-to-zero-state: - uses: ./.github/workflows/cd-reset-internal-testnet.yml - with: - aws-region: us-east-1 - chain-id: kava_2221-17000 - ssm-document-name: kava-testnet-internal-node-update - playbook-name: reset-internal-testnet-playbook.yml - playbook-infrastructure-branch: master - secrets: inherit - - # start kava with new binary and genesis state on api, peer and seed nodes, place nodes in service once they start and are synched to live - start-chain-api: - uses: ./.github/workflows/cd-start-chain.yml - with: - aws-region: us-east-1 - chain-id: kava_2221-17000 - ssm-document-name: kava-testnet-internal-node-update - playbook-name: start-chain-api-playbook.yml - playbook-infrastructure-branch: master - secrets: inherit - needs: [reset-chain-to-zero-state] - - # setup test and development accounts and balances, deploy contracts by calling the chain's api - seed-chain-state: - uses: ./.github/workflows/cd-seed-chain.yml - with: - chain-api-url: https://rpc.app.internal.testnet.us-east.production.kava.io:443 - chain-id: kava_2221-17000 - seed-script-filename: seed-internal-testnet.sh - erc20-deployer-network-name: internal_testnet - genesis_validator_addresses: "kavavaloper1xcgtffvv2yeqmgs3yz4gv29kgjrj8usxrnrlwp kavavaloper1w66m9hdzwgd6uc8g93zqkcumgwzrpcw958sh3s" - kava_version_filepath: ./ci/env/kava-internal-testnet/KAVA.VERSION - secrets: inherit - needs: [start-chain-api] - post-pipeline-metrics: - uses: ./.github/workflows/metric-pipeline.yml - if: always() # always run so we metric failures and successes - with: - aws-region: us-east-1 - metric-name: kava.deploys.testnet.internal - namespace: Kava/ContinuousDeployment - secrets: inherit - needs: [seed-chain-state] diff --git a/.github/workflows/cd-internal-testnet.yml b/.github/workflows/cd-internal-testnet.yml index ed06faf437..0a66b20f8c 100644 --- a/.github/workflows/cd-internal-testnet.yml +++ b/.github/workflows/cd-internal-testnet.yml @@ -1,61 +1,27 @@ name: Continuous Deployment (Internal Testnet) -# run after every successful CI job of new commits to the master branch -# if deploy version or config has changed + +## run after every successful CI job of new commits to the master branch, or on manual dispatch on: workflow_run: workflows: [Continuous Integration (Kava Master)] types: - completed + workflow_dispatch: jobs: - changed_files: - runs-on: ubuntu-latest - # define output for first job forwarding output of changedInternalTestnetConfig job - outputs: - changedInternalTestnetConfig: ${{ steps.changed-internal-testnet-config.outputs.any_changed }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 # OR "2" -> To retrieve the preceding commit. - - name: Get all changed internal testnet files - id: changed-internal-testnet-config - uses: tj-actions/changed-files@v42 - with: - # Avoid using single or double quotes for multiline patterns - files: | - ci/env/kava-internal-testnet/** - - # in order: - # enter standby (prevents autoscaling group from killing node during deploy) - # stop kava - # take ebs + zfs snapshots - # download updated binary and genesis - # reset application database state (only done on internal testnet) - reset-chain-to-zero-state: - needs: [changed_files] - # only start cd pipeline if last ci run was successful - if: ${{ github.event.workflow_run.conclusion == 'success' && needs.changed_files.outputs.changedInternalTestnetConfig == 'true' }} - uses: ./.github/workflows/cd-reset-internal-testnet.yml + reset-and-restart-chain: + # only start cd pipeline if last ci run was successful (or it this is being manually dispatched) + if: | + github.event_name == 'workflow_dispatch' || + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') + uses: ./.github/workflows/cd-reset-network.yml with: aws-region: us-east-1 chain-id: kava_2221-17000 - ssm-document-name: kava-testnet-internal-node-update - playbook-name: reset-internal-testnet-playbook.yml + ssm-document-name: kava-internal-testnet-instance-update playbook-infrastructure-branch: master secrets: inherit - # start kava with new binary and genesis state on api, peer and seed nodes, place nodes in service once they start and are synched to live - start-chain-api: - uses: ./.github/workflows/cd-start-chain.yml - with: - aws-region: us-east-1 - chain-id: kava_2221-17000 - ssm-document-name: kava-testnet-internal-node-update - playbook-name: start-chain-api-playbook.yml - playbook-infrastructure-branch: master - secrets: inherit - needs: [reset-chain-to-zero-state] - # setup test and development accounts and balances, deploy contracts by calling the chain's api seed-chain-state: uses: ./.github/workflows/cd-seed-chain.yml @@ -65,9 +31,10 @@ jobs: seed-script-filename: seed-internal-testnet.sh erc20-deployer-network-name: internal_testnet genesis_validator_addresses: "kavavaloper1xcgtffvv2yeqmgs3yz4gv29kgjrj8usxrnrlwp kavavaloper1w66m9hdzwgd6uc8g93zqkcumgwzrpcw958sh3s" - kava_version_filepath: ./ci/env/kava-internal-testnet/KAVA.VERSION + kava_version_filepath: ./ci/env/kava-internal-testnet/KAVA.VERSION secrets: inherit - needs: [start-chain-api] + needs: [reset-and-restart-chain] + post-pipeline-metrics: uses: ./.github/workflows/metric-pipeline.yml if: always() # always run so we metric failures and successes diff --git a/.github/workflows/cd-reset-internal-testnet.yml b/.github/workflows/cd-reset-internal-testnet.yml deleted file mode 100644 index 738fd4c395..0000000000 --- a/.github/workflows/cd-reset-internal-testnet.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Reset Internal Testnet - -on: - workflow_call: - inputs: - chain-id: - required: true - type: string - aws-region: - required: true - type: string - ssm-document-name: - required: true - type: string - playbook-name: - required: true - type: string - playbook-infrastructure-branch: - required: true - type: string - secrets: - CI_AWS_KEY_ID: - required: true - CI_AWS_KEY_SECRET: - required: true - KAVA_PRIVATE_GITHUB_ACCESS_TOKEN: - required: true - -# in order: -# enter standby (prevents autoscaling group from killing node during deploy) -# stop kava -# download updated binary and genesis -# reset application database state (only done on internal testnet) -jobs: - place-chain-nodes-on-standby: - runs-on: ubuntu-latest - steps: - - name: checkout repo from current commit - uses: actions/checkout@v4 - - name: take the chain offline - run: bash ${GITHUB_WORKSPACE}/.github/scripts/put-all-chain-nodes-on-standby.sh - env: - CHAIN_ID: ${{ inputs.chain-id }} - AWS_REGION: ${{ inputs.aws-region }} - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }} - - name: checkout infrastructure repo - uses: actions/checkout@v4 - with: - repository: Kava-Labs/infrastructure - token: ${{ secrets.KAVA_PRIVATE_GITHUB_ACCESS_TOKEN }} - path: infrastructure - ref: master - - name: Set up Go - uses: actions/setup-go@v4 - with: - go-version-file: go.mod - - name: build kava node updater - run: cd infrastructure/cli/kava-node-updater && make install && cd ../../../ - - name: run reset playbook on all chain nodes - run: | - kava-node-updater \ - --debug \ - --max-retries=2 \ - --aws-ssm-document-name=$SSM_DOCUMENT_NAME \ - --infrastructure-git-pointer=$PLAYBOOK_INFRASTRUCTURE_BRANCH \ - --update-playbook-filename=$PLAYBOOK_NAME \ - --chain-id=$CHAIN_ID \ - --max-upgrade-batch-size=0 \ - --wait-for-node-sync-after-upgrade=false - env: - SSM_DOCUMENT_NAME: ${{ inputs.ssm-document-name }} - PLAYBOOK_NAME: ${{ inputs.playbook-name }} - CHAIN_ID: ${{ inputs.chain-id }} - AWS_REGION: ${{ inputs.aws-region }} - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }} - AWS_SDK_LOAD_CONFIG: 1 - PLAYBOOK_INFRASTRUCTURE_BRANCH: ${{ inputs.playbook-infrastructure-branch }} diff --git a/.github/workflows/cd-start-chain.yml b/.github/workflows/cd-start-chain.yml deleted file mode 100644 index 4e9644f387..0000000000 --- a/.github/workflows/cd-start-chain.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Start Chain - -on: - workflow_call: - inputs: - chain-id: - required: true - type: string - aws-region: - required: true - type: string - ssm-document-name: - required: true - type: string - playbook-name: - required: true - type: string - playbook-infrastructure-branch: - required: true - type: string - secrets: - CI_AWS_KEY_ID: - required: true - CI_AWS_KEY_SECRET: - required: true - KAVA_PRIVATE_GITHUB_ACCESS_TOKEN: - required: true - -jobs: - # start kava, allow nodes to start processing requests from users once they are synced to live - serve-traffic: - runs-on: ubuntu-latest - steps: - - name: checkout repo from current commit - uses: actions/checkout@v4 - - name: take the chain offline - run: bash ${GITHUB_WORKSPACE}/.github/scripts/put-all-chain-nodes-on-standby.sh - env: - CHAIN_ID: ${{ inputs.chain-id }} - AWS_REGION: ${{ inputs.aws-region }} - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }} - - name: checkout infrastructure repo - uses: actions/checkout@v4 - with: - repository: Kava-Labs/infrastructure - token: ${{ secrets.KAVA_PRIVATE_GITHUB_ACCESS_TOKEN }} - path: infrastructure - ref: master - - name: Set up Go - uses: actions/setup-go@v4 - with: - go-version-file: go.mod - - name: build kava node updater - run: cd infrastructure/cli/kava-node-updater && make install && cd ../../../ - - name: run start-chain playbook on all chain nodes - run: | - kava-node-updater \ - --debug \ - --max-retries=2 \ - --aws-ssm-document-name=$SSM_DOCUMENT_NAME \ - --infrastructure-git-pointer=$PLAYBOOK_INFRASTRUCTURE_BRANCH \ - --update-playbook-filename=$PLAYBOOK_NAME \ - --chain-id=$CHAIN_ID \ - --max-upgrade-batch-size=0 \ - --wait-for-node-sync-after-upgrade=true - env: - SSM_DOCUMENT_NAME: ${{ inputs.ssm-document-name }} - PLAYBOOK_NAME: ${{ inputs.playbook-name }} - CHAIN_ID: ${{ inputs.chain-id }} - AWS_REGION: ${{ inputs.aws-region }} - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_KEY_SECRET }} - AWS_SDK_LOAD_CONFIG: 1 - PLAYBOOK_INFRASTRUCTURE_BRANCH: ${{ inputs.playbook-infrastructure-branch }} - - name: bring the chain online - run: bash ${GITHUB_WORKSPACE}/.github/scripts/exit-standby-all-chain-nodes.sh From 1a9f4d2717ba4970c85474d0a97d744ba34cdb09 Mon Sep 17 00:00:00 2001 From: Robert Pirtle Date: Thu, 17 Oct 2024 13:50:06 -0700 Subject: [PATCH 2/2] on merge to master, only redeploy when files changed --- .github/workflows/cd-internal-testnet.yml | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cd-internal-testnet.yml b/.github/workflows/cd-internal-testnet.yml index 0a66b20f8c..149ae0b30e 100644 --- a/.github/workflows/cd-internal-testnet.yml +++ b/.github/workflows/cd-internal-testnet.yml @@ -9,11 +9,29 @@ on: workflow_dispatch: jobs: + # when not manually dispatched, we only want merges to master that change the relevant files + # to trigger the network reset + changed_files: + runs-on: ubuntu-latest + outputs: + changedInternalTestnetConfig: ${{ steps.changed-internal-testnet-config.outputs.any_changed }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # OR "2" -> To retrieve the preceding commit. + - name: Get all changed internal testnet files + id: changed-internal-testnet-config + uses: tj-actions/changed-files@v42 + with: + # Avoid using single or double quotes for multiline patterns + files: | + ci/env/kava-internal-testnet/** + reset-and-restart-chain: # only start cd pipeline if last ci run was successful (or it this is being manually dispatched) if: | github.event_name == 'workflow_dispatch' || - (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' && needs.changed_files.outputs.changedInternalTestnetConfig == 'true') uses: ./.github/workflows/cd-reset-network.yml with: aws-region: us-east-1 @@ -21,6 +39,7 @@ jobs: ssm-document-name: kava-internal-testnet-instance-update playbook-infrastructure-branch: master secrets: inherit + needs: [changed_files] # setup test and development accounts and balances, deploy contracts by calling the chain's api seed-chain-state: