diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..63361f2 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @Sage-Bionetworks/sagebio-it @Sage-Bionetworks/Agora-Admin diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..28ec7aa --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,35 @@ +name: main + +on: + pull_request: + branches: ['*'] + push: + branches: ['develop', 'staging', 'prod' ] + +jobs: + tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: pre-commit/action@v3.0.0 + + deploy: + if: ${{ github.event_name == 'push' }} + needs: ["tests"] + # self hosted runner labels are setup in github to match branch names + runs-on: [self-hosted, "${{ github.ref_name }}"] + # variables in context environments are setup in github to match branch names + environment: + name: ${{ github.ref_name }} + + steps: + - uses: actions/checkout@v4 + - name: Import Synapse Data + run: ./import-data.sh $BRANCH $SYNAPSE_USERNAME $SYNAPSE_PASSWORD $DB_HOST $DB_USER $DB_PASS + env: + BRANCH: ${{ github.ref_name }} + SYNAPSE_USERNAME: ${{ secrets.SYNAPSE_USERNAME }} + SYNAPSE_PASSWORD: ${{ secrets.SYNAPSE_PASSWORD }} + DB_HOST: ${{ secrets.DB_HOST }} + DB_USER: ${{ secrets.DB_USER }} + DB_PASS: ${{ secrets.DB_PASS }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ba9a71..d74508a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,19 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 - hooks: - - id: end-of-file-fixer - - id: trailing-whitespace -- repo: https://github.com/adrienverge/yamllint - rev: v1.33.0 - hooks: - - id: yamllint -- repo: https://github.com/Lucas-C/pre-commit-hooks - rev: v1.5.4 - hooks: - - id: remove-tabs -- repo: https://github.com/sirosen/check-jsonschema - rev: 0.27.0 - hooks: - - id: check-github-workflows - - id: check-github-actions + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - repo: https://github.com/adrienverge/yamllint + rev: v1.33.0 + hooks: + - id: yamllint + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.4 + hooks: + - id: remove-tabs + - repo: https://github.com/sirosen/check-jsonschema + rev: 0.27.0 + hooks: + - id: check-github-workflows + - id: check-github-actions diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 78db586..0000000 --- a/.travis.yml +++ /dev/null @@ -1,21 +0,0 @@ -language: python -python: 3.8 -cache: pip -fast_finish: true - -branches: - only: - - develop - - staging - - prod - -before_install: - - openssl aes-256-cbc -K $encrypted_3307e78034e0_key -iv $encrypted_3307e78034e0_iv -in agora-ci-develop.pem.enc -out ~/.ssh/agora-ci-develop.pem -d - - openssl aes-256-cbc -K $encrypted_76f307832d54_key -iv $encrypted_76f307832d54_iv -in agora-ci-prod.pem.enc -out ~/.ssh/agora-ci-staging.pem -d - - openssl aes-256-cbc -K $encrypted_76f307832d54_key -iv $encrypted_76f307832d54_iv -in agora-ci-prod.pem.enc -out ~/.ssh/agora-ci-prod.pem -d - - eval "$(ssh-agent -s)" - - chmod 600 ~/.ssh/*.pem - - ssh-add ~/.ssh/agora-ci-$TRAVIS_BRANCH.pem - -script: - - ./updatedb.sh || travis_terminate 1 diff --git a/README.md b/README.md index 042bdb4..605c340 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # Overview -Agora Data Manager is a tool that loads the JSON files into Agora's document database instances in our AWS environments. +Agora Data Manager is a tool that loads the JSON files into Agora's document database +instances in our AWS environments. # Purpose This project allows Agora maintainers to update the Agora database with @@ -10,39 +11,64 @@ self-service update. ![alt text][db_update] -# Worflow +# Workflow To deploy an updated data version to the Agora development database 1. Increment `data-version` in `data-manifest.json` on the `develop` branch. 2. Commit the change -3. The [CI system](https://travis-ci.org/Sage-Bionetworks/agora-data-manager) automatically updates the dev DB +3. The Github action CI system automatically updates the dev DB To deploy an updated data version to the Agora staging database: 1. Merge the data-version update from the dev branch to the staging branch. -2. The [CI system](https://travis-ci.org/Sage-Bionetworks/agora-data-manager) automatically updates the staging DB +2. The Github action CI system automatically updates the dev DB To deploy an updated data version to the Agora production database: 1. Merge the data-version update from the staging branch to the production branch. -2. The [CI system](https://travis-ci.org/Sage-Bionetworks/agora-data-manager) automatically updates the production DB +2. The Github action CI system automatically updates the dev DB # Setup -The following environment variables need to be setup for the scripts to deploy database updates: +## Secrets -| Variable | Description | Example | -|----------------------|-----------------------------------|---------------------------------------------------------------------------| -| BASTIAN_HOST_develop | The bastian host | ec2-10-11-12-13.compute-1.amazonaws.com | -| DB_HOST_develop | The database host | dbcluster-mr0a782pfjnk.cluster-ctcayu3de2lt.us-east-1.docdb.amazonaws.com | -| DB_USER_develop | The database user | dbuser | -| DB_PASS_develop | The database password | supersecret | -| SYNAPSE_USERNAME | The Synapse service user | syn-service-user | -| SYNAPSE_PASSWORD | The Synapse service user password | supersecret | +The following secrets need to be setup in Github for the scripts to deploy database updates: -__Note__: The variables containing `_develop` postfix corresponds to the branch. -To deploy to a prod environment a prod branch is require along with a variable -containing a `_prod` prefix (i.e. BASTIAN_HOST_prod) +Global secrets: +| Variable | Description | Example | +|----------------------|-----------------------------------|-----------------------------| +| SYNAPSE_USERNAME | The Synapse service user | syn-service-user | +| SYNAPSE_PASSWORD | The Synapse service user password | supersecret | -[db_update]: diagram1.png "update diagram" + +Context specific secrets for each environment that corresponds to a git branch (develop/staging/prod): + +| Variable | Description | Example | +|-----------|-----------------------------|---------------------------------------------------------------------------| +| DB_HOST | The database host | dbcluster-mr0a782pfjnk.cluster-ctcayu3de2lt.us-east-1.docdb.amazonaws.com | +| DB_USER | The database user | dbuser | +| DB_PASS | The database password | supersecret | + + +![alt text][github_secrets] + + +## Self hosted runners + +[agora2-infra] repository deploys a bastian host in AWS for each environment which have access to +the databases. We manually configure a [Github self-hosted runner] for each bastian host, +a label is applied to each runner to match the corresponding deployment branch name (develop/staging/prod). +Each runner corresponds to an environment which corresponds to a git branch. The update is +executed from these runners. When a push happens on a branch (i.e. develop), the update +is executed on the `agora-bastian-develop` runner which in turn updates the development database. + + +![alt text][self_hosted_runners] + + +[db_update]: agora-db-update.drawio.png "update diagram" +[github_secrets]: github_secrets.png "github secrets screen" +[self_hosted_runners]: self-hosted-runners.png "self hosted runners" +[agora2-infra]: https://github.com/Sage-Bionetworks/agora2-infra "agora2-infra repository" +[Github self-hosted runners]: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#about-self-hosted-runners diff --git a/agora-ci-develop.pem.enc b/agora-ci-develop.pem.enc deleted file mode 100644 index 92d6134..0000000 Binary files a/agora-ci-develop.pem.enc and /dev/null differ diff --git a/agora-ci-prod.pem.enc b/agora-ci-prod.pem.enc deleted file mode 100644 index cae6810..0000000 Binary files a/agora-ci-prod.pem.enc and /dev/null differ diff --git a/agora-db-update.drawio.png b/agora-db-update.drawio.png new file mode 100644 index 0000000..88d8819 Binary files /dev/null and b/agora-db-update.drawio.png differ diff --git a/diagram1.png b/diagram1.png deleted file mode 100644 index 0053688..0000000 Binary files a/diagram1.png and /dev/null differ diff --git a/github_secrets.png b/github_secrets.png new file mode 100644 index 0000000..0144203 Binary files /dev/null and b/github_secrets.png differ diff --git a/import-data.sh b/import-data.sh index c30b310..767d592 100755 --- a/import-data.sh +++ b/import-data.sh @@ -5,7 +5,7 @@ #!/bin/bash set -e -TRAVIS_BRANCH=$1 +BRANCH=$1 SYNAPSE_USERNAME=$2 SYNAPSE_PASSWORD=$3 DB_HOST=$4 @@ -13,17 +13,17 @@ DB_USER=$5 DB_PASS=$6 CURRENT_DIR=$(pwd) -PARENT_DIR="$(dirname "$CURRENT_DIR")" -TMP_DIR=/tmp -WORKING_DIR=$TMP_DIR/work +WORKING_DIR=$CURRENT_DIR DATA_DIR=$WORKING_DIR/data TEAM_IMAGES_DIR=$DATA_DIR/team_images +mkdir -p $TEAM_IMAGES_DIR + # Version key/value should be on his own line DATA_VERSION=$(cat $WORKING_DIR/data-manifest.json | grep data-version | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') DATA_MANIFEST_ID=$(cat $WORKING_DIR/data-manifest.json | grep data-manifest-id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') TEAM_IMAGES_ID=$(cat $WORKING_DIR/data-manifest.json | grep team-images-id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') -echo "$TRAVIS_BRANCH branch, DATA_VERSION = $DATA_VERSION, manifest id = $DATA_MANIFEST_ID" +echo "$BRANCH branch, DATA_VERSION = $DATA_VERSION, manifest id = $DATA_MANIFEST_ID" # Download the manifest file from synapse synapse -u $SYNAPSE_USERNAME -p $SYNAPSE_PASSWORD get --downloadLocation $DATA_DIR -v $DATA_VERSION $DATA_MANIFEST_ID diff --git a/self-hosted-runners.png b/self-hosted-runners.png new file mode 100644 index 0000000..8b0b147 Binary files /dev/null and b/self-hosted-runners.png differ diff --git a/updatedb.sh b/updatedb.sh deleted file mode 100755 index 0ba3578..0000000 --- a/updatedb.sh +++ /dev/null @@ -1,36 +0,0 @@ -# Update agora db from a build machine by running an import script -# on a bastian host -#!/bin/bash -set -e - -# double interpolate vars from travis -eval export "BASTIAN_HOST=\$BASTIAN_HOST_$TRAVIS_BRANCH" -eval export "DB_HOST=\$DB_HOST_$TRAVIS_BRANCH" -eval export "DB_USER=\$DB_USER_$TRAVIS_BRANCH" -eval export "DB_PASS=\$DB_PASS_$TRAVIS_BRANCH" - -# Escape chars in env vars -q_mid=\'\\\'\' -SYNAPSE_USERNAME_ESC="'${SYNAPSE_USERNAME//\'/$q_mid}'" -SYNAPSE_PASSWORD_ESC="'${SYNAPSE_PASSWORD//\'/$q_mid}'" -DB_USER_ESC="'${DB_USER//\'/$q_mid}'" -DB_PASS_ESC="'${DB_PASS//\'/$q_mid}'" - -# bastian configs -BASTIAN_USER="ec2-user" -BASTIAN_SSH_KEY="~/.ssh/agora-ci-$TRAVIS_BRANCH.pem" - -# set SSH configuration -echo -e "Host $BASTIAN_HOST\n\tStrictHostKeyChecking no\n" >> ~/.ssh/config - -# clean up from previous builds -ssh -i $BASTIAN_SSH_KEY $BASTIAN_USER@$BASTIAN_HOST "rm -rf /tmp/work" - -# create directories for data -ssh -i $BASTIAN_SSH_KEY $BASTIAN_USER@$BASTIAN_HOST "mkdir -p /tmp/work/data/team_images" - -# setup script on bastian -scp -i $BASTIAN_SSH_KEY import-data.sh data-manifest.json create-indexes.js $BASTIAN_USER@$BASTIAN_HOST:/tmp/work/. - -# run import on bastian -ssh -i $BASTIAN_SSH_KEY $BASTIAN_USER@$BASTIAN_HOST "bash /tmp/work/import-data.sh $TRAVIS_BRANCH $SYNAPSE_USERNAME_ESC $SYNAPSE_PASSWORD_ESC $DB_HOST $DB_USER_ESC $DB_PASS_ESC"