Skip to content

Commit

Permalink
+ provision dedicated bench-runner
Browse files Browse the repository at this point in the history
  • Loading branch information
kvs96 committed May 23, 2024
1 parent b5cdff7 commit dbc7f79
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 7 deletions.
142 changes: 135 additions & 7 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,105 @@ on:
- ignore
- commit
- pull_request
instance_type:
description: Instance type to provision for benchmarking.
required: true
default: 'c6i.metal'
instance_disk_size:
description: Disk size (in GB) for the instance.
required: true
default: 100

concurrency:
group: benchmarks
cancel-in-progress: true

env:
CARGO_TERM_COLOR: always
TERM: xterm-256color

jobs:
provision-runner:
runs-on: ubuntu-latest
outputs:
available_region: ${{ steps.get-region.outputs.available_region }}
steps:
- name: AWS Login
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2

- name: Generate Runner Registration Token
id: get-runner-token
run: |
RESPONSE=$(curl -L -X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.RUNNER_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/${{ github.repository }}/actions/runners/registration-token)
TOKEN=$(echo "$RESPONSE" | jq -r .token)
echo "REGISTER_TOKEN=$TOKEN" >> $GITHUB_ENV
echo "::add-mask::$TOKEN"
- name: Checkout
uses: actions/checkout@v4

- name: Check Capacity
id: get-region
run: |
regions=(us-east-1 us-east-2 us-west-1 us-west-2 ap-south-1 ap-southeast-1 ap-southeast-2 ap-southeast-3 ap-northeast-1 ap-northeast-2 ap-northeast-3 ca-central-1 eu-central-1 eu-west-1 eu-west-2 eu-west-3 eu-north-1 sa-east-1)
instance_type="${{ inputs.instance_type }}"
available_region=""
for region in "${regions[@]}"
do
available_region=$(aws ec2 describe-instance-type-offerings \
--location-type availability-zone \
--filters Name=instance-type,Values=$instance_type \
--region $region \
--query "InstanceTypeOfferings[?InstanceType=='$instance_type'].InstanceType" \
--output text)
if [ -n "$available_region" ]; then
echo "AVAILABLE_REGION=$region" >> $GITHUB_ENV
echo "available_region=$region" >> $GITHUB_OUTPUT
break
fi
done
- name: Check Region
run: |
if [ -z "${{ env.AVAILABLE_REGION }}" ]; then
echo "No available regions for instance type ${{ inputs.instance_type }}"
exit 1
fi
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3

- name: Terraform Apply
timeout-minutes: 30
run: |
cd terraform/bench-runner
terraform init
terraform apply -auto-approve -var="aws_region=${{ env.AVAILABLE_REGION }}" -var="instance_type=${{ inputs.instance_type }}" -var="instance_disk_size=${{ inputs.instance_disk_size }}" -var="registration_token=${{ env.REGISTER_TOKEN }}" -var="github_run_id=${{ github.run_id }}"
- name: Notify on Failure
if: failure()
uses: appleboy/telegram-action@master
with:
to: ${{ secrets.TELEGRAM_DEVOPS_CHAT }}
token: ${{ secrets.TELEGRAM_DEVOPS_TOKEN }}
format: markdown
disable_web_page_preview: true
message: |
*Status*: 🔥
*Problem*: Issue with server provisioning - bench-runner-${{ github.run_id }}
*Details*: Check [Benchmark Run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
benchmarks:
runs-on: bench
needs: provision-runner
runs-on: [bench]
permissions:
contents: write
pull-requests: write
Expand All @@ -36,6 +127,9 @@ jobs:
run: |
sudo apt update
sudo apt install -y git clang curl libssl-dev llvm libudev-dev cmake wabt protobuf-compiler wget bzip2
curl -o wasm-opt-linux-x64.tar.gz -L `curl -s https://api.github.com/repos/WebAssembly/binaryen/releases/latest | jq -r '.assets[] | select(.name | contains("x86_64-linux")) | .browser_download_url' |grep -v sha256`
tar xzf wasm-opt-linux-x64.tar.gz && sudo mv binaryen-version_*/bin/* /usr/local/bin/
wasm-opt --version
- name: Run all benchmarks
run: |
Expand All @@ -50,12 +144,6 @@ jobs:
# generate code for lightweight scheduler that is used in gtest and other crates
./scripts/weight-dump.sh
- name: Clear target directory
if: ${{ always() }}
run: |
# clear the target directory because our benchmarking machine is not ephemeral
cargo clean
- name: "ACTIONS: Upload artifact with benchmarking errors (if exist)"
if: ${{ always() }}
uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -104,3 +192,43 @@ jobs:
labels: |
A0-pleasereview
A4-insubstantial
destroy-runner:
if: always()
needs: [provision-runner, benchmarks]
runs-on: ubuntu-latest
steps:
- name: AWS Login
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-west-2

- name: Checkout
uses: actions/checkout@v4
with:
sparse-checkout: terraform

- name: Setup Terraform
uses: hashicorp/setup-terraform@v3

- name: Terraform Destroy
timeout-minutes: 60
run: |
cd terraform/bench-runner
terraform init
terraform destroy -auto-approve -var="aws_region=${{ needs.provision-runner.outputs.available_region }}" -var="instance_type=${{ inputs.instance_type }}" -var="instance_disk_size=${{ inputs.instance_disk_size }}" -var="registration_token=${{ env.REGISTER_TOKEN }}" -var="github_run_id=${{ github.run_id }}"
- name: Notify on Failure
if: failure()
uses: appleboy/telegram-action@master
with:
to: ${{ secrets.TELEGRAM_DEVOPS_CHAT }}
token: ${{ secrets.TELEGRAM_DEVOPS_TOKEN }}
format: markdown
disable_web_page_preview: true
message: |
*Status*: 🔥
*Problem*: Issue with server destroying - bench-runner-${{ github.run_id }}
*Details*: Check [Benchmark Run](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }})
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ weight-dumps/
.idea
.log
*.meta.txt
.terraform*
106 changes: 106 additions & 0 deletions terraform/bench-runner/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
terraform {
backend "s3" {
region = "us-west-2"
bucket = "gear-terraform"
key = "bench-runner/terraform.tfstate"
}
}

variable "aws_region" {
type = string
default = "us-west-2"
}
variable "instance_type" {
type = string
default = "t3.micro"
}
variable "instance_disk_size" {
type = string
default = 30
}
variable "registration_token" {
type = string
default = ""
}
variable "github_run_id" {
type = string
default = ""
}

provider "aws" {
region = var.aws_region
}

data "aws_ami" "ubuntu" {
most_recent = true
owners = ["099720109477"]

filter {
name = "name"
values = ["ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-amd64-server-*"]
}
}

data "aws_vpc" "default" {
default = true
}

data "aws_security_group" "default" {
vpc_id = data.aws_vpc.default.id
filter {
name = "group-name"
values = ["default"]
}
}

data "aws_subnets" "default" {
filter {
name = "vpc-id"
values = [data.aws_vpc.default.id]
}
}

resource "aws_instance" "bench_runner" {
ami = data.aws_ami.ubuntu.id
instance_type = var.instance_type
key_name = "root"
subnet_id = data.aws_subnets.default.ids[0]
vpc_security_group_ids = [data.aws_security_group.default.id]

root_block_device {
volume_type = "gp3"
volume_size = var.instance_disk_size
delete_on_termination = true
}

user_data = <<-EOF
#!/bin/bash
echo never > /sys/kernel/mm/transparent_hugepage/enabled
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo 0 > /proc/sys/vm/nr_hugepages
mkdir /runner
chown ubuntu:ubuntu -R /runner
apt update
apt install -y jq docker.io
systemctl enable --now docker
usermod -aG docker ubuntu
sudo -u ubuntu -i bash -c "
cd /runner &&
curl -o actions-runner-linux-x64.tar.gz -L `curl -s https://api.github.com/repos/actions/runner/releases/latest | jq -r '.assets[] | select(.name | contains(\"actions-runner-linux-x64\")) | .browser_download_url'` &&
tar xzf actions-runner-linux-x64.tar.gz &&
sudo ./bin/installdependencies.sh &&
./config.sh --name bench-runner --runnergroup default --no-default-labels --labels bench --replace --work _work --url https://github.com/gear-tech/gear --token ${var.registration_token} &&
./run.sh
"
EOF

tags = {
Name = "bench-runner-${var.github_run_id}"
}

timeouts {
create = "30m"
delete = "60m"
}
}

0 comments on commit dbc7f79

Please sign in to comment.