Skip to content

Training and Validation e2e with Single Server #89

Training and Validation e2e with Single Server

Training and Validation e2e with Single Server #89

name: Training and Validation e2e with Single Server
on:
workflow_dispatch:
inputs:
target_models:
description: 'Comma Separated List of Models to Train. Format: model_type/feature_type/trainer_name'
required: false
default: 'AbsPower/BPFOnly/SGDRegressorTrainer,AbsPower/BPFOnly/ExponentialRegressionTrainer,AbsPower/BPFOnly/LogarithmicRegressionTrainer'
permissions:
pull-requests: write
contents: write
repository-projects: write
packages: write
jobs:
Create-runner:
name: "Create Runner"
uses: ./.github/workflows/create_equinix_runner.yml
secrets: inherit
Validate:
name: "Validate"
needs: Create-runner
runs-on: self-hosted
continue-on-error: true
outputs:
runner-name: ${{ runner.name }}
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Run Setup Runner Action
uses: ./.github/actions/setup-action
- name: Run Setup Playbooks
run: |
cd ${GITHUB_WORKSPACE}/ansible
echo "Create VM"
ansible-playbook -i inventory.yml kvm_playbook.yml
echo "Install SSH tunnel"
ansible-playbook ssh_tunnel_playbook.yml
echo "Install Prometheus"
ansible-playbook -i inventory.yml metrics_playbook.yml
echo "Install Node Exporter"
ansible-playbook -i inventory.yml node_exporter_playbook.yml -vvv
echo "Verify node-exporter"
sudo systemctl status node_exporter || true
sudo ss -tuln | grep 9100 || true
curl -s localhost:9100/metrics | grep collector || true
echo "Install Kepler"
ansible-playbook -i inventory.yml -vvv kepler_playbook.yml -e "target_host=localhost"
echo "Create ssh tunnel"
ansible-playbook -i inventory.yml ssh_tunnel_playbook.yml
echo "Install Model Server"
ansible-playbook -i inventory.yml -vvv model_server_playbook.yml
- name: Run Trainer Action
uses: ./.github/actions/train-action
with:
model_export_path: /tmp/trained-equinix-models
- name: Checkout code
uses: actions/checkout@v3
- name: Run Validation Playbooks
run: |
cd ${GITHUB_WORKSPACE}/ansible
echo "Pass Trained Models to VM"
ansible-playbook -i inventory.yml -v deploy_http_model_server.yml
target_models_list="${{ github.event.inputs.target_models }}"
IFS=',' read -r -a models <<< "$target_models_list"
for model in "${models[@]}"
do
echo "Running Model Server Playbook: $model"
model_url="http://localhost:8080/${model}_0.zip"
echo "Model exists with sufficient accuracy: $model_url"
ansible-playbook -i inventory.yml -vvv model_server_restart.yml \
-e "node_components_init_url=$model_url"
echo "Run validation test"
ansible-playbook -vvv kepler_validator.yml
echo "Validation Finished"
FILE="/tmp/validator-*/*"
mkdir -p /tmp/reports/${model}
mv $FILE /tmp/reports/${model}
done
export DATE_STR=$(date +%Y-%m-%d_%H-%M-%S)
cd ${GITHUB_WORKSPACE}
mkdir -p docs/train-validate-e2e/${DATE_STR}
mv /tmp/reports/* docs/train-validate-e2e/${DATE_STR}
mv /tmp/trained-equinix-models/train_logs.txt docs/train-validate-e2e/${DATE_STR}
git config user.email "dependabot[bot]@users.noreply.github.com"
git config user.name "dependabot[bot]"
git add docs/*
git commit -m "Add train-validate-e2e single server for ${DATE_STR}" -s
git push
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
Cleanup:
name: "Cleanup"
needs: [Validate]
uses: ./.github/workflows/clean_equinix_runner.yml
secrets: inherit
with:
runner_name: ${{ needs.Validate.outputs.runner-name }}