-
Notifications
You must be signed in to change notification settings - Fork 11
229 lines (202 loc) · 8.91 KB
/
equinix_k8s_flow.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
name: Equinix k8s Action
on:
workflow_dispatch:
inputs:
termination_time:
description: 'Cluster termination time in hours after now'
required: true
default: '1'
control_plan:
description: 'control plane machine type'
required: false
default: 'c3.small.x86'
node_plan:
description: 'node machine type'
required: false
default: 'c3.small.x86'
metro:
description: 'metro location'
required: false
default: 'da'
jobs:
Create-k8s-cluster:
name: "Create Cluster"
runs-on: ubuntu-latest
steps:
- name: Validate termination time
run: |
if [ ${{ github.event.inputs.termination_time }} -gt 48 ]; then
echo "Error: Termination time cannot be more than 48 hours."
exit 1
fi
- name: Install ansible, git and jq
run: |
sudo apt-get update
sudo apt-get install ansible jq git -y
- name: Clone repo
run: |
git clone https://github.com/sustainable-computing-io/kepler-metal-ci
- name: Save ssh key from env to file
run: |
mkdir -p ~/.ssh/
echo "${{ secrets.EQUINIX_SSH_KEY }}" > ~/.ssh/kepler_ci
chmod 600 ~/.ssh/kepler_ci
echo "StrictHostKeyChecking no" >> ~/.ssh/config
- name: Create metal config
run: |
cat <<EOF > /tmp/metal.yaml
token: "${{ secrets.METAL_AUTH_TOKEN }}"
project-id: ${{ secrets.EQUINIX_PROJECT_ID }}
EOF
- name: Play k8s_kubeadm.yml
env:
metro: "da"
plan: "c3.small.x86"
os: "rhel_9"
run: |
mkdir -p ~/.ansible
echo '[defaults]' > ~/.ansible/ansible.cfg
echo 'callback_plugins = ~/.github/callback_plugins' >> ~/.ansible/ansible.cfg
echo 'callback_whitelist = mask_ip_callback' >> ~/.ansible/ansible.cfg
mkdir -p ~/.github/callback_plugins
cd kepler-metal-ci
cp ansible/mask_ip_callback.py ~/.github/callback_plugins/mask_ip_callback.py
# save input var into extra-vars.yaml
echo "termination_time: ${{ github.event.inputs.termination_time }}" > /tmp/extra-vars.yaml
echo "control_plan: ${{ github.event.inputs.control_plan }}" >> /tmp/extra-vars.yaml
echo "node_plan: ${{ github.event.inputs.node_plan }}" >> /tmp/extra-vars.yaml
echo "metro: ${{ github.event.inputs.metro }}" >> /tmp/extra-vars.yaml
ansible-playbook ansible/k8s_kubeadm.yml \
--extra-vars "@/tmp/extra-vars.yaml" >> k8s_kubeadm.log
sudo cat /tmp/kubeconfig |jq -r '.stdout' > /tmp/k8sconfig
sudo mv /tmp/k8sconfig /tmp/kubeconfig
- name: Upload kubeconfig
uses: actions/upload-artifact@v2
with:
name: kubeconfig-${{ github.run_id }}
path: /tmp/kubeconfig
retention-days: 1
Test-K8s:
name: "Test K8s"
needs: create-k8s-cluster
runs-on: ubuntu-latest
steps:
- name: Install kubectl
run: |
curl -LO "https://dl.k8s.io/release/v1.22.0/bin/linux/amd64/kubectl"
chmod +x kubectl
sudo mv kubectl /usr/local/bin/
- name: mkdir ~/.kube
run: |
mkdir -p ~/.kube
- name: Download kubeconfig
uses: actions/download-artifact@v2
with:
name: kubeconfig-${{ github.run_id }}
path: ~/.kube/
- name: Test k8s
run: |
ls -la ~/.kube/*
mv ~/.kube/kubeconfig ~/.kube/config
kubectl get nodes
kubectl get pods --all-namespaces
- name: Label workers
run: |
kubectl label node $(kubectl get nodes --selector=node-role.kubernetes.io/control-plane!= --no-headers -o custom-columns=":metadata.name") node-role.kubernetes.io/worker=
- name: Install Prometheus
run: |
git clone --depth 1 https://github.com/prometheus-operator/kube-prometheus; cd kube-prometheus;
kubectl apply --server-side --validate=false -f manifests/setup
kubectl apply --validate=false -f manifests/
until kubectl -n monitoring get statefulset prometheus-k8s; do kubectl get all -n monitoring; echo "StatefulSet not created yet, waiting..."; sleep 5; done
kubectl wait deployments -n monitoring prometheus-adapter --for=condition=available --timeout 3m
kubectl rollout status --watch --timeout=600s statefulset -n monitoring prometheus-k8s
- name: Port forward prom
run: |
kubectl port-forward -n monitoring svc/prometheus-k8s 9090:9090 &
- name: Install Kepler helm chart
run: |
# Install helm binary first
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3
chmod 700 get_helm.sh
./get_helm.sh
export PATH=$PATH:/usr/local/bin
helm repo add kepler https://sustainable-computing-io.github.io/kepler-helm-chart
helm repo update
helm install kepler kepler/kepler --namespace kepler --create-namespace --set serviceMonitor.enabled=true --set image.tag=latest
- name: Allow prometheus to scrape kepler metrics in kepler namespace
run: |
curl -O https://raw.githubusercontent.com/sustainable-computing-io/kepler/main/manifests/k8s/config/rbac/prometheus_role.yaml
sed -i 's/namespace: system/namespace: kepler/' prometheus_role.yaml
kubectl apply -f prometheus_role.yaml
kubectl apply -f - <<EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/instance: k8s
app.kubernetes.io/name: prometheus
sustainable-computing.io/app: kepler
name: prometheus-k8s
namespace: kepler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
EOF
- name: Wait for Kepler metrics kepler_node_package_joules_total is not empty through Prometheus query
run: |
cat <<EOF > wait_for_prometheus.sh
#!/bin/bash
# Prometheus server URL
PROMETHEUS_URL="http://localhost:9090/api/v1/query"
# Prometheus query
QUERY="kepler_node_package_joules_total"
# Function to query Prometheus and check for values
query_prometheus() {
RESPONSE=$(curl -sG --data-urlencode "query=$QUERY" $PROMETHEUS_URL)
VALUES=$(echo $RESPONSE | jq '.data.result | length')
echo $VALUES
}
# Retry 10 times with 5 seconds interval
for i in {1..10}; do
VALUES=$(query_prometheus)
if [ "$VALUES" -gt 0 ]; then
echo "Values found in the query result."
break
else
echo "No values found yet, retrying in 5 seconds..."
sleep 5
fi
done
echo "Exiting"
EOF
chmod +x wait_for_prometheus.sh
./wait_for_prometheus.sh
- name: Pull kube-burner
run: |
git clone https://github.com/kube-burner/kube-burner
curl -sS -L "https://github.com/kube-burner/kube-burner/releases/download/v1.9.5/kube-burner-V1.9.5-linux-x86_64.tar.gz" | tar -xzC kube-burner/ kube-burner
- name: Run kube-burner node-density
run: |
cp kube-burner/examples/metrics-profiles/metrics.yaml kube-burner/examples/workloads/kubelet-density/metrics.yaml
cd kube-burner/examples/workloads/kubelet-density
sed -i 's/qps: 2/qps: 20/g' kubelet-density.yml
sed -i 's/burst: 2/burst: 20/g' kubelet-density.yml
sed -i 's/jobIterations: 25/jobIterations: 80/g' kubelet-density.yml
echo "indexers:" >> kubelet-density.yml
echo " - type: local" >> kubelet-density.yml
echo " metricsDirectory: collected-metrics" >> kubelet-density.yml
export START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
../../../kube-burner init -c kubelet-density.yml -u http://localhost:9090 --metrics-profile metrics.yaml
export END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Retrieve metrics related to Kepler cpu and memory utilization over time
curl -G 'http://localhost:9090/api/v1/query_range' \
--data-urlencode 'query=sum (rate (container_cpu_usage_seconds_total{namespace="kepler"}[1m])) / sum(machine_cpu_cores) * 100' \
--data-urlencode "start=${START_TIME}" --data-urlencode "end=${END_TIME}" --data-urlencode "step=10" \
| jq -r '.data.result[0].values[] | @tsv' | awk '{printf "Time: %s, Utilization: %s%%\n", strftime("%Y-%m-%d %H:%M:%S", $1), $2}'