Skip to content

Commit

Permalink
Enrich pod health check
Browse files Browse the repository at this point in the history
  • Loading branch information
Rei1010 committed Jan 10, 2025
1 parent 72621e8 commit 5edd989
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
6 changes: 6 additions & 0 deletions hack/deploy-helm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,9 @@ util::exec_cmd helm --debug upgrade --install --create-namespace --cleanup-on-fa
--set devicePlugin.passDeviceSpecsEnabled=false \
--version "${HELM_VER}" --wait --timeout 20m --kubeconfig "${KUBE_CONF}"

# check pod running status
kubectl --kubeconfig "${KUBE_CONF}" get po -n "${TARGET_NS}"

if ! util:check_pods_status "${KUBE_CONF}" "${TARGET_NS}" ; then
return 1
fi
29 changes: 29 additions & 0 deletions hack/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,32 @@ function util::wait_ip_reachable(){
fi
done
}

# checking pods in namespace works
function check_pods_status() {
local kubeconfig=${1:-""}
local namespace=${2:-"hami-system"}

local unhealthy_pods
unhealthy_pods=$(kubectl get po -n "$namespace" --kubeconfig "$kubeconfig" | grep -Ev "^(NAME|.*Running.*|.*Succeeded.*)")

if [[ -n "$unhealthy_pods" ]]; then
echo "Found unhealthy_pods pods in namespace $namespace:"
echo "$unhealthy_pods"

for pod in $unhealthy_pods; do
echo "Describing pod: $pod"
kubectl describe po "$pod" -n "$namespace" --kubeconfig "$kubeconfig"

echo "Fetching logs for pod: $pod"
kubectl logs "$pod" -n "$namespace" --kubeconfig "$kubeconfig"
echo "---------------------------------------------------"
done

return 1
else

echo "PASS: All Pods are in Running state."
return 0
fi
}
3 changes: 1 addition & 2 deletions test/e2e/pod/test_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var _ = ginkgo.Describe("Pod E2E Tests", ginkgo.Ordered, func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
})

ginkgo.It("create single pod with configuration", func() {
ginkgo.It("create single pod with CUDA configuration", func() {
newPod = utils.Pod.DeepCopy()
newPod.Name = newPod.Name + utils.GetRandom()

Expand Down Expand Up @@ -92,7 +92,6 @@ var _ = ginkgo.Describe("Pod E2E Tests", ginkgo.Ordered, func() {
newPod.Name = newPod.Name + utils.GetRandom()
newPod.Spec.Containers = append(newPod.Spec.Containers, newPod.Spec.Containers[0])
newPod.Spec.Containers = append(newPod.Spec.Containers, newPod.Spec.Containers[0])
//newPod.Spec.Containers[0].Name = newPod.Spec.Containers[0].Name + utils.GetRandom()
newPod.Spec.Containers[1].Name = newPod.Spec.Containers[0].Name + utils.GetRandom()
newPod.Spec.Containers[2].Name = newPod.Spec.Containers[0].Name + utils.GetRandom()

Expand Down

0 comments on commit 5edd989

Please sign in to comment.