From 8b18d4a65b6aecc9a49e7ef9dfe57b7cae68e4e0 Mon Sep 17 00:00:00 2001 From: JamesMurkin Date: Fri, 6 Sep 2024 18:16:20 +0100 Subject: [PATCH] Delete executor-cluster-monitoring chart Signed-off-by: JamesMurkin --- .../executor-cluster-monitoring/Chart.yaml | 5 -- .../executor-cluster-monitoring/README.md | 15 ------ .../templates/_helpers.tpl | 25 --------- .../templates/prometheusrule.yaml | 52 ------------------- .../executor-cluster-monitoring/values.yaml | 2 - 5 files changed, 99 deletions(-) delete mode 100644 deployment/executor-cluster-monitoring/Chart.yaml delete mode 100644 deployment/executor-cluster-monitoring/README.md delete mode 100644 deployment/executor-cluster-monitoring/templates/_helpers.tpl delete mode 100644 deployment/executor-cluster-monitoring/templates/prometheusrule.yaml delete mode 100644 deployment/executor-cluster-monitoring/values.yaml diff --git a/deployment/executor-cluster-monitoring/Chart.yaml b/deployment/executor-cluster-monitoring/Chart.yaml deleted file mode 100644 index 9895dd45e5d..00000000000 --- a/deployment/executor-cluster-monitoring/Chart.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: v1 -description: A helm chart for monitoring metrics of a cluster managed by a armada-executor component -name: armada-executor-cluster-monitoring -version: 0.0.0-latest -appVerison: 0.0.0-latest diff --git a/deployment/executor-cluster-monitoring/README.md b/deployment/executor-cluster-monitoring/README.md deleted file mode 100644 index 29bdb0731db..00000000000 --- a/deployment/executor-cluster-monitoring/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# armada-executor-cluster-monitoring - -![Version: 0.0.0-latest](https://img.shields.io/badge/Version-0.0.0--latest-informational?style=flat-square) - -A helm chart for monitoring metrics of a cluster managed by a armada-executor component - -## Values - -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| additionalLabels | object | `{}` | | -| interval | string | `"10s"` | | - ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0) diff --git a/deployment/executor-cluster-monitoring/templates/_helpers.tpl b/deployment/executor-cluster-monitoring/templates/_helpers.tpl deleted file mode 100644 index 529a23d0872..00000000000 --- a/deployment/executor-cluster-monitoring/templates/_helpers.tpl +++ /dev/null @@ -1,25 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "executor-cluster-monitoring.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "executor-cluster-monitoring.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Common labels -*/}} -{{- define "executor-cluster-monitoring.labels.all" -}} -app: {{ include "executor-cluster-monitoring.name" . }} -chart: {{ include "executor-cluster-monitoring.chart" . }} -release: {{ .Release.Name }} -{{- if .Values.additionalLabels }} -{{ toYaml .Values.additionalLabels }} -{{- end }} -{{- end -}} diff --git a/deployment/executor-cluster-monitoring/templates/prometheusrule.yaml b/deployment/executor-cluster-monitoring/templates/prometheusrule.yaml deleted file mode 100644 index 37d1c73c4d1..00000000000 --- a/deployment/executor-cluster-monitoring/templates/prometheusrule.yaml +++ /dev/null @@ -1,52 +0,0 @@ -apiVersion: monitoring.coreos.com/v1 -kind: PrometheusRule -metadata: - name: {{ include "executor-cluster-monitoring.name" . }} - namespace: {{ .Release.Namespace }} - labels: - {{- include "executor-cluster-monitoring.labels.all" . | nindent 4 }} -spec: - groups: - - name: armada-pod-details - interval: {{ .Values.interval }} - rules: - - record: armada:monitoring:armada_pod_by_queue:sum - expr: | - sum(kube_pod_labels{label_armada_queue_id!=""}) by (pod, label_armada_queue_id) - * on (pod) group_left(phase) sum(kube_pod_status_phase==1) by (pod, namespace, phase) - - record: armada:monitoring:armada_pod_cpu_request:sum - expr: | - sum( - sum(kube_pod_container_resource_requests_cpu_cores) by (pod) - * on (pod) group_left(label_armada_queue_id, phase) armada:monitoring:armada_pod_by_queue:sum - ) by (label_armada_queue_id, phase) - - record: armada:monitoring:armada_pod_memory_request:sum - expr: | - sum( - sum(kube_pod_container_resource_requests_memory_bytes) by (pod) - * on (pod) group_left(label_armada_queue_id, phase) armada:monitoring:armada_pod_by_queue:sum - ) by (label_armada_queue_id, phase) - - record: armada:monitoring:armada_pods_per_phase:sum - expr: sum(armada:monitoring:armada_pod_by_queue:sum) by (label_armada_queue_id, phase) - - - record: namespace:pods_per_phase:sum - expr: sum(kube_pod_status_phase) by (namespace, phase) - - record: namespace:running_pods:sum - expr: sum(kube_pod_status_phase{phase!="Succeeded", phase!="Failed"}==1) by (pod, namespace, phase) - - record: container:worker_node_cpu_core_request:sum - expr: sum(kube_pod_container_resource_requests_cpu_cores) by (pod, namespace, node) * on (pod, namespace) group_left() namespace:running_pods:sum * on (node) group_left() (node:schedulable_worker_nodes:sum) - - record: container:worker_node_memory_bytes_request:sum - expr: sum(kube_pod_container_resource_requests_memory_bytes) by (pod, namespace, node) * on (pod, namespace) group_left() namespace:running_pods:sum * on (node) group_left() (node:schedulable_worker_nodes:sum) - - record: container:worker_node_cpu_usage_seconds_total:sum - expr: sum(rate(container_cpu_usage_seconds_total{container_name!=""}[1m]) * on (node) group_left(job) node:schedulable_worker_nodes:sum) - - record: container:worker_node_memory_usage_bytes:sum - expr: sum(container_memory_usage_bytes{container_name!=""} * on (pod, namespace) group_left() namespace:running_pods:sum * on (node) group_left(job) node:schedulable_worker_nodes:sum) - - name: armada-nodes-details - interval: {{ .Values.interval }} - rules: - - record: node:schedulable_worker_nodes:sum - expr: sum(kube_node_info) by (node) unless (sum(kube_node_spec_unschedulable > 0) by (node) or sum(kube_node_spec_taint{effect="NoSchedule"}) by (node)) - - record: node:worker_node_allocatable_cpu_cores:sum - expr: sum(kube_node_status_allocatable_cpu_cores * on (node) node:schedulable_worker_nodes:sum) - - record: node:worker_node_allocatable_memory_bytes:sum - expr: sum(kube_node_status_allocatable_memory_bytes * on (node) node:schedulable_worker_nodes:sum) diff --git a/deployment/executor-cluster-monitoring/values.yaml b/deployment/executor-cluster-monitoring/values.yaml deleted file mode 100644 index 78e43b20083..00000000000 --- a/deployment/executor-cluster-monitoring/values.yaml +++ /dev/null @@ -1,2 +0,0 @@ -additionalLabels: {} -interval: 10s