diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96d771f6..586a299b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,8 +7,6 @@ repos: rev: v4.4.0 hooks: - id: trailing-whitespace - - id: check-json - files: resources/index.json - id: end-of-file-fixer - repo: https://github.com/pre-commit/mirrors-prettier rev: v3.1.0 diff --git a/README.md b/README.md index bd323955..1c520d80 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ## What is it? -This repository contains resources & other information required by the [Observability Operator](https://github.com/redhat-developer/observability-operator) +This repository contains resources & other information required by the [Observability Operator](https://github.com/rhobs/observability-operator) to configure the Observability stack for Managed Red Hat Advanced Cluster Security. Resources are maintained in the `/resources` folder. Sub folders contain the following: @@ -13,8 +13,8 @@ Resources are maintained in the `/resources` folder. Sub folders contain the fol ## Install -The observability stack is installed by the [data plane terraforming Helm chart](https://github.com/stackrox/acs-fleet-manager/tree/main/dp-terraform/helm/rhacs-terraform). -Please follow the instructions in the fleet manager repository to install the Helm chart. +The observability stack is installed by ArgoCD from the [gitops repo](https://github.com/stackrox/acscs-manifests). +Please follow the instructions in the fleet manager repository to install ArgoCD. ## Branches diff --git a/resources/grafana/generated/dashboards/rhacs-autoscaler.yaml b/resources/grafana/generated/dashboards/rhacs-autoscaler.yaml index 1d158030..9addfd34 100644 --- a/resources/grafana/generated/dashboards/rhacs-autoscaler.yaml +++ b/resources/grafana/generated/dashboards/rhacs-autoscaler.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-autoscaler spec: - name: rhacs-autoscaler.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "__inputs": [], diff --git a/resources/grafana/generated/dashboards/rhacs-central-release.yaml b/resources/grafana/generated/dashboards/rhacs-central-release.yaml index 4a51965b..9d44b28e 100644 --- a/resources/grafana/generated/dashboards/rhacs-central-release.yaml +++ b/resources/grafana/generated/dashboards/rhacs-central-release.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-central-release spec: - name: rhacs-central-release.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "annotations": { diff --git a/resources/grafana/generated/dashboards/rhacs-central-slo.yaml b/resources/grafana/generated/dashboards/rhacs-central-slo.yaml index 6c1068f1..4603603b 100644 --- a/resources/grafana/generated/dashboards/rhacs-central-slo.yaml +++ b/resources/grafana/generated/dashboards/rhacs-central-slo.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-central-slo spec: - name: rhacs-central-slo.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "annotations": { diff --git a/resources/grafana/generated/dashboards/rhacs-central.yaml b/resources/grafana/generated/dashboards/rhacs-central.yaml index e7a9e261..8b558fa3 100644 --- a/resources/grafana/generated/dashboards/rhacs-central.yaml +++ b/resources/grafana/generated/dashboards/rhacs-central.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-central spec: - name: rhacs-central.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "annotations": { diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml index bed9d321..d8b9bb7b 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-cluster-overview spec: - name: rhacs-cluster-overview.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "annotations": { @@ -35,8 +37,8 @@ spec: "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "purple", "expr": "count (count by (git_version) (label_replace(count_over_time(kubernetes_build_info{job!~\"kube-dns|coredns\"}[${__interval}]), \"git_version\", \"$1\", \"git_version\", \"(v[0-9]*.[0-9]*).*\"))) > 1", + "iconColor": "purple", "name": "Kubernetes Upgrade", "textFormat": "Kubernetes Upgrade" }, @@ -46,8 +48,8 @@ spec: "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "red", "expr": "count (count by (gitVersion) (count_over_time (openshift_apiserver_build_info[${__interval}]))) > 1", + "iconColor": "red", "name": "OpenShift Upgrade", "textFormat": "OpenShift Upgrade" } @@ -56,7 +58,7 @@ spec: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 23, + "id": 26, "links": [], "liveNow": false, "panels": [ @@ -525,8 +527,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -623,8 +624,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -673,19 +673,6 @@ spec: "title": "Secured Clusters", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 16, - "panels": [], - "title": "Resources", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -733,8 +720,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -750,7 +736,7 @@ spec: "h": 8, "w": 12, "x": 0, - "y": 26 + "y": 25 }, "id": 4, "options": { @@ -783,6 +769,19 @@ spec: "title": "Network Received", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 16, + "panels": [], + "title": "Resources", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -830,8 +829,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -839,17 +837,17 @@ spec: } ] }, - "unit": "Bps" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 26 + "x": 0, + "y": 34 }, - "id": 5, + "id": 6, "options": { "legend": { "calcs": [], @@ -859,7 +857,7 @@ spec: }, "tooltip": { "mode": "single", - "sort": "none" + "sort": "desc" } }, "pluginVersion": "9.1.0", @@ -870,14 +868,15 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"rhacs-$instance_id\", job=~\"kubelet\"}[5m])) by (namespace)", + "expr": "sum(container_memory_working_set_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace) / sum(container_spec_memory_limit_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace)", + "format": "time_series", "interval": "", "legendFormat": "{{namespace}}", "range": true, "refId": "A" } ], - "title": "Network Transmitted", + "title": "Central Memory Usage", "type": "timeseries" }, { @@ -927,8 +926,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -936,17 +934,17 @@ spec: } ] }, - "unit": "percentunit" + "unit": "Bps" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, + "x": 12, "y": 34 }, - "id": 6, + "id": 5, "options": { "legend": { "calcs": [], @@ -956,7 +954,7 @@ spec: }, "tooltip": { "mode": "single", - "sort": "desc" + "sort": "none" } }, "pluginVersion": "9.1.0", @@ -967,15 +965,14 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace) / sum(container_spec_memory_limit_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace)", - "format": "time_series", + "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"rhacs-$instance_id\", job=~\"kubelet\"}[5m])) by (namespace)", "interval": "", "legendFormat": "{{namespace}}", "range": true, "refId": "A" } ], - "title": "Central Memory Usage", + "title": "Network Transmitted", "type": "timeseries" }, { @@ -1039,8 +1036,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1118,7 +1114,7 @@ spec: "content": "### Description\n\nThis graph shows the occurences per minute of **SELinux AVC denials** on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that the cluster node's SELinux policy prevented a process' actions.\nAs an example, a violation could indicate that a process on the cluster tried to access a file which is SELinux-protected.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40logStream*20like*20*2flinux-audit*2f*0a*7c*20filter*20*40message*20like*20*2fAVC*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @logStream like /linux-audit/\n| filter @message like /AVC/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1169,8 +1165,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1248,7 +1243,7 @@ spec: "content": "### Description\n\nThis graph shows the occurences per minute of Network Policy ACL denials on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that network traffic was prevented due to a Kubernetes Network Policy.\nAs an example, a violation could indicate that communication between RHACS tenant namespaces\nwas attempted, which is strictly forbidden.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40message*20like*20*2facl_log*28.*2a*29.*2a*5csverdict*3ddrop*2f*0a*7c*20filter*20*40logStream*20like*20*2f.*2aovn-audit*5c.log*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @message like /acl_log(.*).*\\sverdict=drop/\n| filter @logStream like /.*ovn-audit\\.log/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1289,8 +1284,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1327,8 +1321,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1481,7 +1474,7 @@ spec: } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -1666,8 +1659,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1732,8 +1724,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "#EAB839", @@ -1807,8 +1798,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "yellow", @@ -1882,8 +1872,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "#EAB839", @@ -1952,7 +1941,7 @@ spec: } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -2130,11 +2119,249 @@ spec: } ], "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 85 + }, + "id": 149, + "panels": [], + "title": "Certificate Expiry", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Expiration dates for central and scanner TLS certificates.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 86 + }, + "id": 148, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Expiration" + } + ] + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "acs_fleetshard_certificate_expiration_timestamp * on (exported_namespace) group_left(rhacs_org_name) label_replace(rox_central_info, \"exported_namespace\", \"$1\", \"namespace\", \"(.*)\")", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Certificates Expiry", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": false, + "__name__": true, + "container": true, + "instance": true, + "job": true, + "namespace": true, + "pod": true, + "rhacs_cluster_name": true, + "rhacs_environment": true, + "rhacs_org_name": false + }, + "includeByName": {}, + "indexByName": { + "Time": 4, + "Value": 12, + "__name__": 5, + "container": 6, + "data_key": 3, + "exported_namespace": 0, + "instance": 7, + "job": 8, + "namespace": 1, + "pod": 9, + "rhacs_cluster_name": 10, + "rhacs_environment": 11, + "secret": 2 + }, + "renameByName": { + "Time": "Expiration", + "Value": "", + "container": "", + "data_key": "Key", + "exported_namespace": "Namespace", + "secret": "Secret" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Expiration", + "binary": { + "left": "Value", + "operator": "*", + "right": "1000" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "time", + "targetField": "Expiration" + } + ], + "fields": {} + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Value": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Expiration": { + "aggregations": ["last"], + "operation": "aggregate" + }, + "Key": { + "aggregations": [], + "operation": "groupby" + }, + "Namespace": { + "aggregations": ["last"], + "operation": "groupby" + }, + "Secret": { + "aggregations": [], + "operation": "groupby" + }, + "rhacs_org_name": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": { + "Expiration (last)": 1, + "Key": 4, + "Namespace": 0, + "Secret": 3, + "rhacs_org_name": 2 + }, + "renameByName": { + "Expiration (last)": "Expiration", + "Namespace (last)": "Namespace", + "rhacs_org_name": "Organization" + } + } + } + ], + "type": "table" } ], - "refresh": "", "revision": 1, - "schemaVersion": 38, + "schemaVersion": 39, "tags": ["rhacs"], "templating": { "list": [ diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment.yaml index ac02e5fd..fc183d2e 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-cluster-resource-adjustment spec: - name: rhacs-cluster-resource-adjustment.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "annotations": { diff --git a/resources/grafana/generated/dashboards/rhacs-emailsender.yaml b/resources/grafana/generated/dashboards/rhacs-emailsender.yaml index 9b4e07de..d6aa4980 100644 --- a/resources/grafana/generated/dashboards/rhacs-emailsender.yaml +++ b/resources/grafana/generated/dashboards/rhacs-emailsender.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: rhacs-emailsender spec: - name: rhacs-emailsender.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "__inputs": [], diff --git a/resources/grafana/mixins/kubernetes/apiserver.yaml b/resources/grafana/mixins/kubernetes/apiserver.yaml index ae79eb2c..0f3be9c0 100644 --- a/resources/grafana/mixins/kubernetes/apiserver.yaml +++ b/resources/grafana/mixins/kubernetes/apiserver.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-apiserver spec: - name: kubernetes-mixin-apiserver.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/cluster-total.yaml b/resources/grafana/mixins/kubernetes/cluster-total.yaml index be58534c..24f82a8e 100644 --- a/resources/grafana/mixins/kubernetes/cluster-total.yaml +++ b/resources/grafana/mixins/kubernetes/cluster-total.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-cluster-total spec: - name: kubernetes-mixin-cluster-total.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/controller-manager.yaml b/resources/grafana/mixins/kubernetes/controller-manager.yaml index 2441124b..5deb787e 100644 --- a/resources/grafana/mixins/kubernetes/controller-manager.yaml +++ b/resources/grafana/mixins/kubernetes/controller-manager.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-controller-manager spec: - name: kubernetes-mixin-controller-manager.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/k8s-resources-cluster.yaml b/resources/grafana/mixins/kubernetes/k8s-resources-cluster.yaml index fcd89c2f..e5633b23 100644 --- a/resources/grafana/mixins/kubernetes/k8s-resources-cluster.yaml +++ b/resources/grafana/mixins/kubernetes/k8s-resources-cluster.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-cluster spec: - name: kubernetes-mixin-k8s-resources-cluster.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/k8s-resources-namespace.yaml b/resources/grafana/mixins/kubernetes/k8s-resources-namespace.yaml index 9d58bb6d..abb2746a 100644 --- a/resources/grafana/mixins/kubernetes/k8s-resources-namespace.yaml +++ b/resources/grafana/mixins/kubernetes/k8s-resources-namespace.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-namespace spec: - name: kubernetes-mixin-k8s-resources-namespace.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/k8s-resources-node.yaml b/resources/grafana/mixins/kubernetes/k8s-resources-node.yaml index 1e46c934..4b73e769 100644 --- a/resources/grafana/mixins/kubernetes/k8s-resources-node.yaml +++ b/resources/grafana/mixins/kubernetes/k8s-resources-node.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-node spec: - name: kubernetes-mixin-k8s-resources-node.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/k8s-resources-pod.yaml b/resources/grafana/mixins/kubernetes/k8s-resources-pod.yaml index 7fc79b6f..fd26253e 100644 --- a/resources/grafana/mixins/kubernetes/k8s-resources-pod.yaml +++ b/resources/grafana/mixins/kubernetes/k8s-resources-pod.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-pod spec: - name: kubernetes-mixin-k8s-resources-pod.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/k8s-resources-workload.yaml b/resources/grafana/mixins/kubernetes/k8s-resources-workload.yaml index 549e9aa9..9fdfb430 100644 --- a/resources/grafana/mixins/kubernetes/k8s-resources-workload.yaml +++ b/resources/grafana/mixins/kubernetes/k8s-resources-workload.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-workload spec: - name: kubernetes-mixin-k8s-resources-workload.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/k8s-resources-workloads-namespace.yaml b/resources/grafana/mixins/kubernetes/k8s-resources-workloads-namespace.yaml index c9ec8b50..488c82ee 100644 --- a/resources/grafana/mixins/kubernetes/k8s-resources-workloads-namespace.yaml +++ b/resources/grafana/mixins/kubernetes/k8s-resources-workloads-namespace.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-workloads-namespace spec: - name: kubernetes-mixin-k8s-resources-workloads-namespace.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/kubelet.yaml b/resources/grafana/mixins/kubernetes/kubelet.yaml index 0332cb2b..ad81f526 100644 --- a/resources/grafana/mixins/kubernetes/kubelet.yaml +++ b/resources/grafana/mixins/kubernetes/kubelet.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-kubelet spec: - name: kubernetes-mixin-kubelet.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/namespace-by-pod.yaml b/resources/grafana/mixins/kubernetes/namespace-by-pod.yaml index 7957b1d7..b1b0d0e0 100644 --- a/resources/grafana/mixins/kubernetes/namespace-by-pod.yaml +++ b/resources/grafana/mixins/kubernetes/namespace-by-pod.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-namespace-by-pod spec: - name: kubernetes-mixin-namespace-by-pod.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/namespace-by-workload.yaml b/resources/grafana/mixins/kubernetes/namespace-by-workload.yaml index 0d673fd5..f307d383 100644 --- a/resources/grafana/mixins/kubernetes/namespace-by-workload.yaml +++ b/resources/grafana/mixins/kubernetes/namespace-by-workload.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-namespace-by-workload spec: - name: kubernetes-mixin-namespace-by-workload.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/persistentvolumesusage.yaml b/resources/grafana/mixins/kubernetes/persistentvolumesusage.yaml index 620cc098..5b5089d3 100644 --- a/resources/grafana/mixins/kubernetes/persistentvolumesusage.yaml +++ b/resources/grafana/mixins/kubernetes/persistentvolumesusage.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-persistentvolumesusage spec: - name: kubernetes-mixin-persistentvolumesusage.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/pod-total.yaml b/resources/grafana/mixins/kubernetes/pod-total.yaml index ae5fdc9a..0c2299d7 100644 --- a/resources/grafana/mixins/kubernetes/pod-total.yaml +++ b/resources/grafana/mixins/kubernetes/pod-total.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-pod-total spec: - name: kubernetes-mixin-pod-total.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/proxy.yaml b/resources/grafana/mixins/kubernetes/proxy.yaml index 41314330..f998859e 100644 --- a/resources/grafana/mixins/kubernetes/proxy.yaml +++ b/resources/grafana/mixins/kubernetes/proxy.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-proxy spec: - name: kubernetes-mixin-proxy.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/scheduler.yaml b/resources/grafana/mixins/kubernetes/scheduler.yaml index 2e6325f9..e750de40 100644 --- a/resources/grafana/mixins/kubernetes/scheduler.yaml +++ b/resources/grafana/mixins/kubernetes/scheduler.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-scheduler spec: - name: kubernetes-mixin-scheduler.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/mixins/kubernetes/workload-total.yaml b/resources/grafana/mixins/kubernetes/workload-total.yaml index ff7e320a..257053df 100644 --- a/resources/grafana/mixins/kubernetes/workload-total.yaml +++ b/resources/grafana/mixins/kubernetes/workload-total.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,7 +6,9 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-workload-total spec: - name: kubernetes-mixin-workload-total.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | { "editable": false, diff --git a/resources/grafana/sources/rhacs-cluster-overview.json b/resources/grafana/sources/rhacs-cluster-overview.json index ca0e6c24..974a693f 100644 --- a/resources/grafana/sources/rhacs-cluster-overview.json +++ b/resources/grafana/sources/rhacs-cluster-overview.json @@ -25,8 +25,8 @@ "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "purple", "expr": "count (count by (git_version) (label_replace(count_over_time(kubernetes_build_info{job!~\"kube-dns|coredns\"}[${__interval}]), \"git_version\", \"$1\", \"git_version\", \"(v[0-9]*.[0-9]*).*\"))) > 1", + "iconColor": "purple", "name": "Kubernetes Upgrade", "textFormat": "Kubernetes Upgrade" }, @@ -36,8 +36,8 @@ "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "red", "expr": "count (count by (gitVersion) (count_over_time (openshift_apiserver_build_info[${__interval}]))) > 1", + "iconColor": "red", "name": "OpenShift Upgrade", "textFormat": "OpenShift Upgrade" } @@ -46,7 +46,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 23, + "id": 26, "links": [], "liveNow": false, "panels": [ @@ -515,8 +515,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -613,8 +612,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -663,19 +661,6 @@ "title": "Secured Clusters", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 16, - "panels": [], - "title": "Resources", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -723,8 +708,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -740,7 +724,7 @@ "h": 8, "w": 12, "x": 0, - "y": 26 + "y": 25 }, "id": 4, "options": { @@ -773,6 +757,19 @@ "title": "Network Received", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 16, + "panels": [], + "title": "Resources", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -820,8 +817,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -829,17 +825,17 @@ } ] }, - "unit": "Bps" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 26 + "x": 0, + "y": 34 }, - "id": 5, + "id": 6, "options": { "legend": { "calcs": [], @@ -849,7 +845,7 @@ }, "tooltip": { "mode": "single", - "sort": "none" + "sort": "desc" } }, "pluginVersion": "9.1.0", @@ -860,14 +856,15 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"rhacs-$instance_id\", job=~\"kubelet\"}[5m])) by (namespace)", + "expr": "sum(container_memory_working_set_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace) / sum(container_spec_memory_limit_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace)", + "format": "time_series", "interval": "", "legendFormat": "{{namespace}}", "range": true, "refId": "A" } ], - "title": "Network Transmitted", + "title": "Central Memory Usage", "type": "timeseries" }, { @@ -917,8 +914,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -926,17 +922,17 @@ } ] }, - "unit": "percentunit" + "unit": "Bps" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, + "x": 12, "y": 34 }, - "id": 6, + "id": 5, "options": { "legend": { "calcs": [], @@ -946,7 +942,7 @@ }, "tooltip": { "mode": "single", - "sort": "desc" + "sort": "none" } }, "pluginVersion": "9.1.0", @@ -957,15 +953,14 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(container_memory_working_set_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace) / sum(container_spec_memory_limit_bytes{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kubelet\"}) by (namespace)", - "format": "time_series", + "expr": "sum(rate(container_network_transmit_bytes_total{namespace=~\"rhacs-$instance_id\", job=~\"kubelet\"}[5m])) by (namespace)", "interval": "", "legendFormat": "{{namespace}}", "range": true, "refId": "A" } ], - "title": "Central Memory Usage", + "title": "Network Transmitted", "type": "timeseries" }, { @@ -1029,8 +1024,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1108,7 +1102,7 @@ "content": "### Description\n\nThis graph shows the occurences per minute of **SELinux AVC denials** on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that the cluster node's SELinux policy prevented a process' actions.\nAs an example, a violation could indicate that a process on the cluster tried to access a file which is SELinux-protected.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40logStream*20like*20*2flinux-audit*2f*0a*7c*20filter*20*40message*20like*20*2fAVC*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @logStream like /linux-audit/\n| filter @message like /AVC/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1159,8 +1153,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1238,7 +1231,7 @@ "content": "### Description\n\nThis graph shows the occurences per minute of Network Policy ACL denials on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that network traffic was prevented due to a Kubernetes Network Policy.\nAs an example, a violation could indicate that communication between RHACS tenant namespaces\nwas attempted, which is strictly forbidden.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40message*20like*20*2facl_log*28.*2a*29.*2a*5csverdict*3ddrop*2f*0a*7c*20filter*20*40logStream*20like*20*2f.*2aovn-audit*5c.log*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @message like /acl_log(.*).*\\sverdict=drop/\n| filter @logStream like /.*ovn-audit\\.log/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1279,8 +1272,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1317,8 +1309,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1471,7 +1462,7 @@ } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -1656,8 +1647,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1722,8 +1712,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "#EAB839", @@ -1797,8 +1786,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "yellow", @@ -1872,8 +1860,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "#EAB839", @@ -1942,7 +1929,7 @@ } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -2120,11 +2107,249 @@ } ], "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 85 + }, + "id": 149, + "panels": [], + "title": "Certificate Expiry", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Expiration dates for central and scanner TLS certificates.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 13, + "w": 24, + "x": 0, + "y": 86 + }, + "id": 148, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Expiration" + } + ] + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": true, + "expr": "acs_fleetshard_certificate_expiration_timestamp * on (exported_namespace) group_left(rhacs_org_name) label_replace(rox_central_info, \"exported_namespace\", \"$1\", \"namespace\", \"(.*)\")", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Certificates Expiry", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": false, + "__name__": true, + "container": true, + "instance": true, + "job": true, + "namespace": true, + "pod": true, + "rhacs_cluster_name": true, + "rhacs_environment": true, + "rhacs_org_name": false + }, + "includeByName": {}, + "indexByName": { + "Time": 4, + "Value": 12, + "__name__": 5, + "container": 6, + "data_key": 3, + "exported_namespace": 0, + "instance": 7, + "job": 8, + "namespace": 1, + "pod": 9, + "rhacs_cluster_name": 10, + "rhacs_environment": 11, + "secret": 2 + }, + "renameByName": { + "Time": "Expiration", + "Value": "", + "container": "", + "data_key": "Key", + "exported_namespace": "Namespace", + "secret": "Secret" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Expiration", + "binary": { + "left": "Value", + "operator": "*", + "right": "1000" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "time", + "targetField": "Expiration" + } + ], + "fields": {} + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Value": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Expiration": { + "aggregations": ["last"], + "operation": "aggregate" + }, + "Key": { + "aggregations": [], + "operation": "groupby" + }, + "Namespace": { + "aggregations": ["last"], + "operation": "groupby" + }, + "Secret": { + "aggregations": [], + "operation": "groupby" + }, + "rhacs_org_name": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "includeByName": {}, + "indexByName": { + "Expiration (last)": 1, + "Key": 4, + "Namespace": 0, + "Secret": 3, + "rhacs_org_name": 2 + }, + "renameByName": { + "Expiration (last)": "Expiration", + "Namespace (last)": "Namespace", + "rhacs_org_name": "Organization" + } + } + } + ], + "type": "table" } ], - "refresh": "", "revision": 1, - "schemaVersion": 38, + "schemaVersion": 39, "tags": ["rhacs"], "templating": { "list": [ diff --git a/resources/grafana/templates/dashboards/rhacs-autoscaler.yaml b/resources/grafana/templates/dashboards/rhacs-autoscaler.yaml index dbaae059..6204e038 100644 --- a/resources/grafana/templates/dashboards/rhacs-autoscaler.yaml +++ b/resources/grafana/templates/dashboards/rhacs-autoscaler.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-autoscaler spec: - name: rhacs-autoscaler.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/grafana/templates/dashboards/rhacs-central-release.yaml b/resources/grafana/templates/dashboards/rhacs-central-release.yaml index ce336993..46151aa8 100644 --- a/resources/grafana/templates/dashboards/rhacs-central-release.yaml +++ b/resources/grafana/templates/dashboards/rhacs-central-release.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-central-release spec: - name: rhacs-central-release.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/grafana/templates/dashboards/rhacs-central-slo.yaml b/resources/grafana/templates/dashboards/rhacs-central-slo.yaml index e58dc133..4285bc62 100644 --- a/resources/grafana/templates/dashboards/rhacs-central-slo.yaml +++ b/resources/grafana/templates/dashboards/rhacs-central-slo.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-central-slo spec: - name: rhacs-central-slo.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/grafana/templates/dashboards/rhacs-central.yaml b/resources/grafana/templates/dashboards/rhacs-central.yaml index e88ab045..56d6f31b 100644 --- a/resources/grafana/templates/dashboards/rhacs-central.yaml +++ b/resources/grafana/templates/dashboards/rhacs-central.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-central spec: - name: rhacs-central.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/grafana/templates/dashboards/rhacs-cluster-overview.yaml b/resources/grafana/templates/dashboards/rhacs-cluster-overview.yaml index a5097892..29421399 100644 --- a/resources/grafana/templates/dashboards/rhacs-cluster-overview.yaml +++ b/resources/grafana/templates/dashboards/rhacs-cluster-overview.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-cluster-overview spec: - name: rhacs-cluster-overview.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/grafana/templates/dashboards/rhacs-cluster-resource-adjustment.yaml b/resources/grafana/templates/dashboards/rhacs-cluster-resource-adjustment.yaml index e7eaf22b..e19fbd57 100644 --- a/resources/grafana/templates/dashboards/rhacs-cluster-resource-adjustment.yaml +++ b/resources/grafana/templates/dashboards/rhacs-cluster-resource-adjustment.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-cluster-resource-adjustment spec: - name: rhacs-cluster-resource-adjustment.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/grafana/templates/dashboards/rhacs-emailsender.yaml b/resources/grafana/templates/dashboards/rhacs-emailsender.yaml index 4bda1633..df9d918b 100644 --- a/resources/grafana/templates/dashboards/rhacs-emailsender.yaml +++ b/resources/grafana/templates/dashboards/rhacs-emailsender.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: rhacs-emailsender spec: - name: rhacs-emailsender.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/index.json b/resources/index.json deleted file mode 100644 index e36dc3ae..00000000 --- a/resources/index.json +++ /dev/null @@ -1,70 +0,0 @@ -{ - "id": "managed-rhacs", - "config": { - "prometheus": { - "pod_monitors": [ - "prometheus/pod_monitors/prometheus-self-metrics.yaml", - "prometheus/pod_monitors/rhacs-cloudwatch-exporter.yaml", - "prometheus/pod_monitors/rhacs-fleetshard-sync-metrics.yaml", - "prometheus/pod_monitors/rhacs-tenant-metrics.yaml", - "prometheus/pod_monitors/rhacs-emailsender-metrics.yaml" - ], - "rules": [ - "prometheus/billing-rules.yaml", - "prometheus/generated/kubernetes-mixin-alerts.yaml", - "prometheus/prometheus-rules.yaml", - "prometheus/rhacs-recording-rules.yaml" - ], - "federation": "prometheus/federation-config.yaml", - "observatorium": "rhacs-observatorium", - "remoteWrite": "prometheus/remote-write.yaml", - "overridePrometheusPvcSize": "250Gi" - }, - "alertmanager": { - "deadmansSnitchSecretName": "rhacs-dead-mans-switch", - "pagerDutySecretName": "rhacs-pagerduty" - }, - "grafana": { - "dashboards": [ - "grafana/mixins/kubernetes/apiserver.yaml", - "grafana/mixins/kubernetes/cluster-total.yaml", - "grafana/mixins/kubernetes/controller-manager.yaml", - "grafana/mixins/kubernetes/k8s-resources-cluster.yaml", - "grafana/mixins/kubernetes/k8s-resources-namespace.yaml", - "grafana/mixins/kubernetes/k8s-resources-node.yaml", - "grafana/mixins/kubernetes/k8s-resources-pod.yaml", - "grafana/mixins/kubernetes/k8s-resources-workload.yaml", - "grafana/mixins/kubernetes/k8s-resources-workloads-namespace.yaml", - "grafana/mixins/kubernetes/kubelet.yaml", - "grafana/mixins/kubernetes/namespace-by-pod.yaml", - "grafana/mixins/kubernetes/namespace-by-workload.yaml", - "grafana/mixins/kubernetes/persistentvolumesusage.yaml", - "grafana/mixins/kubernetes/pod-total.yaml", - "grafana/mixins/kubernetes/proxy.yaml", - "grafana/mixins/kubernetes/scheduler.yaml", - "grafana/mixins/kubernetes/workload-total.yaml", - "grafana/generated/dashboards/rhacs-autoscaler.yaml", - "grafana/generated/dashboards/rhacs-central.yaml", - "grafana/generated/dashboards/rhacs-central-release.yaml", - "grafana/generated/dashboards/rhacs-central-slo.yaml", - "grafana/generated/dashboards/rhacs-cluster-overview.yaml", - "grafana/generated/dashboards/rhacs-cluster-resource-adjustment.yaml", - "grafana/generated/dashboards/rhacs-emailsender.yaml" - ], - "grafanaVersion": "11.1.0" - }, - "promtail": { - "enabled": false, - "observatorium": "rhacs-observatorium", - "namespaceLabelSelector": { - "observability-operator/scrape-logging": "true" - } - }, - "observatoria": [ - { - "id": "rhacs-observatorium", - "secretName": "rhacs-observatorium" - } - ] - } -} diff --git a/resources/kustomization.yaml b/resources/kustomization.yaml new file mode 100644 index 00000000..b52cd5d4 --- /dev/null +++ b/resources/kustomization.yaml @@ -0,0 +1,40 @@ +secretGenerator: + - name: additional-scrape-configs + options: + disableNameSuffixHash: true + files: + - prometheus/federation-config.yaml +resources: + - prometheus/pod_monitors/prometheus-self-metrics.yaml + - prometheus/pod_monitors/rhacs-cloudwatch-exporter.yaml + - prometheus/pod_monitors/rhacs-fleetshard-sync-metrics.yaml + - prometheus/pod_monitors/rhacs-tenant-metrics.yaml + - prometheus/pod_monitors/rhacs-emailsender-metrics.yaml + - prometheus/billing-rules.yaml + - prometheus/generated/kubernetes-mixin-alerts.yaml + - prometheus/prometheus-rules.yaml + - prometheus/rhacs-recording-rules.yaml + - grafana/mixins/kubernetes/apiserver.yaml + - grafana/mixins/kubernetes/cluster-total.yaml + - grafana/mixins/kubernetes/controller-manager.yaml + - grafana/mixins/kubernetes/k8s-resources-cluster.yaml + - grafana/mixins/kubernetes/k8s-resources-namespace.yaml + - grafana/mixins/kubernetes/k8s-resources-node.yaml + - grafana/mixins/kubernetes/k8s-resources-pod.yaml + - grafana/mixins/kubernetes/k8s-resources-workload.yaml + - grafana/mixins/kubernetes/k8s-resources-workloads-namespace.yaml + - grafana/mixins/kubernetes/kubelet.yaml + - grafana/mixins/kubernetes/namespace-by-pod.yaml + - grafana/mixins/kubernetes/namespace-by-workload.yaml + - grafana/mixins/kubernetes/persistentvolumesusage.yaml + - grafana/mixins/kubernetes/pod-total.yaml + - grafana/mixins/kubernetes/proxy.yaml + - grafana/mixins/kubernetes/scheduler.yaml + - grafana/mixins/kubernetes/workload-total.yaml + - grafana/generated/dashboards/rhacs-autoscaler.yaml + - grafana/generated/dashboards/rhacs-central.yaml + - grafana/generated/dashboards/rhacs-central-release.yaml + - grafana/generated/dashboards/rhacs-central-slo.yaml + - grafana/generated/dashboards/rhacs-cluster-overview.yaml + - grafana/generated/dashboards/rhacs-cluster-resource-adjustment.yaml + - grafana/generated/dashboards/rhacs-emailsender.yaml diff --git a/resources/mixins/kubernetes/templates/alerts.yaml b/resources/mixins/kubernetes/templates/alerts.yaml index cf43bac9..949e7269 100644 --- a/resources/mixins/kubernetes/templates/alerts.yaml +++ b/resources/mixins/kubernetes/templates/alerts.yaml @@ -1,7 +1,8 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PrometheusRule metadata: labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator name: kubernetes-mixin-alerts spec: diff --git a/resources/mixins/kubernetes/templates/dashboards/apiserver.yaml b/resources/mixins/kubernetes/templates/dashboards/apiserver.yaml index 37619208..6a2f8443 100644 --- a/resources/mixins/kubernetes/templates/dashboards/apiserver.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/apiserver.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-apiserver spec: - name: kubernetes-mixin-apiserver.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/cluster-total.yaml b/resources/mixins/kubernetes/templates/dashboards/cluster-total.yaml index b06ba278..93ad9c58 100644 --- a/resources/mixins/kubernetes/templates/dashboards/cluster-total.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/cluster-total.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-cluster-total spec: - name: kubernetes-mixin-cluster-total.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/controller-manager.yaml b/resources/mixins/kubernetes/templates/dashboards/controller-manager.yaml index 534cdc09..d4857111 100644 --- a/resources/mixins/kubernetes/templates/dashboards/controller-manager.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/controller-manager.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-controller-manager spec: - name: kubernetes-mixin-controller-manager.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-cluster.yaml b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-cluster.yaml index 9a9dd6c7..061283e1 100644 --- a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-cluster.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-cluster.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-cluster spec: - name: kubernetes-mixin-k8s-resources-cluster.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-namespace.yaml b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-namespace.yaml index 65f7b497..6ce031f7 100644 --- a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-namespace.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-namespace.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-namespace spec: - name: kubernetes-mixin-k8s-resources-namespace.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-node.yaml b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-node.yaml index dc47ca3b..e73880d0 100644 --- a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-node.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-node.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-node spec: - name: kubernetes-mixin-k8s-resources-node.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-pod.yaml b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-pod.yaml index 93e34d8c..27707495 100644 --- a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-pod.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-pod.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-pod spec: - name: kubernetes-mixin-k8s-resources-pod.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workload.yaml b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workload.yaml index b3e5879f..7d551004 100644 --- a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workload.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workload.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-workload spec: - name: kubernetes-mixin-k8s-resources-workload.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workloads-namespace.yaml b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workloads-namespace.yaml index 7aefe754..56a0cb3e 100644 --- a/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workloads-namespace.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/k8s-resources-workloads-namespace.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-k8s-resources-workloads-namespace spec: - name: kubernetes-mixin-k8s-resources-workloads-namespace.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/kubelet.yaml b/resources/mixins/kubernetes/templates/dashboards/kubelet.yaml index 2e39f089..b34797c8 100644 --- a/resources/mixins/kubernetes/templates/dashboards/kubelet.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/kubelet.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-kubelet spec: - name: kubernetes-mixin-kubelet.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/namespace-by-pod.yaml b/resources/mixins/kubernetes/templates/dashboards/namespace-by-pod.yaml index 70d7ff12..d6e14ab3 100644 --- a/resources/mixins/kubernetes/templates/dashboards/namespace-by-pod.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/namespace-by-pod.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-namespace-by-pod spec: - name: kubernetes-mixin-namespace-by-pod.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/namespace-by-workload.yaml b/resources/mixins/kubernetes/templates/dashboards/namespace-by-workload.yaml index e6dd8596..ed35ba8c 100644 --- a/resources/mixins/kubernetes/templates/dashboards/namespace-by-workload.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/namespace-by-workload.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-namespace-by-workload spec: - name: kubernetes-mixin-namespace-by-workload.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/persistentvolumesusage.yaml b/resources/mixins/kubernetes/templates/dashboards/persistentvolumesusage.yaml index bb66f67a..3d62776f 100644 --- a/resources/mixins/kubernetes/templates/dashboards/persistentvolumesusage.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/persistentvolumesusage.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-persistentvolumesusage spec: - name: kubernetes-mixin-persistentvolumesusage.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/pod-total.yaml b/resources/mixins/kubernetes/templates/dashboards/pod-total.yaml index ba2b6abf..448cd3da 100644 --- a/resources/mixins/kubernetes/templates/dashboards/pod-total.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/pod-total.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-pod-total spec: - name: kubernetes-mixin-pod-total.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/proxy.yaml b/resources/mixins/kubernetes/templates/dashboards/proxy.yaml index 84c0b4dd..2f9b773f 100644 --- a/resources/mixins/kubernetes/templates/dashboards/proxy.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/proxy.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-proxy spec: - name: kubernetes-mixin-proxy.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/scheduler.yaml b/resources/mixins/kubernetes/templates/dashboards/scheduler.yaml index 4e6cfdf5..025dbc71 100644 --- a/resources/mixins/kubernetes/templates/dashboards/scheduler.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/scheduler.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-scheduler spec: - name: kubernetes-mixin-scheduler.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/mixins/kubernetes/templates/dashboards/workload-total.yaml b/resources/mixins/kubernetes/templates/dashboards/workload-total.yaml index 3e882970..363a773b 100644 --- a/resources/mixins/kubernetes/templates/dashboards/workload-total.yaml +++ b/resources/mixins/kubernetes/templates/dashboards/workload-total.yaml @@ -1,4 +1,4 @@ -apiVersion: integreatly.org/v1alpha1 +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaDashboard metadata: labels: @@ -6,5 +6,7 @@ metadata: monitoring-key: middleware name: kubernetes-mixin-workload-total spec: - name: kubernetes-mixin-workload-total.json + instanceSelector: + matchLabels: + dashboards: "rhacs" json: | diff --git a/resources/prometheus/billing-rules.yaml b/resources/prometheus/billing-rules.yaml index f1cbbe6a..8c406c95 100644 --- a/resources/prometheus/billing-rules.yaml +++ b/resources/prometheus/billing-rules.yaml @@ -1,8 +1,9 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PrometheusRule metadata: labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator name: rhacs-billing-rules spec: groups: diff --git a/resources/prometheus/federation-config-base.yaml b/resources/prometheus/federation-config-base.yaml index 60b16a5b..a4378d6a 100644 --- a/resources/prometheus/federation-config-base.yaml +++ b/resources/prometheus/federation-config-base.yaml @@ -1,6 +1,28 @@ -# Use this list to add any required federated metrics that are not used by Rules, Alerts, or Grafana dashboards. -# Listed metrics will be merged with metrics used in Rules, Alerts, and Grafana dashboards. -match[]: - # This is an example. - - up{job!~"central|scanner"} - - openshift_apiserver_build_info +- job_name: openshift-monitoring-federation + honor_labels: true + kubernetes_sd_configs: + - role: service + namespaces: + names: + - openshift-monitoring + scrape_interval: 120s + scrape_timeout: 60s + metrics_path: /federate + relabel_configs: + - action: keep + source_labels: [ '__meta_kubernetes_service_name' ] + regex: prometheus-k8s + - action: keep + source_labels: [ '__meta_kubernetes_service_port_name' ] + regex: web + params: + # Use this list to add any required federated metrics that are not used by Rules, Alerts, or Grafana dashboards. + # Listed metrics will be merged with metrics used in Rules, Alerts, and Grafana dashboards. + match[]: + # This is an example. + - up{job!~"central|scanner"} + - openshift_apiserver_build_info + scheme: https + bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config: + insecure_skip_verify: true diff --git a/resources/prometheus/federation-config.yaml b/resources/prometheus/federation-config.yaml index 815f59a4..413a8be3 100644 --- a/resources/prometheus/federation-config.yaml +++ b/resources/prometheus/federation-config.yaml @@ -1,179 +1,201 @@ -match[]: - - :node_memory_MemAvailable_bytes:sum{job!~"central|scanner"} - - aggregator_unavailable_apiservice_total{job!~"central|scanner"} - - aggregator_unavailable_apiservice{job!~"central|scanner"} - - apiserver_request:availability30d{job!~"central|scanner"} - - apiserver_request:burnrate1d{job!~"central|scanner"} - - apiserver_request:burnrate1h{job!~"central|scanner"} - - apiserver_request:burnrate2h{job!~"central|scanner"} - - apiserver_request:burnrate30m{job!~"central|scanner"} - - apiserver_request:burnrate3d{job!~"central|scanner"} - - apiserver_request:burnrate5m{job!~"central|scanner"} - - apiserver_request:burnrate6h{job!~"central|scanner"} - - apiserver_request_terminations_total{job!~"central|scanner"} - - apiserver_request_total{job!~"central|scanner"} - - cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{job!~"central|scanner"} - - cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{job!~"central|scanner"} - - cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{job!~"central|scanner"} - - cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{job!~"central|scanner"} - - cluster:node_cpu:ratio_rate5m{job!~"central|scanner"} - - cluster_autoscaler_cluster_safe_to_autoscale{job!~"central|scanner"} - - cluster_autoscaler_evicted_pods_total{job!~"central|scanner"} - - cluster_autoscaler_last_activity{job!~"central|scanner"} - - cluster_autoscaler_nodes_count{job!~"central|scanner"} - - cluster_autoscaler_scaled_down_nodes_total{job!~"central|scanner"} - - cluster_autoscaler_scaled_up_nodes_total{job!~"central|scanner"} - - cluster_autoscaler_skipped_scale_events_count{job!~"central|scanner"} - - cluster_autoscaler_unneeded_nodes_count{job!~"central|scanner"} - - cluster_autoscaler_unschedulable_pods_count{job!~"central|scanner"} - - cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{job!~"central|scanner"} - - code_resource:apiserver_request_total:rate5m{job!~"central|scanner"} - - container_cpu_cfs_periods_total{job!~"central|scanner"} - - container_cpu_cfs_throttled_periods_total{job!~"central|scanner"} - - container_cpu_usage_seconds_total{job!~"central|scanner"} - - container_fs_reads_bytes_total{job!~"central|scanner"} - - container_fs_reads_total{job!~"central|scanner"} - - container_fs_writes_bytes_total{job!~"central|scanner"} - - container_fs_writes_total{job!~"central|scanner"} - - container_memory_cache{job!~"central|scanner"} - - container_memory_max_usage_bytes{job!~"central|scanner"} - - container_memory_rss{job!~"central|scanner"} - - container_memory_swap{job!~"central|scanner"} - - container_memory_working_set_bytes{job!~"central|scanner"} - - container_network_receive_bytes_total{job!~"central|scanner"} - - container_network_receive_packets_dropped_total{job!~"central|scanner"} - - container_network_receive_packets_total{job!~"central|scanner"} - - container_network_transmit_bytes_total{job!~"central|scanner"} - - container_network_transmit_packets_dropped_total{job!~"central|scanner"} - - container_network_transmit_packets_total{job!~"central|scanner"} - - container_spec_memory_limit_bytes{job!~"central|scanner"} - - go_gc_duration_seconds{job!~"central|scanner"} - - go_goroutines{job!~"central|scanner"} - - go_memstats_alloc_bytes{job!~"central|scanner"} - - go_memstats_heap_inuse_bytes{job!~"central|scanner"} - - go_memstats_stack_inuse_bytes{job!~"central|scanner"} - - grpc_server_handled_total{job!~"central|scanner"} - - grpc_server_handling_seconds_bucket{job!~"central|scanner"} - - grpc_server_started_total{job!~"central|scanner"} - - haproxy_backend_http_responses_total{job!~"central|scanner"} - - http_incoming_request_duration_histogram_seconds_bucket{job!~"central|scanner"} - - http_incoming_requests_total{job!~"central|scanner"} - - kube_daemonset_status_current_number_scheduled{job!~"central|scanner"} - - kube_daemonset_status_desired_number_scheduled{job!~"central|scanner"} - - kube_daemonset_status_number_available{job!~"central|scanner"} - - kube_daemonset_status_number_misscheduled{job!~"central|scanner"} - - kube_daemonset_status_updated_number_scheduled{job!~"central|scanner"} - - kube_deployment_metadata_generation{job!~"central|scanner"} - - kube_deployment_spec_replicas{job!~"central|scanner"} - - kube_deployment_status_condition{job!~"central|scanner"} - - kube_deployment_status_observed_generation{job!~"central|scanner"} - - kube_deployment_status_replicas_available{job!~"central|scanner"} - - kube_deployment_status_replicas_ready{job!~"central|scanner"} - - kube_deployment_status_replicas_updated{job!~"central|scanner"} - - kube_horizontalpodautoscaler_spec_max_replicas{job!~"central|scanner"} - - kube_horizontalpodautoscaler_spec_min_replicas{job!~"central|scanner"} - - kube_horizontalpodautoscaler_status_current_replicas{job!~"central|scanner"} - - kube_horizontalpodautoscaler_status_desired_replicas{job!~"central|scanner"} - - kube_job_failed{job!~"central|scanner"} - - kube_job_status_active{job!~"central|scanner"} - - kube_job_status_start_time{job!~"central|scanner"} - - kube_namespace_status_phase{job!~"central|scanner"} - - kube_node_info{job!~"central|scanner"} - - kube_node_labels{job!~"central|scanner"} - - kube_node_role{job!~"central|scanner"} - - kube_node_spec_taint{job!~"central|scanner"} - - kube_node_status_allocatable{job!~"central|scanner"} - - kube_node_status_capacity{job!~"central|scanner"} - - kube_node_status_condition{job!~"central|scanner"} - - kube_persistentvolume_status_phase{job!~"central|scanner"} - - kube_persistentvolumeclaim_access_mode{job!~"central|scanner"} - - kube_persistentvolumeclaim_labels{job!~"central|scanner"} - - kube_pod_container_resource_limits{job!~"central|scanner"} - - kube_pod_container_resource_requests{job!~"central|scanner"} - - kube_pod_container_status_ready{job!~"central|scanner"} - - kube_pod_container_status_restarts_total{job!~"central|scanner"} - - kube_pod_container_status_waiting_reason{job!~"central|scanner"} - - kube_pod_info{job!~"central|scanner"} - - kube_pod_labels{job!~"central|scanner"} - - kube_pod_owner{job!~"central|scanner"} - - kube_pod_status_phase{job!~"central|scanner"} - - kube_resourcequota{job!~"central|scanner"} - - kube_statefulset_metadata_generation{job!~"central|scanner"} - - kube_statefulset_replicas{job!~"central|scanner"} - - kube_statefulset_status_current_revision{job!~"central|scanner"} - - kube_statefulset_status_observed_generation{job!~"central|scanner"} - - kube_statefulset_status_replicas_ready{job!~"central|scanner"} - - kube_statefulset_status_replicas_updated{job!~"central|scanner"} - - kube_statefulset_status_replicas{job!~"central|scanner"} - - kube_statefulset_status_update_revision{job!~"central|scanner"} - - kubelet_certificate_manager_client_expiration_renew_errors{job!~"central|scanner"} - - kubelet_cgroup_manager_duration_seconds_bucket{job!~"central|scanner"} - - kubelet_cgroup_manager_duration_seconds_count{job!~"central|scanner"} - - kubelet_node_config_error{job!~"central|scanner"} - - kubelet_node_name{job!~"central|scanner"} - - kubelet_pleg_relist_duration_seconds_bucket{job!~"central|scanner"} - - kubelet_pleg_relist_duration_seconds_count{job!~"central|scanner"} - - kubelet_pleg_relist_interval_seconds_bucket{job!~"central|scanner"} - - kubelet_pod_start_duration_seconds_bucket{job!~"central|scanner"} - - kubelet_pod_start_duration_seconds_count{job!~"central|scanner"} - - kubelet_pod_worker_duration_seconds_bucket{job!~"central|scanner"} - - kubelet_pod_worker_duration_seconds_count{job!~"central|scanner"} - - kubelet_running_containers{job!~"central|scanner"} - - kubelet_running_pods{job!~"central|scanner"} - - kubelet_runtime_operations_duration_seconds_bucket{job!~"central|scanner"} - - kubelet_runtime_operations_errors_total{job!~"central|scanner"} - - kubelet_runtime_operations_total{job!~"central|scanner"} - - kubelet_server_expiration_renew_errors{job!~"central|scanner"} - - kubelet_volume_stats_available_bytes{job!~"central|scanner"} - - kubelet_volume_stats_capacity_bytes{job!~"central|scanner"} - - kubelet_volume_stats_inodes_free{job!~"central|scanner"} - - kubelet_volume_stats_inodes_used{job!~"central|scanner"} - - kubelet_volume_stats_inodes{job!~"central|scanner"} - - kubelet_volume_stats_used_bytes{job!~"central|scanner"} - - kubeproxy_network_programming_duration_seconds_bucket{job!~"central|scanner"} - - kubeproxy_network_programming_duration_seconds_count{job!~"central|scanner"} - - kubeproxy_sync_proxy_rules_duration_seconds_bucket{job!~"central|scanner"} - - kubeproxy_sync_proxy_rules_duration_seconds_count{job!~"central|scanner"} - - kubernetes_build_info{job!~"central|scanner"} - - namespace_cpu:kube_pod_container_resource_limits:sum{job!~"central|scanner"} - - namespace_cpu:kube_pod_container_resource_requests:sum{job!~"central|scanner"} - - namespace_memory:kube_pod_container_resource_limits:sum{job!~"central|scanner"} - - namespace_memory:kube_pod_container_resource_requests:sum{job!~"central|scanner"} - - namespace_workload_pod:kube_pod_owner:relabel{job!~"central|scanner"} - - network_policy_denials_sample_count{job!~"central|scanner"} - - node_memory_MemTotal_bytes{job!~"central|scanner"} - - node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{job!~"central|scanner"} - - node_namespace_pod_container:container_memory_cache{job!~"central|scanner"} - - node_namespace_pod_container:container_memory_rss{job!~"central|scanner"} - - node_namespace_pod_container:container_memory_swap{job!~"central|scanner"} - - node_namespace_pod_container:container_memory_working_set_bytes{job!~"central|scanner"} - - node_netstat_TcpExt_TCPSynRetrans{job!~"central|scanner"} - - node_netstat_Tcp_OutSegs{job!~"central|scanner"} - - node_netstat_Tcp_RetransSegs{job!~"central|scanner"} - - node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{job!~"central|scanner"} - - obs_operator:prometheus_remote_storage_succeeded_samples:ratio_rate1h{job!~"central|scanner"} - - openshift_apiserver_build_info - - process_cpu_seconds_total{job!~"central|scanner"} - - process_resident_memory_bytes{job!~"central|scanner"} - - prometheus_remote_storage_samples_failed_total{job!~"central|scanner"} - - prometheus_remote_storage_samples_retried_total{job!~"central|scanner"} - - prometheus_remote_storage_samples_total{job!~"central|scanner"} - - rest_client_request_duration_seconds_bucket{job!~"central|scanner"} - - rest_client_requests_total{job!~"central|scanner"} - - scheduler_binding_duration_seconds_bucket{job!~"central|scanner"} - - scheduler_binding_duration_seconds_count{job!~"central|scanner"} - - scheduler_e2e_scheduling_duration_seconds_bucket{job!~"central|scanner"} - - scheduler_e2e_scheduling_duration_seconds_count{job!~"central|scanner"} - - scheduler_scheduling_algorithm_duration_seconds_bucket{job!~"central|scanner"} - - scheduler_scheduling_algorithm_duration_seconds_count{job!~"central|scanner"} - - scheduler_volume_scheduling_duration_seconds_bucket{job!~"central|scanner"} - - scheduler_volume_scheduling_duration_seconds_count{job!~"central|scanner"} - - selinux_denials_sample_count{job!~"central|scanner"} - - storage_operation_duration_seconds_bucket{job!~"central|scanner"} - - storage_operation_duration_seconds_count{job!~"central|scanner"} - - storage_operation_errors_total{job!~"central|scanner"} - - up{job!~"central|scanner"} - - volume_manager_total_volumes{job!~"central|scanner"} - - workqueue_adds_total{job!~"central|scanner"} - - workqueue_depth{job!~"central|scanner"} - - workqueue_queue_duration_seconds_bucket{job!~"central|scanner"} +- job_name: openshift-monitoring-federation + honor_labels: true + kubernetes_sd_configs: + - role: service + namespaces: + names: + - openshift-monitoring + scrape_interval: 120s + scrape_timeout: 60s + metrics_path: /federate + relabel_configs: + - action: keep + source_labels: ['__meta_kubernetes_service_name'] + regex: prometheus-k8s + - action: keep + source_labels: ['__meta_kubernetes_service_port_name'] + regex: web + params: + match[]: + - :node_memory_MemAvailable_bytes:sum{job!~"central|scanner"} + - aggregator_unavailable_apiservice_total{job!~"central|scanner"} + - aggregator_unavailable_apiservice{job!~"central|scanner"} + - apiserver_request:availability30d{job!~"central|scanner"} + - apiserver_request:burnrate1d{job!~"central|scanner"} + - apiserver_request:burnrate1h{job!~"central|scanner"} + - apiserver_request:burnrate2h{job!~"central|scanner"} + - apiserver_request:burnrate30m{job!~"central|scanner"} + - apiserver_request:burnrate3d{job!~"central|scanner"} + - apiserver_request:burnrate5m{job!~"central|scanner"} + - apiserver_request:burnrate6h{job!~"central|scanner"} + - apiserver_request_terminations_total{job!~"central|scanner"} + - apiserver_request_total{job!~"central|scanner"} + - cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{job!~"central|scanner"} + - cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{job!~"central|scanner"} + - cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{job!~"central|scanner"} + - cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{job!~"central|scanner"} + - cluster:node_cpu:ratio_rate5m{job!~"central|scanner"} + - cluster_autoscaler_cluster_safe_to_autoscale{job!~"central|scanner"} + - cluster_autoscaler_evicted_pods_total{job!~"central|scanner"} + - cluster_autoscaler_last_activity{job!~"central|scanner"} + - cluster_autoscaler_nodes_count{job!~"central|scanner"} + - cluster_autoscaler_scaled_down_nodes_total{job!~"central|scanner"} + - cluster_autoscaler_scaled_up_nodes_total{job!~"central|scanner"} + - cluster_autoscaler_skipped_scale_events_count{job!~"central|scanner"} + - cluster_autoscaler_unneeded_nodes_count{job!~"central|scanner"} + - cluster_autoscaler_unschedulable_pods_count{job!~"central|scanner"} + - cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{job!~"central|scanner"} + - code_resource:apiserver_request_total:rate5m{job!~"central|scanner"} + - container_cpu_cfs_periods_total{job!~"central|scanner"} + - container_cpu_cfs_throttled_periods_total{job!~"central|scanner"} + - container_cpu_usage_seconds_total{job!~"central|scanner"} + - container_fs_reads_bytes_total{job!~"central|scanner"} + - container_fs_reads_total{job!~"central|scanner"} + - container_fs_writes_bytes_total{job!~"central|scanner"} + - container_fs_writes_total{job!~"central|scanner"} + - container_memory_cache{job!~"central|scanner"} + - container_memory_max_usage_bytes{job!~"central|scanner"} + - container_memory_rss{job!~"central|scanner"} + - container_memory_swap{job!~"central|scanner"} + - container_memory_working_set_bytes{job!~"central|scanner"} + - container_network_receive_bytes_total{job!~"central|scanner"} + - container_network_receive_packets_dropped_total{job!~"central|scanner"} + - container_network_receive_packets_total{job!~"central|scanner"} + - container_network_transmit_bytes_total{job!~"central|scanner"} + - container_network_transmit_packets_dropped_total{job!~"central|scanner"} + - container_network_transmit_packets_total{job!~"central|scanner"} + - container_spec_memory_limit_bytes{job!~"central|scanner"} + - go_gc_duration_seconds{job!~"central|scanner"} + - go_goroutines{job!~"central|scanner"} + - go_memstats_alloc_bytes{job!~"central|scanner"} + - go_memstats_heap_inuse_bytes{job!~"central|scanner"} + - go_memstats_stack_inuse_bytes{job!~"central|scanner"} + - grpc_server_handled_total{job!~"central|scanner"} + - grpc_server_handling_seconds_bucket{job!~"central|scanner"} + - grpc_server_started_total{job!~"central|scanner"} + - haproxy_backend_http_responses_total{job!~"central|scanner"} + - http_incoming_request_duration_histogram_seconds_bucket{job!~"central|scanner"} + - http_incoming_requests_total{job!~"central|scanner"} + - kube_daemonset_status_current_number_scheduled{job!~"central|scanner"} + - kube_daemonset_status_desired_number_scheduled{job!~"central|scanner"} + - kube_daemonset_status_number_available{job!~"central|scanner"} + - kube_daemonset_status_number_misscheduled{job!~"central|scanner"} + - kube_daemonset_status_updated_number_scheduled{job!~"central|scanner"} + - kube_deployment_metadata_generation{job!~"central|scanner"} + - kube_deployment_spec_replicas{job!~"central|scanner"} + - kube_deployment_status_condition{job!~"central|scanner"} + - kube_deployment_status_observed_generation{job!~"central|scanner"} + - kube_deployment_status_replicas_available{job!~"central|scanner"} + - kube_deployment_status_replicas_ready{job!~"central|scanner"} + - kube_deployment_status_replicas_updated{job!~"central|scanner"} + - kube_horizontalpodautoscaler_spec_max_replicas{job!~"central|scanner"} + - kube_horizontalpodautoscaler_spec_min_replicas{job!~"central|scanner"} + - kube_horizontalpodautoscaler_status_current_replicas{job!~"central|scanner"} + - kube_horizontalpodautoscaler_status_desired_replicas{job!~"central|scanner"} + - kube_job_failed{job!~"central|scanner"} + - kube_job_status_active{job!~"central|scanner"} + - kube_job_status_start_time{job!~"central|scanner"} + - kube_namespace_status_phase{job!~"central|scanner"} + - kube_node_info{job!~"central|scanner"} + - kube_node_labels{job!~"central|scanner"} + - kube_node_role{job!~"central|scanner"} + - kube_node_spec_taint{job!~"central|scanner"} + - kube_node_status_allocatable{job!~"central|scanner"} + - kube_node_status_capacity{job!~"central|scanner"} + - kube_node_status_condition{job!~"central|scanner"} + - kube_persistentvolume_status_phase{job!~"central|scanner"} + - kube_persistentvolumeclaim_access_mode{job!~"central|scanner"} + - kube_persistentvolumeclaim_labels{job!~"central|scanner"} + - kube_pod_container_resource_limits{job!~"central|scanner"} + - kube_pod_container_resource_requests{job!~"central|scanner"} + - kube_pod_container_status_ready{job!~"central|scanner"} + - kube_pod_container_status_restarts_total{job!~"central|scanner"} + - kube_pod_container_status_waiting_reason{job!~"central|scanner"} + - kube_pod_info{job!~"central|scanner"} + - kube_pod_labels{job!~"central|scanner"} + - kube_pod_owner{job!~"central|scanner"} + - kube_pod_status_phase{job!~"central|scanner"} + - kube_resourcequota{job!~"central|scanner"} + - kube_statefulset_metadata_generation{job!~"central|scanner"} + - kube_statefulset_replicas{job!~"central|scanner"} + - kube_statefulset_status_current_revision{job!~"central|scanner"} + - kube_statefulset_status_observed_generation{job!~"central|scanner"} + - kube_statefulset_status_replicas_ready{job!~"central|scanner"} + - kube_statefulset_status_replicas_updated{job!~"central|scanner"} + - kube_statefulset_status_replicas{job!~"central|scanner"} + - kube_statefulset_status_update_revision{job!~"central|scanner"} + - kubelet_certificate_manager_client_expiration_renew_errors{job!~"central|scanner"} + - kubelet_cgroup_manager_duration_seconds_bucket{job!~"central|scanner"} + - kubelet_cgroup_manager_duration_seconds_count{job!~"central|scanner"} + - kubelet_node_config_error{job!~"central|scanner"} + - kubelet_node_name{job!~"central|scanner"} + - kubelet_pleg_relist_duration_seconds_bucket{job!~"central|scanner"} + - kubelet_pleg_relist_duration_seconds_count{job!~"central|scanner"} + - kubelet_pleg_relist_interval_seconds_bucket{job!~"central|scanner"} + - kubelet_pod_start_duration_seconds_bucket{job!~"central|scanner"} + - kubelet_pod_start_duration_seconds_count{job!~"central|scanner"} + - kubelet_pod_worker_duration_seconds_bucket{job!~"central|scanner"} + - kubelet_pod_worker_duration_seconds_count{job!~"central|scanner"} + - kubelet_running_containers{job!~"central|scanner"} + - kubelet_running_pods{job!~"central|scanner"} + - kubelet_runtime_operations_duration_seconds_bucket{job!~"central|scanner"} + - kubelet_runtime_operations_errors_total{job!~"central|scanner"} + - kubelet_runtime_operations_total{job!~"central|scanner"} + - kubelet_server_expiration_renew_errors{job!~"central|scanner"} + - kubelet_volume_stats_available_bytes{job!~"central|scanner"} + - kubelet_volume_stats_capacity_bytes{job!~"central|scanner"} + - kubelet_volume_stats_inodes_free{job!~"central|scanner"} + - kubelet_volume_stats_inodes_used{job!~"central|scanner"} + - kubelet_volume_stats_inodes{job!~"central|scanner"} + - kubelet_volume_stats_used_bytes{job!~"central|scanner"} + - kubeproxy_network_programming_duration_seconds_bucket{job!~"central|scanner"} + - kubeproxy_network_programming_duration_seconds_count{job!~"central|scanner"} + - kubeproxy_sync_proxy_rules_duration_seconds_bucket{job!~"central|scanner"} + - kubeproxy_sync_proxy_rules_duration_seconds_count{job!~"central|scanner"} + - kubernetes_build_info{job!~"central|scanner"} + - namespace_cpu:kube_pod_container_resource_limits:sum{job!~"central|scanner"} + - namespace_cpu:kube_pod_container_resource_requests:sum{job!~"central|scanner"} + - namespace_memory:kube_pod_container_resource_limits:sum{job!~"central|scanner"} + - namespace_memory:kube_pod_container_resource_requests:sum{job!~"central|scanner"} + - namespace_workload_pod:kube_pod_owner:relabel{job!~"central|scanner"} + - network_policy_denials_sample_count{job!~"central|scanner"} + - node_memory_MemTotal_bytes{job!~"central|scanner"} + - node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{job!~"central|scanner"} + - node_namespace_pod_container:container_memory_cache{job!~"central|scanner"} + - node_namespace_pod_container:container_memory_rss{job!~"central|scanner"} + - node_namespace_pod_container:container_memory_swap{job!~"central|scanner"} + - node_namespace_pod_container:container_memory_working_set_bytes{job!~"central|scanner"} + - node_netstat_TcpExt_TCPSynRetrans{job!~"central|scanner"} + - node_netstat_Tcp_OutSegs{job!~"central|scanner"} + - node_netstat_Tcp_RetransSegs{job!~"central|scanner"} + - node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{job!~"central|scanner"} + - obs_operator:prometheus_remote_storage_succeeded_samples:ratio_rate1h{job!~"central|scanner"} + - openshift_apiserver_build_info + - process_cpu_seconds_total{job!~"central|scanner"} + - process_resident_memory_bytes{job!~"central|scanner"} + - prometheus_remote_storage_samples_failed_total{job!~"central|scanner"} + - prometheus_remote_storage_samples_retried_total{job!~"central|scanner"} + - prometheus_remote_storage_samples_total{job!~"central|scanner"} + - rest_client_request_duration_seconds_bucket{job!~"central|scanner"} + - rest_client_requests_total{job!~"central|scanner"} + - scheduler_binding_duration_seconds_bucket{job!~"central|scanner"} + - scheduler_binding_duration_seconds_count{job!~"central|scanner"} + - scheduler_e2e_scheduling_duration_seconds_bucket{job!~"central|scanner"} + - scheduler_e2e_scheduling_duration_seconds_count{job!~"central|scanner"} + - scheduler_scheduling_algorithm_duration_seconds_bucket{job!~"central|scanner"} + - scheduler_scheduling_algorithm_duration_seconds_count{job!~"central|scanner"} + - scheduler_volume_scheduling_duration_seconds_bucket{job!~"central|scanner"} + - scheduler_volume_scheduling_duration_seconds_count{job!~"central|scanner"} + - selinux_denials_sample_count{job!~"central|scanner"} + - storage_operation_duration_seconds_bucket{job!~"central|scanner"} + - storage_operation_duration_seconds_count{job!~"central|scanner"} + - storage_operation_errors_total{job!~"central|scanner"} + - up{job!~"central|scanner"} + - volume_manager_total_volumes{job!~"central|scanner"} + - workqueue_adds_total{job!~"central|scanner"} + - workqueue_depth{job!~"central|scanner"} + - workqueue_queue_duration_seconds_bucket{job!~"central|scanner"} + scheme: https + bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token" + tls_config: + insecure_skip_verify: true diff --git a/resources/prometheus/generated/kubernetes-mixin-alerts.yaml b/resources/prometheus/generated/kubernetes-mixin-alerts.yaml index 23330a0b..8d1720be 100644 --- a/resources/prometheus/generated/kubernetes-mixin-alerts.yaml +++ b/resources/prometheus/generated/kubernetes-mixin-alerts.yaml @@ -1,8 +1,9 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PrometheusRule metadata: labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator name: kubernetes-mixin-alerts spec: "groups": diff --git a/resources/prometheus/pod_monitors/prometheus-self-metrics.yaml b/resources/prometheus/pod_monitors/prometheus-self-metrics.yaml index 845550ba..f9a1d24a 100644 --- a/resources/prometheus/pod_monitors/prometheus-self-metrics.yaml +++ b/resources/prometheus/pod_monitors/prometheus-self-metrics.yaml @@ -1,9 +1,10 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PodMonitor metadata: name: prometheus-self-metrics labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator spec: namespaceSelector: any: true @@ -12,4 +13,4 @@ spec: port: web selector: matchLabels: - prometheus: obs-prometheus + prometheus: rhacs-prometheus diff --git a/resources/prometheus/pod_monitors/rhacs-cloudwatch-exporter.yaml b/resources/prometheus/pod_monitors/rhacs-cloudwatch-exporter.yaml index 870a6d1e..6ee9a171 100644 --- a/resources/prometheus/pod_monitors/rhacs-cloudwatch-exporter.yaml +++ b/resources/prometheus/pod_monitors/rhacs-cloudwatch-exporter.yaml @@ -1,9 +1,10 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PodMonitor metadata: name: rhacs-cloudwatch-metrics labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator spec: selector: matchLabels: diff --git a/resources/prometheus/pod_monitors/rhacs-emailsender-metrics.yaml b/resources/prometheus/pod_monitors/rhacs-emailsender-metrics.yaml index 71c99f7a..d1fefa90 100644 --- a/resources/prometheus/pod_monitors/rhacs-emailsender-metrics.yaml +++ b/resources/prometheus/pod_monitors/rhacs-emailsender-metrics.yaml @@ -1,9 +1,10 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PodMonitor metadata: name: rhacs-emailsender-metrics labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator spec: selector: matchLabels: diff --git a/resources/prometheus/pod_monitors/rhacs-fleetshard-sync-metrics.yaml b/resources/prometheus/pod_monitors/rhacs-fleetshard-sync-metrics.yaml index 97d03af0..5b6d5526 100644 --- a/resources/prometheus/pod_monitors/rhacs-fleetshard-sync-metrics.yaml +++ b/resources/prometheus/pod_monitors/rhacs-fleetshard-sync-metrics.yaml @@ -1,9 +1,10 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PodMonitor metadata: name: rhacs-fleetshard-metrics labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator spec: selector: matchLabels: diff --git a/resources/prometheus/pod_monitors/rhacs-tenant-metrics.yaml b/resources/prometheus/pod_monitors/rhacs-tenant-metrics.yaml index a84287a0..1712355c 100644 --- a/resources/prometheus/pod_monitors/rhacs-tenant-metrics.yaml +++ b/resources/prometheus/pod_monitors/rhacs-tenant-metrics.yaml @@ -1,4 +1,4 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PodMonitor metadata: name: rhacs-tenant-metrics diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 98eef14c..f2fb342a 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -1,8 +1,9 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PrometheusRule metadata: labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator name: rhacs-data-plane-prometheus-rules spec: groups: @@ -403,6 +404,7 @@ spec: expr: vector(1) labels: name: DeadMansSwitchAlert + observability: managed-rhacs - name: federation rules: diff --git a/resources/prometheus/rhacs-recording-rules.yaml b/resources/prometheus/rhacs-recording-rules.yaml index e1f26601..6dd5fa22 100644 --- a/resources/prometheus/rhacs-recording-rules.yaml +++ b/resources/prometheus/rhacs-recording-rules.yaml @@ -1,8 +1,9 @@ -apiVersion: monitoring.coreos.com/v1 +apiVersion: monitoring.rhobs/v1 kind: PrometheusRule metadata: labels: app: rhacs + app.kubernetes.io/managed-by: observability-operator name: rhacs-recording-rules spec: groups: diff --git a/scripts/generate-federate-match.sh b/scripts/generate-federate-match.sh index cc5a29cc..83f20437 100755 --- a/scripts/generate-federate-match.sh +++ b/scripts/generate-federate-match.sh @@ -75,18 +75,26 @@ function main() { # Get metrics used in recording rules and alerts local rules_files - rules_files=$(jq '.config.prometheus.rules[]' "${repo_dir}/resources/index.json" --raw-output) + rules_files=$(yq '.resources[] | select(. | match("^prometheus\/")) | select(load("resources/"+ .) | .kind == "PrometheusRule")' "${repo_dir}/resources/kustomization.yaml") while IFS= read -r rules_file; do get_rules_metrics "${repo_dir}/resources/${rules_file}" "${metrics_list_file}" done <<< "${rules_files}" # Filter metrics (exclude metrics that are collected by observability Prometheus or created by recording rules) sort "${metrics_list_file}" | uniq | grep -v -E "^acs|^rox|^aws|^central:|acscs_worker_nodes" | awk '{ print $1 "{job!~\"central|scanner\"}" }' > "${metrics_list_file}.filter" - - # Create federation-config.yaml - local yq_expression='. *+ load("'"${repo_dir}/resources/prometheus/federation-config-base.yaml"'")."match[]" | unique | sort | { "match[]": . }' - sed -e 's/^/- /' "${metrics_list_file}.filter" | yq "${yq_expression}" > "${repo_dir}/resources/prometheus/federation-config.yaml" - + local yq_expression + # shellcheck disable=SC2016 + # $f is not a shell variable, but a yq variable, so it should not be surrounded by double quotes + yq_expression=$(printf '%s' \ + '(load_str("'"${metrics_list_file}.filter"'") |' \ + 'sub("\n$","") |' \ + 'split("\n")) as $f |' \ + '.[0].params."match[]" += $f |' \ + '.[0].params."match[]" |= unique |' \ + '.[0].params."match[]" |= sort |' \ + '... comments=""' \ + ) + yq "${yq_expression}" "${repo_dir}/resources/prometheus/federation-config-base.yaml" > "${repo_dir}/resources/prometheus/federation-config.yaml" # Clean up the temp directory with all transient files rm -rf "${working_tmp_dir}" log "Deleted temp dir: '${working_tmp_dir}'"