diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml index bed9d32..1b70d8f 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview.yaml @@ -35,8 +35,8 @@ spec: "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "purple", "expr": "count (count by (git_version) (label_replace(count_over_time(kubernetes_build_info{job!~\"kube-dns|coredns\"}[${__interval}]), \"git_version\", \"$1\", \"git_version\", \"(v[0-9]*.[0-9]*).*\"))) > 1", + "iconColor": "purple", "name": "Kubernetes Upgrade", "textFormat": "Kubernetes Upgrade" }, @@ -46,8 +46,8 @@ spec: "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "red", "expr": "count (count by (gitVersion) (count_over_time (openshift_apiserver_build_info[${__interval}]))) > 1", + "iconColor": "red", "name": "OpenShift Upgrade", "textFormat": "OpenShift Upgrade" } @@ -56,7 +56,7 @@ spec: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 23, + "id": 11, "links": [], "liveNow": false, "panels": [ @@ -1118,7 +1118,7 @@ spec: "content": "### Description\n\nThis graph shows the occurences per minute of **SELinux AVC denials** on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that the cluster node's SELinux policy prevented a process' actions.\nAs an example, a violation could indicate that a process on the cluster tried to access a file which is SELinux-protected.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40logStream*20like*20*2flinux-audit*2f*0a*7c*20filter*20*40message*20like*20*2fAVC*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @logStream like /linux-audit/\n| filter @message like /AVC/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1248,7 +1248,7 @@ spec: "content": "### Description\n\nThis graph shows the occurences per minute of Network Policy ACL denials on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that network traffic was prevented due to a Kubernetes Network Policy.\nAs an example, a violation could indicate that communication between RHACS tenant namespaces\nwas attempted, which is strictly forbidden.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40message*20like*20*2facl_log*28.*2a*29.*2a*5csverdict*3ddrop*2f*0a*7c*20filter*20*40logStream*20like*20*2f.*2aovn-audit*5c.log*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @message like /acl_log(.*).*\\sverdict=drop/\n| filter @logStream like /.*ovn-audit\\.log/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1481,7 +1481,7 @@ spec: } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -1952,7 +1952,7 @@ spec: } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -2130,11 +2130,137 @@ spec: } ], "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Monitoring + Alerting for certificate expiration, tracking and managing of digital certificates expiration dates. Extracts timestamps from certificates.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 147, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "acs_fleetshard_certificate_expiration_timestamp", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Certificate Expiry", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Value": true, + "__name__": true, + "container": true, + "instance": true, + "job": true, + "namespace": true, + "pod": true, + "rhacs_cluster_name": true, + "rhacs_environment": true + }, + "includeByName": {}, + "indexByName": { + "Time": 3, + "Value": 12, + "__name__": 4, + "container": 5, + "data_key": 2, + "exported_namespace": 0, + "instance": 6, + "job": 7, + "namespace": 8, + "pod": 9, + "rhacs_cluster_name": 10, + "rhacs_environment": 11, + "secret": 1 + }, + "renameByName": { + "Time": "Expiration", + "data_key": "Key", + "exported_namespace": "Namespace", + "secret": "Secret" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Expiration" + } + ] + } + } + ], + "type": "table" } ], "refresh": "", "revision": 1, - "schemaVersion": 38, + "schemaVersion": 39, "tags": ["rhacs"], "templating": { "list": [ @@ -2260,6 +2386,6 @@ spec: "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 2, + "version": 1, "weekStart": "" } diff --git a/resources/grafana/sources/rhacs-cluster-overview.json b/resources/grafana/sources/rhacs-cluster-overview.json index ca0e6c2..ad3a197 100644 --- a/resources/grafana/sources/rhacs-cluster-overview.json +++ b/resources/grafana/sources/rhacs-cluster-overview.json @@ -25,8 +25,8 @@ "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "purple", "expr": "count (count by (git_version) (label_replace(count_over_time(kubernetes_build_info{job!~\"kube-dns|coredns\"}[${__interval}]), \"git_version\", \"$1\", \"git_version\", \"(v[0-9]*.[0-9]*).*\"))) > 1", + "iconColor": "purple", "name": "Kubernetes Upgrade", "textFormat": "Kubernetes Upgrade" }, @@ -36,8 +36,8 @@ "uid": "PBFA97CFB590B2093" }, "enable": true, - "iconColor": "red", "expr": "count (count by (gitVersion) (count_over_time (openshift_apiserver_build_info[${__interval}]))) > 1", + "iconColor": "red", "name": "OpenShift Upgrade", "textFormat": "OpenShift Upgrade" } @@ -46,7 +46,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 23, + "id": 11, "links": [], "liveNow": false, "panels": [ @@ -1108,7 +1108,7 @@ "content": "### Description\n\nThis graph shows the occurences per minute of **SELinux AVC denials** on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that the cluster node's SELinux policy prevented a process' actions.\nAs an example, a violation could indicate that a process on the cluster tried to access a file which is SELinux-protected.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40logStream*20like*20*2flinux-audit*2f*0a*7c*20filter*20*40message*20like*20*2fAVC*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @logStream like /linux-audit/\n| filter @message like /AVC/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1238,7 +1238,7 @@ "content": "### Description\n\nThis graph shows the occurences per minute of Network Policy ACL denials on the cluster.\nThese violations are logged on the cluster, propagated to CloudWatch, aggregated by a log metric, retrieved by the cloudwatch-exporter and finally scraped by Prometheus.\n\n**Expected: 0 violations.**\n\nA violation means that network traffic was prevented due to a Kubernetes Network Policy.\nAs an example, a violation could indicate that communication between RHACS tenant namespaces\nwas attempted, which is strictly forbidden.\n\n### Drill-Down\n\nLog into the cluster's AWS account and use a [Log Insights query](https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logsV2:logs-insights$3FqueryDetail$3D~(end~0~start~-3600~timeType~'RELATIVE~unit~'seconds~editorString~'fields*20*40timestamp*2c*20*40message*2c*20*40logStream*2c*20*40log*0a*7c*20filter*20*40message*20like*20*2facl_log*28.*2a*29.*2a*5csverdict*3ddrop*2f*0a*7c*20filter*20*40logStream*20like*20*2f.*2aovn-audit*5c.log*2f*0a*7c*20sort*20*40timestamp*20desc*0a*7c*20limit*201000~source~(~))) similar to this one:\n```\nfields @timestamp, @message, @logStream, @log\n| filter @message like /acl_log(.*).*\\sverdict=drop/\n| filter @logStream like /.*ovn-audit\\.log/\n| sort @timestamp desc\n| limit 1000\n```\n\n**Note:**\n* all CloudWatch related resources are located in the `us-east-1` region.\n* the log group containing the violation logs are called `acs-.audit`.\n\n", "mode": "markdown" }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "type": "text" }, { @@ -1471,7 +1471,7 @@ } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -1942,7 +1942,7 @@ } ] }, - "pluginVersion": "10.2.0", + "pluginVersion": "11.1.0", "targets": [ { "datasource": { @@ -2120,11 +2120,137 @@ } ], "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Monitoring + Alerting for certificate expiration, tracking and managing of digital certificates expiration dates. Extracts timestamps from certificates.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 85 + }, + "id": 147, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "acs_fleetshard_certificate_expiration_timestamp", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Certificate Expiry", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Value": true, + "__name__": true, + "container": true, + "instance": true, + "job": true, + "namespace": true, + "pod": true, + "rhacs_cluster_name": true, + "rhacs_environment": true + }, + "includeByName": {}, + "indexByName": { + "Time": 3, + "Value": 12, + "__name__": 4, + "container": 5, + "data_key": 2, + "exported_namespace": 0, + "instance": 6, + "job": 7, + "namespace": 8, + "pod": 9, + "rhacs_cluster_name": 10, + "rhacs_environment": 11, + "secret": 1 + }, + "renameByName": { + "Time": "Expiration", + "data_key": "Key", + "exported_namespace": "Namespace", + "secret": "Secret" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "Expiration" + } + ] + } + } + ], + "type": "table" } ], "refresh": "", "revision": 1, - "schemaVersion": 38, + "schemaVersion": 39, "tags": ["rhacs"], "templating": { "list": [ @@ -2250,6 +2376,6 @@ "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 2, + "version": 1, "weekStart": "" }