Skip to content

Commit

Permalink
Update metrics configuration for Kafka, MM2 and Connect (#168)
Browse files Browse the repository at this point in the history
Signed-off-by: Jakub Stejskal <[email protected]>
  • Loading branch information
Frawless authored Jun 17, 2024
1 parent 50ca244 commit 8c125b8
Show file tree
Hide file tree
Showing 13 changed files with 509 additions and 335 deletions.
201 changes: 121 additions & 80 deletions amq-streams/kafka/00-kafka-route.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -255,121 +255,162 @@ metadata:
name: heimdall-kafka-jmx-exporter-configuration
data:
heimdall-kafka-jmx-exporter-configuration.yaml: |
# See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics
lowercaseOutputName: true
rules:
- pattern: "kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value"
name: "kafka_server_$1_$2"
type: "GAUGE"
# Special cases and very specific rules
- pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value
name: kafka_server_$1_$2
type: GAUGE
labels:
clientId: "$3"
topic: "$4"
partition: "$5"
- pattern: "kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value"
name: "kafka_server_$1_$2"
type: "GAUGE"
- pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value
name: kafka_server_$1_$2
type: GAUGE
labels:
clientId: "$3"
broker: "$4:$5"
- pattern: "kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections"
name: "kafka_server_$1_connections_tls_info"
type: "GAUGE"
- pattern: kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections
name: kafka_server_$1_connections_tls_info
type: GAUGE
labels:
listener: "$2"
networkProcessor: "$3"
protocol: "$4"
cipher: "$5"
- pattern: "kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+),\
\ listener=(.+), networkProcessor=(.+)><>connections"
name: "kafka_server_$1_connections_software"
type: "GAUGE"
cipher: "$2"
protocol: "$3"
listener: "$4"
networkProcessor: "$5"
- pattern: kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections
name: kafka_server_$1_connections_software
type: GAUGE
labels:
clientSoftwareName: "$2"
clientSoftwareVersion: "$3"
listener: "$4"
networkProcessor: "$5"
- pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+-total):"
name: kafka_server_$1_$4
type: COUNTER
labels:
listener: "$2"
networkProcessor: "$3"
- pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+):"
name: "kafka_server_$1_$4"
type: "GAUGE"
name: kafka_server_$1_$4
type: GAUGE
labels:
listener: "$2"
networkProcessor: "$3"
- pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+)"
name: "kafka_server_$1_$4"
type: "GAUGE"
- pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+-total)
name: kafka_server_$1_$4
type: COUNTER
labels:
listener: "$2"
networkProcessor: "$3"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)Percent\\w*><>MeanRate"
name: "kafka_$1_$2_$3_percent"
type: "GAUGE"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)Percent\\w*><>Value"
name: "kafka_$1_$2_$3_percent"
type: "GAUGE"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)Percent\\w*, (.+)=(.+)><>Value"
name: "kafka_$1_$2_$3_percent"
type: "GAUGE"
- pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+)
name: kafka_server_$1_$4
type: GAUGE
labels:
$4: "$5"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)PerSec\\w*, (.+)=(.+), (.+)=(.+)><>Count"
name: "kafka_$1_$2_$3_total"
type: "COUNTER"
listener: "$2"
networkProcessor: "$3"
# Some percent metrics use MeanRate attribute
# Ex) kafka.server<type=(KafkaRequestHandlerPool), name=(RequestHandlerAvgIdlePercent)><>MeanRate
- pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>MeanRate
name: kafka_$1_$2_$3_percent
type: GAUGE
# Generic gauges for percents
- pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>Value
name: kafka_$1_$2_$3_percent
type: GAUGE
- pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*, (.+)=(.+)><>Value
name: kafka_$1_$2_$3_percent
type: GAUGE
labels:
$4: "$5"
$6: "$7"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)PerSec\\w*, (.+)=(.+)><>Count"
name: "kafka_$1_$2_$3_total"
type: "COUNTER"
"$4": "$5"
# Generic per-second counters with 0-2 key/value pairs
- pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+), (.+)=(.+)><>Count
name: kafka_$1_$2_$3_total
type: COUNTER
labels:
$4: "$5"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)PerSec\\w*><>Count"
name: "kafka_$1_$2_$3_total"
type: "COUNTER"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value"
name: "kafka_$1_$2_$3"
type: "GAUGE"
"$4": "$5"
"$6": "$7"
- pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+)><>Count
name: kafka_$1_$2_$3_total
type: COUNTER
labels:
$4: "$5"
$6: "$7"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value"
name: "kafka_$1_$2_$3"
type: "GAUGE"
"$4": "$5"
- pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*><>Count
name: kafka_$1_$2_$3_total
type: COUNTER
# Generic gauges with 0-2 key/value pairs
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value
name: kafka_$1_$2_$3
type: GAUGE
labels:
$4: "$5"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)><>Value"
name: "kafka_$1_$2_$3"
type: "GAUGE"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count"
name: "kafka_$1_$2_$3_count"
type: "COUNTER"
"$4": "$5"
"$6": "$7"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value
name: kafka_$1_$2_$3
type: GAUGE
labels:
$4: "$5"
$6: "$7"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\\d+)thPercentile"
name: "kafka_$1_$2_$3"
type: "GAUGE"
"$4": "$5"
- pattern: kafka.(\w+)<type=(.+), name=(.+)><>Value
name: kafka_$1_$2_$3
type: GAUGE
# Emulate Prometheus 'Summary' metrics for the exported 'Histogram's.
# Note that these are missing the '_sum' metric!
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count
name: kafka_$1_$2_$3_count
type: COUNTER
labels:
$4: "$5"
$6: "$7"
"$4": "$5"
"$6": "$7"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\d+)thPercentile
name: kafka_$1_$2_$3
type: GAUGE
labels:
"$4": "$5"
"$6": "$7"
quantile: "0.$8"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count"
name: "kafka_$1_$2_$3_count"
type: "COUNTER"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count
name: kafka_$1_$2_$3_count
type: COUNTER
labels:
$4: "$5"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\\d+)thPercentile"
name: "kafka_$1_$2_$3"
type: "GAUGE"
"$4": "$5"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\d+)thPercentile
name: kafka_$1_$2_$3
type: GAUGE
labels:
$4: "$5"
"$4": "$5"
quantile: "0.$6"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)><>Count"
name: "kafka_$1_$2_$3_count"
type: "COUNTER"
- pattern: "kafka.(\\w+)<type=(.+), name=(.+)><>(\\d+)thPercentile"
name: "kafka_$1_$2_$3"
type: "GAUGE"
- pattern: kafka.(\w+)<type=(.+), name=(.+)><>Count
name: kafka_$1_$2_$3_count
type: COUNTER
- pattern: kafka.(\w+)<type=(.+), name=(.+)><>(\d+)thPercentile
name: kafka_$1_$2_$3
type: GAUGE
labels:
quantile: "0.$4"
# KRaft mode: uncomment the following lines to export KRaft related metrics
# KRaft overall related metrics
# distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics
#- pattern: "kafka.server<type=raft-metrics><>(.+-total|.+-max):"
# name: kafka_server_raftmetrics_$1
# type: COUNTER
#- pattern: "kafka.server<type=raft-metrics><>(.+):"
# name: kafka_server_raftmetrics_$1
# type: GAUGE
# KRaft "low level" channels related metrics
# distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics
#- pattern: "kafka.server<type=raft-channel-metrics><>(.+-total|.+-max):"
# name: kafka_server_raftchannelmetrics_$1
# type: COUNTER
#- pattern: "kafka.server<type=raft-channel-metrics><>(.+):"
# name: kafka_server_raftchannelmetrics_$1
# type: GAUGE
# Broker metrics related to fetching metadata topic records in KRaft mode
#- pattern: "kafka.server<type=broker-metadata-metrics><>(.+):"
# name: kafka_server_brokermetadatametrics_$1
# type: GAUGE
---
apiVersion: v1
kind: ConfigMap
Expand Down
71 changes: 49 additions & 22 deletions debezium/connector/030-Metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,18 @@ data:
clientId: "$2"
$3: "$4"
help: "Kafka $1 JMX metric info version and commit-id"
type: GAUGE
type: UNTYPED
#kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}",topic="{topic}"", partition="{partition}"
- pattern: kafka.consumer<type=consumer-fetch-manager-metrics, client-id=(.+), topic=(.+), partition=(.+)><>(.+-total|compression-rate|.+-avg|.+-replica|.+-lag|.+-lead)
- pattern: kafka.consumer<type=consumer-fetch-manager-metrics, client-id=(.+), topic=(.+), partition=(.+)><>(.+-total)
name: kafka_consumer_fetch_manager_$4
labels:
clientId: "$1"
topic: "$2"
partition: "$3"
help: "Kafka Consumer JMX metric type consumer-fetch-manager-metrics"
type: COUNTER
- pattern: kafka.consumer<type=consumer-fetch-manager-metrics, client-id=(.+), topic=(.+), partition=(.+)><>(compression-rate|.+-avg|.+-replica|.+-lag|.+-lead)
name: kafka_consumer_fetch_manager_$4
labels:
clientId: "$1"
Expand All @@ -43,7 +51,14 @@ data:
type: GAUGE
#kafka.producer:type=producer-topic-metrics,client-id="{clientid}",topic="{topic}"
- pattern: kafka.producer<type=producer-topic-metrics, client-id=(.+), topic=(.+)><>(.+-total|compression-rate|.+-avg|.+rate)
- pattern: kafka.producer<type=producer-topic-metrics, client-id=(.+), topic=(.+)><>(.+-total)
name: kafka_producer_topic_$3
labels:
clientId: "$1"
topic: "$2"
help: "Kafka Producer JMX metric type producer-topic-metrics"
type: COUNTER
- pattern: kafka.producer<type=producer-topic-metrics, client-id=(.+), topic=(.+)><>(compression-rate|.+-avg|.+rate)
name: kafka_producer_topic_$3
labels:
clientId: "$1"
Expand All @@ -53,7 +68,14 @@ data:
#kafka.connect:type=connect-node-metrics,client-id="{clientid}",node-id="{nodeid}"
#kafka.consumer:type=consumer-node-metrics,client-id=consumer-1,node-id="{nodeid}"
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.+), node-id=(.+)><>(.+-total|.+-avg|.+-rate)
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.+), node-id=(.+)><>(.+-total)
name: kafka_$2_$5
labels:
clientId: "$3"
nodeId: "$4"
help: "Kafka $1 JMX metric type $2"
type: COUNTER
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.+), node-id=(.+)><>(.+-avg|.+-rate)
name: kafka_$2_$5
labels:
clientId: "$3"
Expand All @@ -65,7 +87,13 @@ data:
#kafka.consumer:type=consumer-fetch-manager-metrics,client-id="{clientid}"
#kafka.consumer:type=consumer-coordinator-metrics,client-id="{clientid}"
#kafka.consumer:type=consumer-metrics,client-id="{clientid}"
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.*)><>(.+-total|.+-avg|.+-bytes|.+-count|.+-ratio|.+-age|.+-flight|.+-threads|.+-connectors|.+-tasks|.+-ago)
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.*)><>(.+-total)
name: kafka_$2_$4
labels:
clientId: "$3"
help: "Kafka $1 JMX metric type $2"
type: COUNTER
- pattern: kafka.(.+)<type=(.+)-metrics, client-id=(.*)><>(.+-avg|.+-bytes|.+-count|.+-ratio|.+-age|.+-flight|.+-threads|.+-connectors|.+-tasks|.+-ago)
name: kafka_$2_$4
labels:
clientId: "$3"
Expand Down Expand Up @@ -97,7 +125,14 @@ data:
#kafka.connect:type=source-task-metrics,connector="{connector}",task="{task}"
#kafka.connect:type=sink-task-metrics,connector="{connector}",task="{task}"
#kafka.connect:type=connector-task-metrics,connector="{connector}",task="{task}"
- pattern: kafka.connect<type=(.+)-metrics, connector=(.+), task=(.+)><>(.+-total|.+-count|.+-ms|.+-ratio|.+-seq-no|.+-rate|.+-max|.+-avg|.+-failures|.+-requests|.+-timestamp|.+-logged|.+-errors|.+-retries|.+-skipped)
- pattern: kafka.connect<type=(.+)-metrics, connector=(.+), task=(.+)><>(.+-total)
name: kafka_connect_$1_$4
labels:
connector: "$2"
task: "$3"
help: "Kafka Connect JMX metric type $1"
type: COUNTER
- pattern: kafka.connect<type=(.+)-metrics, connector=(.+), task=(.+)><>(.+-count|.+-ms|.+-ratio|.+-seq-no|.+-rate|.+-max|.+-avg|.+-failures|.+-requests|.+-timestamp|.+-logged|.+-errors|.+-retries|.+-skipped)
name: kafka_connect_$1_$4
labels:
connector: "$2"
Expand All @@ -114,6 +149,10 @@ data:
type: GAUGE
#kafka.connect:type=connect-worker-metrics
- pattern: kafka.connect<type=connect-worker-metrics><>([a-z-]+-total)
name: kafka_connect_worker_$1
help: "Kafka Connect JMX metric worker"
type: COUNTER
- pattern: kafka.connect<type=connect-worker-metrics><>([a-z-]+)
name: kafka_connect_worker_$1
help: "Kafka Connect JMX metric worker"
Expand All @@ -129,6 +168,10 @@ data:
type: UNTYPED
#kafka.connect:type=connect-worker-rebalance-metrics
- pattern: kafka.connect<type=connect-worker-rebalance-metrics><>([a-z-]+-total)
name: kafka_connect_worker_rebalance_$1
help: "Kafka Connect JMX metric rebalance information"
type: COUNTER
- pattern: kafka.connect<type=connect-worker-rebalance-metrics><>([a-z-]+)
name: kafka_connect_worker_rebalance_$1
help: "Kafka Connect JMX metric rebalance information"
Expand All @@ -139,19 +182,3 @@ data:
name: kafka_connect_coordinator_$1
help: "Kafka Connect JMX metric assignment information"
type: GAUGE
# Debezium specific metrics
- pattern: "debezium.([^:]+)<type=connector-metrics, context=([^,]+), server=([^,]+), key=([^>]+)><>RowsScanned"
name: "debezium_metrics_RowsScanned"
labels:
plugin: "$1"
name: "$3"
context: "$2"
table: "$4"
- pattern: "debezium.([^:]+)<type=connector-metrics, context=([^,]+), server=([^>]+)>([^:]+)"
name: "debezium_metrics_$4"
labels:
plugin: "$1"
name: "$3"
context: "$2"
Original file line number Diff line number Diff line change
Expand Up @@ -1584,7 +1584,7 @@ spec:
"targets": [
{
"datasource": "${DS_THANOS}",
"expr": "sum(jvm_memory_bytes_used{namespace=\"$kubernetes_namespace\",kubernetes_pod_name=~\"$strimzi_cluster_name-cruise-control-.*\",strimzi_io_name=\"$strimzi_cluster_name-cruise-control\"}) by (kubernetes_pod_name)",
"expr": "sum(jvm_memory_used_bytes{namespace=\"$kubernetes_namespace\",kubernetes_pod_name=~\"$strimzi_cluster_name-cruise-control-.*\",strimzi_io_name=\"$strimzi_cluster_name-cruise-control\"}) by (kubernetes_pod_name)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{kubernetes_pod_name}}",
Expand Down
2 changes: 1 addition & 1 deletion metrics/grafana/dashboards/00-debezium-dashboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@ spec:
"steppedLine": false,
"targets": [
{
"expr": "jvm_memory_bytes_used{area='heap', clusterName=~\"$OPENSHIFT_CLUSTER\"}",
"expr": "jvm_memory_used_bytes{area='heap', clusterName=~\"$OPENSHIFT_CLUSTER\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Used",
Expand Down
2 changes: 1 addition & 1 deletion metrics/grafana/dashboards/00-kafka-connect-dashboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1496,7 +1496,7 @@ spec:
"targets": [
{
"datasource": "${DS_THANOS}",
"expr": "sum (jvm_memory_bytes_used{strimzi_io_kind=~\"KafkaConnect.*\",strimzi_io_cluster=\"$strimzi_connect_cluster_name\"}) by (kubernetes_pod_name)",
"expr": "sum (jvm_memory_used_bytes{strimzi_io_kind=~\"KafkaConnect.*\",strimzi_io_cluster=\"$strimzi_connect_cluster_name\"}) by (kubernetes_pod_name)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
Expand Down
Loading

0 comments on commit 8c125b8

Please sign in to comment.