From 25e475871f2bb90cd32dbcf506719accefe79d72 Mon Sep 17 00:00:00 2001 From: Ashwin Tumma Date: Thu, 30 Jan 2025 17:54:03 -0800 Subject: [PATCH 1/4] Support custom histogram buckets for Prometheus Timer Metrics --- .../extensions-contrib/prometheus.md | 4 ++- .../prometheus/DimensionsAndCollector.java | 9 ++++++- .../druid/emitter/prometheus/Metrics.java | 14 +++++++--- .../src/main/resources/defaultMetrics.json | 2 +- .../druid/emitter/prometheus/MetricsTest.java | 26 +++++++++++++++++-- .../resources/defaultInvalidMetricsTest.json | 3 +++ .../test/resources/defaultMetricsTest.json | 4 +-- website/.spelling | 1 + 8 files changed, 53 insertions(+), 10 deletions(-) create mode 100644 extensions-contrib/prometheus-emitter/src/test/resources/defaultInvalidMetricsTest.json diff --git a/docs/development/extensions-contrib/prometheus.md b/docs/development/extensions-contrib/prometheus.md index ddbd9da46437..63fb7016e405 100644 --- a/docs/development/extensions-contrib/prometheus.md +++ b/docs/development/extensions-contrib/prometheus.md @@ -40,7 +40,7 @@ All the configuration parameters for the Prometheus emitter are under `druid.emi | `druid.emitter.prometheus.strategy` | The strategy to expose prometheus metrics.
Should be one of `exporter` and `pushgateway`. Default strategy `exporter` would expose metrics for scraping purpose. Peon tasks (short-lived jobs) should use `pushgateway` strategy. | yes | exporter | | `druid.emitter.prometheus.port` | The port on which to expose the prometheus HTTPServer. Required if using `exporter` strategy. | no | none | | `druid.emitter.prometheus.namespace` | Optional metric namespace. Must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*` | no | druid | -| `druid.emitter.prometheus.dimensionMapPath` | JSON file defining the Prometheus metric type, desired dimensions, help text, and conversionFactor for every Druid metric. | no | Default mapping provided. See below. | +| `druid.emitter.prometheus.dimensionMapPath` | JSON file defining the Prometheus metric type, desired dimensions, conversionFactor, histogram buckets and help text for every Druid metric. | no | Default mapping provided. See below. | | `druid.emitter.prometheus.addHostAsLabel` | Flag to include the hostname as a prometheus label. | no | false | | `druid.emitter.prometheus.addServiceAsLabel` | Flag to include the druid service name (e.g. `druid/broker`, `druid/coordinator`, etc.) as a prometheus label. | no | false | | `druid.emitter.prometheus.pushGatewayAddress` | Pushgateway address. Required if using `pushgateway` strategy. | no | none | @@ -90,6 +90,7 @@ Prometheus metric path is organized using the following schema: "dimensions" : , "type" : , "conversionFactor": , + "histogramBuckets": , "help" : } ``` @@ -100,6 +101,7 @@ For example: "dimensions" : ["dataSource", "type"], "type" : "timer", "conversionFactor": 1000.0, + "histogramBuckets": [0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0], "help": "Seconds taken to complete a query." } ``` diff --git a/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/DimensionsAndCollector.java b/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/DimensionsAndCollector.java index ede4977aeee6..9fbf22f643ad 100644 --- a/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/DimensionsAndCollector.java +++ b/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/DimensionsAndCollector.java @@ -26,12 +26,14 @@ public class DimensionsAndCollector private final String[] dimensions; private final SimpleCollector collector; private final double conversionFactor; + private final double[] histogramBuckets; - DimensionsAndCollector(String[] dimensions, SimpleCollector collector, double conversionFactor) + DimensionsAndCollector(String[] dimensions, SimpleCollector collector, double conversionFactor, double[] histogramBuckets) { this.dimensions = dimensions; this.collector = collector; this.conversionFactor = conversionFactor; + this.histogramBuckets = histogramBuckets; } public String[] getDimensions() @@ -48,4 +50,9 @@ public double getConversionFactor() { return conversionFactor; } + + public double[] getHistogramBuckets() + { + return histogramBuckets; + } } diff --git a/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/Metrics.java b/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/Metrics.java index 4b9ad716cdf8..00df3c46cdbe 100644 --- a/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/Metrics.java +++ b/extensions-contrib/prometheus-emitter/src/main/java/org/apache/druid/emitter/prometheus/Metrics.java @@ -38,6 +38,7 @@ import java.io.InputStream; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.SortedSet; import java.util.regex.Pattern; @@ -108,7 +109,7 @@ public Metrics(String namespace, String path, boolean isAddHostAsLabel, boolean .namespace(namespace) .name(formattedName) .labelNames(dimensions) - .buckets(.1, .25, .5, .75, 1, 2.5, 5, 7.5, 10, 30, 60, 120, 300) + .buckets(metric.histogramBuckets) .help(metric.help) .register(); } else { @@ -116,7 +117,7 @@ public Metrics(String namespace, String path, boolean isAddHostAsLabel, boolean } if (collector != null) { - parsedRegisteredMetrics.put(name, new DimensionsAndCollector(dimensions, collector, metric.conversionFactor)); + parsedRegisteredMetrics.put(name, new DimensionsAndCollector(dimensions, collector, metric.conversionFactor, metric.histogramBuckets)); } } this.registeredMetrics = Collections.unmodifiableMap(parsedRegisteredMetrics); @@ -153,19 +154,26 @@ public static class Metric public final Type type; public final String help; public final double conversionFactor; + public final double[] histogramBuckets; @JsonCreator public Metric( @JsonProperty("dimensions") SortedSet dimensions, @JsonProperty("type") Type type, @JsonProperty("help") String help, - @JsonProperty("conversionFactor") double conversionFactor + @JsonProperty("conversionFactor") double conversionFactor, + @JsonProperty("histogramBuckets") List histogramBuckets ) { this.dimensions = dimensions; this.type = type; this.help = help; this.conversionFactor = conversionFactor; + if (histogramBuckets != null && !histogramBuckets.isEmpty()) { + this.histogramBuckets = histogramBuckets.stream().mapToDouble(Double::doubleValue).toArray(); + } else { + this.histogramBuckets = new double[] {0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0}; + } } public enum Type diff --git a/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json b/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json index 9cb01646f209..2224b2bf3cf1 100644 --- a/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json +++ b/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json @@ -1,5 +1,5 @@ { - "query/time" : { "dimensions" : ["dataSource", "type"], "type" : "timer", "conversionFactor": 1000.0, "help": "Seconds taken to complete a query."}, + "query/time" : { "dimensions" : ["dataSource", "type"], "type" : "timer", "conversionFactor": 1000.0, "histogramBuckets": [0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0], "help": "Seconds taken to complete a query."}, "query/bytes" : { "dimensions" : ["dataSource", "type"], "type" : "count", "help": "Number of bytes returned in query response."}, "query/node/time" : { "dimensions" : ["server"], "type" : "timer", "conversionFactor": 1000.0, "help": "Seconds taken to query individual historical/realtime processes."}, "query/node/bytes" : { "dimensions" : ["server"], "type" : "count", "help": "Number of bytes returned from querying individual historical/realtime processes."}, diff --git a/extensions-contrib/prometheus-emitter/src/test/java/org/apache/druid/emitter/prometheus/MetricsTest.java b/extensions-contrib/prometheus-emitter/src/test/java/org/apache/druid/emitter/prometheus/MetricsTest.java index 4567c7cd0696..ef6a1d517752 100644 --- a/extensions-contrib/prometheus-emitter/src/test/java/org/apache/druid/emitter/prometheus/MetricsTest.java +++ b/extensions-contrib/prometheus-emitter/src/test/java/org/apache/druid/emitter/prometheus/MetricsTest.java @@ -41,6 +41,8 @@ public void testMetricsConfiguration() Assert.assertEquals("host_name", dimensions[2]); Assert.assertEquals("type", dimensions[3]); Assert.assertEquals(1000.0, dimensionsAndCollector.getConversionFactor(), 0.0); + double[] defaultHistogramBuckets = {0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0}; + Assert.assertArrayEquals(defaultHistogramBuckets, dimensionsAndCollector.getHistogramBuckets(), 0.0); Assert.assertTrue(dimensionsAndCollector.getCollector() instanceof Histogram); DimensionsAndCollector d = metrics.getByName("segment/loadQueue/count", "historical"); @@ -67,6 +69,8 @@ public void testMetricsConfigurationWithExtraLabels() Assert.assertEquals("host_name", dimensions[3]); Assert.assertEquals("type", dimensions[4]); Assert.assertEquals(1000.0, dimensionsAndCollector.getConversionFactor(), 0.0); + double[] defaultHistogramBuckets = {0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0}; + Assert.assertArrayEquals(defaultHistogramBuckets, dimensionsAndCollector.getHistogramBuckets(), 0.0); Assert.assertTrue(dimensionsAndCollector.getCollector() instanceof Histogram); DimensionsAndCollector d = metrics.getByName("segment/loadQueue/count", "historical"); @@ -106,8 +110,26 @@ public void testMetricsConfigurationWithNonExistentMetric() @Test public void testMetricsConfigurationWithUnSupportedType() { - Assert.assertThrows(ISE.class, () -> { - new Metrics("test_5", "src/test/resources/defaultMetricsTest.json", true, true, null); + ISE iseException = Assert.assertThrows(ISE.class, () -> { + new Metrics("test_5", "src/test/resources/defaultInvalidMetricsTest.json", true, true, null); }); + Assert.assertEquals("Failed to parse metric configuration", iseException.getMessage()); } + + @Test + public void testMetricsConfigurationWithTimerHistogramBuckets() + { + Metrics metrics = new Metrics("test_6", "src/test/resources/defaultMetricsTest.json", true, true, null); + DimensionsAndCollector dimensionsAndCollector = metrics.getByName("query/time", "historical"); + Assert.assertNotNull(dimensionsAndCollector); + String[] dimensions = dimensionsAndCollector.getDimensions(); + Assert.assertEquals("dataSource", dimensions[0]); + Assert.assertEquals("druid_service", dimensions[1]); + Assert.assertEquals("host_name", dimensions[2]); + Assert.assertEquals("type", dimensions[3]); + Assert.assertEquals(1000.0, dimensionsAndCollector.getConversionFactor(), 0.0); + double[] expectedHistogramBuckets = {10.0, 30.0, 60.0, 120.0, 200.0, 300.0}; + Assert.assertArrayEquals(expectedHistogramBuckets, dimensionsAndCollector.getHistogramBuckets(), 0.0); + } + } diff --git a/extensions-contrib/prometheus-emitter/src/test/resources/defaultInvalidMetricsTest.json b/extensions-contrib/prometheus-emitter/src/test/resources/defaultInvalidMetricsTest.json new file mode 100644 index 000000000000..994469f9fe19 --- /dev/null +++ b/extensions-contrib/prometheus-emitter/src/test/resources/defaultInvalidMetricsTest.json @@ -0,0 +1,3 @@ +{ + "query/nonExistent" : { "dimensions" : ["dataSource"], "type" : "nonExistent", "help": "Non supported type."} +} \ No newline at end of file diff --git a/extensions-contrib/prometheus-emitter/src/test/resources/defaultMetricsTest.json b/extensions-contrib/prometheus-emitter/src/test/resources/defaultMetricsTest.json index a63e2b454702..aa1e659de142 100644 --- a/extensions-contrib/prometheus-emitter/src/test/resources/defaultMetricsTest.json +++ b/extensions-contrib/prometheus-emitter/src/test/resources/defaultMetricsTest.json @@ -1,3 +1,3 @@ { - "query/nonExistent" : { "dimensions" : ["dataSource"], "type" : "nonExistent", "help": "Non supported type."} -} + "query/time" : { "dimensions" : ["dataSource", "type"], "type" : "timer", "conversionFactor": 1000.0, "histogramBuckets": [10.0, 30.0, 60.0, 120.0, 200.0, 300.0], "help": "Seconds taken to complete a query."} +} \ No newline at end of file diff --git a/website/.spelling b/website/.spelling index 700f59db05a7..afe9bc88914b 100644 --- a/website/.spelling +++ b/website/.spelling @@ -360,6 +360,7 @@ hashcode hashtable high-QPS historicals +histogramBuckets hostname hostnames http From d10f06b543ce591719c053ed4da26d5d064b4315 Mon Sep 17 00:00:00 2001 From: Ashwin Tumma Date: Thu, 30 Jan 2025 20:57:42 -0800 Subject: [PATCH 2/4] Trigger build From a5c70fe2a86feab5cf6753414c101a706b445c78 Mon Sep 17 00:00:00 2001 From: Ashwin Tumma Date: Fri, 31 Jan 2025 23:41:19 -0800 Subject: [PATCH 3/4] Update README file and remove spelling --- docs/development/extensions-contrib/prometheus.md | 4 ++-- website/.spelling | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/development/extensions-contrib/prometheus.md b/docs/development/extensions-contrib/prometheus.md index 63fb7016e405..2114eb2d23b1 100644 --- a/docs/development/extensions-contrib/prometheus.md +++ b/docs/development/extensions-contrib/prometheus.md @@ -80,7 +80,7 @@ All metric names and labels are reformatted to match Prometheus standards. Each metric to be collected by Prometheus must specify a type, one of `[timer, counter, guage]`. Prometheus Emitter expects this mapping to be provided as a JSON file. Additionally, this mapping specifies which dimensions should be included for each metric. Prometheus expects histogram timers to use Seconds as the base unit. Timers which do not use seconds as a base unit can use the `conversionFactor` to set -the base time unit. If the user does not specify their own JSON file, a default mapping is used. All +the base time unit. Histogram timers also support custom bucket configurations through the `histogramBuckets` parameter. If no custom buckets are provided, the following default buckets are used: `[0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0]`. If the user does not specify their own JSON file, a default mapping is used. All metrics are expected to be mapped. Metrics which are not mapped will not be tracked. Prometheus metric path is organized using the following schema: @@ -90,7 +90,7 @@ Prometheus metric path is organized using the following schema: "dimensions" : , "type" : , "conversionFactor": , - "histogramBuckets": , + "histogramBuckets": , "help" : } ``` diff --git a/website/.spelling b/website/.spelling index afe9bc88914b..700f59db05a7 100644 --- a/website/.spelling +++ b/website/.spelling @@ -360,7 +360,6 @@ hashcode hashtable high-QPS historicals -histogramBuckets hostname hostnames http From 09547936685b64cd6456b1036845a77e33f47edb Mon Sep 17 00:00:00 2001 From: Ashwin Tumma Date: Mon, 3 Feb 2025 15:16:57 -0800 Subject: [PATCH 4/4] Remove example from defaultMetrics JSON --- .../prometheus-emitter/src/main/resources/defaultMetrics.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json b/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json index 2224b2bf3cf1..9cb01646f209 100644 --- a/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json +++ b/extensions-contrib/prometheus-emitter/src/main/resources/defaultMetrics.json @@ -1,5 +1,5 @@ { - "query/time" : { "dimensions" : ["dataSource", "type"], "type" : "timer", "conversionFactor": 1000.0, "histogramBuckets": [0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 30.0, 60.0, 120.0, 300.0], "help": "Seconds taken to complete a query."}, + "query/time" : { "dimensions" : ["dataSource", "type"], "type" : "timer", "conversionFactor": 1000.0, "help": "Seconds taken to complete a query."}, "query/bytes" : { "dimensions" : ["dataSource", "type"], "type" : "count", "help": "Number of bytes returned in query response."}, "query/node/time" : { "dimensions" : ["server"], "type" : "timer", "conversionFactor": 1000.0, "help": "Seconds taken to query individual historical/realtime processes."}, "query/node/bytes" : { "dimensions" : ["server"], "type" : "count", "help": "Number of bytes returned from querying individual historical/realtime processes."},