From 4a93aeb1de35670fb493ba12aa56d22b4b889a66 Mon Sep 17 00:00:00 2001 From: Pedro Tanaka Date: Mon, 16 Oct 2023 19:57:38 -0300 Subject: [PATCH 1/3] Emit error when querying native histograms with extended functions We saw panics on our setup because users tried to use the extended functions with native histograms, which led to rates calculated with only one sample in some windows. In this PR I am introducing a safeguard against this type of situation and emiting a warn when users try to query native histograms with extended functions. Signed-off-by: Pedro Tanaka Fixing test case Signed-off-by: Pedro Tanaka removing dead code Signed-off-by: Pedro Tanaka Return error instead of warn Signed-off-by: Pedro Tanaka fixing linting Signed-off-by: Pedro Tanaka --- engine/engine_test.go | 40 +++++++++++++++++++++++++++++++ execution/scan/functions.go | 5 ++++ execution/scan/matrix_selector.go | 17 +++++++++---- 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/engine/engine_test.go b/engine/engine_test.go index ee8665ba..769544ad 100644 --- a/engine/engine_test.go +++ b/engine/engine_test.go @@ -16,6 +16,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/efficientgo/core/errors" "github.com/efficientgo/core/testutil" "go.uber.org/goleak" @@ -2138,6 +2140,44 @@ func TestDisabledXFunction(t *testing.T) { } } +func TestXFunctionsWithNativeHistograms(t *testing.T) { + defaultQueryTime := time.Unix(50, 0) + + expr := "sum(xincrease(native_histogram_series[50s]))" + + // Negative offset and at modifier are enabled by default + // since Prometheus v2.33.0, so we also enable them. + opts := promql.EngineOpts{ + Timeout: 1 * time.Hour, + MaxSamples: 1e10, + EnableNegativeOffset: true, + EnableAtModifier: true, + } + + lStorage := teststorage.New(t) + defer lStorage.Close() + + app := lStorage.Appender(context.TODO()) + testutil.Ok(t, generateFloatHistogramSeries(app, 3000, false)) + testutil.Ok(t, app.Commit()) + + optimizers := logicalplan.AllOptimizers + + ctx := context.Background() + newEngine := engine.New(engine.Opts{ + EngineOpts: opts, + DisableFallback: true, + LogicalOptimizers: optimizers, + EnableXFunctions: true, + }) + query, err := newEngine.NewInstantQuery(ctx, lStorage, nil, expr, defaultQueryTime) + testutil.Ok(t, err) + defer query.Close() + + engineResult := query.Exec(ctx) + require.Error(t, engineResult.Err) +} + func TestXFunctionsWhenDisabled(t *testing.T) { var ( query = "xincrease(http_requests[50s])" diff --git a/execution/scan/functions.go b/execution/scan/functions.go index 7c1d1f21..f90daedd 100644 --- a/execution/scan/functions.go +++ b/execution/scan/functions.go @@ -381,6 +381,11 @@ func extendedRate(samples []Sample, isCounter, isRate bool, stepTime int64, sele // points[0] to be a histogram. It returns nil if any other Point in points is // not a histogram. func histogramRate(points []Sample, isCounter bool) *histogram.FloatHistogram { + // safeguard against empty ranges coming from extended function evaluation. + if len(points) < 2 { + return nil + } + prev := points[0].H // We already know that this is a histogram. last := points[len(points)-1].H if last == nil { diff --git a/execution/scan/matrix_selector.go b/execution/scan/matrix_selector.go index 3c12506b..dd12532f 100644 --- a/execution/scan/matrix_selector.go +++ b/execution/scan/matrix_selector.go @@ -9,13 +9,13 @@ import ( "sync" "time" + "github.com/efficientgo/core/errors" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" - "github.com/prometheus/prometheus/promql/parser" - "github.com/thanos-io/promql-engine/execution/function" "github.com/thanos-io/promql-engine/execution/model" engstore "github.com/thanos-io/promql-engine/execution/storage" @@ -60,6 +60,8 @@ type matrixSelector struct { model.OperatorTelemetry } +var ErrNativeHistogramsNotSupported = errors.New("native histograms are not supported in extended range functions") + // NewMatrixSelector creates operator which selects vector of series over time. func NewMatrixSelector( pool *model.VectorPool, @@ -170,7 +172,7 @@ func (o *matrixSelector) Next(ctx context.Context) ([]model.StepVector, error) { if !o.isExtFunction { rangeSamples, err = selectPoints(series.samples, mint, maxt, o.scanners[i].previousSamples) } else { - rangeSamples, err = selectExtPoints(series.samples, mint, maxt, o.scanners[i].previousSamples, o.extLookbackDelta, &o.scanners[i].metricAppearedTs) + rangeSamples, err = selectExtPoints(ctx, series.samples, mint, maxt, o.scanners[i].previousSamples, o.extLookbackDelta, &o.scanners[i].metricAppearedTs) } if err != nil { @@ -351,8 +353,9 @@ loop: // into the [mint, maxt] range are retained; only points with later timestamps // are populated from the iterator. // TODO(fpetkovski): Add max samples limit. -func selectExtPoints(it *storage.BufferedSeriesIterator, mint, maxt int64, out []Sample, extLookbackDelta int64, metricAppearedTs **int64) ([]Sample, error) { +func selectExtPoints(ctx context.Context, it *storage.BufferedSeriesIterator, mint, maxt int64, out []Sample, extLookbackDelta int64, metricAppearedTs **int64) ([]Sample, error) { extMint := mint - extLookbackDelta + selectsNativeHistograms := false if len(out) > 0 && out[len(out)-1].T >= mint { // There is an overlap between previous and current ranges, retain common @@ -396,6 +399,7 @@ loop: case chunkenc.ValNone: break loop case chunkenc.ValHistogram, chunkenc.ValFloatHistogram: + selectsNativeHistograms = true t, fh := buf.AtFloatHistogram() if value.IsStaleNaN(fh.Sum) { continue loop @@ -431,6 +435,7 @@ loop: // The sought sample might also be in the range. switch soughtValueType { case chunkenc.ValHistogram, chunkenc.ValFloatHistogram: + selectsNativeHistograms = true t, fh := it.AtFloatHistogram() if t == maxt && !value.IsStaleNaN(fh.Sum) { if *metricAppearedTs == nil { @@ -448,5 +453,9 @@ loop: } } + if selectsNativeHistograms { + return nil, ErrNativeHistogramsNotSupported + } + return out, nil } From eba4b06334ea295cc0487dc0ab2c39025ad89110 Mon Sep 17 00:00:00 2001 From: Pedro Tanaka Date: Wed, 25 Oct 2023 10:35:19 -0300 Subject: [PATCH 2/3] Update execution/scan/functions.go Co-authored-by: Filip Petkovski Signed-off-by: Pedro Tanaka --- execution/scan/functions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/execution/scan/functions.go b/execution/scan/functions.go index f90daedd..86f5c381 100644 --- a/execution/scan/functions.go +++ b/execution/scan/functions.go @@ -381,7 +381,7 @@ func extendedRate(samples []Sample, isCounter, isRate bool, stepTime int64, sele // points[0] to be a histogram. It returns nil if any other Point in points is // not a histogram. func histogramRate(points []Sample, isCounter bool) *histogram.FloatHistogram { - // safeguard against empty ranges coming from extended function evaluation. + // Calculating a rate on a single sample is not defined. if len(points) < 2 { return nil } From 2e22f8fec4b46b3e67fd28b462e3f95bd67af277 Mon Sep 17 00:00:00 2001 From: Pedro Tanaka Date: Wed, 25 Oct 2023 16:08:16 +0200 Subject: [PATCH 3/3] Removing query context Signed-off-by: Pedro Tanaka --- execution/scan/matrix_selector.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/execution/scan/matrix_selector.go b/execution/scan/matrix_selector.go index dd12532f..f1d4a1bb 100644 --- a/execution/scan/matrix_selector.go +++ b/execution/scan/matrix_selector.go @@ -172,7 +172,7 @@ func (o *matrixSelector) Next(ctx context.Context) ([]model.StepVector, error) { if !o.isExtFunction { rangeSamples, err = selectPoints(series.samples, mint, maxt, o.scanners[i].previousSamples) } else { - rangeSamples, err = selectExtPoints(ctx, series.samples, mint, maxt, o.scanners[i].previousSamples, o.extLookbackDelta, &o.scanners[i].metricAppearedTs) + rangeSamples, err = selectExtPoints(series.samples, mint, maxt, o.scanners[i].previousSamples, o.extLookbackDelta, &o.scanners[i].metricAppearedTs) } if err != nil { @@ -353,7 +353,7 @@ loop: // into the [mint, maxt] range are retained; only points with later timestamps // are populated from the iterator. // TODO(fpetkovski): Add max samples limit. -func selectExtPoints(ctx context.Context, it *storage.BufferedSeriesIterator, mint, maxt int64, out []Sample, extLookbackDelta int64, metricAppearedTs **int64) ([]Sample, error) { +func selectExtPoints(it *storage.BufferedSeriesIterator, mint, maxt int64, out []Sample, extLookbackDelta int64, metricAppearedTs **int64) ([]Sample, error) { extMint := mint - extLookbackDelta selectsNativeHistograms := false