From 887026bd5914d65fe2fc78776c374bc29a400b77 Mon Sep 17 00:00:00 2001 From: Lisen <38773813+yl-lisen@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:48:03 +0800 Subject: [PATCH] Feature/issue 6875 largest triangle three buckets (#6877) * Add aggregation function for Largest-Triangle-Three-Buckets (#53145) * Added a simple lttb aggregate function * Added support for multiple datatypes * Added support for Date and Date32, updated LTTBData struct * Updated code to handle bucket size 0 and 1 * Added sort for LTTBData * Added tests and documentation * Added some code style fixes * Added function to new func ref file * Removed function from new func ref file * Apply suggestions from code review * Updated unit tests * updated LTTB data code * Minor style fixes * Updated code with std sort * updated tests * Renamed lttb to largestTriangleThreeBuckets * Added alias lttb --------- Co-authored-by: Alexey Milovidov * ClickHouse/ClickHouse#56350 (partial) * fix: split lttb bucket strategy, first bucket and last bucket should only contain single point (#57003) * fix: split lttb bucket policy, first bucket and last bucket should only contain single point * add comments and modify the corresponding query test * style: update code format * style: remove useless comments * feat: add lttb bucket size test * fix: typo, duplicate sql * Merge pull request #62646 from Algunenano/i_like_triangles Fix crash in largestTriangleThreeBuckets * ClickHouse/ClickHouse#60469 (partial) * ClickHouse/ClickHouse#68135 (partial) * Merge pull request #73172 from ucasfl/fix-ubsan Fix UBSAN in largestTriangleThreeBuckets * fixes --- .../reference/largestTriangleThreeBuckets.md | 67 ++++ ...ateFunctionLargestTriangleThreeBuckets.cpp | 373 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 2 + src/Interpreters/ExpressionAnalyzer.cpp | 14 + ...eThreeBuckets_aggregate_function.reference | 18 + ...riangleThreeBuckets_aggregate_function.sql | 65 +++ .../03096_largest_triangle_3b_crash.reference | 1 + .../03096_largest_triangle_3b_crash.sql | 1 + ...eThreeBuckets_aggregate_function.reference | 18 + ...riangleThreeBuckets_aggregate_function.sql | 65 +++ .../03096_largest_triangle_3b_crash.reference | 1 + .../03096_largest_triangle_3b_crash.sql | 1 + 12 files changed, 626 insertions(+) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md create mode 100644 src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp create mode 100644 tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference create mode 100644 tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql create mode 100644 tests/queries/0_stateless/03096_largest_triangle_3b_crash.reference create mode 100644 tests/queries/0_stateless/03096_largest_triangle_3b_crash.sql create mode 100644 tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference create mode 100644 tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql create mode 100644 tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.reference create mode 100644 tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.sql diff --git a/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md new file mode 100644 index 00000000000..ca033bb48c8 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets.md @@ -0,0 +1,67 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/largestTriangleThreeBuckets +sidebar_position: 312 +sidebar_label: largestTriangleThreeBuckets +--- + +# largestTriangleThreeBuckets + +Applies the [Largest-Triangle-Three-Buckets](https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf) algorithm to the input data. +The algorithm is used for downsampling time series data for visualization. It is designed to operate on series sorted by x coordinate. +It works by dividing the sorted series into buckets and then finding the largest triangle in each bucket. The number of buckets is equal to the number of points in the resulting series. +the function will sort data by `x` and then apply the downsampling algorithm to the sorted data. + +**Syntax** + +``` sql +largestTriangleThreeBuckets(n)(x, y) +``` + +Alias: `lttb`. + +**Arguments** + +- `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md). +- `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md). + +**Parameters** + +- `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Array](../../../sql-reference/data-types/array.md) of [Tuple](../../../sql-reference/data-types/tuple.md) with two elements: + +**Example** + +Input table: + +``` text +┌─────x───────┬───────y──────┐ +│ 1.000000000 │ 10.000000000 │ +│ 2.000000000 │ 20.000000000 │ +│ 3.000000000 │ 15.000000000 │ +│ 8.000000000 │ 60.000000000 │ +│ 9.000000000 │ 55.000000000 │ +│ 10.00000000 │ 70.000000000 │ +│ 4.000000000 │ 30.000000000 │ +│ 5.000000000 │ 40.000000000 │ +│ 6.000000000 │ 35.000000000 │ +│ 7.000000000 │ 50.000000000 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT largestTriangleThreeBuckets(4)(x, y) FROM largestTriangleThreeBuckets_test; +``` + +Result: + +``` text +┌────────largestTriangleThreeBuckets(3)(x, y)───────────┐ +│ [(1,10),(3,15),(5,40),(10,70)] │ +└───────────────────────────────────────────────────────┘ +``` + diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp new file mode 100644 index 00000000000..561ed6dcc4f --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -0,0 +1,373 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace DB +{ +struct Settings; + +namespace +{ + +struct LargestTriangleThreeBucketsData : public StatisticalSample +{ + void add(const Float64 xval, const Float64 yval, Arena * arena) + { + /// We need to ensure either both or neither coordinates are saved (StatisticalSample ignores NaNs) + if (isNaN(xval) || isNaN(yval)) + return; + this->addX(xval, arena); + this->addY(yval, arena); + } + + void sort(Arena * arena) + { + chassert(this->x.size() == this->y.size()); + // sort the this->x and this->y in ascending order of this->x using index + std::vector index(this->x.size()); + + std::iota(index.begin(), index.end(), 0); + ::sort(index.begin(), index.end(), [&](size_t i1, size_t i2) { return this->x[i1] < this->x[i2]; }); + + SampleX temp_x{}; + SampleY temp_y{}; + + for (size_t i = 0; i < this->x.size(); ++i) + { + temp_x.push_back(this->x[index[i]], arena); + temp_y.push_back(this->y[index[i]], arena); + } + + for (size_t i = 0; i < this->x.size(); ++i) + { + this->x[i] = temp_x[i]; + this->y[i] = temp_y[i]; + } + } + + PODArray> getResult(size_t total_buckets, Arena * arena) + { + // Sort the data + this->sort(arena); + + PODArray> result; + + // Handle special cases for small data list + if (this->x.size() <= total_buckets) + { + for (size_t i = 0; i < this->x.size(); ++i) + { + result.emplace_back(std::make_pair(this->x[i], this->y[i])); + } + return result; + } + + // Handle special cases for 0 or 1 or 2 buckets + if (total_buckets == 0) + return result; + if (total_buckets == 1) + { + result.emplace_back(std::make_pair(this->x.front(), this->y.front())); + return result; + } + if (total_buckets == 2) + { + result.emplace_back(std::make_pair(this->x.front(), this->y.front())); + result.emplace_back(std::make_pair(this->x.back(), this->y.back())); + return result; + } + + // Find the size of each bucket + Float64 single_bucket_size = static_cast(this->x.size() - 2) / static_cast(total_buckets - 2); + + // Include the first data point + result.emplace_back(std::make_pair(this->x[0], this->y[0])); + + // the start index of current bucket + size_t start_index = 1; + // the end index of current bucket, also is the start index of next bucket + size_t center_index = start_index + static_cast(floor(single_bucket_size)); + + for (size_t i = 0; i < total_buckets - 2; ++i) // Skip the first and last bucket + { + // the end index of next bucket + size_t end_index = 1 + static_cast(floor(single_bucket_size * (i + 2))); + // current bucket is the last bucket + end_index = std::min(end_index, this->x.size()); + + // Compute the average point in the next bucket + Float64 avg_x = 0; + Float64 avg_y = 0; + for (size_t j = center_index; j < end_index; ++j) + { + avg_x += this->x[j]; + avg_y += this->y[j]; + } + avg_x /= static_cast(end_index - center_index); + avg_y /= static_cast(end_index - center_index); + + // Find the point in the current bucket that forms the largest triangle + size_t max_index = start_index; + Float64 max_area = 0.0; + for (size_t j = start_index; j < center_index; ++j) + { + Float64 area = std::abs( + 0.5 + * (result.back().first * this->y[j] + this->x[j] * avg_y + avg_x * result.back().second - result.back().first * avg_y + - this->x[j] * result.back().second - avg_x * this->y[j])); + if (area > max_area) + { + max_area = area; + max_index = j; + } + } + + // Include the selected point + result.emplace_back(std::make_pair(this->x[max_index], this->y[max_index])); + + start_index = center_index; + center_index = end_index; + } + + // Include the last data point + result.emplace_back(std::make_pair(this->x.back(), this->y.back())); + + return result; + } +}; + +static constexpr auto AggregateFunctionLargestTriangleThreeBucketsName = "largest_triangle_three_buckets"; + +class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunctionDataHelper +{ +private: + UInt64 total_buckets{0}; + TypeIndex x_type; + TypeIndex y_type; + +public: + explicit AggregateFunctionLargestTriangleThreeBuckets(const DataTypes & arguments, const Array & params) + : IAggregateFunctionDataHelper({arguments}, {}) + { + if (params.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require one parameter", getName()); + + if (params[0].getType() != Field::Types::UInt64) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} require first parameter to be a UInt64", getName()); + + total_buckets = params[0].safeGet(); + + this->x_type = WhichDataType(arguments[0]).idx; + this->y_type = WhichDataType(arguments[1]).idx; + } + + String getName() const override { return AggregateFunctionLargestTriangleThreeBucketsName; } + + bool allocatesMemoryInArena() const override { return true; } + + DataTypePtr getReturnType() const override + { + UInt32 x_scale = 0; + UInt32 y_scale = 0; + + if (const auto * datetime64_type = typeid_cast(argument_types[0].get())) + { + x_scale = datetime64_type->getScale(); + } + + if (const auto * datetime64_type = typeid_cast(argument_types[1].get())) + { + y_scale = datetime64_type->getScale(); + } + + DataTypes types = {getDataTypeFromTypeIndex(x_type, x_scale), getDataTypeFromTypeIndex(y_type, y_scale)}; + + auto tuple = std::make_shared(std::move(types)); + + return std::make_shared(tuple); + } + + static DataTypePtr getDataTypeFromTypeIndex(TypeIndex type_index, UInt32 scale) + { + DataTypePtr data_type; + switch (type_index) + { + case TypeIndex::Date: + data_type = std::make_shared(); + break; + case TypeIndex::Date32: + data_type = std::make_shared(); + break; + case TypeIndex::DateTime: + data_type = std::make_shared(); + break; + case TypeIndex::DateTime64: + data_type = std::make_shared(scale); + break; + default: + data_type = std::make_shared>(); + } + return data_type; + } + + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type); + Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type); + this->data(place).add(x, y, arena); + } + + Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const + { + switch (type_index) + { + case TypeIndex::Date: + return static_cast(*column).getData()[row_num]; + case TypeIndex::Date32: + return static_cast(*column).getData()[row_num]; + case TypeIndex::DateTime: + return static_cast(*column).getData()[row_num]; + case TypeIndex::DateTime64: + return static_cast(*column).getData()[row_num]; + default: + return column->getFloat64(row_num); + } + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + auto & a = this->data(place); + const auto & b = this->data(rhs); + + a.merge(b, arena); + } + + void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override + { + this->data(place).write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override + { + this->data(place).read(buf, arena); + } + + void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override + { + auto res = this->data(place).getResult(total_buckets, arena); + + auto & col = assert_cast(to); + auto & col_offsets = assert_cast(col.getOffsetsColumn()); + + auto column_x_adder_func = getColumnAdderFunc(x_type); + auto column_y_adder_func = getColumnAdderFunc(y_type); + + for (const auto & elem : res) + { + auto & column_tuple = assert_cast(col.getData()); + column_x_adder_func(column_tuple.getColumn(0), elem.first); + column_y_adder_func(column_tuple.getColumn(1), elem.second); + } + + col_offsets.getData().push_back(col.getData().size()); + } + + std::function getColumnAdderFunc(TypeIndex type_index) const + { + switch (type_index) + { + case TypeIndex::Date: + return [](IColumn & column, Float64 value) + { + auto & col = assert_cast(column); + col.getData().push_back(static_cast(value)); + }; + case TypeIndex::Date32: + return [](IColumn & column, Float64 value) + { + auto & col = assert_cast(column); + col.getData().push_back(static_cast(value)); + }; + case TypeIndex::DateTime: + return [](IColumn & column, Float64 value) + { + auto & col = assert_cast(column); + col.getData().push_back(static_cast(value)); + }; + case TypeIndex::DateTime64: + return [](IColumn & column, Float64 value) + { + auto & col = assert_cast(column); + col.getData().push_back(static_cast(value)); + }; + default: + return [](IColumn & column, Float64 value) + { + auto & col = assert_cast(column); + col.getData().push_back(value); + }; + } + } +}; + + +AggregateFunctionPtr +createAggregateFunctionLargestTriangleThreeBuckets(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) +{ + assertBinary(name, argument_types); + + if (!(isNumber(argument_types[0]) || isDateOrDate32(argument_types[0]) || isDateTime(argument_types[0]) + || isDateTime64(argument_types[0]))) + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the first argument", + name); + + if (!(isNumber(argument_types[1]) || isDateOrDate32(argument_types[1]) || isDateTime(argument_types[1]) + || isDateTime64(argument_types[1]))) + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Aggregate function {} only supports Date, Date32, DateTime, DateTime64 and Number as the second argument", + name); + + return std::make_shared(argument_types, parameters); +} + +} + + +void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory) +{ + factory.registerFunction(AggregateFunctionLargestTriangleThreeBucketsName, createAggregateFunctionLargestTriangleThreeBuckets); + factory.registerAlias("lttb", AggregateFunctionLargestTriangleThreeBucketsName); +} + + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index ef0f708c245..56c2c158d6d 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -74,6 +74,7 @@ void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory void registerAggregateFunctionSparkbar(AggregateFunctionFactory &); void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &); void registerAggregateFunctionAnalysisOfVariance(AggregateFunctionFactory &); +void registerAggregateFunctionLargestTriangleThreeBuckets(AggregateFunctionFactory & factory); class AggregateFunctionCombinatorFactory; void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &); @@ -188,6 +189,7 @@ void registerAggregateFunctions() registerAggregateFunctionExponentialMovingAverage(factory); registerAggregateFunctionSparkbar(factory); registerAggregateFunctionAnalysisOfVariance(factory); + registerAggregateFunctionLargestTriangleThreeBuckets(factory); registerWindowFunctions(factory); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index e125975bca1..cd651e9bf42 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -263,6 +263,20 @@ void tryTranslateToParametricAggregateFunction( argument_names = {argument_names[0]}; types = {types[0]}; } + else if (lower_name == "largest_triangle_three_buckets" || lower_name == "lttb") + { + /// Translate `largest_triangle_three_buckets(x, y, n)` to `largest_triangle_three_buckets(n)(x, y)` + if (arguments.size() != 3) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires 3 arguments", node->name); + + ASTPtr expression_list = std::make_shared(); + expression_list->children.emplace_back(arguments.back()); + parameters = getAggregateFunctionParametersArray(expression_list, "", context); + + argument_names.pop_back(); + types.pop_back(); + } }; /// proton: starts. Add 'is_changelog_input' param to allow aggregate function being aware whether the input stream is a changelog diff --git a/tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference b/tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference new file mode 100644 index 00000000000..1e304d84612 --- /dev/null +++ b/tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference @@ -0,0 +1,18 @@ +[] +[(1,10)] +[(1,10),(10,70)] +[(1,10),(3,15),(9,55),(10,70)] +[(0.02,0.16),(0.08,0.88),(0.09,0),(0.15,0.09),(0.21,0.46),(0.23,0.23),(0.29,0.16),(0.37,0.86),(0.39,0.86),(0.47,0.06),(0.48,0.57),(0.52,0.11),(0.6,0.64),(0.63,0.25),(0.7,0.25),(0.73,0.95),(0.83,0.67),(0.87,0.15),(0.91,0.62),(0.98,0.09)] +[('2023-01-14 00:00:00.000',35.96528042030847),('2023-03-15 00:00:00.000',98.77709508458238),('2023-06-05 00:00:00.000',8.107958052612418),('2023-12-18 00:00:00.000',12.832032764204616),('2023-12-31 00:00:00.000',98.52375935588333)] +[('2023-01-14 00:00:00.000',35.96528042030847),('2023-03-15 00:00:00.000',98.77709508458238),('2023-06-05 00:00:00.000',8.107958052612418),('2023-12-18 00:00:00.000',12.832032764204616),('2023-12-31 00:00:00.000',98.52375935588333)] +(9908,908) 9908 10 +(9918,918) 9918 10 +(9928,928) 9928 10 +(9938,938) 9938 10 +(9948,948) 9948 10 +(9958,958) 9958 10 +(9968,968) 9968 10 +(9978,978) 9978 10 +(9988,988) 9988 10 +(9999,999) 9999 11 +[(0,'1900-01-01 00:00:00.000')] diff --git a/tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql b/tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql new file mode 100644 index 00000000000..2a390d8e52c --- /dev/null +++ b/tests/queries/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql @@ -0,0 +1,65 @@ +drop table if exists largestTriangleThreeBucketsTestFloat64Float64; + +CREATE TABLE largestTriangleThreeBucketsTestFloat64Float64 +( + x Float64, + y Float64 +) ENGINE = MergeTree order by (y,x); + +INSERT INTO largestTriangleThreeBucketsTestFloat64Float64 +VALUES (1.0, 10.0),(2.0, 20.0),(3.0, 15.0),(8.0, 60.0),(9.0, 55.0),(10.0, 70.0),(4.0, 30.0),(5.0, 40.0),(6.0, 35.0),(7.0, 50.0); + +select largestTriangleThreeBuckets(0)(x, y) FROM largestTriangleThreeBucketsTestFloat64Float64; + +select largestTriangleThreeBuckets(1)(x, y) FROM largestTriangleThreeBucketsTestFloat64Float64; + +select largestTriangleThreeBuckets(2)(x, y) FROM largestTriangleThreeBucketsTestFloat64Float64; + +SELECT largestTriangleThreeBuckets(4)(x, y) AS downsampled_data +FROM largestTriangleThreeBucketsTestFloat64Float64; + +drop table largestTriangleThreeBucketsTestFloat64Float64; + +drop table if exists largestTriangleThreeBucketsTestDecimal64Decimal64; + +CREATE TABLE largestTriangleThreeBucketsTestDecimal64Decimal64 +( + x Decimal64(2), + y Decimal64(2) +) ENGINE = MergeTree order by (y,x); + +INSERT INTO largestTriangleThreeBucketsTestDecimal64Decimal64(x, y) VALUES (0.63, 0.25), (0.02, 0.16), (0.29, 0.16), (0.2, 0.24), (0.41, 0.63), (0.06, 0.73), (0.36, 0.99), (0.57, 0.18), (0.98, 0.09), (0.73, 0.95), (0.45, 0.86), (0.37, 0.86), (0.6, 0.64), (0.11, 0.31), (0.7, 0.25), (0.85, 0.15), (0.68, 0.39), (0.9, 0.3), (0.25, 0.34), (0.09, 0.0), (0.91, 0.62), (0.47, 0.06), (0.08, 0.88), (0.48, 0.57), (0.55, 0.75), (0.19, 0.27), (0.87, 0.15), (0.15, 0.09), (0.77, 0.28), (0.5, 0.2), (0.39, 0.86), (0.52, 0.11), (0.38, 0.75), (0.71, 0.44), (0.21, 0.46), (0.88, 0.15), (0.83, 0.67), (0.23, 0.23); + +select largestTriangleThreeBuckets(20)(x, y) from largestTriangleThreeBucketsTestDecimal64Decimal64; + +drop table largestTriangleThreeBucketsTestDecimal64Decimal64; + +drop table if exists largestTriangleThreeBucketsTestDateTime64Float64; + +create table largestTriangleThreeBucketsTestDateTime64Float64 (x DateTime64(3), y Float64) engine = MergeTree order by (y,x); + +INSERT INTO largestTriangleThreeBucketsTestDateTime64Float64 (x, y) VALUES ('2023-09-06 00:00:00', 14.217481939467213), ('2023-09-11 00:00:00', 30.096113766096455), ('2023-01-31 00:00:00', 91.42364224984735), ('2023-12-14 00:00:00', 42.08543753438961), ('2023-10-31 00:00:00', 29.93227107709394), ('2023-12-31 00:00:00', 98.52375935588333), ('2023-07-07 00:00:00', 79.9367415060134), ('2023-08-02 00:00:00', 55.417182033825696), ('2023-03-15 00:00:00', 98.77709508458238), ('2023-09-05 00:00:00', 2.832505232031368), ('2023-06-05 00:00:00', 8.107958052612418), ('2023-02-08 00:00:00', 62.95788480328096), ('2023-02-17 00:00:00', 76.80522155552535), ('2023-11-13 00:00:00', 24.927527306242993), ('2023-02-03 00:00:00', 7.966981342350332), ('2023-05-31 00:00:00', 44.61922229800436), ('2023-09-21 00:00:00', 65.86974701469791), ('2023-01-14 00:00:00', 35.96528042030847), ('2023-02-19 00:00:00', 16.065599678978305), ('2023-05-24 00:00:00', 17.23630978966909), ('2023-11-15 00:00:00', 15.544172190379879), ('2023-12-03 00:00:00', 13.738382187690856), ('2023-10-09 00:00:00', 16.7137129521176), ('2023-11-19 00:00:00', 12.12866001303361), ('2023-06-10 00:00:00', 95.15764263905534), ('2023-07-06 00:00:00', 18.87765798627088), ('2023-03-13 00:00:00', 44.82941460384813), ('2023-01-29 00:00:00', 36.0214717111606), ('2023-12-19 00:00:00', 90.30173319497655), ('2023-07-15 00:00:00', 12.67101467231364), ('2023-07-06 00:00:00', 88.13662733228512), ('2023-05-10 00:00:00', 34.18711141027026), ('2023-11-12 00:00:00', 75.58716684321973), ('2023-10-28 00:00:00', 35.79179186729331), ('2023-11-14 00:00:00', 0.9318182359137728), ('2023-09-29 00:00:00', 80.05338096818797), ('2023-09-13 00:00:00', 16.130217942056866), ('2023-07-28 00:00:00', 11.186638594914744), ('2023-02-12 00:00:00', 69.43690757793445), ('2023-12-18 00:00:00', 12.832032764204616), ('2023-05-21 00:00:00', 74.25002458036471), ('2023-04-03 00:00:00', 51.5662427420719), ('2023-11-27 00:00:00', 96.44359131281784), ('2023-03-29 00:00:00', 33.018594418113324), ('2023-02-07 00:00:00', 84.58945099939815), ('2023-11-16 00:00:00', 40.61531555527268), ('2023-04-21 00:00:00', 60.0545791577218), ('2023-01-31 00:00:00', 87.23185155362057), ('2023-05-19 00:00:00', 77.4095289464808), ('2023-08-26 00:00:00', 18.700816570182067); + +select largestTriangleThreeBuckets(5)(x, y) from largestTriangleThreeBucketsTestDateTime64Float64; + +select lttb(5)(x, y) from largestTriangleThreeBucketsTestDateTime64Float64; + +drop table largestTriangleThreeBucketsTestDateTime64Float64; + +CREATE TABLE largestTriangleTreeBucketsBucketSizeTest +( + x UInt32, + y UInt32 +) ENGINE = MergeTree ORDER BY x; + +INSERT INTO largestTriangleTreeBucketsBucketSizeTest (x, y) SELECT (number + 1) AS x, (x % 1000) AS y FROM numbers(9999); + +SELECT + arrayJoin(lttb(1000)(x, y)) AS point, + tupleElement(point, 1) AS point_x, + point_x - neighbor(point_x, -1) AS point_x_diff_with_previous_row +FROM largestTriangleTreeBucketsBucketSizeTest LIMIT 990, 10; + +SELECT largestTriangleThreeBuckets(1)(0, '1900-01-01 00:00:00'::DateTime64); + +DROP TABLE largestTriangleTreeBucketsBucketSizeTest; diff --git a/tests/queries/0_stateless/03096_largest_triangle_3b_crash.reference b/tests/queries/0_stateless/03096_largest_triangle_3b_crash.reference new file mode 100644 index 00000000000..fe51488c706 --- /dev/null +++ b/tests/queries/0_stateless/03096_largest_triangle_3b_crash.reference @@ -0,0 +1 @@ +[] diff --git a/tests/queries/0_stateless/03096_largest_triangle_3b_crash.sql b/tests/queries/0_stateless/03096_largest_triangle_3b_crash.sql new file mode 100644 index 00000000000..b1a07294052 --- /dev/null +++ b/tests/queries/0_stateless/03096_largest_triangle_3b_crash.sql @@ -0,0 +1 @@ +SELECT largestTriangleThreeBuckets(1)(1, nan); \ No newline at end of file diff --git a/tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference b/tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference new file mode 100644 index 00000000000..1e304d84612 --- /dev/null +++ b/tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.reference @@ -0,0 +1,18 @@ +[] +[(1,10)] +[(1,10),(10,70)] +[(1,10),(3,15),(9,55),(10,70)] +[(0.02,0.16),(0.08,0.88),(0.09,0),(0.15,0.09),(0.21,0.46),(0.23,0.23),(0.29,0.16),(0.37,0.86),(0.39,0.86),(0.47,0.06),(0.48,0.57),(0.52,0.11),(0.6,0.64),(0.63,0.25),(0.7,0.25),(0.73,0.95),(0.83,0.67),(0.87,0.15),(0.91,0.62),(0.98,0.09)] +[('2023-01-14 00:00:00.000',35.96528042030847),('2023-03-15 00:00:00.000',98.77709508458238),('2023-06-05 00:00:00.000',8.107958052612418),('2023-12-18 00:00:00.000',12.832032764204616),('2023-12-31 00:00:00.000',98.52375935588333)] +[('2023-01-14 00:00:00.000',35.96528042030847),('2023-03-15 00:00:00.000',98.77709508458238),('2023-06-05 00:00:00.000',8.107958052612418),('2023-12-18 00:00:00.000',12.832032764204616),('2023-12-31 00:00:00.000',98.52375935588333)] +(9908,908) 9908 10 +(9918,918) 9918 10 +(9928,928) 9928 10 +(9938,938) 9938 10 +(9948,948) 9948 10 +(9958,958) 9958 10 +(9968,968) 9968 10 +(9978,978) 9978 10 +(9988,988) 9988 10 +(9999,999) 9999 11 +[(0,'1900-01-01 00:00:00.000')] diff --git a/tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql b/tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql new file mode 100644 index 00000000000..916251c33bf --- /dev/null +++ b/tests/queries_ported/0_stateless/02842_largestTriangleThreeBuckets_aggregate_function.sql @@ -0,0 +1,65 @@ +drop stream if exists largestTriangleThreeBucketsTestFloat64Float64; + +CREATE STREAM largestTriangleThreeBucketsTestFloat64Float64 +( + x float64, + y float64 +) ENGINE = MergeTree order by (y,x); + +INSERT INTO largestTriangleThreeBucketsTestFloat64Float64 +VALUES (1.0, 10.0),(2.0, 20.0),(3.0, 15.0),(8.0, 60.0),(9.0, 55.0),(10.0, 70.0),(4.0, 30.0),(5.0, 40.0),(6.0, 35.0),(7.0, 50.0); + +select largest_triangle_three_buckets(x, y, 0) FROM largestTriangleThreeBucketsTestFloat64Float64; + +select largest_triangle_three_buckets(x, y, 1) FROM largestTriangleThreeBucketsTestFloat64Float64; + +select largest_triangle_three_buckets(x, y, 2) FROM largestTriangleThreeBucketsTestFloat64Float64; + +SELECT largest_triangle_three_buckets(x, y, 4) AS downsampled_data +FROM largestTriangleThreeBucketsTestFloat64Float64; + +drop stream largestTriangleThreeBucketsTestFloat64Float64; + +drop stream if exists largestTriangleThreeBucketsTestDecimal64Decimal64; + +CREATE STREAM largestTriangleThreeBucketsTestDecimal64Decimal64 +( + x Decimal64(2), + y Decimal64(2) +) ENGINE = MergeTree order by (y,x); + +INSERT INTO largestTriangleThreeBucketsTestDecimal64Decimal64(x, y) VALUES (0.63, 0.25), (0.02, 0.16), (0.29, 0.16), (0.2, 0.24), (0.41, 0.63), (0.06, 0.73), (0.36, 0.99), (0.57, 0.18), (0.98, 0.09), (0.73, 0.95), (0.45, 0.86), (0.37, 0.86), (0.6, 0.64), (0.11, 0.31), (0.7, 0.25), (0.85, 0.15), (0.68, 0.39), (0.9, 0.3), (0.25, 0.34), (0.09, 0.0), (0.91, 0.62), (0.47, 0.06), (0.08, 0.88), (0.48, 0.57), (0.55, 0.75), (0.19, 0.27), (0.87, 0.15), (0.15, 0.09), (0.77, 0.28), (0.5, 0.2), (0.39, 0.86), (0.52, 0.11), (0.38, 0.75), (0.71, 0.44), (0.21, 0.46), (0.88, 0.15), (0.83, 0.67), (0.23, 0.23); + +select largest_triangle_three_buckets(x, y, 20) from largestTriangleThreeBucketsTestDecimal64Decimal64; + +drop stream largestTriangleThreeBucketsTestDecimal64Decimal64; + +drop stream if exists largestTriangleThreeBucketsTestDateTime64Float64; + +create stream largestTriangleThreeBucketsTestDateTime64Float64 (x DateTime64(3), y float64) engine = MergeTree order by (y,x); + +INSERT INTO largestTriangleThreeBucketsTestDateTime64Float64 (x, y) VALUES ('2023-09-06 00:00:00', 14.217481939467213), ('2023-09-11 00:00:00', 30.096113766096455), ('2023-01-31 00:00:00', 91.42364224984735), ('2023-12-14 00:00:00', 42.08543753438961), ('2023-10-31 00:00:00', 29.93227107709394), ('2023-12-31 00:00:00', 98.52375935588333), ('2023-07-07 00:00:00', 79.9367415060134), ('2023-08-02 00:00:00', 55.417182033825696), ('2023-03-15 00:00:00', 98.77709508458238), ('2023-09-05 00:00:00', 2.832505232031368), ('2023-06-05 00:00:00', 8.107958052612418), ('2023-02-08 00:00:00', 62.95788480328096), ('2023-02-17 00:00:00', 76.80522155552535), ('2023-11-13 00:00:00', 24.927527306242993), ('2023-02-03 00:00:00', 7.966981342350332), ('2023-05-31 00:00:00', 44.61922229800436), ('2023-09-21 00:00:00', 65.86974701469791), ('2023-01-14 00:00:00', 35.96528042030847), ('2023-02-19 00:00:00', 16.065599678978305), ('2023-05-24 00:00:00', 17.23630978966909), ('2023-11-15 00:00:00', 15.544172190379879), ('2023-12-03 00:00:00', 13.738382187690856), ('2023-10-09 00:00:00', 16.7137129521176), ('2023-11-19 00:00:00', 12.12866001303361), ('2023-06-10 00:00:00', 95.15764263905534), ('2023-07-06 00:00:00', 18.87765798627088), ('2023-03-13 00:00:00', 44.82941460384813), ('2023-01-29 00:00:00', 36.0214717111606), ('2023-12-19 00:00:00', 90.30173319497655), ('2023-07-15 00:00:00', 12.67101467231364), ('2023-07-06 00:00:00', 88.13662733228512), ('2023-05-10 00:00:00', 34.18711141027026), ('2023-11-12 00:00:00', 75.58716684321973), ('2023-10-28 00:00:00', 35.79179186729331), ('2023-11-14 00:00:00', 0.9318182359137728), ('2023-09-29 00:00:00', 80.05338096818797), ('2023-09-13 00:00:00', 16.130217942056866), ('2023-07-28 00:00:00', 11.186638594914744), ('2023-02-12 00:00:00', 69.43690757793445), ('2023-12-18 00:00:00', 12.832032764204616), ('2023-05-21 00:00:00', 74.25002458036471), ('2023-04-03 00:00:00', 51.5662427420719), ('2023-11-27 00:00:00', 96.44359131281784), ('2023-03-29 00:00:00', 33.018594418113324), ('2023-02-07 00:00:00', 84.58945099939815), ('2023-11-16 00:00:00', 40.61531555527268), ('2023-04-21 00:00:00', 60.0545791577218), ('2023-01-31 00:00:00', 87.23185155362057), ('2023-05-19 00:00:00', 77.4095289464808), ('2023-08-26 00:00:00', 18.700816570182067); + +select largest_triangle_three_buckets(x, y, 5) from largestTriangleThreeBucketsTestDateTime64Float64; + +select lttb(x, y, 5) from largestTriangleThreeBucketsTestDateTime64Float64; + +drop stream largestTriangleThreeBucketsTestDateTime64Float64; + +CREATE STREAM largestTriangleTreeBucketsBucketSizeTest +( + x uint32, + y uint32 +) ENGINE = MergeTree ORDER BY x; + +INSERT INTO largestTriangleTreeBucketsBucketSizeTest (x, y) SELECT (number + 1) AS x, (x % 1000) AS y FROM numbers(9999); + +SELECT + array_join(lttb(x, y, 1000)) AS point, + tuple_element(point, 1) AS point_x, + point_x - neighbor(point_x, -1) AS point_x_diff_with_previous_row +FROM largestTriangleTreeBucketsBucketSizeTest LIMIT 990, 10; + +SELECT largest_triangle_three_buckets(0, '1900-01-01 00:00:00'::DateTime64, 1); + +DROP STREAM largestTriangleTreeBucketsBucketSizeTest; diff --git a/tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.reference b/tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.reference new file mode 100644 index 00000000000..fe51488c706 --- /dev/null +++ b/tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.reference @@ -0,0 +1 @@ +[] diff --git a/tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.sql b/tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.sql new file mode 100644 index 00000000000..c7a9d1f5ec4 --- /dev/null +++ b/tests/queries_ported/0_stateless/03096_largest_triangle_3b_crash.sql @@ -0,0 +1 @@ +SELECT largest_triangle_three_buckets(1, nan, 1); \ No newline at end of file