From b1df1b7fb5e3a31b0e93a1d1f3b4b7e21d10ce82 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Wed, 22 May 2024 11:03:16 +0800 Subject: [PATCH] move median test Signed-off-by: jayzhan211 --- .../physical-expr/src/aggregate/median.rs | 384 ------------------ .../sqllogictest/test_files/aggregate.slt | 156 +++++++ 2 files changed, 156 insertions(+), 384 deletions(-) diff --git a/datafusion/physical-expr/src/aggregate/median.rs b/datafusion/physical-expr/src/aggregate/median.rs index f4f56fa46ed5..ee0fce3fabe7 100644 --- a/datafusion/physical-expr/src/aggregate/median.rs +++ b/datafusion/physical-expr/src/aggregate/median.rs @@ -295,387 +295,3 @@ fn calculate_median( Some(*median) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::expressions::col; - use crate::expressions::tests::aggregate; - use crate::generic_test_distinct_op; - use arrow::{array::*, datatypes::*}; - - #[test] - fn median_decimal() -> Result<()> { - // test median - let array: ArrayRef = Arc::new( - (1..7) - .map(Some) - .collect::() - .with_precision_and_scale(10, 4)?, - ); - - generic_test_distinct_op!( - array, - DataType::Decimal128(10, 4), - Median, - false, - ScalarValue::Decimal128(Some(3), 10, 4) - ) - } - - #[test] - fn median_decimal_with_nulls() -> Result<()> { - let array: ArrayRef = Arc::new( - (1..6) - .map(|i| if i == 2 { None } else { Some(i) }) - .collect::() - .with_precision_and_scale(10, 4)?, - ); - generic_test_distinct_op!( - array, - DataType::Decimal128(10, 4), - Median, - false, - ScalarValue::Decimal128(Some(3), 10, 4) - ) - } - - #[test] - fn median_decimal_all_nulls() -> Result<()> { - // test median - let array: ArrayRef = Arc::new( - std::iter::repeat::>(None) - .take(6) - .collect::() - .with_precision_and_scale(10, 4)?, - ); - generic_test_distinct_op!( - array, - DataType::Decimal128(10, 4), - Median, - false, - ScalarValue::Decimal128(None, 10, 4) - ) - } - - #[test] - fn median_i32_odd() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - false, - ScalarValue::from(3_i32) - ) - } - - #[test] - fn median_i32_even() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - false, - ScalarValue::from(3_i32) - ) - } - - #[test] - fn median_i32_with_nulls() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![ - Some(1), - None, - Some(3), - Some(4), - Some(5), - ])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - false, - ScalarValue::from(3i32) - ) - } - - #[test] - fn median_i32_all_nulls() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - false, - ScalarValue::Int32(None) - ) - } - - #[test] - fn median_u32_odd() -> Result<()> { - let a: ArrayRef = - Arc::new(UInt32Array::from(vec![1_u32, 2_u32, 3_u32, 4_u32, 5_u32])); - generic_test_distinct_op!( - a, - DataType::UInt32, - Median, - false, - ScalarValue::from(3u32) - ) - } - - #[test] - fn median_u32_even() -> Result<()> { - let a: ArrayRef = Arc::new(UInt32Array::from(vec![ - 1_u32, 2_u32, 3_u32, 4_u32, 5_u32, 6_u32, - ])); - generic_test_distinct_op!( - a, - DataType::UInt32, - Median, - false, - ScalarValue::from(3u32) - ) - } - - #[test] - fn median_f32_odd() -> Result<()> { - let a: ArrayRef = - Arc::new(Float32Array::from(vec![1_f32, 2_f32, 3_f32, 4_f32, 5_f32])); - generic_test_distinct_op!( - a, - DataType::Float32, - Median, - false, - ScalarValue::from(3_f32) - ) - } - - #[test] - fn median_f32_even() -> Result<()> { - let a: ArrayRef = Arc::new(Float32Array::from(vec![ - 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, - ])); - generic_test_distinct_op!( - a, - DataType::Float32, - Median, - false, - ScalarValue::from(3.5_f32) - ) - } - - #[test] - fn median_f64_odd() -> Result<()> { - let a: ArrayRef = - Arc::new(Float64Array::from(vec![1_f64, 2_f64, 3_f64, 4_f64, 5_f64])); - generic_test_distinct_op!( - a, - DataType::Float64, - Median, - false, - ScalarValue::from(3_f64) - ) - } - - #[test] - fn median_f64_even() -> Result<()> { - let a: ArrayRef = Arc::new(Float64Array::from(vec![ - 1_f64, 2_f64, 3_f64, 4_f64, 5_f64, 6_f64, - ])); - generic_test_distinct_op!( - a, - DataType::Float64, - Median, - false, - ScalarValue::from(3.5_f64) - ) - } - - #[test] - fn distinct_median_decimal() -> Result<()> { - let array: ArrayRef = Arc::new( - vec![1, 1, 1, 1, 2, 3, 1, 1, 3] - .into_iter() - .map(Some) - .collect::() - .with_precision_and_scale(10, 4)?, - ); - - generic_test_distinct_op!( - array, - DataType::Decimal128(10, 4), - Median, - true, - ScalarValue::Decimal128(Some(2), 10, 4) - ) - } - - #[test] - fn distinct_median_decimal_with_nulls() -> Result<()> { - let array: ArrayRef = Arc::new( - vec![Some(3), Some(1), None, Some(3), Some(2), Some(3), Some(3)] - .into_iter() - .collect::() - .with_precision_and_scale(10, 4)?, - ); - generic_test_distinct_op!( - array, - DataType::Decimal128(10, 4), - Median, - true, - ScalarValue::Decimal128(Some(2), 10, 4) - ) - } - - #[test] - fn distinct_median_decimal_all_nulls() -> Result<()> { - let array: ArrayRef = Arc::new( - std::iter::repeat::>(None) - .take(6) - .collect::() - .with_precision_and_scale(10, 4)?, - ); - generic_test_distinct_op!( - array, - DataType::Decimal128(10, 4), - Median, - true, - ScalarValue::Decimal128(None, 10, 4) - ) - } - - #[test] - fn distinct_median_i32_odd() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![2, 1, 1, 2, 1, 3])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - true, - ScalarValue::from(2_i32) - ) - } - - #[test] - fn distinct_median_i32_even() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 1, 3, 1, 1])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - true, - ScalarValue::from(2_i32) - ) - } - - #[test] - fn distinct_median_i32_with_nulls() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![ - Some(1), - None, - Some(1), - Some(1), - Some(3), - ])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - true, - ScalarValue::from(2i32) - ) - } - - #[test] - fn distinct_median_i32_all_nulls() -> Result<()> { - let a: ArrayRef = Arc::new(Int32Array::from(vec![None, None])); - generic_test_distinct_op!( - a, - DataType::Int32, - Median, - true, - ScalarValue::Int32(None) - ) - } - - #[test] - fn distinct_median_u32_odd() -> Result<()> { - let a: ArrayRef = - Arc::new(UInt32Array::from(vec![1_u32, 1_u32, 2_u32, 1_u32, 3_u32])); - generic_test_distinct_op!( - a, - DataType::UInt32, - Median, - true, - ScalarValue::from(2u32) - ) - } - - #[test] - fn distinct_median_u32_even() -> Result<()> { - let a: ArrayRef = Arc::new(UInt32Array::from(vec![ - 1_u32, 1_u32, 1_u32, 1_u32, 3_u32, 3_u32, - ])); - generic_test_distinct_op!( - a, - DataType::UInt32, - Median, - true, - ScalarValue::from(2u32) - ) - } - - #[test] - fn distinct_median_f32_odd() -> Result<()> { - let a: ArrayRef = - Arc::new(Float32Array::from(vec![3_f32, 2_f32, 1_f32, 1_f32, 1_f32])); - generic_test_distinct_op!( - a, - DataType::Float32, - Median, - true, - ScalarValue::from(2_f32) - ) - } - - #[test] - fn distinct_median_f32_even() -> Result<()> { - let a: ArrayRef = - Arc::new(Float32Array::from(vec![1_f32, 1_f32, 1_f32, 1_f32, 2_f32])); - generic_test_distinct_op!( - a, - DataType::Float32, - Median, - true, - ScalarValue::from(1.5_f32) - ) - } - - #[test] - fn distinct_median_f64_odd() -> Result<()> { - let a: ArrayRef = - Arc::new(Float64Array::from(vec![1_f64, 1_f64, 1_f64, 2_f64, 3_f64])); - generic_test_distinct_op!( - a, - DataType::Float64, - Median, - true, - ScalarValue::from(2_f64) - ) - } - - #[test] - fn distinct_median_f64_even() -> Result<()> { - let a: ArrayRef = - Arc::new(Float64Array::from(vec![1_f64, 1_f64, 1_f64, 1_f64, 2_f64])); - generic_test_distinct_op!( - a, - DataType::Float64, - Median, - true, - ScalarValue::from(1.5_f64) - ) - } -} diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 983f8a085ba9..ed1c0773a9b8 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -619,6 +619,162 @@ SELECT approx_median(col_f64_nan) FROM median_table ---- NaN +# median decimal +statement ok +create table t(c decimal(10, 4)) as values (0.0001), (0.0002), (0.0003), (0.0004), (0.0005), (0.0006); + +query RT +select median(c), arrow_typeof(median(c)) from t; +---- +0.0003 Decimal128(10, 4) + +statement ok +drop table t; + +# median decimal with nulls +statement ok +create table t(c decimal(10, 4)) as values (0.0001), (null), (0.0003), (0.0004), (0.0005); + +query RT +select median(c), arrow_typeof(median(c)) from t; +---- +0.0003 Decimal128(10, 4) + +statement ok +drop table t; + +# median decimal with all nulls +statement ok +create table t(c decimal(10, 4)) as values (null), (null), (null); + +query RT +select median(c), arrow_typeof(median(c)) from t; +---- +NULL Decimal128(10, 4) + +statement ok +drop table t; + +# median odd +statement ok +create table t(c int) as values (1), (2), (3), (4), (5); + +query I +select median(c) from t; +---- +3 + +statement ok +drop table t; + +# median even +statement ok +create table t(c int) as values (1), (2), (3), (4), (5), (6); + +query I +select median(c) from t; +---- +3 + +statement ok +drop table t; + +# median with nulls +statement ok +create table t(c int) as values (1), (null), (3), (4), (5); + +query I +select median(c) from t; +---- +3 + +statement ok +drop table t; + +# median with all nulls +statement ok +create table t(c int) as values (null), (null), (null); + +query I +select median(c) from t; +---- +NULL + +statement ok +drop table t; + +# median u32 +statement ok +create table t(c int unsigned) as values (1), (2), (3), (4), (5); + +query I +select median(c) from t; +---- +3 + +statement ok +drop table t; + +# median f32 +statement ok +create table t(c float) as values (1.1), (2.2), (3.3), (4.4), (5.5); + +query R +select median(c) from t; +---- +3.3 + +statement ok +drop table t; + +# median distinct decimal +statement ok +create table t(c decimal(10, 4)) as values (0.0001), (0.0001), (0.0001), (0.0001), (0.0002), (0.0002), (0.0003), (0.0003); + +query R +select median(distinct c) from t; +---- +0.0002 + +statement ok +drop table t; + +# median distinct decimal with nulls +statement ok +create table t(c decimal(10, 4)) as values (0.0001), (0.0001), (0.0001), (null), (null), (0.0002), (0.0003), (0.0003); + +query R +select median(distinct c) from t; +---- +0.0002 + +statement ok +drop table t; + +# distinct median f32 +statement ok +create table t(c float) as values (1.1), (1.1), (1.1), (1.1), (2.2), (2.2), (3.3), (3.3); + +query R +select median(distinct c) from t; +---- +2.2 + +statement ok +drop table t; + +# distinct median i32 +statement ok +create table t(c int) as values (1), (1), (1), (1), (2), (2), (3), (3); + +query I +select median(distinct c) from t; +---- +2 + +statement ok +drop table t; + # median_multi # test case for https://github.com/apache/datafusion/issues/3105 # has an intermediate grouping