diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 91e3acd0f7bb..e658cde4dd18 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -130,8 +130,6 @@ pub enum BuiltinScalarFunction { ArrayPopFront, /// array_pop_back ArrayPopBack, - /// array_dims - ArrayDims, /// array_distinct ArrayDistinct, /// array_element @@ -140,8 +138,6 @@ pub enum BuiltinScalarFunction { ArrayEmpty, /// array_length ArrayLength, - /// array_ndims - ArrayNdims, /// array_position ArrayPosition, /// array_positions @@ -172,8 +168,6 @@ pub enum BuiltinScalarFunction { ArrayUnion, /// array_except ArrayExcept, - /// cardinality - Cardinality, /// array_resize ArrayResize, /// construct an array from columns @@ -385,12 +379,10 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable, BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable, BuiltinScalarFunction::ArrayHas => Volatility::Immutable, - BuiltinScalarFunction::ArrayDims => Volatility::Immutable, BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable, BuiltinScalarFunction::ArrayElement => Volatility::Immutable, BuiltinScalarFunction::ArrayExcept => Volatility::Immutable, BuiltinScalarFunction::ArrayLength => Volatility::Immutable, - BuiltinScalarFunction::ArrayNdims => Volatility::Immutable, BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable, BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable, BuiltinScalarFunction::ArrayPosition => Volatility::Immutable, @@ -409,7 +401,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable, BuiltinScalarFunction::ArrayUnion => Volatility::Immutable, BuiltinScalarFunction::ArrayResize => Volatility::Immutable, - BuiltinScalarFunction::Cardinality => Volatility::Immutable, BuiltinScalarFunction::MakeArray => Volatility::Immutable, BuiltinScalarFunction::Ascii => Volatility::Immutable, BuiltinScalarFunction::BitLength => Volatility::Immutable, @@ -561,9 +552,6 @@ impl BuiltinScalarFunction { | BuiltinScalarFunction::ArrayHasAny | BuiltinScalarFunction::ArrayHas | BuiltinScalarFunction::ArrayEmpty => Ok(Boolean), - BuiltinScalarFunction::ArrayDims => { - Ok(List(Arc::new(Field::new("item", UInt64, true)))) - } BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] { List(field) @@ -574,7 +562,6 @@ impl BuiltinScalarFunction { ), }, BuiltinScalarFunction::ArrayLength => Ok(UInt64), - BuiltinScalarFunction::ArrayNdims => Ok(UInt64), BuiltinScalarFunction::ArrayPopFront => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPopBack => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPosition => Ok(UInt64), @@ -622,7 +609,6 @@ impl BuiltinScalarFunction { (dt, _) => Ok(dt), } } - BuiltinScalarFunction::Cardinality => Ok(UInt64), BuiltinScalarFunction::MakeArray => match input_expr_types.len() { 0 => Ok(List(Arc::new(Field::new("item", Null, true)))), _ => { @@ -884,7 +870,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayElement => { Signature::array_and_index(self.volatility()) @@ -900,7 +885,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) @@ -931,7 +915,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) } @@ -1481,7 +1464,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayConcat => { &["array_concat", "array_cat", "list_concat", "list_cat"] } - BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"], BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"], BuiltinScalarFunction::ArrayEmpty => &["empty"], BuiltinScalarFunction::ArrayElement => &[ @@ -1498,7 +1480,6 @@ impl BuiltinScalarFunction { &["array_has", "list_has", "array_contains", "list_contains"] } BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"], - BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"], BuiltinScalarFunction::ArrayPopFront => { &["array_pop_front", "list_pop_front"] } @@ -1534,7 +1515,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"], BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"], BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"], - BuiltinScalarFunction::Cardinality => &["cardinality"], BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"], BuiltinScalarFunction::MakeArray => &["make_array", "make_list"], BuiltinScalarFunction::ArrayIntersect => { diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 157b8b0989df..07844c27fe50 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -628,12 +628,6 @@ scalar_expr!( array, "flattens an array of arrays into a single array." ); -scalar_expr!( - ArrayDims, - array_dims, - array, - "returns an array of the array's dimensions." -); scalar_expr!( ArrayElement, array_element, @@ -652,12 +646,6 @@ scalar_expr!( array dimension, "returns the length of the array dimension." ); -scalar_expr!( - ArrayNdims, - array_ndims, - array, - "returns the number of dimensions of the array." -); scalar_expr!( ArrayDistinct, array_distinct, @@ -738,13 +726,6 @@ scalar_expr!( ); scalar_expr!(ArrayUnion, array_union, array1 array2, "returns an array of the elements in the union of array1 and array2 without duplicates."); -scalar_expr!( - Cardinality, - cardinality, - array, - "returns the total number of elements in the array." -); - scalar_expr!( ArrayResize, array_resize, @@ -1389,9 +1370,7 @@ mod test { test_scalar_expr!(ArraySort, array_sort, array, desc, null_first); test_scalar_expr!(ArrayPopFront, array_pop_front, array); test_scalar_expr!(ArrayPopBack, array_pop_back, array); - test_unary_scalar_expr!(ArrayDims, array_dims); test_scalar_expr!(ArrayLength, array_length, array, dimension); - test_unary_scalar_expr!(ArrayNdims, array_ndims); test_scalar_expr!(ArrayPosition, array_position, array, element, index); test_scalar_expr!(ArrayPositions, array_positions, array, element); test_scalar_expr!(ArrayPrepend, array_prepend, array, element); @@ -1402,7 +1381,6 @@ mod test { test_scalar_expr!(ArrayReplace, array_replace, array, from, to); test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max); test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to); - test_unary_scalar_expr!(Cardinality, cardinality); test_nary_scalar_expr!(MakeArray, array, input); test_unary_scalar_expr!(ArrowTypeof, arrow_typeof); diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index b9a68b466605..8631e42a0f80 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -17,16 +17,19 @@ //! implementation kernels for array functions +use arrow::array::ListArray; use arrow::array::{ Array, ArrayRef, BooleanArray, Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; -use arrow::datatypes::DataType; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::Field; +use arrow::datatypes::{DataType, UInt64Type}; use datafusion_common::cast::{ as_int64_array, as_large_list_array, as_list_array, as_string_array, }; -use datafusion_common::{exec_err, DataFusionError}; +use datafusion_common::{exec_err, DataFusionError, Result}; use std::any::type_name; use std::sync::Arc; macro_rules! downcast_arg { @@ -102,7 +105,7 @@ macro_rules! call_array_function { } /// Array_to_string SQL function -pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result { +pub(super) fn array_to_string(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { return exec_err!("array_to_string expects two or three arguments"); } @@ -254,9 +257,6 @@ pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result [0, 1, 2] /// gen_range(1, 4) => [1, 2, 3] /// gen_range(1, 7, 2) => [1, 3, 5] -pub fn gen_range( - args: &[ArrayRef], - include_upper: i64, -) -> datafusion_common::Result { +pub fn gen_range(args: &[ArrayRef], include_upper: i64) -> Result { let (start_array, stop_array, step_array) = match args.len() { 1 => (None, as_int64_array(&args[0])?, None), 2 => ( @@ -319,3 +316,125 @@ pub fn gen_range( )?); Ok(arr) } + +/// Returns the length of each array dimension +fn compute_array_dims(arr: Option) -> Result>>> { + let mut value = match arr { + Some(arr) => arr, + None => return Ok(None), + }; + if value.is_empty() { + return Ok(None); + } + let mut res = vec![Some(value.len() as u64)]; + + loop { + match value.data_type() { + DataType::List(..) => { + value = downcast_arg!(value, ListArray).value(0); + res.push(Some(value.len() as u64)); + } + _ => return Ok(Some(res)), + } + } +} + +fn generic_list_cardinality( + array: &GenericListArray, +) -> Result { + let result = array + .iter() + .map(|arr| match compute_array_dims(arr)? { + Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::())), + None => Ok(None), + }) + .collect::>()?; + Ok(Arc::new(result) as ArrayRef) +} + +/// Cardinality SQL function +pub fn cardinality(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("cardinality expects one argument"); + } + + match &args[0].data_type() { + DataType::List(_) => { + let list_array = as_list_array(&args[0])?; + generic_list_cardinality::(list_array) + } + DataType::LargeList(_) => { + let list_array = as_large_list_array(&args[0])?; + generic_list_cardinality::(list_array) + } + other => { + exec_err!("cardinality does not support type '{:?}'", other) + } + } +} + +/// Array_dims SQL function +pub fn array_dims(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("array_dims needs one argument"); + } + + let data = match args[0].data_type() { + DataType::List(_) => { + let array = as_list_array(&args[0])?; + array + .iter() + .map(compute_array_dims) + .collect::>>()? + } + DataType::LargeList(_) => { + let array = as_large_list_array(&args[0])?; + array + .iter() + .map(compute_array_dims) + .collect::>>()? + } + array_type => { + return exec_err!("array_dims does not support type '{array_type:?}'"); + } + }; + + let result = ListArray::from_iter_primitive::(data); + + Ok(Arc::new(result) as ArrayRef) +} + +/// Array_ndims SQL function +pub fn array_ndims(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("array_ndims needs one argument"); + } + + fn general_list_ndims( + array: &GenericListArray, + ) -> Result { + let mut data = Vec::new(); + let ndims = datafusion_common::utils::list_ndims(array.data_type()); + + for arr in array.iter() { + if arr.is_some() { + data.push(Some(ndims)) + } else { + data.push(None) + } + } + + Ok(Arc::new(UInt64Array::from(data)) as ArrayRef) + } + match args[0].data_type() { + DataType::List(_) => { + let array = as_list_array(&args[0])?; + general_list_ndims::(array) + } + DataType::LargeList(_) => { + let array = as_large_list_array(&args[0])?; + general_list_ndims::(array) + } + array_type => exec_err!("array_ndims does not support type {array_type:?}"), + } +} diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index e3515ccf9f72..e4cdf69aa93a 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -39,7 +39,10 @@ use std::sync::Arc; /// Fluent-style API for creating `Expr`s pub mod expr_fn { + pub use super::udf::array_dims; + pub use super::udf::array_ndims; pub use super::udf::array_to_string; + pub use super::udf::cardinality; pub use super::udf::gen_series; pub use super::udf::range; } @@ -50,6 +53,9 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { udf::array_to_string_udf(), udf::range_udf(), udf::gen_series_udf(), + udf::array_dims_udf(), + udf::cardinality_udf(), + udf::array_ndims_udf(), ]; functions.into_iter().try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 17769419c0b2..a35b45464795 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -202,3 +202,168 @@ impl ScalarUDFImpl for GenSeries { &self.aliases } } + +make_udf_function!( + ArrayDims, + array_dims, + array, + "returns an array of the array's dimensions.", + array_dims_udf +); + +#[derive(Debug)] +pub(super) struct ArrayDims { + signature: Signature, + aliases: Vec, +} + +impl ArrayDims { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec!["array_dims".to_string(), "list_dims".to_string()], + } + } +} + +impl ScalarUDFImpl for ArrayDims { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_dims" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => { + List(Arc::new(Field::new("item", UInt64, true))) + } + _ => { + return plan_err!("The array_dims function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_dims(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +make_udf_function!( + Cardinality, + cardinality, + array, + "returns the total number of elements in the array.", + cardinality_udf +); + +impl Cardinality { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec![String::from("cardinality")], + } + } +} + +#[derive(Debug)] +pub(super) struct Cardinality { + signature: Signature, + aliases: Vec, +} +impl ScalarUDFImpl for Cardinality { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "cardinality" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, + _ => { + return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::cardinality(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +make_udf_function!( + ArrayNdims, + array_ndims, + array, + "returns the number of dimensions of the array.", + array_ndims_udf +); + +#[derive(Debug)] +pub(super) struct ArrayNdims { + signature: Signature, + aliases: Vec, +} +impl ArrayNdims { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec![String::from("array_ndims"), String::from("list_ndims")], + } + } +} + +impl ScalarUDFImpl for ArrayNdims { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_ndims" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, + _ => { + return plan_err!("The array_ndims function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_ndims(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 01b2ae13c8d4..c10f5df54027 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -193,28 +193,6 @@ fn compute_array_length( } } -/// Returns the length of each array dimension -fn compute_array_dims(arr: Option) -> Result>>> { - let mut value = match arr { - Some(arr) => arr, - None => return Ok(None), - }; - if value.is_empty() { - return Ok(None); - } - let mut res = vec![Some(value.len() as u64)]; - - loop { - match value.data_type() { - DataType::List(..) => { - value = downcast_arg!(value, ListArray).value(0); - res.push(Some(value.len() as u64)); - } - _ => return Ok(Some(res)), - } - } -} - fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> { let data_type = args[0].data_type(); if !args.iter().all(|arg| { @@ -1938,40 +1916,6 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result { general_set_op(array1, array2, SetOp::Intersect) } -/// Cardinality SQL function -pub fn cardinality(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("cardinality expects one argument"); - } - - match &args[0].data_type() { - DataType::List(_) => { - let list_array = as_list_array(&args[0])?; - generic_list_cardinality::(list_array) - } - DataType::LargeList(_) => { - let list_array = as_large_list_array(&args[0])?; - generic_list_cardinality::(list_array) - } - other => { - exec_err!("cardinality does not support type '{:?}'", other) - } - } -} - -fn generic_list_cardinality( - array: &GenericListArray, -) -> Result { - let result = array - .iter() - .map(|arr| match compute_array_dims(arr)? { - Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::())), - None => Ok(None), - }) - .collect::>()?; - Ok(Arc::new(result) as ArrayRef) -} - // Create new offsets that are euqiavlent to `flatten` the array. fn get_offsets_for_flatten( offsets: OffsetBuffer, @@ -2074,72 +2018,6 @@ pub fn array_length(args: &[ArrayRef]) -> Result { } } -/// Array_dims SQL function -pub fn array_dims(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("array_dims needs one argument"); - } - - let data = match args[0].data_type() { - DataType::List(_) => { - let array = as_list_array(&args[0])?; - array - .iter() - .map(compute_array_dims) - .collect::>>()? - } - DataType::LargeList(_) => { - let array = as_large_list_array(&args[0])?; - array - .iter() - .map(compute_array_dims) - .collect::>>()? - } - array_type => { - return exec_err!("array_dims does not support type '{array_type:?}'"); - } - }; - - let result = ListArray::from_iter_primitive::(data); - - Ok(Arc::new(result) as ArrayRef) -} - -/// Array_ndims SQL function -pub fn array_ndims(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("array_ndims needs one argument"); - } - - fn general_list_ndims( - array: &GenericListArray, - ) -> Result { - let mut data = Vec::new(); - let ndims = datafusion_common::utils::list_ndims(array.data_type()); - - for arr in array.iter() { - if arr.is_some() { - data.push(Some(ndims)) - } else { - data.push(None) - } - } - - Ok(Arc::new(UInt64Array::from(data)) as ArrayRef) - } - match args[0].data_type() { - DataType::List(_) => { - let array = as_list_array(&args[0])?; - general_list_ndims::(array) - } - DataType::LargeList(_) => { - let array = as_large_list_array(&args[0])?; - general_list_ndims::(array) - } - array_type => exec_err!("array_ndims does not support type {array_type:?}"), - } -} - /// Represents the type of comparison for array_has. #[derive(Debug, PartialEq)] enum ComparisonType { diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index 56ad92082d9f..d9d1d704db2e 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -339,9 +339,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayHas => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_has)(args) }), - BuiltinScalarFunction::ArrayDims => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_dims)(args) - }), BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_distinct)(args) }), @@ -357,9 +354,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::Flatten => { Arc::new(|args| make_scalar_function_inner(array_expressions::flatten)(args)) } - BuiltinScalarFunction::ArrayNdims => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_ndims)(args) - }), BuiltinScalarFunction::ArrayPopFront => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_pop_front)(args) }), @@ -405,9 +399,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_intersect)(args) }), - BuiltinScalarFunction::Cardinality => Arc::new(|args| { - make_scalar_function_inner(array_expressions::cardinality)(args) - }), BuiltinScalarFunction::ArrayResize => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_resize)(args) }), diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 528f977a94d3..ef2db5a75d57 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -636,17 +636,17 @@ enum ScalarFunction { Gcd = 85; ArrayAppend = 86; ArrayConcat = 87; - ArrayDims = 88; + // 88 was ArrayDims ArrayRepeat = 89; ArrayLength = 90; - ArrayNdims = 91; + // 91 was ArrayNdims ArrayPosition = 92; ArrayPositions = 93; ArrayPrepend = 94; ArrayRemove = 95; ArrayReplace = 96; // 97 was ArrayToString - Cardinality = 98; + // 98 was Cardinality ArrayElement = 99; ArraySlice = 100; Cot = 103; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index d6ee204d5cf3..e9e1f270864a 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22401,16 +22401,13 @@ impl serde::Serialize for ScalarFunction { Self::Gcd => "Gcd", Self::ArrayAppend => "ArrayAppend", Self::ArrayConcat => "ArrayConcat", - Self::ArrayDims => "ArrayDims", Self::ArrayRepeat => "ArrayRepeat", Self::ArrayLength => "ArrayLength", - Self::ArrayNdims => "ArrayNdims", Self::ArrayPosition => "ArrayPosition", Self::ArrayPositions => "ArrayPositions", Self::ArrayPrepend => "ArrayPrepend", Self::ArrayRemove => "ArrayRemove", Self::ArrayReplace => "ArrayReplace", - Self::Cardinality => "Cardinality", Self::ArrayElement => "ArrayElement", Self::ArraySlice => "ArraySlice", Self::Cot => "Cot", @@ -22535,16 +22532,13 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Gcd", "ArrayAppend", "ArrayConcat", - "ArrayDims", "ArrayRepeat", "ArrayLength", - "ArrayNdims", "ArrayPosition", "ArrayPositions", "ArrayPrepend", "ArrayRemove", "ArrayReplace", - "Cardinality", "ArrayElement", "ArraySlice", "Cot", @@ -22698,16 +22692,13 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Gcd" => Ok(ScalarFunction::Gcd), "ArrayAppend" => Ok(ScalarFunction::ArrayAppend), "ArrayConcat" => Ok(ScalarFunction::ArrayConcat), - "ArrayDims" => Ok(ScalarFunction::ArrayDims), "ArrayRepeat" => Ok(ScalarFunction::ArrayRepeat), "ArrayLength" => Ok(ScalarFunction::ArrayLength), - "ArrayNdims" => Ok(ScalarFunction::ArrayNdims), "ArrayPosition" => Ok(ScalarFunction::ArrayPosition), "ArrayPositions" => Ok(ScalarFunction::ArrayPositions), "ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend), "ArrayRemove" => Ok(ScalarFunction::ArrayRemove), "ArrayReplace" => Ok(ScalarFunction::ArrayReplace), - "Cardinality" => Ok(ScalarFunction::Cardinality), "ArrayElement" => Ok(ScalarFunction::ArrayElement), "ArraySlice" => Ok(ScalarFunction::ArraySlice), "Cot" => Ok(ScalarFunction::Cot), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 432dd4a8a6a0..eca0156dd223 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2725,17 +2725,17 @@ pub enum ScalarFunction { Gcd = 85, ArrayAppend = 86, ArrayConcat = 87, - ArrayDims = 88, + /// 88 was ArrayDims ArrayRepeat = 89, ArrayLength = 90, - ArrayNdims = 91, + /// 91 was ArrayNdims ArrayPosition = 92, ArrayPositions = 93, ArrayPrepend = 94, ArrayRemove = 95, ArrayReplace = 96, /// 97 was ArrayToString - Cardinality = 98, + /// 98 was Cardinality ArrayElement = 99, ArraySlice = 100, Cot = 103, @@ -2861,16 +2861,13 @@ impl ScalarFunction { ScalarFunction::Gcd => "Gcd", ScalarFunction::ArrayAppend => "ArrayAppend", ScalarFunction::ArrayConcat => "ArrayConcat", - ScalarFunction::ArrayDims => "ArrayDims", ScalarFunction::ArrayRepeat => "ArrayRepeat", ScalarFunction::ArrayLength => "ArrayLength", - ScalarFunction::ArrayNdims => "ArrayNdims", ScalarFunction::ArrayPosition => "ArrayPosition", ScalarFunction::ArrayPositions => "ArrayPositions", ScalarFunction::ArrayPrepend => "ArrayPrepend", ScalarFunction::ArrayRemove => "ArrayRemove", ScalarFunction::ArrayReplace => "ArrayReplace", - ScalarFunction::Cardinality => "Cardinality", ScalarFunction::ArrayElement => "ArrayElement", ScalarFunction::ArraySlice => "ArraySlice", ScalarFunction::Cot => "Cot", @@ -2989,16 +2986,13 @@ impl ScalarFunction { "Gcd" => Some(Self::Gcd), "ArrayAppend" => Some(Self::ArrayAppend), "ArrayConcat" => Some(Self::ArrayConcat), - "ArrayDims" => Some(Self::ArrayDims), "ArrayRepeat" => Some(Self::ArrayRepeat), "ArrayLength" => Some(Self::ArrayLength), - "ArrayNdims" => Some(Self::ArrayNdims), "ArrayPosition" => Some(Self::ArrayPosition), "ArrayPositions" => Some(Self::ArrayPositions), "ArrayPrepend" => Some(Self::ArrayPrepend), "ArrayRemove" => Some(Self::ArrayRemove), "ArrayReplace" => Some(Self::ArrayReplace), - "Cardinality" => Some(Self::Cardinality), "ArrayElement" => Some(Self::ArrayElement), "ArraySlice" => Some(Self::ArraySlice), "Cot" => Some(Self::Cot), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index b1fd128d0833..29e51e03730e 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -47,15 +47,15 @@ use datafusion_common::{ use datafusion_expr::expr::Unnest; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ - acosh, array, array_append, array_concat, array_dims, array_distinct, array_element, - array_empty, array_except, array_has, array_has_all, array_has_any, array_intersect, - array_length, array_ndims, array_pop_back, array_pop_front, array_position, - array_positions, array_prepend, array_remove, array_remove_all, array_remove_n, - array_repeat, array_replace, array_replace_all, array_replace_n, array_resize, - array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, - bit_length, btrim, cardinality, cbrt, ceil, character_length, chr, coalesce, - concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin, - date_part, date_trunc, degrees, digest, ends_with, exp, + acosh, array, array_append, array_concat, array_distinct, array_element, array_empty, + array_except, array_has, array_has_all, array_has_any, array_intersect, array_length, + array_pop_back, array_pop_front, array_position, array_positions, array_prepend, + array_remove, array_remove_all, array_remove_n, array_repeat, array_replace, + array_replace_all, array_replace_n, array_resize, array_slice, array_sort, + array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, bit_length, btrim, cbrt, + ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot, + current_date, current_time, date_bin, date_part, date_trunc, degrees, digest, + ends_with, exp, expr::{self, InList, Sort, WindowFunction}, factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, instr, iszero, lcm, left, levenshtein, ln, log, log10, log2, @@ -486,12 +486,10 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayHasAll => Self::ArrayHasAll, ScalarFunction::ArrayHasAny => Self::ArrayHasAny, ScalarFunction::ArrayHas => Self::ArrayHas, - ScalarFunction::ArrayDims => Self::ArrayDims, ScalarFunction::ArrayDistinct => Self::ArrayDistinct, ScalarFunction::ArrayElement => Self::ArrayElement, ScalarFunction::Flatten => Self::Flatten, ScalarFunction::ArrayLength => Self::ArrayLength, - ScalarFunction::ArrayNdims => Self::ArrayNdims, ScalarFunction::ArrayPopFront => Self::ArrayPopFront, ScalarFunction::ArrayPopBack => Self::ArrayPopBack, ScalarFunction::ArrayPosition => Self::ArrayPosition, @@ -509,7 +507,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayIntersect => Self::ArrayIntersect, ScalarFunction::ArrayUnion => Self::ArrayUnion, ScalarFunction::ArrayResize => Self::ArrayResize, - ScalarFunction::Cardinality => Self::Cardinality, ScalarFunction::Array => Self::MakeArray, ScalarFunction::DatePart => Self::DatePart, ScalarFunction::DateTrunc => Self::DateTrunc, @@ -1526,16 +1523,10 @@ pub fn parse_expr( parse_expr(&args[2], registry, codec)?, parse_expr(&args[3], registry, codec)?, )), - ScalarFunction::Cardinality => { - Ok(cardinality(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayLength => Ok(array_length( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::ArrayDims => { - Ok(array_dims(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayDistinct => { Ok(array_distinct(parse_expr(&args[0], registry, codec)?)) } @@ -1546,9 +1537,6 @@ pub fn parse_expr( ScalarFunction::ArrayEmpty => { Ok(array_empty(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::ArrayNdims => { - Ok(array_ndims(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayUnion => Ok(array_union( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index d238884374fb..424e1414af11 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1463,12 +1463,10 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayHasAll => Self::ArrayHasAll, BuiltinScalarFunction::ArrayHasAny => Self::ArrayHasAny, BuiltinScalarFunction::ArrayHas => Self::ArrayHas, - BuiltinScalarFunction::ArrayDims => Self::ArrayDims, BuiltinScalarFunction::ArrayDistinct => Self::ArrayDistinct, BuiltinScalarFunction::ArrayElement => Self::ArrayElement, BuiltinScalarFunction::Flatten => Self::Flatten, BuiltinScalarFunction::ArrayLength => Self::ArrayLength, - BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims, BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront, BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack, BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition, @@ -1486,7 +1484,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArraySlice => Self::ArraySlice, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion, - BuiltinScalarFunction::Cardinality => Self::Cardinality, BuiltinScalarFunction::MakeArray => Self::Array, BuiltinScalarFunction::DatePart => Self::DatePart, BuiltinScalarFunction::DateTrunc => Self::DateTrunc, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 0ec44190ef7a..702ae99babd8 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -583,6 +583,11 @@ async fn roundtrip_expr_api() -> Result<()> { encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")), decode(lit("1234"), lit("hex")), array_to_string(array(vec![lit(1), lit(2), lit(3)]), lit(",")), + array_dims(array(vec![lit(1), lit(2), lit(3)])), + array_ndims(array(vec![lit(1), lit(2), lit(3)])), + cardinality(array(vec![lit(1), lit(2), lit(3)])), + range(lit(1), lit(10), lit(2)), + gen_series(lit(1), lit(10), lit(2)), ]; // ensure expressions created with the expr api can be round tripped