From 8b4d148a2537848ce9d9850224e212cceca77ac5 Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Mon, 18 Dec 2023 21:16:45 +0800 Subject: [PATCH 1/5] Implemet struct cast. --- arrow-cast/src/cast.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index a75354cf9b35..9e774b51dbce 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -160,6 +160,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (Decimal128(_, _) | Decimal256(_, _), Utf8 | LargeUtf8) => true, // Utf8 to decimal (Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true, + (Struct(from_fields), Struct(to_fields)) => { + from_fields.len() == to_fields.len() && + from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| { + can_cast_types(f1.data_type(), f2.data_type()) + }) + } (Struct(_), _) => false, (_, Struct(_)) => false, (_, Boolean) => { @@ -1138,11 +1144,22 @@ pub fn cast_with_options( ))), } } + (Struct(_), Struct(fields_r)) => { + let array = array.as_any().downcast_ref::().unwrap(); + let fields = array + .columns() + .iter() + .zip(fields_r.iter()) + .map(|(l, field)| cast_with_options(l, field.data_type(), cast_options)) + .collect::, ArrowError>>()?; + let array = StructArray::new(fields_r.clone(), fields, array.nulls().cloned()); + Ok(Arc::new(array) as ArrayRef) + } (Struct(_), _) => Err(ArrowError::CastError( - "Cannot cast from struct to other types".to_string(), + "Cannot cast from struct to other types except struct".to_string(), )), (_, Struct(_)) => Err(ArrowError::CastError( - "Cannot cast to struct from other types".to_string(), + "Cannot cast to struct from other types except struct".to_string(), )), (_, Boolean) => match from_type { UInt8 => cast_numeric_to_bool::(array), From b1894ad3c838f13f64f6e5c3f69cd8ee22dd2edd Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Mon, 18 Dec 2023 22:57:00 +0800 Subject: [PATCH 2/5] add some tests --- arrow-cast/src/cast.rs | 67 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 9e774b51dbce..e32e60739649 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -1144,15 +1144,15 @@ pub fn cast_with_options( ))), } } - (Struct(_), Struct(fields_r)) => { + (Struct(_), Struct(to_fields)) => { let array = array.as_any().downcast_ref::().unwrap(); let fields = array .columns() .iter() - .zip(fields_r.iter()) + .zip(to_fields.iter()) .map(|(l, field)| cast_with_options(l, field.data_type(), cast_options)) .collect::, ArrowError>>()?; - let array = StructArray::new(fields_r.clone(), fields, array.nulls().cloned()); + let array = StructArray::new(to_fields.clone(), fields, array.nulls().cloned()); Ok(Arc::new(array) as ArrayRef) } (Struct(_), _) => Err(ArrowError::CastError( @@ -9464,4 +9464,65 @@ mod tests { ); } } + #[test] + fn test_cast_struct_to_struct() { + let struct_type = DataType::Struct( + vec![ + Field::new("a", DataType::Boolean, false), + Field::new("b", DataType::Int32, false), + ] + .into(), + ); + let to_type = DataType::Struct( + vec![ + Field::new("a", DataType::Utf8, false), + Field::new("b", DataType::Utf8, false), + ] + .into(), + ); + let boolean = Arc::new(BooleanArray::from(vec![false, false, true, true])); + let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31])); + let struct_array = StructArray::from(vec![ + ( + Arc::new(Field::new("b", DataType::Boolean, false)), + boolean.clone() as ArrayRef, + ), + ( + Arc::new(Field::new("c", DataType::Int32, false)), + int.clone() as ArrayRef, + ), + ]); + let casted_array = cast(&struct_array, &to_type).unwrap(); + let casted_array = casted_array.as_struct(); + assert_eq!(casted_array.data_type(), &to_type); + let casted_boolean_array = casted_array + .column(0) + .as_string::() + .into_iter() + .flatten() + .collect::>(); + let casted_int_array = casted_array + .column(1) + .as_string::() + .into_iter() + .flatten() + .collect::>(); + assert_eq!(casted_boolean_array, vec!["false", "false", "true", "true"]); + assert_eq!(casted_int_array, vec!["42", "28", "19", "31"]); + + // test for can't cast + let to_type = DataType::Struct( + vec![ + Field::new("a", DataType::Date32, false), + Field::new("b", DataType::Utf8, false), + ] + .into(), + ); + assert_eq!(can_cast_types(&struct_type, &to_type), false); + let result = cast(&struct_array, &to_type); + assert_eq!( + "Cast error: Casting from Boolean to Date32 not supported", + result.unwrap_err().to_string() + ); + } } From d5795c2e4a7cc796c89750014bc605392dac412c Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Mon, 18 Dec 2023 23:20:54 +0800 Subject: [PATCH 3/5] fix clippy --- arrow-cast/src/cast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index e32e60739649..5cc4194bfb53 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -9518,7 +9518,7 @@ mod tests { ] .into(), ); - assert_eq!(can_cast_types(&struct_type, &to_type), false); + assert!(!can_cast_types(&struct_type, &to_type)); let result = cast(&struct_array, &to_type); assert_eq!( "Cast error: Casting from Boolean to Date32 not supported", From 5a473e81eb77398854fd0759ca5601d7ecb3bc53 Mon Sep 17 00:00:00 2001 From: my-vegetable-has-exploded Date: Mon, 18 Dec 2023 23:23:58 +0800 Subject: [PATCH 4/5] fix fmt --- arrow-cast/src/cast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 5cc4194bfb53..ceea3a39df47 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -9518,7 +9518,7 @@ mod tests { ] .into(), ); - assert!(!can_cast_types(&struct_type, &to_type)); + assert!(!can_cast_types(&struct_type, &to_type)); let result = cast(&struct_array, &to_type); assert_eq!( "Cast error: Casting from Boolean to Date32 not supported", From 07a62e5a5acbf3dcb3b86406e902d7c5beab46fe Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Tue, 19 Dec 2023 12:24:38 +0000 Subject: [PATCH 5/5] Update arrow-cast/src/cast.rs --- arrow-cast/src/cast.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index ceea3a39df47..0775392b7d64 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -1145,7 +1145,7 @@ pub fn cast_with_options( } } (Struct(_), Struct(to_fields)) => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.as_struct(); let fields = array .columns() .iter()