From e48e93f7452e21b5ed758112e6e48b267733c7c1 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Mon, 27 Mar 2023 18:31:37 +0100 Subject: [PATCH] Add Fields abstraction (#3955) --- arrow-array/src/array/map_array.rs | 5 +- arrow-array/src/array/mod.rs | 8 +- arrow-array/src/array/struct_array.rs | 6 +- arrow-array/src/builder/struct_builder.rs | 8 +- arrow-data/src/data/mod.rs | 3 +- arrow-schema/src/datatype.rs | 42 ++++--- arrow-schema/src/field.rs | 49 ++++---- arrow-schema/src/fields.rs | 139 ++++++++++++++++++++++ arrow-schema/src/lib.rs | 2 + arrow-schema/src/schema.rs | 125 ++++++++++++------- 10 files changed, 292 insertions(+), 95 deletions(-) create mode 100644 arrow-schema/src/fields.rs diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs index c9651f0b2019..e3f566e99ca3 100644 --- a/arrow-array/src/array/map_array.rs +++ b/arrow-array/src/array/map_array.rs @@ -256,6 +256,7 @@ mod tests { use crate::cast::AsArray; use crate::types::UInt32Type; use crate::{Int32Array, UInt32Array}; + use arrow_schema::Fields; use std::sync::Arc; use super::*; @@ -496,10 +497,10 @@ mod tests { fn test_from_array_data_validation() { // A DictionaryArray has similar buffer layout to a MapArray // but the meaning of the values differs - let struct_t = DataType::Struct(vec![ + let struct_t = DataType::Struct(Fields::from(vec![ Field::new("keys", DataType::Int32, true), Field::new("values", DataType::UInt32, true), - ]); + ])); let dict_t = DataType::Dictionary(Box::new(DataType::Int32), Box::new(struct_t)); let _ = MapArray::from(ArrayData::new_empty(&dict_t)); } diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index 9afefc07f8d4..4416481ed5d4 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -731,7 +731,7 @@ mod tests { use crate::cast::{as_union_array, downcast_array}; use crate::downcast_run_array; use arrow_buffer::{Buffer, MutableBuffer}; - use arrow_schema::{Field, UnionMode}; + use arrow_schema::{Field, Fields, UnionMode}; #[test] fn test_empty_primitive() { @@ -785,7 +785,7 @@ mod tests { // It is possible to create a null struct containing a non-nullable child // see https://github.com/apache/arrow-rs/pull/3244 for details let struct_type = - DataType::Struct(vec![Field::new("data", DataType::Int64, false)]); + DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into()); let array = new_null_array(&struct_type, 9); let a = array.as_any().downcast_ref::().unwrap(); @@ -828,10 +828,10 @@ mod tests { let data_type = DataType::Map( Box::new(Field::new( "entry", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ Field::new("key", DataType::Utf8, false), Field::new("value", DataType::Int32, true), - ]), + ])), false, )), false, diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs index 4fe59c0c240f..cc42b6276b0f 100644 --- a/arrow-array/src/array/struct_array.rs +++ b/arrow-array/src/array/struct_array.rs @@ -151,11 +151,11 @@ impl TryFrom> for StructArray { len = Some(child_datum_len) } child_data.push(child_datum.clone()); - fields.push(Field::new( + fields.push(Arc::new(Field::new( field_name, array.data_type().clone(), child_datum.nulls().is_some(), - )); + ))); if let Some(child_nulls) = child_datum.nulls() { null = Some(if let Some(null_buffer) = &null { @@ -176,7 +176,7 @@ impl TryFrom> for StructArray { } let len = len.unwrap(); - let builder = ArrayData::builder(DataType::Struct(fields)) + let builder = ArrayData::builder(DataType::Struct(fields.into())) .len(len) .null_bit_buffer(null) .child_data(child_data); diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 51b4c7cfcdc6..5cd560036e0b 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -20,7 +20,7 @@ use crate::builder::*; use crate::{Array, ArrayRef, StructArray}; use arrow_buffer::Buffer; use arrow_data::ArrayData; -use arrow_schema::{DataType, Field, IntervalUnit, TimeUnit}; +use arrow_schema::{DataType, Field, Fields, IntervalUnit, TimeUnit}; use std::any::Any; use std::sync::Arc; @@ -29,7 +29,7 @@ use std::sync::Arc; /// Note that callers should make sure that methods of all the child field builders are /// properly called to maintain the consistency of the data structure. pub struct StructBuilder { - fields: Vec, + fields: Fields, field_builders: Vec>, null_buffer_builder: NullBufferBuilder, } @@ -180,7 +180,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box, field_builders: Vec>) -> Self { + pub fn new(fields: Fields, field_builders: Vec>) -> Self { Self { fields, field_builders, @@ -189,7 +189,7 @@ impl StructBuilder { } /// Creates a new `StructBuilder` from vector of [`Field`] with `capacity` - pub fn from_fields(fields: Vec, capacity: usize) -> Self { + pub fn from_fields(fields: Fields, capacity: usize) -> Self { let mut builders = Vec::with_capacity(fields.len()); for field in &fields { builders.push(make_builder(field.data_type(), capacity)); diff --git a/arrow-data/src/data/mod.rs b/arrow-data/src/data/mod.rs index 7241a5d80ee0..c47c836637a4 100644 --- a/arrow-data/src/data/mod.rs +++ b/arrow-data/src/data/mod.rs @@ -1874,7 +1874,8 @@ mod tests { ) .unwrap(); - let data_type = DataType::Struct(vec![Field::new("x", DataType::Int32, true)]); + let field = Arc::new(Field::new("x", DataType::Int32, true)); + let data_type = DataType::Struct(vec![field].into()); let arr_data = ArrayData::builder(data_type) .len(5) diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index 56eb6e8cef16..b143a17b37fd 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -18,6 +18,7 @@ use std::fmt; use crate::field::Field; +use crate::Fields; /// The set of datatypes that are supported by this implementation of Apache Arrow. /// @@ -182,7 +183,7 @@ pub enum DataType { /// A single LargeList array can store up to [`i64::MAX`] elements in total LargeList(Box), /// A nested datatype that contains a number of sub-fields. - Struct(Vec), + Struct(Fields), /// A nested datatype that can represent slots of differing types. Components: /// /// 1. [`Field`] for each possible child type the Union can hold @@ -482,7 +483,8 @@ impl DataType { | DataType::FixedSizeList(field, _) | DataType::LargeList(field) | DataType::Map(field, _) => field.size(), - DataType::Struct(fields) | DataType::Union(fields, _, _) => { + DataType::Struct(fields) => fields.size(), + DataType::Union(fields, _, _) => { fields .iter() .map(|field| field.size() - std::mem::size_of_val(field)) @@ -534,18 +536,18 @@ mod tests { let last_name = Field::new("last_name", DataType::Utf8, false) .with_metadata(HashMap::default()); - let person = DataType::Struct(vec![ + let person = DataType::Struct(Fields::from(vec![ first_name, last_name, Field::new( "address", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ Field::new("street", DataType::Utf8, false), Field::new("zip", DataType::UInt16, false), - ]), + ])), false, ), - ]); + ])); let serialized = serde_json::to_string(&person).unwrap(); @@ -592,24 +594,26 @@ mod tests { assert!(!list_e.equals_datatype(&list_g)); assert!(!list_f.equals_datatype(&list_g)); - let list_h = DataType::Struct(vec![Field::new("f1", list_e, true)]); - let list_i = DataType::Struct(vec![Field::new("f1", list_f.clone(), true)]); - let list_j = DataType::Struct(vec![Field::new("f1", list_f.clone(), false)]); - let list_k = DataType::Struct(vec![ + let list_h = DataType::Struct(Fields::from(vec![Field::new("f1", list_e, true)])); + let list_i = + DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), true)])); + let list_j = + DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), false)])); + let list_k = DataType::Struct(Fields::from(vec![ Field::new("f1", list_f.clone(), false), Field::new("f2", list_g.clone(), false), Field::new("f3", DataType::Utf8, true), - ]); - let list_l = DataType::Struct(vec![ + ])); + let list_l = DataType::Struct(Fields::from(vec![ Field::new("ff1", list_f.clone(), false), Field::new("ff2", list_g.clone(), false), Field::new("ff3", DataType::LargeUtf8, true), - ]); - let list_m = DataType::Struct(vec![ + ])); + let list_m = DataType::Struct(Fields::from(vec![ Field::new("ff1", list_f, false), Field::new("ff2", list_g, false), Field::new("ff3", DataType::Utf8, true), - ]); + ])); assert!(list_h.equals_datatype(&list_i)); assert!(!list_h.equals_datatype(&list_j)); assert!(!list_k.equals_datatype(&list_l)); @@ -618,18 +622,18 @@ mod tests { #[test] fn create_struct_type() { - let _person = DataType::Struct(vec![ + let _person = DataType::Struct(Fields::from(vec![ Field::new("first_name", DataType::Utf8, false), Field::new("last_name", DataType::Utf8, false), Field::new( "address", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ Field::new("street", DataType::Utf8, false), Field::new("zip", DataType::UInt16, false), - ]), + ])), false, ), - ]); + ])); } #[test] diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs index b687b629aa75..8ef9fd2b81e5 100644 --- a/arrow-schema/src/field.rs +++ b/arrow-schema/src/field.rs @@ -21,6 +21,10 @@ use std::collections::HashMap; use std::hash::{Hash, Hasher}; use crate::datatype::DataType; +use crate::schema::SchemaBuilder; + +/// A reference counted [`Field`] +pub type FieldRef = std::sync::Arc; /// Describes a single column in a [`Schema`](super::Schema). /// @@ -230,7 +234,8 @@ impl Field { fn _fields(dt: &DataType) -> Vec<&Field> { match dt { - DataType::Struct(fields) | DataType::Union(fields, _, _) => { + DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(), + DataType::Union(fields, _, _) => { fields.iter().flat_map(|f| f.fields()).collect() } DataType::List(field) @@ -326,15 +331,9 @@ impl Field { match &mut self.data_type { DataType::Struct(nested_fields) => match &from.data_type { DataType::Struct(from_nested_fields) => { - for from_field in from_nested_fields { - match nested_fields - .iter_mut() - .find(|self_field| self_field.name == from_field.name) - { - Some(self_field) => self_field.try_merge(from_field)?, - None => nested_fields.push(from_field.clone()), - } - } + let mut builder = SchemaBuilder::new(); + nested_fields.iter().chain(from_nested_fields).try_for_each(|f| builder.try_merge(f))?; + *nested_fields = builder.finish().fields; } _ => { return Err(ArrowError::SchemaError( @@ -479,6 +478,7 @@ impl std::fmt::Display for Field { #[cfg(test)] mod test { use super::*; + use crate::Fields; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; @@ -525,29 +525,29 @@ mod test { let field = Field::new( "struct]>", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ dict1.clone(), Field::new( "list[struct]>]", DataType::List(Box::new(Field::new( "struct]>", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ dict1.clone(), Field::new( "list[struct]", DataType::List(Box::new(Field::new( "struct", - DataType::Struct(vec![dict2.clone()]), + DataType::Struct(vec![dict2.clone()].into()), false, ))), false, ), - ]), + ])), false, ))), false, ), - ]), + ])), false, ); @@ -632,14 +632,18 @@ mod test { fn test_contains_transitivity() { let child_field = Field::new("child1", DataType::Float16, false); - let mut field1 = Field::new("field1", DataType::Struct(vec![child_field]), false); + let mut field1 = Field::new( + "field1", + DataType::Struct(Fields::from(vec![child_field])), + false, + ); field1.set_metadata(HashMap::from([(String::from("k1"), String::from("v1"))])); - let mut field2 = Field::new("field1", DataType::Struct(vec![]), true); + let mut field2 = Field::new("field1", DataType::Struct(Fields::default()), true); field2.set_metadata(HashMap::from([(String::from("k2"), String::from("v2"))])); field2.try_merge(&field1).unwrap(); - let mut field3 = Field::new("field1", DataType::Struct(vec![]), false); + let mut field3 = Field::new("field1", DataType::Struct(Fields::default()), false); field3.set_metadata(HashMap::from([(String::from("k3"), String::from("v3"))])); field3.try_merge(&field2).unwrap(); @@ -665,11 +669,14 @@ mod test { let child_field1 = Field::new("child1", DataType::Float16, false); let child_field2 = Field::new("child2", DataType::Float16, false); - let field1 = - Field::new("field1", DataType::Struct(vec![child_field1.clone()]), true); + let field1 = Field::new( + "field1", + DataType::Struct(vec![child_field1.clone()].into()), + true, + ); let field2 = Field::new( "field1", - DataType::Struct(vec![child_field1, child_field2]), + DataType::Struct(vec![child_field1, child_field2].into()), true, ); diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs new file mode 100644 index 000000000000..4f6f34a8a975 --- /dev/null +++ b/arrow-schema/src/fields.rs @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{Field, FieldRef}; +use std::ops::Deref; +use std::sync::Arc; + +/// A cheaply cloneable, owned slice of [`FieldRef`] +/// +/// Similar to `Arc>` or `Arc<[FieldPtr]>` +/// +/// Can be constructed in a number of ways +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow_schema::{DataType, Field, Fields}; +/// // Can be constructed from Vec +/// Fields::from(vec![Field::new("a", DataType::Boolean, false)]); +/// // Can be constructed from Vec +/// Fields::from(vec![Arc::new(Field::new("a", DataType::Boolean, false))]); +/// // Can be constructed from an iterator of Field +/// std::iter::once(Field::new("a", DataType::Boolean, false)).collect::(); +/// // Can be constructed from an iterator of FieldRef +/// std::iter::once(Arc::new(Field::new("a", DataType::Boolean, false))).collect::(); +/// ``` +/// +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Fields(Arc<[FieldRef]>); + +impl std::fmt::Debug for Fields { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.as_ref().fmt(f) + } +} + +impl Fields { + /// Returns a new empty [`Fields`] + pub fn new() -> Self { + Self(Arc::new([])) + } + + /// Return size of this instance in bytes. + pub fn size(&self) -> usize { + self.iter().map(|field| field.size()).sum() + } +} + +impl Default for Fields { + fn default() -> Self { + Self::new() + } +} + +impl FromIterator for Fields { + fn from_iter>(iter: T) -> Self { + iter.into_iter().map(Arc::new).collect() + } +} + +impl FromIterator for Fields { + fn from_iter>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl From> for Fields { + fn from(value: Vec) -> Self { + value.into_iter().collect() + } +} + +impl From> for Fields { + fn from(value: Vec) -> Self { + Self(value.into()) + } +} + +impl From<&[FieldRef]> for Fields { + fn from(value: &[FieldRef]) -> Self { + Self(value.into()) + } +} + +impl Deref for Fields { + type Target = [FieldRef]; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl<'a> IntoIterator for &'a Fields { + type Item = &'a FieldRef; + type IntoIter = std::slice::Iter<'a, FieldRef>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +// Manually implement to avoid needing serde rc feature +#[cfg(feature = "serde")] +impl serde::Serialize for Fields { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeSeq; + let mut seq = serializer.serialize_seq(Some(self.len()))?; + for e in self.iter() { + seq.serialize_element(e.as_ref())?; + } + seq.end() + } +} + +#[cfg(feature = "serde")] +impl<'de> serde::Deserialize<'de> for Fields { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + Ok(Vec::::deserialize(deserializer)?.into()) + } +} diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs index e977203e9c71..0e9edc7b4b26 100644 --- a/arrow-schema/src/lib.rs +++ b/arrow-schema/src/lib.rs @@ -23,6 +23,8 @@ mod error; pub use error::*; mod field; pub use field::*; +mod fields; +pub use fields::*; mod schema; pub use schema::*; use std::ops; diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 10a72ba0cdf6..d550196fe08d 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -18,12 +18,53 @@ use std::collections::HashMap; use std::fmt; use std::hash::Hash; +use std::sync::Arc; use crate::error::ArrowError; use crate::field::Field; +use crate::{FieldRef, Fields}; + +/// A builder to facilitate building a [`Schema`] from iteratively from [`FieldRef`] +#[derive(Debug, Default)] +pub(crate) struct SchemaBuilder { + fields: Vec, +} + +impl SchemaBuilder { + /// Creates a new empty [`SchemaBuilder`] + pub fn new() -> Self { + Self::default() + } + + /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision + /// + /// If an existing field exists with the same name, calls [`Field::try_merge`] + pub fn try_merge(&mut self, field: &FieldRef) -> Result<(), ArrowError> { + // This could potentially be sped up with a HashMap or similar + let existing = self.fields.iter_mut().find(|f| f.name() == field.name()); + match existing { + Some(e) if Arc::ptr_eq(e, field) => {} // Nothing to do + Some(e) => match Arc::get_mut(e) { + Some(e) => e.try_merge(field.as_ref())?, + None => { + let mut t = e.as_ref().clone(); + t.try_merge(field)?; + *e = Arc::new(t) + } + }, + None => self.fields.push(field.clone()), + } + Ok(()) + } + + /// Consume this [`SchemaBuilder`] yielding the final [`Schema`] + pub fn finish(self) -> Schema { + Schema::new(self.fields) + } +} /// A reference-counted reference to a [`Schema`]. -pub type SchemaRef = std::sync::Arc; +pub type SchemaRef = Arc; /// Describes the meta-data of an ordered sequence of relative types. /// @@ -32,7 +73,7 @@ pub type SchemaRef = std::sync::Arc; #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Schema { - pub fields: Vec, + pub fields: Fields, /// A map of key-value pairs containing additional meta data. pub metadata: HashMap, } @@ -41,7 +82,7 @@ impl Schema { /// Creates an empty `Schema` pub fn empty() -> Self { Self { - fields: vec![], + fields: Default::default(), metadata: HashMap::new(), } } @@ -57,7 +98,7 @@ impl Schema { /// /// let schema = Schema::new(vec![field_a, field_b]); /// ``` - pub fn new(fields: Vec) -> Self { + pub fn new(fields: impl Into) -> Self { Self::new_with_metadata(fields, HashMap::new()) } @@ -79,11 +120,14 @@ impl Schema { /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata); /// ``` #[inline] - pub const fn new_with_metadata( - fields: Vec, + pub fn new_with_metadata( + fields: impl Into, metadata: HashMap, ) -> Self { - Self { fields, metadata } + Self { + fields: fields.into(), + metadata, + } } /// Sets the metadata of this `Schema` to be `metadata` and returns self @@ -141,39 +185,34 @@ impl Schema { pub fn try_merge( schemas: impl IntoIterator, ) -> Result { - schemas - .into_iter() - .try_fold(Self::empty(), |mut merged, schema| { - let Schema { metadata, fields } = schema; - for (key, value) in metadata.into_iter() { - // merge metadata - if let Some(old_val) = merged.metadata.get(&key) { - if old_val != &value { - return Err(ArrowError::SchemaError(format!( - "Fail to merge schema due to conflicting metadata. \ + let mut out_meta = HashMap::new(); + let mut out_fields = SchemaBuilder::new(); + for schema in schemas { + let Schema { metadata, fields } = schema; + + // merge metadata + for (key, value) in metadata.into_iter() { + if let Some(old_val) = out_meta.get(&key) { + if old_val != &value { + return Err(ArrowError::SchemaError(format!( + "Fail to merge schema due to conflicting metadata. \ Key '{key}' has different values '{old_val}' and '{value}'" - ))); - } - } - merged.metadata.insert(key, value); - } - // merge fields - for field in fields.into_iter() { - let merged_field = - merged.fields.iter_mut().find(|f| f.name() == field.name()); - match merged_field { - Some(merged_field) => merged_field.try_merge(&field)?, - // found a new field, add to field list - None => merged.fields.push(field), + ))); } } - Ok(merged) - }) + out_meta.insert(key, value); + } + + // merge fields + fields.iter().try_for_each(|x| out_fields.try_merge(x))? + } + + Ok(out_fields.finish().with_metadata(out_meta)) } /// Returns an immutable reference of the vector of `Field` instances. #[inline] - pub const fn fields(&self) -> &Vec { + pub const fn fields(&self) -> &Fields { &self.fields } @@ -227,6 +266,7 @@ impl Schema { pub fn column_with_name(&self, name: &str) -> Option<(usize, &Field)> { self.fields .iter() + .map(|x| x.as_ref()) .enumerate() .find(|&(_, c)| c.name() == name) } @@ -281,10 +321,11 @@ impl Hash for Schema { #[cfg(test)] mod tests { - use super::*; use crate::datatype::DataType; use crate::{TimeUnit, UnionMode}; + use super::*; + #[test] #[cfg(feature = "serde")] fn test_ser_de_metadata() { @@ -525,10 +566,10 @@ mod tests { Field::new("last_name", DataType::Utf8, false), Field::new( "address", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ Field::new("street", DataType::Utf8, false), Field::new("zip", DataType::UInt16, false), - ]), + ])), false, ), Field::new_dict( @@ -634,7 +675,9 @@ mod tests { Field::new("last_name", DataType::Utf8, false), Field::new( "address", - DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)]), + DataType::Struct( + vec![Field::new("zip", DataType::UInt16, false)].into(), + ), false, ), ]), @@ -644,12 +687,12 @@ mod tests { Field::new("last_name", DataType::Utf8, true), Field::new( "address", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ // add new nested field Field::new("street", DataType::Utf8, false), // nullable merge on nested field Field::new("zip", DataType::UInt16, true), - ]), + ])), false, ), // new field @@ -671,10 +714,10 @@ mod tests { Field::new("last_name", DataType::Utf8, true), Field::new( "address", - DataType::Struct(vec![ + DataType::Struct(Fields::from(vec![ Field::new("zip", DataType::UInt16, true), Field::new("street", DataType::Utf8, false), - ]), + ])), false, ), Field::new("number", DataType::Utf8, true),