Skip to content

Commit

Permalink
Add test for reading struct array statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Nov 27, 2023
1 parent a128a20 commit b4009c2
Showing 1 changed file with 61 additions and 2 deletions.
63 changes: 61 additions & 2 deletions datafusion/core/src/datasource/physical_plan/parquet/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,8 +239,9 @@ impl<'a> RowGroupStatisticsConverter<'a> {
mod test {
use super::*;
use arrow_array::{
BinaryArray, BooleanArray, Decimal128Array, Float32Array, Float64Array,
Int32Array, Int64Array, RecordBatch, StringArray, TimestampNanosecondArray,
new_null_array, Array, BinaryArray, BooleanArray, Decimal128Array, Float32Array,
Float64Array, Int32Array, Int64Array, RecordBatch, StringArray, StructArray,
TimestampNanosecondArray,
};
use arrow_schema::SchemaRef;
use bytes::Bytes;
Expand Down Expand Up @@ -480,6 +481,45 @@ mod test {
.run()
}

#[test]
fn roundtrip_struct() {
let mut test = Test {
input: make_struct_array(vec![
// row group 1
(Some(true), Some(1)),
(None, None),
(Some(true), Some(3)),
// row group 2
(Some(true), Some(0)),
(Some(false), Some(5)),
(None, None),
// row group 3
(None, None),
(None, None),
(None, None),
]),
// TODO not really sure what the min/max values are
expected_min: make_struct_array(vec![
(Some(true), Some(1)),
(Some(true), Some(0)),
(None, None),
]),

expected_max: make_struct_array(vec![
(Some(true), Some(3)),
(Some(true), Some(0)),
(None, None),
]),
};
// Due to https://github.com/apache/arrow-datafusion/issues/8334,
// statistics for struct arrays are not supported
test.expected_min =
new_null_array(test.input.data_type(), test.expected_min.len());
test.expected_max =
new_null_array(test.input.data_type(), test.expected_min.len());
test.run()
}

#[test]
#[should_panic(
expected = "Inconsistent types in ScalarValue::iter_to_array. Expected Utf8, got Binary(NULL)"
Expand Down Expand Up @@ -803,4 +843,23 @@ mod test {
})
.run();
}

// returns a struct array with columns "b" and "i" with the specified values
fn make_struct_array(input: Vec<(Option<bool>, Option<i32>)>) -> ArrayRef {
let boolean: BooleanArray = input.iter().map(|(b, _i)| b).collect();
let int: Int32Array = input.iter().map(|(_b, i)| i).collect();

let nullable = true;
let struct_array = StructArray::from(vec![
(
Arc::new(Field::new("b", DataType::Boolean, nullable)),
Arc::new(boolean) as ArrayRef,
),
(
Arc::new(Field::new("i", DataType::Int32, nullable)),
Arc::new(int) as ArrayRef,
),
]);
Arc::new(struct_array)
}
}

0 comments on commit b4009c2

Please sign in to comment.