Skip to content

Commit

Permalink
fix: Add outer validity to AnyValueBufferTrusted for structs
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp committed Jan 14, 2025
1 parent 1cd72ff commit b96effd
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 12 deletions.
21 changes: 15 additions & 6 deletions crates/polars-core/src/frame/row/av_buffer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::hint::unreachable_unchecked;

use arrow::bitmap::BitmapBuilder;
#[cfg(feature = "dtype-struct")]
use polars_utils::pl_str::PlSmallStr;

Expand Down Expand Up @@ -340,7 +341,7 @@ pub enum AnyValueBufferTrusted<'a> {
String(StringChunkedBuilder),
#[cfg(feature = "dtype-struct")]
// not the trusted variant!
Struct(Vec<(AnyValueBuffer<'a>, PlSmallStr)>),
Struct(BitmapBuilder, Vec<(AnyValueBuffer<'a>, PlSmallStr)>),
Null(NullChunkedBuilder),
All(DataType, Vec<AnyValue<'a>>),
}
Expand Down Expand Up @@ -371,7 +372,8 @@ impl<'a> AnyValueBufferTrusted<'a> {
Float64(builder) => builder.append_null(),
String(builder) => builder.append_null(),
#[cfg(feature = "dtype-struct")]
Struct(builders) => {
Struct(outer_validity, builders) => {
outer_validity.push(false);
for (b, _) in builders.iter_mut() {
b.add(AnyValue::Null);
}
Expand Down Expand Up @@ -486,7 +488,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
builder.append_value(v.as_str())
},
#[cfg(feature = "dtype-struct")]
Struct(builders) => {
Struct(outer_validity, builders) => {
let AnyValue::StructOwned(payload) = val else {
unreachable_unchecked()
};
Expand All @@ -501,6 +503,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
builder.add(av.clone());
}
}
outer_validity.push(true);
},
All(_, vals) => vals.push(val.clone().into_static()),
_ => self.add_physical(val),
Expand All @@ -525,7 +528,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
builder.append_value(v)
},
#[cfg(feature = "dtype-struct")]
Struct(builders) => {
Struct(outer_validity, builders) => {
let AnyValue::Struct(idx, arr, fields) = val else {
unreachable_unchecked()
};
Expand All @@ -542,6 +545,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
builder.add(av);
}
}
outer_validity.push(true);
},
All(_, vals) => vals.push(val.clone().into_static()),
_ => self.add_physical(val),
Expand Down Expand Up @@ -619,7 +623,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
new.finish().into_series()
},
#[cfg(feature = "dtype-struct")]
Struct(b) => {
Struct(outer_validity, b) => {
// @Q? Maybe we need to add a length parameter here for ZFS's. I am not very happy
// with just setting the length to zero for that case.
if b.is_empty() {
Expand All @@ -646,8 +650,12 @@ impl<'a> AnyValueBufferTrusted<'a> {

let length = if min_len == 0 { 0 } else { max_len };

let old_outer_validity = core::mem::take(outer_validity);
outer_validity.reserve(capacity);

StructChunked::from_series(PlSmallStr::EMPTY, length, v.iter())
.unwrap()
.with_outer_validity(Some(old_outer_validity.freeze()))
.into_series()
},
Null(b) => {
Expand Down Expand Up @@ -716,6 +724,7 @@ impl From<(&DataType, usize)> for AnyValueBufferTrusted<'_> {
},
#[cfg(feature = "dtype-struct")]
Struct(fields) => {
let outer_validity = BitmapBuilder::with_capacity(len);
let buffers = fields
.iter()
.map(|field| {
Expand All @@ -724,7 +733,7 @@ impl From<(&DataType, usize)> for AnyValueBufferTrusted<'_> {
(buffer, field.name.clone())
})
.collect::<Vec<_>>();
AnyValueBufferTrusted::Struct(buffers)
AnyValueBufferTrusted::Struct(outer_validity, buffers)
},
// List can be recursive so use AnyValues for that
dt => AnyValueBufferTrusted::All(dt.clone(), Vec::with_capacity(len)),
Expand Down
12 changes: 6 additions & 6 deletions py-polars/tests/unit/operations/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,22 +473,22 @@ def test_pivot_struct() -> None:
"id": ["a", "b", "c"],
"1": [
{"num1": 1, "num2": 4},
{"num1": None, "num2": None},
None,
{"num1": 6, "num2": 6},
],
"2": [
{"num1": 3, "num2": 5},
{"num1": None, "num2": None},
{"num1": None, "num2": None},
None,
None,
],
"3": [
{"num1": None, "num2": None},
None,
{"num1": 5, "num2": 3},
{"num1": 3, "num2": 6},
],
"4": [
{"num1": None, "num2": None},
{"num1": None, "num2": None},
None,
None,
{"num1": 4, "num2": 4},
],
}
Expand Down

0 comments on commit b96effd

Please sign in to comment.