Skip to content

Commit

Permalink
fix hive columns
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Jan 15, 2025
1 parent 28c5755 commit 14e839f
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 9 deletions.
13 changes: 13 additions & 0 deletions crates/polars-arrow/src/array/binview/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,19 @@ impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
}

pub fn extend_from_array(&mut self, other: &BinaryViewArrayGeneric<T>) {
let slf_len = self.len();
match (&mut self.validity, other.validity()) {
(None, None) => {},
(Some(v), None) => v.extend_constant(other.len(), true),
(v @ None, Some(other)) => {
let mut bm = MutableBitmap::with_capacity(slf_len + other.len());
bm.extend_constant(slf_len, true);
bm.extend_from_bitmap(other);
*v = Some(bm);
}
(Some(slf), Some(other)) => slf.extend_from_bitmap(other),
}

if other.total_buffer_len() == 0 {
self.views.extend(other.views().iter().copied());
} else {
Expand Down
1 change: 1 addition & 0 deletions crates/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ timezones = [
"polars-json?/timezones",
]
dtype-time = ["polars-core/dtype-time", "polars-core/temporal", "polars-time/dtype-time"]
dtype-duration = ["polars-core/dtype-duration", "polars-time/dtype-duration"]
dtype-struct = ["polars-core/dtype-struct"]
dtype-decimal = ["polars-core/dtype-decimal", "polars-json?/dtype-decimal"]
fmt = ["polars-core/fmt"]
Expand Down
11 changes: 8 additions & 3 deletions crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,12 +298,16 @@ fn rg_to_dfs_prefiltered(
}

let do_parquet_expr = std::env::var("POLARS_NO_PARQUET_EXPR").as_deref() != Ok("1")
&& live_columns.len() == 1
&& live_columns.len() == 1 // Only do it with one column for now
&& hive_partition_columns.is_none_or(|hc| {
!hc.iter()
.any(|c| c.name().as_str() == live_columns[0].as_str())
}) // No hive columns
&& !schema
.get(live_columns[0].as_str())
.unwrap()
.dtype()
.is_nested();
.is_nested(); // No nested columns
let column_exprs = do_parquet_expr.then(|| {
live_columns
.iter()
Expand Down Expand Up @@ -430,10 +434,11 @@ fn rg_to_dfs_prefiltered(
PlSmallStr::EMPTY,
[BooleanArray::new(ArrowDataType::Boolean, f.clone(), None)],
))?;
unsafe { df.column_extend_unchecked(live_columns) };
unsafe { df.column_extend_unchecked(live_columns) }
} else {
df = DataFrame::new(live_columns).unwrap();
}

filter_mask = f.clone();
} else {
df = unsafe { DataFrame::new_no_checks(md.num_rows(), live_columns.clone()) };
Expand Down
18 changes: 12 additions & 6 deletions crates/polars-io/src/predicates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,17 +143,23 @@ fn cast_to_parquet_scalar(scalar: Scalar) -> Option<ParquetScalar> {

A::Int8(v) => P::Int8(v),
A::Int16(v) => P::Int16(v),
A::Int32(v) | A::Date(v) => P::Int32(v),
A::Int64(v)
| A::Datetime(v, _, _)
| A::DatetimeOwned(v, _, _)
| A::Duration(v, _)
| A::Time(v) => P::Int64(v),
A::Int32(v) => P::Int32(v),
A::Int64(v) => P::Int64(v),

#[cfg(feature = "dtype-time")]
A::Date(v) => P::Int32(v),
#[cfg(feature = "dtype-datetime")]
A::Datetime(v, _, _) | A::DatetimeOwned(v, _, _) => P::Int64(v),
#[cfg(feature = "dtype-duration")]
A::Duration(v, _) => P::Int64(v),
#[cfg(feature = "dtype-time")]
A::Time(v) => P::Int64(v),

A::Float32(v) => P::Float32(v),
A::Float64(v) => P::Float64(v),

// @TODO: Cast to string
#[cfg(feature = "dtype-categorical")]
A::Categorical(_, _, _)
| A::CategoricalOwned(_, _, _)
| A::Enum(_, _, _)
Expand Down
1 change: 1 addition & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ dtype-datetime = [
]
dtype-duration = [
"polars-core/dtype-duration",
"polars-io/dtype-duration",
"polars-lazy?/dtype-duration",
"polars-time?/dtype-duration",
"polars-ops/dtype-duration",
Expand Down

0 comments on commit 14e839f

Please sign in to comment.