Skip to content

Commit

Permalink
refactor(rust): make Parquet Statistics into enum instead of `tra…
Browse files Browse the repository at this point in the history
…it` (#16485)
  • Loading branch information
coastalwhite authored May 25, 2024
1 parent f2bbc39 commit d68d499
Show file tree
Hide file tree
Showing 30 changed files with 513 additions and 585 deletions.
7 changes: 4 additions & 3 deletions crates/polars-parquet/src/arrow/read/statistics/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ use arrow::array::{MutableArray, MutableBinaryArray};
use arrow::offset::Offset;
use polars_error::PolarsResult;

use crate::parquet::statistics::{BinaryStatistics, Statistics as ParquetStatistics};
use crate::parquet::statistics::BinaryStatistics;

pub(super) fn push<O: Offset>(
from: Option<&dyn ParquetStatistics>,
from: Option<&BinaryStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -17,8 +17,9 @@ pub(super) fn push<O: Offset>(
.as_mut_any()
.downcast_mut::<MutableBinaryArray<O>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<BinaryStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value.as_ref()));
max.push(from.and_then(|s| s.max_value.as_ref()));

Ok(())
}
7 changes: 4 additions & 3 deletions crates/polars-parquet/src/arrow/read/statistics/binview.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use arrow::array::{MutableArray, MutableBinaryViewArray, ViewType};
use polars_error::PolarsResult;

use crate::parquet::statistics::{BinaryStatistics, Statistics as ParquetStatistics};
use crate::parquet::statistics::BinaryStatistics;

pub(super) fn push<T: ViewType + ?Sized>(
from: Option<&dyn ParquetStatistics>,
from: Option<&BinaryStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -16,7 +16,7 @@ pub(super) fn push<T: ViewType + ?Sized>(
.as_mut_any()
.downcast_mut::<MutableBinaryViewArray<T>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<BinaryStatistics>().unwrap());

min.push(from.and_then(|s| {
let opt_b = s.min_value.as_deref();
unsafe { opt_b.map(|b| T::from_bytes_unchecked(b)) }
Expand All @@ -25,5 +25,6 @@ pub(super) fn push<T: ViewType + ?Sized>(
let opt_b = s.max_value.as_deref();
unsafe { opt_b.map(|b| T::from_bytes_unchecked(b)) }
}));

Ok(())
}
7 changes: 4 additions & 3 deletions crates/polars-parquet/src/arrow/read/statistics/boolean.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use arrow::array::{MutableArray, MutableBooleanArray};
use polars_error::PolarsResult;

use crate::parquet::statistics::{BooleanStatistics, Statistics as ParquetStatistics};
use crate::parquet::statistics::BooleanStatistics;

pub(super) fn push(
from: Option<&dyn ParquetStatistics>,
from: Option<&BooleanStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -16,8 +16,9 @@ pub(super) fn push(
.as_mut_any()
.downcast_mut::<MutableBooleanArray>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<BooleanStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value));
max.push(from.and_then(|s| s.max_value));

Ok(())
}
22 changes: 9 additions & 13 deletions crates/polars-parquet/src/arrow/read/statistics/fixlen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use polars_error::PolarsResult;

use super::super::{convert_days_ms, convert_i128};
use crate::arrow::read::convert_i256;
use crate::parquet::statistics::{FixedLenStatistics, Statistics as ParquetStatistics};
use crate::parquet::statistics::FixedLenStatistics;

pub(super) fn push_i128(
from: Option<&dyn ParquetStatistics>,
from: Option<&FixedLenStatistics>,
n: usize,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
Expand All @@ -21,7 +21,6 @@ pub(super) fn push_i128(
.as_mut_any()
.downcast_mut::<MutablePrimitiveArray<i128>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<FixedLenStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value.as_deref().map(|x| convert_i128(x, n))));
max.push(from.and_then(|s| s.max_value.as_deref().map(|x| convert_i128(x, n))));
Expand All @@ -30,7 +29,7 @@ pub(super) fn push_i128(
}

pub(super) fn push_i256_with_i128(
from: Option<&dyn ParquetStatistics>,
from: Option<&FixedLenStatistics>,
n: usize,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
Expand All @@ -43,7 +42,6 @@ pub(super) fn push_i256_with_i128(
.as_mut_any()
.downcast_mut::<MutablePrimitiveArray<i256>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<FixedLenStatistics>().unwrap());

min.push(from.and_then(|s| {
s.min_value
Expand All @@ -60,7 +58,7 @@ pub(super) fn push_i256_with_i128(
}

pub(super) fn push_i256(
from: Option<&dyn ParquetStatistics>,
from: Option<&FixedLenStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -72,7 +70,6 @@ pub(super) fn push_i256(
.as_mut_any()
.downcast_mut::<MutablePrimitiveArray<i256>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<FixedLenStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value.as_deref().map(convert_i256)));
max.push(from.and_then(|s| s.max_value.as_deref().map(convert_i256)));
Expand All @@ -81,7 +78,7 @@ pub(super) fn push_i256(
}

pub(super) fn push(
from: Option<&dyn ParquetStatistics>,
from: Option<&FixedLenStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -93,9 +90,10 @@ pub(super) fn push(
.as_mut_any()
.downcast_mut::<MutableFixedSizeBinaryArray>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<FixedLenStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value.as_ref()));
max.push(from.and_then(|s| s.max_value.as_ref()));

Ok(())
}

Expand All @@ -104,7 +102,7 @@ fn convert_year_month(value: &[u8]) -> i32 {
}

pub(super) fn push_year_month(
from: Option<&dyn ParquetStatistics>,
from: Option<&FixedLenStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -116,7 +114,6 @@ pub(super) fn push_year_month(
.as_mut_any()
.downcast_mut::<MutablePrimitiveArray<i32>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<FixedLenStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value.as_deref().map(convert_year_month)));
max.push(from.and_then(|s| s.max_value.as_deref().map(convert_year_month)));
Expand All @@ -125,7 +122,7 @@ pub(super) fn push_year_month(
}

pub(super) fn push_days_ms(
from: Option<&dyn ParquetStatistics>,
from: Option<&FixedLenStatistics>,
min: &mut dyn MutableArray,
max: &mut dyn MutableArray,
) -> PolarsResult<()> {
Expand All @@ -137,7 +134,6 @@ pub(super) fn push_days_ms(
.as_mut_any()
.downcast_mut::<MutablePrimitiveArray<days_ms>>()
.unwrap();
let from = from.map(|s| s.as_any().downcast_ref::<FixedLenStatistics>().unwrap());

min.push(from.and_then(|s| s.min_value.as_deref().map(convert_days_ms)));
max.push(from.and_then(|s| s.max_value.as_deref().map(convert_days_ms)));
Expand Down
Loading

0 comments on commit d68d499

Please sign in to comment.