Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate LexOrderingRef and LexRequirementRef #13233

Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 68 additions & 59 deletions benchmarks/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::util::{AccessLogOpt, BenchmarkRun, CommonOpt};

use arrow::util::pretty;
use datafusion::common::Result;
use datafusion::physical_expr::{LexOrdering, LexOrderingRef, PhysicalSortExpr};
use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr};
use datafusion::physical_plan::collect;
use datafusion::physical_plan::sorts::sort::SortExec;
use datafusion::prelude::{SessionConfig, SessionContext};
Expand Down Expand Up @@ -70,79 +70,88 @@ impl RunOpt {
let sort_cases = vec![
(
"sort utf8",
vec![PhysicalSortExpr {
expr: col("request_method", &schema)?,
options: Default::default(),
}],
LexOrdering {
jatin510 marked this conversation as resolved.
Show resolved Hide resolved
inner: vec![PhysicalSortExpr {
expr: col("request_method", &schema)?,
options: Default::default(),
}],
},
),
(
"sort int",
vec![PhysicalSortExpr {
expr: col("request_bytes", &schema)?,
options: Default::default(),
}],
LexOrdering {
inner: vec![PhysicalSortExpr {
expr: col("response_bytes", &schema)?,
options: Default::default(),
}],
},
),
(
"sort decimal",
vec![
// sort decimal
PhysicalSortExpr {
LexOrdering {
inner: vec![PhysicalSortExpr {
expr: col("decimal_price", &schema)?,
options: Default::default(),
},
],
}],
},
),
(
"sort integer tuple",
vec![
PhysicalSortExpr {
expr: col("request_bytes", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("response_bytes", &schema)?,
options: Default::default(),
},
],
LexOrdering {
inner: vec![
PhysicalSortExpr {
expr: col("request_bytes", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("response_bytes", &schema)?,
options: Default::default(),
},
],
},
),
(
"sort utf8 tuple",
vec![
// sort utf8 tuple
PhysicalSortExpr {
expr: col("service", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("host", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("pod", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("image", &schema)?,
options: Default::default(),
},
],
LexOrdering {
inner: vec![
// sort utf8 tuple
PhysicalSortExpr {
expr: col("service", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("host", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("pod", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("image", &schema)?,
options: Default::default(),
},
],
},
),
(
"sort mixed tuple",
vec![
PhysicalSortExpr {
expr: col("service", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("request_bytes", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("decimal_price", &schema)?,
options: Default::default(),
},
],
LexOrdering {
inner: vec![
PhysicalSortExpr {
expr: col("service", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("request_bytes", &schema)?,
options: Default::default(),
},
PhysicalSortExpr {
expr: col("decimal_price", &schema)?,
options: Default::default(),
},
],
},
),
];
for (title, expr) in sort_cases {
Expand Down Expand Up @@ -170,13 +179,13 @@ impl RunOpt {

async fn exec_sort(
ctx: &SessionContext,
expr: LexOrderingRef<'_>,
expr: &LexOrdering,
test_file: &TestParquetFile,
debug: bool,
) -> Result<(usize, std::time::Duration)> {
let start = Instant::now();
let scan = test_file.create_scan(ctx, None).await?;
let exec = Arc::new(SortExec::new(LexOrdering::new(expr.to_owned()), scan));
let exec = Arc::new(SortExec::new(expr.clone(), scan));
let task_ctx = ctx.task_ctx();
let result = collect(exec, task_ctx).await?;
let elapsed = start.elapsed();
Expand Down
33 changes: 17 additions & 16 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 14 additions & 13 deletions datafusion/core/src/datasource/physical_plan/file_scan_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ use arrow_schema::{DataType, Field, Schema, SchemaRef};
use datafusion_common::stats::Precision;
use datafusion_common::{exec_err, ColumnStatistics, DataFusionError, Statistics};
use datafusion_physical_expr::LexOrdering;
use datafusion_physical_expr_common::sort_expr::LexOrderingRef;

use log::warn;

Expand Down Expand Up @@ -308,7 +307,7 @@ impl FileScanConfig {
pub fn split_groups_by_statistics(
table_schema: &SchemaRef,
file_groups: &[Vec<PartitionedFile>],
sort_order: LexOrderingRef,
sort_order: &LexOrdering,
) -> Result<Vec<Vec<PartitionedFile>>> {
let flattened_files = file_groups.iter().flatten().collect::<Vec<_>>();
// First Fit:
Expand Down Expand Up @@ -1113,17 +1112,19 @@ mod tests {
))))
.collect::<Vec<_>>(),
));
let sort_order = case
.sort
.into_iter()
.map(|expr| {
crate::physical_planner::create_physical_sort_expr(
&expr,
&DFSchema::try_from(table_schema.as_ref().clone())?,
&ExecutionProps::default(),
)
})
.collect::<Result<Vec<_>>>()?;
let sort_order = LexOrdering {
inner: case
.sort
.into_iter()
.map(|expr| {
crate::physical_planner::create_physical_sort_expr(
&expr,
&DFSchema::try_from(table_schema.as_ref().clone())?,
&ExecutionProps::default(),
)
})
.collect::<Result<Vec<_>>>()?,
};

let partitioned_files =
case.files.into_iter().map(From::from).collect::<Vec<_>>();
Expand Down
32 changes: 17 additions & 15 deletions datafusion/core/src/datasource/physical_plan/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use arrow_array::RecordBatch;
use arrow_schema::SchemaRef;
use datafusion_common::{DataFusionError, Result};
use datafusion_physical_expr::{expressions::Column, PhysicalSortExpr};
use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexOrderingRef};
use datafusion_physical_expr_common::sort_expr::LexOrdering;

/// A normalized representation of file min/max statistics that allows for efficient sorting & comparison.
/// The min/max values are ordered by [`Self::sort_order`].
Expand All @@ -50,7 +50,7 @@ pub(crate) struct MinMaxStatistics {
impl MinMaxStatistics {
/// Sort order used to sort the statistics
#[allow(unused)]
pub fn sort_order(&self) -> LexOrderingRef {
pub fn sort_order(&self) -> &LexOrdering {
&self.sort_order
}

Expand All @@ -66,8 +66,8 @@ impl MinMaxStatistics {
}

pub fn new_from_files<'a>(
projected_sort_order: LexOrderingRef, // Sort order with respect to projected schema
projected_schema: &SchemaRef, // Projected schema
projected_sort_order: &LexOrdering, // Sort order with respect to projected schema
projected_schema: &SchemaRef, // Projected schema
projection: Option<&[usize]>, // Indices of projection in full table schema (None = all columns)
files: impl IntoIterator<Item = &'a PartitionedFile>,
) -> Result<Self> {
Expand Down Expand Up @@ -119,15 +119,17 @@ impl MinMaxStatistics {
projected_schema
.project(&(sort_columns.iter().map(|c| c.index()).collect::<Vec<_>>()))?,
);
let min_max_sort_order = sort_columns
.iter()
.zip(projected_sort_order.iter())
.enumerate()
.map(|(i, (col, sort))| PhysicalSortExpr {
expr: Arc::new(Column::new(col.name(), i)),
options: sort.options,
})
.collect::<Vec<_>>();
let min_max_sort_order = LexOrdering {
inner: sort_columns
.iter()
.zip(projected_sort_order.iter())
.enumerate()
.map(|(i, (col, sort))| PhysicalSortExpr {
expr: Arc::new(Column::new(col.name(), i)),
options: sort.options,
})
.collect::<Vec<_>>(),
};

let (min_values, max_values): (Vec<_>, Vec<_>) = sort_columns
.iter()
Expand Down Expand Up @@ -167,7 +169,7 @@ impl MinMaxStatistics {
}

pub fn new(
sort_order: LexOrderingRef,
sort_order: &LexOrdering,
schema: &SchemaRef,
min_values: RecordBatch,
max_values: RecordBatch,
Expand Down Expand Up @@ -278,7 +280,7 @@ impl MinMaxStatistics {
}

fn sort_columns_from_physical_sort_exprs(
sort_order: LexOrderingRef,
sort_order: &LexOrdering,
) -> Option<Vec<&Column>> {
sort_order
.iter()
Expand Down
Loading