diff --git a/Cargo.toml b/Cargo.toml index b08309013408..ac49d8fab524 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,6 +70,7 @@ datafusion-physical-plan = { path = "datafusion/physical-plan" } datafusion-execution = { path = "datafusion/execution" } datafusion-proto = { path = "datafusion/proto" } datafusion-sqllogictest = { path = "datafusion/sqllogictest" } +datafusion-statistics = { path = "datafusion/statistics" } datafusion-substrait = { path = "datafusion/substrait" } dashmap = "5.4.0" doc-comment = "0.3" diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index b44914ec719f..41a5d5b6fb2a 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -68,6 +68,7 @@ datafusion-expr = { workspace = true } datafusion-optimizer = { path = "../optimizer", version = "33.0.0", default-features = false } datafusion-physical-expr = { path = "../physical-expr", version = "33.0.0", default-features = false } datafusion-physical-plan = { workspace = true } +datafusion-statistics = { workspace = true } datafusion-sql = { workspace = true } flate2 = { version = "1.0.24", optional = true } futures = { workspace = true } diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml index e9bb87e9f8ac..9b0268bd594c 100644 --- a/datafusion/execution/Cargo.toml +++ b/datafusion/execution/Cargo.toml @@ -38,6 +38,7 @@ chrono = { version = "0.4", default-features = false } dashmap = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } +datafusion-statistics = { workspace = true } futures = { workspace = true } hashbrown = { version = "0.14", features = ["raw"] } log = { workspace = true } diff --git a/datafusion/execution/src/cache/cache_manager.rs b/datafusion/execution/src/cache/cache_manager.rs index 97529263688b..376670c9c24f 100644 --- a/datafusion/execution/src/cache/cache_manager.rs +++ b/datafusion/execution/src/cache/cache_manager.rs @@ -16,7 +16,8 @@ // under the License. use crate::cache::CacheAccessor; -use datafusion_common::{Result, Statistics}; +use datafusion_common::Result; +use datafusion_statistics::Statistics; use object_store::path::Path; use object_store::ObjectMeta; use std::fmt::{Debug, Formatter}; diff --git a/datafusion/execution/src/cache/cache_unit.rs b/datafusion/execution/src/cache/cache_unit.rs index 4a21dc02bd13..d435810a93c4 100644 --- a/datafusion/execution/src/cache/cache_unit.rs +++ b/datafusion/execution/src/cache/cache_unit.rs @@ -19,7 +19,7 @@ use std::sync::Arc; use crate::cache::CacheAccessor; -use datafusion_common::Statistics; +use datafusion_statistics::Statistics; use dashmap::DashMap; use object_store::path::Path; diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 4496e7215204..3d6f655c4a62 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -52,6 +52,7 @@ blake3 = { version = "1.0", optional = true } chrono = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } +datafusion-statistics = { workspace = true } half = { version = "2.1", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } hex = { version = "0.4", optional = true } diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs index 93c24014fd3e..1fc33e92ca04 100644 --- a/datafusion/physical-expr/src/analysis.rs +++ b/datafusion/physical-expr/src/analysis.rs @@ -27,10 +27,8 @@ use crate::utils::collect_columns; use crate::PhysicalExpr; use arrow::datatypes::Schema; -use datafusion_common::stats::Precision; -use datafusion_common::{ - internal_err, ColumnStatistics, DataFusionError, Result, ScalarValue, -}; +use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue}; +use datafusion_statistics::{ColumnStatistics, Precision}; /// The shared context used during the analysis of an expression. Includes /// the boundaries for all known columns. diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 82c8f49a764f..ca5479b32736 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -44,6 +44,7 @@ datafusion-common = { workspace = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-physical-expr = { workspace = true } +datafusion-statistics = { workspace = true } futures = { workspace = true } half = { version = "2.1", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 9cbf12aeeb88..7622eb8157cf 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -38,7 +38,6 @@ use crate::{ use arrow::array::ArrayRef; use arrow::datatypes::{Field, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; -use datafusion_common::stats::Precision; use datafusion_common::{not_impl_err, plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; use datafusion_expr::Accumulator; @@ -49,6 +48,7 @@ use datafusion_physical_expr::{ physical_exprs_contains, reverse_order_bys, AggregateExpr, EquivalenceProperties, LexOrdering, LexRequirement, PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement, }; +use datafusion_statistics::Precision; use itertools::{izip, Itertools}; diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index 649f3a31aa7e..d056924eecf4 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -30,11 +30,11 @@ use crate::{ColumnStatistics, ExecutionPlan, Statistics}; use arrow::datatypes::Schema; use arrow::ipc::writer::{FileWriter, IpcWriteOptions}; use arrow::record_batch::RecordBatch; -use datafusion_common::stats::Precision; use datafusion_common::{plan_err, DataFusionError, Result}; use datafusion_execution::memory_pool::MemoryReservation; use datafusion_physical_expr::expressions::{BinaryExpr, Column}; use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; +use datafusion_statistics::Precision; use futures::{Future, StreamExt, TryStreamExt}; use parking_lot::Mutex; diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index ce66d614721c..781266710ef1 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -37,7 +37,6 @@ use arrow::compute::filter_record_batch; use arrow::datatypes::{DataType, SchemaRef}; use arrow::record_batch::RecordBatch; use datafusion_common::cast::as_boolean_array; -use datafusion_common::stats::Precision; use datafusion_common::{plan_err, DataFusionError, Result}; use datafusion_execution::TaskContext; use datafusion_expr::Operator; @@ -48,6 +47,7 @@ use datafusion_physical_expr::{ analyze, split_conjunction, AnalysisContext, EquivalenceProperties, ExprBoundaries, PhysicalExpr, }; +use datafusion_statistics::Precision; use futures::stream::{Stream, StreamExt}; use log::trace; diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 102f0c42e90c..f5ca28d1cb83 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -34,12 +34,12 @@ use crate::{ use arrow::datatypes::{Fields, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_array::RecordBatchOptions; -use datafusion_common::stats::Precision; use datafusion_common::{plan_err, DataFusionError, JoinType, Result, ScalarValue}; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_statistics::Precision; use async_trait::async_trait; use futures::{ready, Stream, StreamExt, TryStreamExt}; diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index 6951642ff801..ad7deb797284 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -44,12 +44,13 @@ use arrow::array::{ use arrow::datatypes::{Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow::util::bit_util; -use datafusion_common::{exec_err, DataFusionError, JoinSide, Result, Statistics}; +use datafusion_common::{exec_err, DataFusionError, JoinSide, Result}; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; use datafusion_expr::JoinType; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortExpr}; +use datafusion_statstics::Statistics; use futures::{ready, Stream, StreamExt, TryStreamExt}; diff --git a/datafusion/statistics/src/lib.rs b/datafusion/statistics/src/lib.rs index a268ea052236..1fcf89ab9880 100644 --- a/datafusion/statistics/src/lib.rs +++ b/datafusion/statistics/src/lib.rs @@ -16,6 +16,5 @@ // under the License. /// Statistics for DataFusion - mod statistics; -pub use statistics::{Statistics, Precision, ColumnStatistics}; \ No newline at end of file +pub use statistics::{ColumnStatistics, Precision, Statistics};