diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs index 12ed25a0ea34..a816203b6812 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs @@ -20,24 +20,27 @@ use arrow::record_batch::RecordBatch; use arrow_array::{downcast_primitive, ArrayRef}; use arrow_schema::{DataType, SchemaRef}; -use bytes_view::GroupValuesBytesView; use datafusion_common::Result; -pub(crate) mod primitive; use datafusion_expr::EmitTo; -use primitive::GroupValuesPrimitive; -mod multi_column; +pub(crate) mod multi_group_by; + mod row; -use multi_column::GroupValuesColumn; +mod single_group_by; +use datafusion_physical_expr::binary_map::OutputType; +use multi_group_by::GroupValuesColumn; use row::GroupValuesRows; -mod bytes; -mod bytes_view; -use bytes::GroupValuesByes; -use datafusion_physical_expr::binary_map::OutputType; +pub(crate) use single_group_by::primitive::HashValue; -use crate::aggregates::order::GroupOrdering; +use crate::aggregates::{ + group_values::single_group_by::{ + bytes::GroupValuesByes, bytes_view::GroupValuesBytesView, + primitive::GroupValuesPrimitive, + }, + order::GroupOrdering, +}; mod null_builder; @@ -77,7 +80,7 @@ mod null_builder; /// Each distinct group in a hash aggregation is identified by a unique group id /// (usize) which is assigned by instances of this trait. Group ids are /// continuous without gaps, starting from 0. -pub trait GroupValues: Send { +pub(crate) trait GroupValues: Send { /// Calculates the group id for each input row of `cols`, assigning new /// group ids as necessary. /// @@ -106,7 +109,21 @@ pub trait GroupValues: Send { } /// Return a specialized implementation of [`GroupValues`] for the given schema. -pub fn new_group_values( +/// +/// [`GroupValues`] implementations choosing logic: +/// +/// - If group by single column, and type of this column has +/// the specific [`GroupValues`] implementation, such implementation +/// will be chosen. +/// +/// - If group by multiple columns, and all column types have the specific +/// [`GroupColumn`] implementations, [`GroupValuesColumn`] will be chosen. +/// +/// - Otherwise, the general implementation [`GroupValuesRows`] will be chosen. +/// +/// [`GroupColumn`]: crate::aggregates::group_values::multi_group_by::GroupColumn +/// +pub(crate) fn new_group_values( schema: SchemaRef, group_ordering: &GroupOrdering, ) -> Result> { @@ -147,7 +164,7 @@ pub fn new_group_values( } } - if multi_column::supported_schema(schema.as_ref()) { + if multi_group_by::supported_schema(schema.as_ref()) { if matches!(group_ordering, GroupOrdering::None) { Ok(Box::new(GroupValuesColumn::::try_new(schema)?)) } else { diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs similarity index 99% rename from datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes.rs rename to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index 820d28fc58e7..35a79cbd91ed 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::aggregates::group_values::multi_column::{nulls_equal_to, GroupColumn}; +use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{AsArray, BufferBuilder, GenericBinaryArray, GenericStringArray}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; @@ -403,7 +403,7 @@ where mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_column::bytes::ByteGroupValueBuilder; + use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; use arrow_array::{ArrayRef, StringArray}; use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; use datafusion_physical_expr::binary_map::OutputType; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs similarity index 99% rename from datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes_view.rs rename to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index 032b4d9e2a91..811790f4e588 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_column/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::aggregates::group_values::multi_column::{nulls_equal_to, GroupColumn}; +use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{make_view, AsArray, ByteView}; use arrow::buffer::ScalarBuffer; @@ -544,7 +544,7 @@ impl GroupColumn for ByteViewGroupValueBuilder { mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_column::bytes_view::ByteViewGroupValueBuilder; + use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; use arrow::array::AsArray; use arrow::datatypes::StringViewType; use arrow_array::{ArrayRef, StringViewArray}; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_column/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs similarity index 99% rename from datafusion/physical-plan/src/aggregates/group_values/multi_column/mod.rs rename to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index 191292c549f4..83b0f9d77369 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_column/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -15,13 +15,15 @@ // specific language governing permissions and limitations // under the License. +//! `GroupValues` implementations for multi group by cases + mod bytes; mod bytes_view; mod primitive; use std::mem::{self, size_of}; -use crate::aggregates::group_values::multi_column::{ +use crate::aggregates::group_values::multi_group_by::{ bytes::ByteGroupValueBuilder, bytes_view::ByteViewGroupValueBuilder, primitive::PrimitiveGroupValueBuilder, }; @@ -1138,7 +1140,9 @@ mod tests { use datafusion_common::utils::proxy::RawTableAllocExt; use datafusion_expr::EmitTo; - use crate::aggregates::group_values::{multi_column::GroupValuesColumn, GroupValues}; + use crate::aggregates::group_values::{ + multi_group_by::GroupValuesColumn, GroupValues, + }; use super::GroupIndexView; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_column/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs similarity index 98% rename from datafusion/physical-plan/src/aggregates/group_values/multi_column/primitive.rs rename to datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index dff85ff7eb1a..4da482247458 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_column/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::aggregates::group_values::multi_column::{nulls_equal_to, GroupColumn}; +use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::buffer::ScalarBuffer; use arrow_array::cast::AsArray; @@ -208,7 +208,7 @@ impl GroupColumn mod tests { use std::sync::Arc; - use crate::aggregates::group_values::multi_column::primitive::PrimitiveGroupValueBuilder; + use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; use arrow::datatypes::Int64Type; use arrow_array::{ArrayRef, Int64Array}; use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; diff --git a/datafusion/physical-plan/src/aggregates/group_values/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs similarity index 100% rename from datafusion/physical-plan/src/aggregates/group_values/bytes.rs rename to datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes.rs diff --git a/datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs similarity index 100% rename from datafusion/physical-plan/src/aggregates/group_values/bytes_view.rs rename to datafusion/physical-plan/src/aggregates/group_values/single_group_by/bytes_view.rs diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/mod.rs new file mode 100644 index 000000000000..417618ba66af --- /dev/null +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/mod.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! `GroupValues` implementations for single group by cases + +pub(crate) mod bytes; +pub(crate) mod bytes_view; +pub(crate) mod primitive; diff --git a/datafusion/physical-plan/src/aggregates/group_values/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs similarity index 100% rename from datafusion/physical-plan/src/aggregates/group_values/primitive.rs rename to datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 2220007fdd72..260c3a1c48de 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -52,7 +52,7 @@ use crate::execution_plan::CardinalityEffect; use datafusion_physical_expr::aggregate::AggregateFunctionExpr; use itertools::Itertools; -pub mod group_values; +pub(crate) mod group_values; mod no_grouping; pub mod order; mod row_hash; diff --git a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs index 34df643b6cf0..23a07ebec305 100644 --- a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs +++ b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs @@ -17,7 +17,7 @@ //! A wrapper around `hashbrown::RawTable` that allows entries to be tracked by index -use crate::aggregates::group_values::primitive::HashValue; +use crate::aggregates::group_values::HashValue; use crate::aggregates::topk::heap::Comparable; use ahash::RandomState; use arrow::datatypes::i256;