Skip to content

Commit

Permalink
Merge branch 'fr/native-and-logical-types' into logical-signature
Browse files Browse the repository at this point in the history
  • Loading branch information
jayzhan211 committed Oct 25, 2024
2 parents 5ca1a62 + 7ed7891 commit 0e66e21
Show file tree
Hide file tree
Showing 5 changed files with 496 additions and 0 deletions.
1 change: 1 addition & 0 deletions datafusion/common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ pub mod scalar;
pub mod stats;
pub mod test_util;
pub mod tree_node;
pub mod types;
pub mod utils;

/// Reexport arrow crate
Expand Down
114 changes: 114 additions & 0 deletions datafusion/common/src/types/field.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use arrow_schema::{Field, Fields, UnionFields};
use std::hash::{Hash, Hasher};
use std::{ops::Deref, sync::Arc};

use super::{LogicalTypeRef, NativeType};

/// A record of a logical type, its name and its nullability.
#[derive(Debug, Clone, Eq, PartialOrd, Ord)]
pub struct LogicalField {
pub name: String,
pub logical_type: LogicalTypeRef,
pub nullable: bool,
}

impl PartialEq for LogicalField {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
&& self.logical_type.eq(&other.logical_type)
&& self.nullable == other.nullable
}
}

impl Hash for LogicalField {
fn hash<H: Hasher>(&self, state: &mut H) {
self.name.hash(state);
self.logical_type.hash(state);
self.nullable.hash(state);
}
}

impl From<&Field> for LogicalField {
fn from(value: &Field) -> Self {
Self {
name: value.name().clone(),
logical_type: Arc::new(NativeType::from(value.data_type().clone())),
nullable: value.is_nullable(),
}
}
}

/// A reference counted [`LogicalField`].
pub type LogicalFieldRef = Arc<LogicalField>;

/// A cheaply cloneable, owned collection of [`LogicalFieldRef`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct LogicalFields(Arc<[LogicalFieldRef]>);

impl Deref for LogicalFields {
type Target = [LogicalFieldRef];

fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}

impl From<&Fields> for LogicalFields {
fn from(value: &Fields) -> Self {
value
.iter()
.map(|field| Arc::new(LogicalField::from(field.as_ref())))
.collect()
}
}

impl FromIterator<LogicalFieldRef> for LogicalFields {
fn from_iter<T: IntoIterator<Item = LogicalFieldRef>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}

/// A cheaply cloneable, owned collection of [`LogicalFieldRef`] and their
/// corresponding type ids.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct LogicalUnionFields(Arc<[(i8, LogicalFieldRef)]>);

impl Deref for LogicalUnionFields {
type Target = [(i8, LogicalFieldRef)];

fn deref(&self) -> &Self::Target {
self.0.as_ref()
}
}

impl From<&UnionFields> for LogicalUnionFields {
fn from(value: &UnionFields) -> Self {
value
.iter()
.map(|(i, field)| (i, Arc::new(LogicalField::from(field.as_ref()))))
.collect()
}
}

impl FromIterator<(i8, LogicalFieldRef)> for LogicalUnionFields {
fn from_iter<T: IntoIterator<Item = (i8, LogicalFieldRef)>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}
118 changes: 118 additions & 0 deletions datafusion/common/src/types/logical.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use core::fmt;
use std::{cmp::Ordering, hash::Hash, sync::Arc};

use super::NativeType;

/// Signature that uniquely identifies a type among other types.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum TypeSignature<'a> {
/// Represents a built-in native type.
Native(&'a NativeType),
/// Represents an arrow-compatible extension type.
/// (<https://arrow.apache.org/docs/format/Columnar.html#extension-types>)
///
/// The `name` should contain the same value as 'ARROW:extension:name'.
Extension {
name: &'a str,
parameters: &'a [TypeParameter<'a>],
},
}

#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum TypeParameter<'a> {
Type(TypeSignature<'a>),
Number(i128),
}

/// A reference counted [`LogicalType`].
pub type LogicalTypeRef = Arc<dyn LogicalType>;

/// Representation of a logical type with its signature and its native backing
/// type.
///
/// The logical type is meant to be used during the DataFusion logical planning
/// phase in order to reason about logical types without worrying about their
/// underlying physical implementation.
///
/// ### Extension types
///
/// [`LogicalType`] is a trait in order to allow the possibility of declaring
/// extension types:
///
/// ```
/// use datafusion_common::types::{LogicalType, NativeType, TypeSignature};
///
/// struct JSON {}
///
/// impl LogicalType for JSON {
/// fn native(&self) -> &NativeType {
/// &NativeType::Utf8
/// }
///
/// fn signature(&self) -> TypeSignature<'_> {
/// TypeSignature::Extension {
/// name: "JSON",
/// parameters: &[],
/// }
/// }
/// }
/// ```
pub trait LogicalType: Sync + Send {
fn native(&self) -> &NativeType;
fn signature(&self) -> TypeSignature<'_>;
}

impl fmt::Debug for dyn LogicalType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("LogicalType")
.field(&self.signature())
.field(&self.native())
.finish()
}
}

impl PartialEq for dyn LogicalType {
fn eq(&self, other: &Self) -> bool {
self.native().eq(other.native()) && self.signature().eq(&other.signature())
}
}

impl Eq for dyn LogicalType {}

impl PartialOrd for dyn LogicalType {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl Ord for dyn LogicalType {
fn cmp(&self, other: &Self) -> Ordering {
self.signature()
.cmp(&other.signature())
.then(self.native().cmp(other.native()))
}
}

impl Hash for dyn LogicalType {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.signature().hash(state);
self.native().hash(state);
}
}
24 changes: 24 additions & 0 deletions datafusion/common/src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

mod field;
mod logical;
mod native;

pub use field::*;
pub use logical::*;
pub use native::*;
Loading

0 comments on commit 0e66e21

Please sign in to comment.