-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support compute return types from argument values (not just their DataTypes) #8985
Changes from 16 commits
4e06013
17a2c91
02f2284
0c9acdd
56b71ae
3dbc0c7
491a4a1
468b38f
5772d9f
59b3958
f195fba
21d495f
b2e8457
4efb395
a9546ee
040d319
93b72ee
e0add48
653577f
7993af8
a3b9648
2121770
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,35 +28,37 @@ use crate::{utils, LogicalPlan, Projection, Subquery}; | |
use arrow::compute::can_cast_types; | ||
use arrow::datatypes::{DataType, Field}; | ||
use datafusion_common::{ | ||
internal_err, plan_datafusion_err, plan_err, Column, DFField, DFSchema, | ||
DataFusionError, ExprSchema, Result, | ||
internal_err, plan_datafusion_err, plan_err, Column, DFField, DataFusionError, | ||
ExprSchema, Result, | ||
}; | ||
use std::collections::HashMap; | ||
use std::sync::Arc; | ||
|
||
/// trait to allow expr to typable with respect to a schema | ||
pub trait ExprSchemable { | ||
/// given a schema, return the type of the expr | ||
fn get_type<S: ExprSchema>(&self, schema: &S) -> Result<DataType>; | ||
fn get_type(&self, schema: &dyn ExprSchema) -> Result<DataType>; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had to change the traits to use I expect this to have 0 performance impact, but I will run the planning benchmarks to be sure if this acceptable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran
And the results looked good ( within the noise threshold / reported 1% slower which I don't attribute to this change) |
||
|
||
/// given a schema, return the nullability of the expr | ||
fn nullable<S: ExprSchema>(&self, input_schema: &S) -> Result<bool>; | ||
fn nullable(&self, input_schema: &dyn ExprSchema) -> Result<bool>; | ||
|
||
/// given a schema, return the expr's optional metadata | ||
fn metadata<S: ExprSchema>(&self, schema: &S) -> Result<HashMap<String, String>>; | ||
fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, String>>; | ||
|
||
/// convert to a field with respect to a schema | ||
fn to_field(&self, input_schema: &DFSchema) -> Result<DFField>; | ||
fn to_field(&self, input_schema: &dyn ExprSchema) -> Result<DFField>; | ||
|
||
/// cast to a type with respect to a schema | ||
fn cast_to<S: ExprSchema>(self, cast_to_type: &DataType, schema: &S) -> Result<Expr>; | ||
fn cast_to(self, cast_to_type: &DataType, schema: &dyn ExprSchema) -> Result<Expr>; | ||
} | ||
|
||
impl ExprSchemable for Expr { | ||
/// Returns the [arrow::datatypes::DataType] of the expression | ||
/// based on [ExprSchema] | ||
/// | ||
/// Note: [DFSchema] implements [ExprSchema]. | ||
/// Note: [`DFSchema`] implements [ExprSchema]. | ||
/// | ||
/// [`DFSchema`]: datafusion_common::DFSchema | ||
/// | ||
/// # Examples | ||
/// | ||
|
@@ -90,7 +92,7 @@ impl ExprSchemable for Expr { | |
/// expression refers to a column that does not exist in the | ||
/// schema, or when the expression is incorrectly typed | ||
/// (e.g. `[utf8] + [bool]`). | ||
fn get_type<S: ExprSchema>(&self, schema: &S) -> Result<DataType> { | ||
fn get_type(&self, schema: &dyn ExprSchema) -> Result<DataType> { | ||
match self { | ||
Expr::Alias(Alias { expr, name, .. }) => match &**expr { | ||
Expr::Placeholder(Placeholder { data_type, .. }) => match &data_type { | ||
|
@@ -136,7 +138,7 @@ impl ExprSchemable for Expr { | |
fun.return_type(&arg_data_types) | ||
} | ||
ScalarFunctionDefinition::UDF(fun) => { | ||
Ok(fun.return_type(&arg_data_types)?) | ||
Ok(fun.return_type_from_exprs(args, schema)?) | ||
} | ||
ScalarFunctionDefinition::Name(_) => { | ||
internal_err!("Function `Expr` with name should be resolved.") | ||
|
@@ -213,14 +215,16 @@ impl ExprSchemable for Expr { | |
|
||
/// Returns the nullability of the expression based on [ExprSchema]. | ||
/// | ||
/// Note: [DFSchema] implements [ExprSchema]. | ||
/// Note: [`DFSchema`] implements [ExprSchema]. | ||
/// | ||
/// [`DFSchema`]: datafusion_common::DFSchema | ||
/// | ||
/// # Errors | ||
/// | ||
/// This function errors when it is not possible to compute its | ||
/// nullability. This happens when the expression refers to a | ||
/// column that does not exist in the schema. | ||
fn nullable<S: ExprSchema>(&self, input_schema: &S) -> Result<bool> { | ||
fn nullable(&self, input_schema: &dyn ExprSchema) -> Result<bool> { | ||
match self { | ||
Expr::Alias(Alias { expr, .. }) | ||
| Expr::Not(expr) | ||
|
@@ -327,7 +331,7 @@ impl ExprSchemable for Expr { | |
} | ||
} | ||
|
||
fn metadata<S: ExprSchema>(&self, schema: &S) -> Result<HashMap<String, String>> { | ||
fn metadata(&self, schema: &dyn ExprSchema) -> Result<HashMap<String, String>> { | ||
match self { | ||
Expr::Column(c) => Ok(schema.metadata(c)?.clone()), | ||
Expr::Alias(Alias { expr, .. }) => expr.metadata(schema), | ||
|
@@ -339,7 +343,7 @@ impl ExprSchemable for Expr { | |
/// | ||
/// So for example, a projected expression `col(c1) + col(c2)` is | ||
/// placed in an output field **named** col("c1 + c2") | ||
fn to_field(&self, input_schema: &DFSchema) -> Result<DFField> { | ||
fn to_field(&self, input_schema: &dyn ExprSchema) -> Result<DFField> { | ||
match self { | ||
Expr::Column(c) => Ok(DFField::new( | ||
c.relation.clone(), | ||
|
@@ -370,7 +374,7 @@ impl ExprSchemable for Expr { | |
/// | ||
/// This function errors when it is impossible to cast the | ||
/// expression to the target [arrow::datatypes::DataType]. | ||
fn cast_to<S: ExprSchema>(self, cast_to_type: &DataType, schema: &S) -> Result<Expr> { | ||
fn cast_to(self, cast_to_type: &DataType, schema: &dyn ExprSchema) -> Result<Expr> { | ||
let this_type = self.get_type(schema)?; | ||
if this_type == *cast_to_type { | ||
return Ok(self); | ||
|
@@ -394,10 +398,10 @@ impl ExprSchemable for Expr { | |
} | ||
|
||
/// return the schema [`Field`] for the type referenced by `get_indexed_field` | ||
fn field_for_index<S: ExprSchema>( | ||
fn field_for_index( | ||
expr: &Expr, | ||
field: &GetFieldAccess, | ||
schema: &S, | ||
schema: &dyn ExprSchema, | ||
) -> Result<Field> { | ||
let expr_dt = expr.get_type(schema)?; | ||
match field { | ||
|
@@ -457,7 +461,7 @@ mod tests { | |
use super::*; | ||
use crate::{col, lit}; | ||
use arrow::datatypes::{DataType, Fields}; | ||
use datafusion_common::{Column, ScalarValue, TableReference}; | ||
use datafusion_common::{Column, DFSchema, ScalarValue, TableReference}; | ||
|
||
macro_rules! test_is_expr_nullable { | ||
($EXPR_TYPE:ident) => {{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is an example of the feature working