Skip to content

Commit

Permalink
add support for NOT and column expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
scovich committed Jan 18, 2025
1 parent 8494126 commit d177ae9
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 29 deletions.
47 changes: 39 additions & 8 deletions kernel/src/predicates/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ pub(crate) trait PredicateEvaluator {
}
}

/// Evaluates a predicate with SQL WHERE semantics.
/// Evaluates a (possibly inverted) predicate with SQL WHERE semantics.
///
/// By default, [`eval_expr`] behaves badly for comparisons involving NULL columns (e.g. `a <
/// 10` when `a` is NULL), because the comparison correctly evaluates to NULL, but NULL
Expand Down Expand Up @@ -351,26 +351,57 @@ pub(crate) trait PredicateEvaluator {
/// AND(..., AND(NULL, TRUE, NULL), ...)
/// AND(..., NULL, ...)
/// ```
fn eval_sql_where(&self, filter: &Expr) -> Option<Self::Output> {
use Expr::{Binary, Variadic};
///
/// WARNING: Not an idempotent transform. If data skipping eval produces a sql predicate,
/// evaluating the result with sql semantics has undefined behavior.
fn eval_expr_sql_where(&self, filter: &Expr, inverted: bool) -> Option<Self::Output> {
use Expr::*;
match filter {
Variadic(v) => {
// Recursively invoke `eval_sql_where` instead of the usual `eval_expr` for AND/OR.
let exprs = v.exprs.iter().map(|expr| self.eval_sql_where(expr));
self.finish_eval_variadic(v.op, exprs, false)
// Recursively invoke `eval_expr_sql_where` instead of the usual `eval_expr` for AND/OR.
let exprs = v
.exprs
.iter()
.map(|expr| self.eval_expr_sql_where(expr, inverted));
self.finish_eval_variadic(v.op, exprs, inverted)
}
Binary(BinaryExpression { op, left, right }) if op.is_null_intolerant_comparison() => {
// Perform a nullsafe comparison instead of the usual `eval_binary`
let exprs = [
self.eval_unary(UnaryOperator::IsNull, left, true),
self.eval_unary(UnaryOperator::IsNull, right, true),
self.eval_binary(*op, left, right, false),
self.eval_binary(*op, left, right, inverted),
];
self.finish_eval_variadic(VariadicOperator::And, exprs, false)
}
Unary(UnaryExpression {
op: UnaryOperator::Not,
expr,
}) => self.eval_expr_sql_where(expr, !inverted),
Column(col) => {
// Perform a nullsafe comparison instead of the usual `eval_column`
let exprs = [
self.eval_unary(UnaryOperator::IsNull, filter, true),
self.eval_column(col, inverted),
];
self.finish_eval_variadic(VariadicOperator::And, exprs, false)
}
_ => self.eval_expr(filter, false),
// NOTE: It's unsafe to process other expressions like DISTINCT, IS NULL, and literals
// with null-safe semantics, so just process them normally.
_ => self.eval_expr(filter, inverted),
}
}

/// A convenient wrapper for [`eval_expr`].
#[cfg(test)]
fn eval(&self, expr: &Expr) -> Option<Self::Output> {
self.eval_expr(expr, false)
}

/// A convenient wrapper for [`eval_expr_sql_where`].
fn eval_sql_where(&self, expr: &Expr) -> Option<Self::Output> {
self.eval_expr_sql_where(expr, false)
}
}

/// A collection of provided methods from the [`PredicateEvaluator`] trait, factored out to allow
Expand Down
12 changes: 2 additions & 10 deletions kernel/src/predicates/parquet_stats_skipping/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,7 @@ fn test_eval_binary_comparisons() {
let do_test = |min: Scalar, max: Scalar, expected: &[Option<bool>]| {
let filter = MinMaxTestFilter::new(Some(min.clone()), Some(max.clone()));
for (expr, expect) in expressions.iter().zip(expected.iter()) {
expect_eq!(
filter.eval_expr(expr, false),
*expect,
"{expr:#?} with [{min}..{max}]"
);
expect_eq!(filter.eval(expr), *expect, "{expr:#?} with [{min}..{max}]");
}
};

Expand Down Expand Up @@ -240,11 +236,7 @@ fn test_eval_is_null() {
let do_test = |nullcount: i64, expected: &[Option<bool>]| {
let filter = NullCountTestFilter::new(Some(nullcount), 2);
for (expr, expect) in expressions.iter().zip(expected) {
expect_eq!(
filter.eval_expr(expr, false),
*expect,
"{expr:#?} ({nullcount} nulls)"
);
expect_eq!(filter.eval(expr), *expect, "{expr:#?} ({nullcount} nulls)");
}
};

Expand Down
28 changes: 18 additions & 10 deletions kernel/src/predicates/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -588,62 +588,70 @@ fn test_sql_where() {
let null_filter = DefaultPredicateEvaluator::from(NullColumnResolver);
let empty_filter = DefaultPredicateEvaluator::from(EmptyColumnResolver);

// Basic sanity checks
// Basic sanity check
expect_eq!(null_filter.eval_sql_where(&VAL), None, "WHERE {VAL}");
expect_eq!(null_filter.eval_sql_where(col), None, "WHERE {col}");
expect_eq!(empty_filter.eval_sql_where(&VAL), None, "WHERE {VAL}");

expect_eq!(null_filter.eval_sql_where(col), Some(false), "WHERE {col}");
expect_eq!(empty_filter.eval_sql_where(col), None, "WHERE {col}");

// SQL eval does not modify behavior of IS NULL
let expr = &Expr::is_null(col.clone());
expect_eq!(null_filter.eval_sql_where(expr), Some(true), "{expr}");

// NOT a gets skipped when NULL but not when missing
let expr = &!col.clone();
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// Injected NULL checks only short circuit if inputs are NULL
let expr = &Expr::lt(FALSE, TRUE);
expect_eq!(null_filter.eval_sql_where(expr), Some(true), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), Some(true), "{expr}");

// Constrast normal vs SQL WHERE semantics - comparison
let expr = &Expr::lt(col.clone(), VAL);
expect_eq!(null_filter.eval_expr(expr, false), None, "{expr}");
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
// NULL check produces NULL due to missing column
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

let expr = &Expr::lt(VAL, col.clone());
expect_eq!(null_filter.eval_expr(expr, false), None, "{expr}");
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

let expr = &Expr::distinct(VAL, col.clone());
expect_eq!(null_filter.eval_expr(expr, false), Some(true), "{expr}");
expect_eq!(null_filter.eval(expr), Some(true), "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(true), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

let expr = &Expr::distinct(NULL, col.clone());
expect_eq!(null_filter.eval_expr(expr, false), Some(false), "{expr}");
expect_eq!(null_filter.eval(expr), Some(false), "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// Constrast normal vs SQL WHERE semantics - comparison inside AND
let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL));
expect_eq!(null_filter.eval_expr(expr, false), None, "{expr}");
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// NULL/FALSE (from the NULL check) is stronger than TRUE
let expr = &Expr::and(TRUE, Expr::lt(col.clone(), VAL));
expect_eq!(null_filter.eval_expr(expr, false), None, "{expr}");
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// Contrast normal vs. SQL WHERE semantics - comparison inside AND inside AND
let expr = &Expr::and(TRUE, Expr::and(NULL, Expr::lt(col.clone(), VAL)));
expect_eq!(null_filter.eval_expr(expr, false), None, "{expr}");
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// Ditto for comparison inside OR inside AND
let expr = &Expr::or(FALSE, Expr::and(NULL, Expr::lt(col.clone(), VAL)));
expect_eq!(null_filter.eval_expr(expr, false), None, "{expr}");
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");
}
2 changes: 1 addition & 1 deletion kernel/src/scan/data_skipping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ mod tests;
/// expression is dropped.
#[cfg(test)]
fn as_data_skipping_predicate(expr: &Expr) -> Option<Expr> {
DataSkippingPredicateCreator.eval_expr(expr, false)
DataSkippingPredicateCreator.eval(expr)
}

/// Like `as_data_skipping_predicate`, but invokes [`PredicateEvaluator::eval_sql_where`] instead
Expand Down

0 comments on commit d177ae9

Please sign in to comment.