Skip to content

Commit

Permalink
add support for NULL literals
Browse files Browse the repository at this point in the history
  • Loading branch information
scovich committed Jan 22, 2025
1 parent d177ae9 commit a905db1
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 14 deletions.
2 changes: 1 addition & 1 deletion kernel/src/expressions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ impl Expression {
Self::Literal(value.into())
}

pub fn null_literal(data_type: DataType) -> Self {
pub const fn null_literal(data_type: DataType) -> Self {
Self::Literal(Scalar::Null(data_type))
}

Expand Down
26 changes: 24 additions & 2 deletions kernel/src/predicates/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,30 @@ pub(crate) trait PredicateEvaluator {
];
self.finish_eval_variadic(VariadicOperator::And, exprs, false)
}
// NOTE: It's unsafe to process other expressions like DISTINCT, IS NULL, and literals
// with null-safe semantics, so just process them normally.
Literal(val) if val.is_null() => {
// AND(NULL IS NOT NULL, NULL) = AND(FALSE, NULL) = FALSE
self.eval_scalar(&Scalar::from(false), false)
}
// Process all remaining expressions normally, because they are not proven safe. Indeed,
// expressions like DISTINCT and IS [NOT] NULL are known-unsafe under SQL semantics:
//
// ```
// x IS NULL # when x really is NULL
// = AND(x IS NOT NULL, x IS NULL)
// = AND(FALSE, TRUE)
// = FALSE
//
// DISTINCT(x, 10) # when x is NULL
// = AND(x IS NOT NULL, 10 IS NOT NULL, DISTINCT(x, 10))
// = AND(FALSE, TRUE, TRUE)
// = FALSE
//
// DISTINCT(x, NULL) # when x is not NULL
// = AND(x IS NOT NULL, NULL IS NOT NULL, DISTINCT(x, NULL))
// = AND(TRUE, FALSE, TRUE)
// = FALSE
// ```
//
_ => self.eval_expr(filter, inverted),
}
}
Expand Down
12 changes: 6 additions & 6 deletions kernel/src/predicates/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -632,25 +632,25 @@ fn test_sql_where() {
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// Constrast normal vs SQL WHERE semantics - comparison inside AND
let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL));
let expr = &Expr::and(TRUE, Expr::lt(col.clone(), VAL));
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// NULL/FALSE (from the NULL check) is stronger than TRUE
let expr = &Expr::and(TRUE, Expr::lt(col.clone(), VAL));
// NULL allows static skipping under SQL semantics
let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL));
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), Some(false), "{expr}");

// Contrast normal vs. SQL WHERE semantics - comparison inside AND inside AND
let expr = &Expr::and(TRUE, Expr::and(NULL, Expr::lt(col.clone(), VAL)));
let expr = &Expr::and(TRUE, Expr::and(TRUE, Expr::lt(col.clone(), VAL)));
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");

// Ditto for comparison inside OR inside AND
let expr = &Expr::or(FALSE, Expr::and(NULL, Expr::lt(col.clone(), VAL)));
let expr = &Expr::or(FALSE, Expr::and(TRUE, Expr::lt(col.clone(), VAL)));
expect_eq!(null_filter.eval(expr), None, "{expr}");
expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}");
expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}");
Expand Down
9 changes: 5 additions & 4 deletions kernel/src/scan/data_skipping/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -319,16 +319,17 @@ fn test_sql_where() {
do_test(ALL_NULL, expr, MISSING, None, None);

// Comparison inside AND works
let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL));
let expr = &Expr::and(TRUE, Expr::lt(VAL, col.clone()));
do_test(ALL_NULL, expr, PRESENT, None, Some(false));
do_test(ALL_NULL, expr, MISSING, None, None);

let expr = &Expr::and(TRUE, Expr::lt(VAL, col.clone()));
// NULL inside AND allows static skipping under SQL semantics
let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL));
do_test(ALL_NULL, expr, PRESENT, None, Some(false));
do_test(ALL_NULL, expr, MISSING, None, None);
do_test(ALL_NULL, expr, MISSING, None, Some(false));

// Comparison inside AND inside AND works
let expr = &Expr::and(TRUE, Expr::and(NULL, Expr::lt(col.clone(), VAL)));
let expr = &Expr::and(TRUE, Expr::and(TRUE, Expr::lt(col.clone(), VAL)));
do_test(ALL_NULL, expr, PRESENT, None, Some(false));
do_test(ALL_NULL, expr, MISSING, None, None);

Expand Down
4 changes: 3 additions & 1 deletion kernel/src/scan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -776,17 +776,19 @@ mod tests {

#[test]
fn test_static_skipping() {
const NULL: Expression = Expression::null_literal(DataType::BOOLEAN);
let test_cases = [
(false, column_expr!("a")),
(true, Expression::literal(false)),
(false, Expression::literal(true)),
(false, Expression::null_literal(DataType::LONG)),
(true, NULL),
(true, Expression::and(column_expr!("a"), false)),
(false, Expression::or(column_expr!("a"), true)),
(false, Expression::or(column_expr!("a"), false)),
(false, Expression::lt(column_expr!("a"), 10)),
(false, Expression::lt(Expression::literal(10), 100)),
(true, Expression::gt(Expression::literal(10), 100)),
(true, Expression::and(NULL, column_expr!("a"))),
];
for (should_skip, predicate) in test_cases {
assert_eq!(
Expand Down

0 comments on commit a905db1

Please sign in to comment.