diff --git a/kernel/src/expressions/mod.rs b/kernel/src/expressions/mod.rs index 47d35afa1..9f4972408 100644 --- a/kernel/src/expressions/mod.rs +++ b/kernel/src/expressions/mod.rs @@ -259,7 +259,7 @@ impl Expression { Self::Literal(value.into()) } - pub fn null_literal(data_type: DataType) -> Self { + pub const fn null_literal(data_type: DataType) -> Self { Self::Literal(Scalar::Null(data_type)) } diff --git a/kernel/src/predicates/mod.rs b/kernel/src/predicates/mod.rs index 6a9ebaaf6..e93f78238 100644 --- a/kernel/src/predicates/mod.rs +++ b/kernel/src/predicates/mod.rs @@ -386,8 +386,30 @@ pub(crate) trait PredicateEvaluator { ]; self.finish_eval_variadic(VariadicOperator::And, exprs, false) } - // NOTE: It's unsafe to process other expressions like DISTINCT, IS NULL, and literals - // with null-safe semantics, so just process them normally. + Literal(val) if val.is_null() => { + // AND(NULL IS NOT NULL, NULL) = AND(FALSE, NULL) = FALSE + self.eval_scalar(&Scalar::from(false), false) + } + // Process all remaining expressions normally, because they are not proven safe. Indeed, + // expressions like DISTINCT and IS [NOT] NULL are known-unsafe under SQL semantics: + // + // ``` + // x IS NULL # when x really is NULL + // = AND(x IS NOT NULL, x IS NULL) + // = AND(FALSE, TRUE) + // = FALSE + // + // DISTINCT(x, 10) # when x is NULL + // = AND(x IS NOT NULL, 10 IS NOT NULL, DISTINCT(x, 10)) + // = AND(FALSE, TRUE, TRUE) + // = FALSE + // + // DISTINCT(x, NULL) # when x is not NULL + // = AND(x IS NOT NULL, NULL IS NOT NULL, DISTINCT(x, NULL)) + // = AND(TRUE, FALSE, TRUE) + // = FALSE + // ``` + // _ => self.eval_expr(filter, inverted), } } diff --git a/kernel/src/predicates/tests.rs b/kernel/src/predicates/tests.rs index 1911b8474..b57d0759a 100644 --- a/kernel/src/predicates/tests.rs +++ b/kernel/src/predicates/tests.rs @@ -632,25 +632,25 @@ fn test_sql_where() { expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}"); // Constrast normal vs SQL WHERE semantics - comparison inside AND - let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL)); + let expr = &Expr::and(TRUE, Expr::lt(col.clone(), VAL)); expect_eq!(null_filter.eval(expr), None, "{expr}"); expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}"); expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}"); - // NULL/FALSE (from the NULL check) is stronger than TRUE - let expr = &Expr::and(TRUE, Expr::lt(col.clone(), VAL)); + // NULL allows static skipping under SQL semantics + let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL)); expect_eq!(null_filter.eval(expr), None, "{expr}"); expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}"); - expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}"); + expect_eq!(empty_filter.eval_sql_where(expr), Some(false), "{expr}"); // Contrast normal vs. SQL WHERE semantics - comparison inside AND inside AND - let expr = &Expr::and(TRUE, Expr::and(NULL, Expr::lt(col.clone(), VAL))); + let expr = &Expr::and(TRUE, Expr::and(TRUE, Expr::lt(col.clone(), VAL))); expect_eq!(null_filter.eval(expr), None, "{expr}"); expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}"); expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}"); // Ditto for comparison inside OR inside AND - let expr = &Expr::or(FALSE, Expr::and(NULL, Expr::lt(col.clone(), VAL))); + let expr = &Expr::or(FALSE, Expr::and(TRUE, Expr::lt(col.clone(), VAL))); expect_eq!(null_filter.eval(expr), None, "{expr}"); expect_eq!(null_filter.eval_sql_where(expr), Some(false), "{expr}"); expect_eq!(empty_filter.eval_sql_where(expr), None, "{expr}"); diff --git a/kernel/src/scan/data_skipping/tests.rs b/kernel/src/scan/data_skipping/tests.rs index 4f1d74f63..2ca7a0c01 100644 --- a/kernel/src/scan/data_skipping/tests.rs +++ b/kernel/src/scan/data_skipping/tests.rs @@ -319,16 +319,17 @@ fn test_sql_where() { do_test(ALL_NULL, expr, MISSING, None, None); // Comparison inside AND works - let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL)); + let expr = &Expr::and(TRUE, Expr::lt(VAL, col.clone())); do_test(ALL_NULL, expr, PRESENT, None, Some(false)); do_test(ALL_NULL, expr, MISSING, None, None); - let expr = &Expr::and(TRUE, Expr::lt(VAL, col.clone())); + // NULL inside AND allows static skipping under SQL semantics + let expr = &Expr::and(NULL, Expr::lt(col.clone(), VAL)); do_test(ALL_NULL, expr, PRESENT, None, Some(false)); - do_test(ALL_NULL, expr, MISSING, None, None); + do_test(ALL_NULL, expr, MISSING, None, Some(false)); // Comparison inside AND inside AND works - let expr = &Expr::and(TRUE, Expr::and(NULL, Expr::lt(col.clone(), VAL))); + let expr = &Expr::and(TRUE, Expr::and(TRUE, Expr::lt(col.clone(), VAL))); do_test(ALL_NULL, expr, PRESENT, None, Some(false)); do_test(ALL_NULL, expr, MISSING, None, None); diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index 49af0222c..53c6d4cae 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -776,17 +776,19 @@ mod tests { #[test] fn test_static_skipping() { + const NULL: Expression = Expression::null_literal(DataType::BOOLEAN); let test_cases = [ (false, column_expr!("a")), (true, Expression::literal(false)), (false, Expression::literal(true)), - (false, Expression::null_literal(DataType::LONG)), + (true, NULL), (true, Expression::and(column_expr!("a"), false)), (false, Expression::or(column_expr!("a"), true)), (false, Expression::or(column_expr!("a"), false)), (false, Expression::lt(column_expr!("a"), 10)), (false, Expression::lt(Expression::literal(10), 100)), (true, Expression::gt(Expression::literal(10), 100)), + (true, Expression::and(NULL, column_expr!("a"))), ]; for (should_skip, predicate) in test_cases { assert_eq!(