Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fully support LIKE/ILIKE with Utf8View #14379

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions datafusion/sql/src/expr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use datafusion_common::{
internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
ScalarValue,
};

use datafusion_expr::expr::ScalarFunction;
use datafusion_expr::expr::{InList, WildcardOptions};
use datafusion_expr::{
Expand Down Expand Up @@ -819,10 +820,6 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
return not_impl_err!("ANY in LIKE expression");
}
let pattern = self.sql_expr_to_logical_expr(pattern, schema, planner_context)?;
let pattern_type = pattern.get_type(schema)?;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the code change -- don't do type checking in the planner

if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
return plan_err!("Invalid pattern in LIKE expression");
}
let escape_char = if let Some(char) = escape_char {
if char.len() != 1 {
return plan_err!("Invalid escape character in LIKE expression");
Expand Down
5 changes: 5 additions & 0 deletions datafusion/sqllogictest/test_files/scalar.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,11 @@ true true false false true true
statement ok
drop table t1

# can't use like with non stirngs
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shows what happens when you try to run like on an integer 😆

query error There isn't a common type to coerce Utf8 and Int64 in LIKE expression
select column1 like 1 from (values('a'), ('b'), (NULL)) as t;


# like nlike with null lt
query BB rowsort
SELECT column1 like NULL as col_null, NULL like column1 as null_col from (values('a'), ('b'), (NULL)) as t
Expand Down
135 changes: 0 additions & 135 deletions datafusion/sqllogictest/test_files/string/string.slt
Original file line number Diff line number Diff line change
Expand Up @@ -41,141 +41,6 @@ select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
#
include ./string_query.slt.part

# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By moving these tests to string_query.slt.part they now run for strings, large strings, stringview and dictionary arrays

FYI @goldmedal

# dynamic LIKE as filter
query TTT rowsort
SELECT ascii_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 LIKE ascii_2
UNION ALL
SELECT ascii_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT LIKE ascii_2
UNION ALL
SELECT unicode_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 LIKE ascii_2
UNION ALL
SELECT unicode_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT LIKE ascii_2
UNION ALL
SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LIKE ascii_2
UNION ALL
SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
----
% is LIKE \%
(empty) is LIKE %
(empty) is LIKE %
(empty) is LIKE %
(empty) is LIKE %%
(empty) is LIKE %%
(empty) is LIKE %%
(empty) is NOT LIKE \%
(empty) is NOT LIKE \%
(empty) is NOT LIKE \_
(empty) is NOT LIKE \_
Andrew is NOT LIKE X
Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
Raphael is NOT LIKE R
Xiangpeng is LIKE Xiangpeng
_ is LIKE \_
chrząszcz na łące w 東京都 is NOT LIKE un_____core
datafusionДатаФусион is NOT LIKE R
datafusion数据融合 is NOT LIKE Xiangpeng
datafusion数据融合 is NOT LIKE Xiangpeng
datafusion📊🔥 is NOT LIKE X
pan Tadeusz ma iść w kąt is LIKE p%t
percent is LIKE p%t
un iść core is LIKE un_____core
under_score is LIKE un_____core
аФус is NOT LIKE R
🔥 is NOT LIKE R
🔥 is NOT LIKE X

# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
# dynamic LIKE as projection
query TTTTBBBB rowsort
SELECT
ascii_1, ascii_2, unicode_1, unicode_2,
(ascii_1 LIKE ascii_2) AS ascii_1_like_ascii_2,
(ascii_2 LIKE ascii_1) AS ascii_2_like_ascii_1,
(unicode_1 LIKE ascii_2) AS unicode_1_like_ascii_2,
(unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
FROM test_basic_operator
----
% \% (empty) (empty) true true false false
(empty) % (empty) (empty) true false true true
(empty) %% (empty) (empty) true false true true
Andrew X datafusion📊🔥 🔥 false false false false
NULL % NULL NULL NULL NULL NULL NULL
NULL R NULL 🔥 NULL NULL NULL false
Raphael R datafusionДатаФусион аФус false false false false
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
_ \_ (empty) (empty) true false false false
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false

# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
# dynamic ILIKE as filter
query TTT rowsort
SELECT ascii_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 ILIKE ascii_2
UNION ALL
SELECT ascii_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT ILIKE ascii_2
UNION ALL
SELECT unicode_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 ILIKE ascii_2
UNION ALL
SELECT unicode_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT ILIKE ascii_2
UNION ALL
SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 ILIKE ascii_2
UNION ALL
SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
----
% is ILIKE \%
(empty) is ILIKE %
(empty) is ILIKE %
(empty) is ILIKE %
(empty) is ILIKE %%
(empty) is ILIKE %%
(empty) is ILIKE %%
(empty) is NOT ILIKE \%
(empty) is NOT ILIKE \%
(empty) is NOT ILIKE \_
(empty) is NOT ILIKE \_
Andrew is NOT ILIKE X
Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
Raphael is NOT ILIKE R
Xiangpeng is ILIKE Xiangpeng
_ is ILIKE \_
chrząszcz na łące w 東京都 is NOT ILIKE un_____core
datafusionДатаФусион is NOT ILIKE R
datafusion数据融合 is NOT ILIKE Xiangpeng
datafusion数据融合 is NOT ILIKE Xiangpeng
datafusion📊🔥 is NOT ILIKE X
pan Tadeusz ma iść w kąt is ILIKE p%t
percent is ILIKE p%t
un iść core is ILIKE un_____core
under_score is ILIKE un_____core
аФус is NOT ILIKE R
🔥 is NOT ILIKE R
🔥 is NOT ILIKE X

# TODO support all String types in sql_like_to_expr and move this test to `string_query.slt.part`
# dynamic ILIKE as projection
query TTTTBBBB rowsort
SELECT
ascii_1, ascii_2, unicode_1, unicode_2,
(ascii_1 ILIKE ascii_2) AS ascii_1_ilike_ascii_2,
(ascii_2 ILIKE ascii_1) AS ascii_2_ilike_ascii_1,
(unicode_1 ILIKE ascii_2) AS unicode_1_ilike_ascii_2,
(unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
FROM test_basic_operator
----
% \% (empty) (empty) true true false false
(empty) % (empty) (empty) true false true true
(empty) %% (empty) (empty) true false true true
Andrew X datafusion📊🔥 🔥 false false false false
NULL % NULL NULL NULL NULL NULL NULL
NULL R NULL 🔥 NULL NULL NULL false
Raphael R datafusionДатаФусион аФус false false false false
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
_ \_ (empty) (empty) true false false false
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false



#
# Clean up
Expand Down
142 changes: 142 additions & 0 deletions datafusion/sqllogictest/test_files/string/string_query.slt.part
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,148 @@ _ (empty) false true false true
NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL

# --------------------------------------
# dynamic LIKE as filter
# --------------------------------------

query TTT rowsort
SELECT ascii_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 LIKE ascii_2
UNION ALL
SELECT ascii_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT LIKE ascii_2
UNION ALL
SELECT unicode_1, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 LIKE ascii_2
UNION ALL
SELECT unicode_1, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT LIKE ascii_2
UNION ALL
SELECT unicode_2, 'is LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 LIKE ascii_2
UNION ALL
SELECT unicode_2, 'is NOT LIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT LIKE ascii_2
----
% is LIKE \%
(empty) is LIKE %
(empty) is LIKE %
(empty) is LIKE %
(empty) is LIKE %%
(empty) is LIKE %%
(empty) is LIKE %%
(empty) is NOT LIKE \%
(empty) is NOT LIKE \%
(empty) is NOT LIKE \_
(empty) is NOT LIKE \_
Andrew is NOT LIKE X
Pan Tadeusz ma frunąć stąd w kąt is NOT LIKE p%t
Raphael is NOT LIKE R
Xiangpeng is LIKE Xiangpeng
_ is LIKE \_
chrząszcz na łące w 東京都 is NOT LIKE un_____core
datafusionДатаФусион is NOT LIKE R
datafusion数据融合 is NOT LIKE Xiangpeng
datafusion数据融合 is NOT LIKE Xiangpeng
datafusion📊🔥 is NOT LIKE X
pan Tadeusz ma iść w kąt is LIKE p%t
percent is LIKE p%t
un iść core is LIKE un_____core
under_score is LIKE un_____core
аФус is NOT LIKE R
🔥 is NOT LIKE R
🔥 is NOT LIKE X

# --------------------------------------
# dynamic LIKE as projection
# --------------------------------------

query TTTTBBBB rowsort
SELECT
ascii_1, ascii_2, unicode_1, unicode_2,
(ascii_1 LIKE ascii_2) AS ascii_1_like_ascii_2,
(ascii_2 LIKE ascii_1) AS ascii_2_like_ascii_1,
(unicode_1 LIKE ascii_2) AS unicode_1_like_ascii_2,
(unicode_2 LIKE ascii_2) AS unicode_2_like_ascii_2
FROM test_basic_operator
----
% \% (empty) (empty) true true false false
(empty) % (empty) (empty) true false true true
(empty) %% (empty) (empty) true false true true
Andrew X datafusion📊🔥 🔥 false false false false
NULL % NULL NULL NULL NULL NULL NULL
NULL R NULL 🔥 NULL NULL NULL false
Raphael R datafusionДатаФусион аФус false false false false
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
_ \_ (empty) (empty) true false false false
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true false
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false

# --------------------------------------
# dynamic ILIKE as filter
# --------------------------------------

query TTT rowsort
SELECT ascii_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 ILIKE ascii_2
UNION ALL
SELECT ascii_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE ascii_1 NOT ILIKE ascii_2
UNION ALL
SELECT unicode_1, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 ILIKE ascii_2
UNION ALL
SELECT unicode_1, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_1 NOT ILIKE ascii_2
UNION ALL
SELECT unicode_2, 'is ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 ILIKE ascii_2
UNION ALL
SELECT unicode_2, 'is NOT ILIKE', ascii_2 FROM test_basic_operator WHERE unicode_2 NOT ILIKE ascii_2
----
% is ILIKE \%
(empty) is ILIKE %
(empty) is ILIKE %
(empty) is ILIKE %
(empty) is ILIKE %%
(empty) is ILIKE %%
(empty) is ILIKE %%
(empty) is NOT ILIKE \%
(empty) is NOT ILIKE \%
(empty) is NOT ILIKE \_
(empty) is NOT ILIKE \_
Andrew is NOT ILIKE X
Pan Tadeusz ma frunąć stąd w kąt is ILIKE p%t
Raphael is NOT ILIKE R
Xiangpeng is ILIKE Xiangpeng
_ is ILIKE \_
chrząszcz na łące w 東京都 is NOT ILIKE un_____core
datafusionДатаФусион is NOT ILIKE R
datafusion数据融合 is NOT ILIKE Xiangpeng
datafusion数据融合 is NOT ILIKE Xiangpeng
datafusion📊🔥 is NOT ILIKE X
pan Tadeusz ma iść w kąt is ILIKE p%t
percent is ILIKE p%t
un iść core is ILIKE un_____core
under_score is ILIKE un_____core
аФус is NOT ILIKE R
🔥 is NOT ILIKE R
🔥 is NOT ILIKE X

# --------------------------------------
# dynamic ILIKE as projection
# --------------------------------------
query TTTTBBBB rowsort
SELECT
ascii_1, ascii_2, unicode_1, unicode_2,
(ascii_1 ILIKE ascii_2) AS ascii_1_ilike_ascii_2,
(ascii_2 ILIKE ascii_1) AS ascii_2_ilike_ascii_1,
(unicode_1 ILIKE ascii_2) AS unicode_1_ilike_ascii_2,
(unicode_2 ILIKE ascii_2) AS unicode_2_ilike_ascii_2
FROM test_basic_operator
----
% \% (empty) (empty) true true false false
(empty) % (empty) (empty) true false true true
(empty) %% (empty) (empty) true false true true
Andrew X datafusion📊🔥 🔥 false false false false
NULL % NULL NULL NULL NULL NULL NULL
NULL R NULL 🔥 NULL NULL NULL false
Raphael R datafusionДатаФусион аФус false false false false
Xiangpeng Xiangpeng datafusion数据融合 datafusion数据融合 true true false false
_ \_ (empty) (empty) true false false false
percent p%t pan Tadeusz ma iść w kąt Pan Tadeusz ma frunąć stąd w kąt true false true true
under_score un_____core un iść core chrząszcz na łące w 東京都 true false true false


# --------------------------------------
# substr function
# --------------------------------------
Expand Down