Skip to content

Commit

Permalink
refactor(translator): Optimize LIKE queries to not always use =~.
Browse files Browse the repository at this point in the history
When possible, use `CONTAINS`, `STARTS WITH` or `ENDS WITH`.

Signed-off-by: Michael Simons <[email protected]>
  • Loading branch information
michael-simons committed Jan 20, 2025
1 parent 03bc391 commit b1e3435
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.LongSupplier;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Level;
Expand Down Expand Up @@ -104,6 +105,7 @@ final class SqlToCypher implements Translator {
static final Pattern ELEMENT_ID_PATTERN = Pattern.compile("(?i)v\\$(?:(?<prefix>.+?)_)?id");
static final String ELEMENT_ID_FUNCTION_NAME = "elementId";
static final String ELEMENT_ID_ALIAS = "v$id";
static final Pattern PERCENT_OR_UNDERSCORE = Pattern.compile("[%_]");

static {
Logger.getLogger("org.jooq.Constants").setLevel(Level.WARNING);
Expand Down Expand Up @@ -1186,14 +1188,7 @@ else if (c instanceof QOM.RowIsNotNull e) {
.orElseThrow();
}
else if (c instanceof QOM.Like like) {
Expression rhs;
if (like.$arg2() instanceof Param<?> p && p.$inline() && p.getValue() instanceof String s) {
rhs = Cypher.literalOf(s.replace("%", ".*").replace("_", "."));
}
else {
rhs = expression(like.$arg2());
}
return expression(like.$arg1()).matches(rhs);
return like(like);
}
else if (c instanceof QOM.FieldCondition fc && fc.$field() instanceof Param<Boolean> param) {
return (Boolean.TRUE.equals(param.getValue()) ? Cypher.literalTrue() : Cypher.literalFalse())
Expand All @@ -1212,6 +1207,41 @@ else if (c instanceof QOM.InList<?> il) {
}
}

private Condition like(QOM.Like like) {
Expression rhs;
Expression lhs = expression(like.$arg1());
if (like.$arg2() instanceof Param<?> p && p.$inline() && p.getValue() instanceof String s) {
var sw = s.startsWith("%");
var ew = s.endsWith("%");
var length = s.length();
var cnt = new LongSupplier() {
Long value = null;

@Override
public long getAsLong() {
if (this.value == null) {
this.value = PERCENT_OR_UNDERSCORE.matcher(s).results().count();
}
return this.value;
}
};
if (sw && ew && length > 2 && cnt.getAsLong() == 2) {
return lhs.contains(Cypher.literalOf(s.substring(1, length - 1)));
}
else if (sw && length > 1 && cnt.getAsLong() == 1) {
return lhs.endsWith(Cypher.literalOf(s.substring(1)));
}
else if (ew && length > 1 && cnt.getAsLong() == 1) {
return lhs.startsWith(Cypher.literalOf(s.substring(0, length - 1)));
}
rhs = Cypher.literalOf(s.replaceAll("%+", ".*").replace("_", "."));
}
else {
rhs = expression(like.$arg2());
}
return lhs.matches(rhs);
}

private Condition rowCondition(Row r1, Row r2,
BiFunction<? super Expression, ? super Expression, ? extends Condition> comp,
BiFunction<? super Expression, ? super Expression, ? extends Condition> last) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,28 @@ void escapingShouldWork(Boolean prettyPrint, Boolean alwaysEscapeNames, String e
assertThat(cypher).isEqualTo(expected.replace("$", cfg.isPrettyPrint() ? System.lineSeparator() : " "));
}

@ParameterizedTest
@CsvSource(
textBlock = """
SELECT * FROM blub b WHERE name like '%Test%', MATCH (b:blub) WHERE b.name CONTAINS 'Test' RETURN *
SELECT * FROM blub b WHERE name like '%Test', MATCH (b:blub) WHERE b.name ENDS WITH 'Test' RETURN *
SELECT * FROM blub b WHERE name like 'Test%', MATCH (b:blub) WHERE b.name STARTS WITH 'Test' RETURN *
SELECT * FROM blub b WHERE name like 'This is _ %Test%', MATCH (b:blub) WHERE b.name =~ 'This is . .*Test.*' RETURN *
SELECT * FROM blub b WHERE name like '%', MATCH (b:blub) WHERE b.name =~ '.*' RETURN *
SELECT * FROM blub b WHERE name like '%%', MATCH (b:blub) WHERE b.name =~ '.*' RETURN *
SELECT * FROM blub b WHERE name like '%%%', MATCH (b:blub) WHERE b.name =~ '.*' RETURN *
SELECT * FROM blub b WHERE name like '_', MATCH (b:blub) WHERE b.name =~ '.' RETURN *
SELECT * FROM blub b WHERE name like '__', MATCH (b:blub) WHERE b.name =~ '..' RETURN *
SELECT * FROM blub b WHERE name like '___', MATCH (b:blub) WHERE b.name =~ '...' RETURN *
SELECT * FROM blub b WHERE name like '%_%', MATCH (b:blub) WHERE b.name =~ '.*..*' RETURN *
SELECT * FROM blub b WHERE name like '%ein%schöner%Name%', MATCH (b:blub) WHERE b.name =~ '.*ein.*schöner.*Name.*' RETURN *
""")
void likeShouldBeHandledNicely(String sql, String expected) {

var translator = SqlToCypher.defaultTranslator();
assertThat(translator.translate(sql)).isEqualTo(expected);
}

private static class TestDataExtractor extends Treeprocessor {

private final List<TestData> testData = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,6 @@ will be translated into a regular expressions, replacing the `%` with `.*`:

[source,cypher,id=p5_0_expected]
----
MATCH (m:`movies`) WHERE m.title =~ '.*Matrix.*' OR m.title =~ 'M.trix'
MATCH (m:`movies`) WHERE m.title CONTAINS 'Matrix' OR m.title =~ 'M.trix'
RETURN *
----

0 comments on commit b1e3435

Please sign in to comment.