diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala index aabdae0951a7c..b86482720c7c8 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala @@ -54,8 +54,12 @@ object CHJoinValidateUtil extends Logging { condition.isDefined && hasTwoTableColumn(leftOutputSet, rightOutputSet, condition.get) val shouldFallback = joinStrategy match { case SortMergeJoinStrategy(joinType) => - joinType.sql.contains("SEMI") || joinType.sql.contains("ANTI") || joinType.toString - .contains("ExistenceJoin") || hasMixedFilterCondition + if (!joinType.isInstanceOf[ExistenceJoin] && joinType.sql.contains("INNER")) { + false + } else { + joinType.sql.contains("SEMI") || joinType.sql.contains("ANTI") || joinType.toString + .contains("ExistenceJoin") || hasMixedFilterCondition + } case UnknownJoinStrategy(joinType) => throw new IllegalArgumentException(s"Unknown join type $joinStrategy") case _ => false diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala index b9d580c7249cd..4c49cc2d9f46c 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala @@ -119,7 +119,7 @@ class GlutenClickHouseColumnarMemorySortShuffleSuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala index 10e5c7534d352..e5da78de3fd60 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala @@ -163,7 +163,7 @@ class GlutenClickHouseColumnarShuffleAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala index dd997832d3e3d..17bd9912b032c 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala @@ -168,7 +168,7 @@ class GlutenClickHouseDSV2ColumnarShuffleSuite extends GlutenClickHouseTPCHAbstr } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala index 08393ccfe7748..a58b6b1c1292d 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala @@ -126,7 +126,7 @@ class GlutenClickHouseDSV2Suite extends GlutenClickHouseTPCHAbstractSuite { } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala index cf1bdd296c013..bd831e64bf38c 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala @@ -343,7 +343,7 @@ class GlutenClickHouseDecimalSuite decimalTPCHTables.foreach { dt => { - val fallBack = (sql_num == 16 || sql_num == 21) + val fallBack = (sql_num == 16) val compareResult = !dt._2.contains(sql_num) val native = if (fallBack) "fallback" else "native" val compare = if (compareResult) "compare" else "noCompare" diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala index c5f67f45d5771..5f9aa0dbda60f 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala @@ -171,7 +171,7 @@ class GlutenClickHouseTPCHNullableColumnarShuffleSuite extends GlutenClickHouseT } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala index 7f62c69931577..e0e4d33804504 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala @@ -174,7 +174,7 @@ class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuit } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala index 1c09449c817fb..f25a1313255ff 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala @@ -175,7 +175,7 @@ class GlutenClickHouseTPCHSuite extends GlutenClickHouseTPCHAbstractSuite { } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala index 1fd8983f5876a..3e1507bf17aab 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala @@ -239,6 +239,6 @@ class GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite | LIMIT 100 ; |""".stripMargin // There are some BroadcastHashJoin with NOT condition - compareResultsAgainstVanillaSpark(sql, true, { df => }, false) + compareResultsAgainstVanillaSpark(sql, true, { df => }) } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala index 509c830545c60..41cb8771eaaad 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala @@ -37,6 +37,7 @@ class GlutenClickHouseTPCDSParquetSortMergeJoinSuite extends GlutenClickHouseTPC "q51", "q69", "q70", + "q72", //OOM "q78", "q87", "q95", @@ -49,8 +50,11 @@ class GlutenClickHouseTPCDSParquetSortMergeJoinSuite extends GlutenClickHouseTPC .set("spark.shuffle.manager", "sort") .set("spark.io.compression.codec", "snappy") .set("spark.sql.shuffle.partitions", "5") - .set("spark.sql.autoBroadcastJoinThreshold", "10MB") + .set("spark.sql.autoBroadcastJoinThreshold", "-1") .set("spark.memory.offHeap.size", "8g") + .set( + "spark.gluten.sql.columnar.backend.ch.runtime_config.extra_memory_hard_limit", + "2147483648") .set("spark.gluten.sql.columnar.forceShuffledHashJoin", "false") } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala index c2e2f9f5565f2..39dc7baf96950 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala @@ -275,7 +275,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { + runTPCHQuery(21) { df => val plans = collect(df.queryExecution.executedPlan) { case scanExec: BasicScanExecTransformer => scanExec diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala index 8c706f6836399..e1dd33912bd75 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala @@ -45,7 +45,6 @@ class GlutenClickHouseTPCHParquetAQEConcurrentSuite .set("spark.shuffle.manager", "sort") .set("spark.io.compression.codec", "snappy") .set("spark.sql.shuffle.partitions", "5") - .set("spark.sql.autoBroadcastJoinThreshold", "10MB") .set("spark.sql.adaptive.enabled", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") } @@ -82,5 +81,4 @@ class GlutenClickHouseTPCHParquetAQEConcurrentSuite queries.map(queryId => runTPCHQuery(queryId) { df => }) } - } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala index 1d8389b481439..2aadac05d348c 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala @@ -209,7 +209,7 @@ class GlutenClickHouseTPCHParquetAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala index 0efc1414ce338..e21df203dac03 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala @@ -335,7 +335,7 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("GLUTEN-2115: Fix wrong number of records shuffle written") { diff --git a/cpp-ch/local-engine/Common/QueryContext.cpp b/cpp-ch/local-engine/Common/QueryContext.cpp index ff9c151159a6d..0abff2fc143db 100644 --- a/cpp-ch/local-engine/Common/QueryContext.cpp +++ b/cpp-ch/local-engine/Common/QueryContext.cpp @@ -172,4 +172,4 @@ double currentThreadGroupMemoryUsageRatio() } return static_cast(CurrentThread::getGroup()->memory_tracker.get()) / CurrentThread::getGroup()->memory_tracker.getSoftLimit(); } -} \ No newline at end of file +} diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp b/cpp-ch/local-engine/Parser/JoinRelParser.cpp index 0446a397c0087..99270348ce05a 100644 --- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp +++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp @@ -681,14 +681,14 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( auto optional_keys = parse_join_keys(current_expr); if (!optional_keys) { - LOG_ERROR(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); + LOG_INFO(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); return false; } join_on_clause.addKey(optional_keys->first, optional_keys->second, false); } else { - LOG_ERROR(getLogger("JoinRelParser"), "And or equals function is expected"); + LOG_INFO(getLogger("JoinRelParser"), "And or equals function is expected"); return false; } } @@ -701,7 +701,8 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( expression_stack.pop_back(); if (!check_function("or", current_expr)) { - LOG_ERROR(getLogger("JoinRelParser"), "Not an or expression"); + LOG_INFO(getLogger("JoinRelParser"), "Not an or expression"); + return false; } auto get_current_join_on_clause = [&]() @@ -719,7 +720,7 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( auto optional_keys = parse_join_keys(arg.value()); if (!optional_keys) { - LOG_ERROR(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); + LOG_INFO(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); return false; } get_current_join_on_clause()->addKey(optional_keys->first, optional_keys->second, false); @@ -728,7 +729,7 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( { if (!parse_and_expression(arg.value(), *get_current_join_on_clause())) { - LOG_ERROR(getLogger("JoinRelParser"), "Parse and expression failed"); + LOG_INFO(getLogger("JoinRelParser"), "Parse and expression failed"); return false; } } @@ -738,7 +739,7 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( } else { - LOG_ERROR(getLogger("JoinRelParser"), "Unknow function"); + LOG_INFO(getLogger("JoinRelParser"), "Unknow function"); return false; } }