From 635264070ddd579710b00c4b7a3726200b267363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Tue, 27 Aug 2024 15:34:46 +0800 Subject: [PATCH] [CH] Enable more uts in GlutenOrcV1SchemaPruningSuite (#6895) * enable vectorized uts * commit again * finish dev * enable more uts about GlutenOrcV1SchemaPruningSuite * fix failed uts * fix failed uts --- .../clickhouse/ClickHouseTestSettings.scala | 169 +----------------- .../clickhouse/ClickHouseTestSettings.scala | 169 +----------------- .../clickhouse/ClickHouseTestSettings.scala | 169 +----------------- .../clickhouse/ClickHouseTestSettings.scala | 169 +----------------- 4 files changed, 4 insertions(+), 672 deletions(-) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 9b2e2ab95bc9..c8507b30376a 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -1509,148 +1509,10 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-34862: Support ORC vectorized reader for nested column") enableSuite[GlutenOrcV1SchemaPruningSuite] .exclude( - "Spark vectorized reader - without partition data column - select only top-level fields") - .exclude("Spark vectorized reader - with partition data column - select only top-level fields") - .exclude("Non-vectorized reader - without partition data column - select only top-level fields") - .exclude("Non-vectorized reader - with partition data column - select only top-level fields") - .exclude("Spark vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Spark vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude( - "Spark vectorized reader - without partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - with partition data column - select only input_file_name()") - .exclude( - "Non-vectorized reader - without partition data column - select only input_file_name()") - .exclude("Non-vectorized reader - with partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - without partition data column - select only expressions without references") - .exclude("Spark vectorized reader - with partition data column - select only expressions without references") - .exclude("Non-vectorized reader - without partition data column - select only expressions without references") - .exclude("Non-vectorized reader - with partition data column - select only expressions without references") - .exclude( - "Spark vectorized reader - without partition data column - select a single complex field") - .exclude("Spark vectorized reader - with partition data column - select a single complex field") - .exclude( - "Non-vectorized reader - without partition data column - select a single complex field") - .exclude("Non-vectorized reader - with partition data column - select a single complex field") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") + "Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Spark vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude( - "Spark vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude( - "Non-vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Non-vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - without partition data column - empty schema intersection") - .exclude("Spark vectorized reader - with partition data column - empty schema intersection") - .exclude("Non-vectorized reader - without partition data column - empty schema intersection") - .exclude("Non-vectorized reader - with partition data column - empty schema intersection") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function") - .exclude( - "Non-vectorized reader - with partition data column - select nested field in window function") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Sort") - .exclude("Spark vectorized reader - with partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - without partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - with partition data column - select nested field in Sort") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - with partition data column - select nested field in Expand") - .exclude( - "Non-vectorized reader - without partition data column - select nested field in Expand") - .exclude("Non-vectorized reader - with partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Spark vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") .exclude("Case-insensitive parser - mixed-case schema - select with exact column names") .exclude("Case-insensitive parser - mixed-case schema - select with lowercase column names") .exclude( @@ -1658,36 +1520,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude( "Case-insensitive parser - mixed-case schema - filter with different-case column names") .exclude("Case-insensitive parser - mixed-case schema - subquery filter with different-case column names") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") .exclude("SPARK-36352: Spark should check result plan's output schema name") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("SPARK-37450: Prunes unnecessary fields from Explode for count aggregation") enableSuite[GlutenOrcV2QuerySuite] .exclude("Enabling/disabling ignoreCorruptFiles") .exclude("SPARK-27160 Predicate pushdown correctness on DecimalType for ORC") diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 660d693cce3f..a914f28700dc 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -1318,148 +1318,10 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-34862: Support ORC vectorized reader for nested column") enableSuite[GlutenOrcV1SchemaPruningSuite] .exclude( - "Spark vectorized reader - without partition data column - select only top-level fields") - .exclude("Spark vectorized reader - with partition data column - select only top-level fields") - .exclude("Non-vectorized reader - without partition data column - select only top-level fields") - .exclude("Non-vectorized reader - with partition data column - select only top-level fields") - .exclude("Spark vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Spark vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude( - "Spark vectorized reader - without partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - with partition data column - select only input_file_name()") - .exclude( - "Non-vectorized reader - without partition data column - select only input_file_name()") - .exclude("Non-vectorized reader - with partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - without partition data column - select only expressions without references") - .exclude("Spark vectorized reader - with partition data column - select only expressions without references") - .exclude("Non-vectorized reader - without partition data column - select only expressions without references") - .exclude("Non-vectorized reader - with partition data column - select only expressions without references") - .exclude( - "Spark vectorized reader - without partition data column - select a single complex field") - .exclude("Spark vectorized reader - with partition data column - select a single complex field") - .exclude( - "Non-vectorized reader - without partition data column - select a single complex field") - .exclude("Non-vectorized reader - with partition data column - select a single complex field") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") + "Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Spark vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude( - "Spark vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude( - "Non-vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Non-vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - without partition data column - empty schema intersection") - .exclude("Spark vectorized reader - with partition data column - empty schema intersection") - .exclude("Non-vectorized reader - without partition data column - empty schema intersection") - .exclude("Non-vectorized reader - with partition data column - empty schema intersection") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function") - .exclude( - "Non-vectorized reader - with partition data column - select nested field in window function") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Sort") - .exclude("Spark vectorized reader - with partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - without partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - with partition data column - select nested field in Sort") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - with partition data column - select nested field in Expand") - .exclude( - "Non-vectorized reader - without partition data column - select nested field in Expand") - .exclude("Non-vectorized reader - with partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Spark vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") .exclude("Case-insensitive parser - mixed-case schema - select with exact column names") .exclude("Case-insensitive parser - mixed-case schema - select with lowercase column names") .exclude( @@ -1467,36 +1329,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude( "Case-insensitive parser - mixed-case schema - filter with different-case column names") .exclude("Case-insensitive parser - mixed-case schema - subquery filter with different-case column names") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") .exclude("SPARK-36352: Spark should check result plan's output schema name") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("SPARK-37450: Prunes unnecessary fields from Explode for count aggregation") enableSuite[GlutenOrcV2QuerySuite] .exclude("Enabling/disabling ignoreCorruptFiles") .exclude("SPARK-27160 Predicate pushdown correctness on DecimalType for ORC") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 77c12621efeb..3b686f78cff8 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -1157,148 +1157,10 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-34862: Support ORC vectorized reader for nested column") enableSuite[GlutenOrcV1SchemaPruningSuite] .exclude( - "Spark vectorized reader - without partition data column - select only top-level fields") - .exclude("Spark vectorized reader - with partition data column - select only top-level fields") - .exclude("Non-vectorized reader - without partition data column - select only top-level fields") - .exclude("Non-vectorized reader - with partition data column - select only top-level fields") - .exclude("Spark vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Spark vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude( - "Spark vectorized reader - without partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - with partition data column - select only input_file_name()") - .exclude( - "Non-vectorized reader - without partition data column - select only input_file_name()") - .exclude("Non-vectorized reader - with partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - without partition data column - select only expressions without references") - .exclude("Spark vectorized reader - with partition data column - select only expressions without references") - .exclude("Non-vectorized reader - without partition data column - select only expressions without references") - .exclude("Non-vectorized reader - with partition data column - select only expressions without references") - .exclude( - "Spark vectorized reader - without partition data column - select a single complex field") - .exclude("Spark vectorized reader - with partition data column - select a single complex field") - .exclude( - "Non-vectorized reader - without partition data column - select a single complex field") - .exclude("Non-vectorized reader - with partition data column - select a single complex field") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") + "Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Spark vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude( - "Spark vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude( - "Non-vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Non-vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - without partition data column - empty schema intersection") - .exclude("Spark vectorized reader - with partition data column - empty schema intersection") - .exclude("Non-vectorized reader - without partition data column - empty schema intersection") - .exclude("Non-vectorized reader - with partition data column - empty schema intersection") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function") - .exclude( - "Non-vectorized reader - with partition data column - select nested field in window function") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Sort") - .exclude("Spark vectorized reader - with partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - without partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - with partition data column - select nested field in Sort") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - with partition data column - select nested field in Expand") - .exclude( - "Non-vectorized reader - without partition data column - select nested field in Expand") - .exclude("Non-vectorized reader - with partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Spark vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") .exclude("Case-insensitive parser - mixed-case schema - select with exact column names") .exclude("Case-insensitive parser - mixed-case schema - select with lowercase column names") .exclude( @@ -1306,36 +1168,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude( "Case-insensitive parser - mixed-case schema - filter with different-case column names") .exclude("Case-insensitive parser - mixed-case schema - subquery filter with different-case column names") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") .exclude("SPARK-36352: Spark should check result plan's output schema name") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("SPARK-37450: Prunes unnecessary fields from Explode for count aggregation") enableSuite[GlutenOrcV2QuerySuite] .exclude("Enabling/disabling ignoreCorruptFiles") .exclude("SPARK-27160 Predicate pushdown correctness on DecimalType for ORC") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index bf971aba7282..ef3a12008f48 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -1157,148 +1157,10 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("SPARK-34862: Support ORC vectorized reader for nested column") enableSuite[GlutenOrcV1SchemaPruningSuite] .exclude( - "Spark vectorized reader - without partition data column - select only top-level fields") - .exclude("Spark vectorized reader - with partition data column - select only top-level fields") - .exclude("Non-vectorized reader - without partition data column - select only top-level fields") - .exclude("Non-vectorized reader - with partition data column - select only top-level fields") - .exclude("Spark vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Spark vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - without partition data column - select a single complex field with disabled nested schema pruning") - .exclude("Non-vectorized reader - with partition data column - select a single complex field with disabled nested schema pruning") - .exclude( - "Spark vectorized reader - without partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - with partition data column - select only input_file_name()") - .exclude( - "Non-vectorized reader - without partition data column - select only input_file_name()") - .exclude("Non-vectorized reader - with partition data column - select only input_file_name()") - .exclude("Spark vectorized reader - without partition data column - select only expressions without references") - .exclude("Spark vectorized reader - with partition data column - select only expressions without references") - .exclude("Non-vectorized reader - without partition data column - select only expressions without references") - .exclude("Non-vectorized reader - with partition data column - select only expressions without references") - .exclude( - "Spark vectorized reader - without partition data column - select a single complex field") - .exclude("Spark vectorized reader - with partition data column - select a single complex field") - .exclude( - "Non-vectorized reader - without partition data column - select a single complex field") - .exclude("Non-vectorized reader - with partition data column - select a single complex field") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and its parent struct") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and its parent struct") - .exclude("Spark vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - without partition data column - select a single complex field array and its parent struct array") - .exclude("Non-vectorized reader - with partition data column - select a single complex field array and its parent struct array") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") + "Spark vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and its parent map entry") .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and its parent map entry") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and the partition column") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and the partition column") - .exclude("Spark vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Spark vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - without partition data column - partial schema intersection - select missing subfield") - .exclude("Non-vectorized reader - with partition data column - partial schema intersection - select missing subfield") - .exclude( - "Spark vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude( - "Non-vectorized reader - without partition data column - no unnecessary schema pruning") - .exclude("Non-vectorized reader - with partition data column - no unnecessary schema pruning") - .exclude("Spark vectorized reader - without partition data column - empty schema intersection") - .exclude("Spark vectorized reader - with partition data column - empty schema intersection") - .exclude("Non-vectorized reader - without partition data column - empty schema intersection") - .exclude("Non-vectorized reader - with partition data column - empty schema intersection") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and in where clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and in where clause") - .exclude("Spark vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - without partition data column - select nullable complex field and having is not null predicate") - .exclude("Non-vectorized reader - with partition data column - select nullable complex field and having is not null predicate") - .exclude("Spark vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - without partition data column - select a single complex field and is null expression in project") - .exclude("Non-vectorized reader - with partition data column - select a single complex field and is null expression in project") - .exclude("Spark vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - without partition data column - select a single complex field from a map entry and in clause") - .exclude("Non-vectorized reader - with partition data column - select a single complex field from a map entry and in clause") - .exclude("Spark vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - without partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Non-vectorized reader - with partition data column - select one complex field and having is null predicate on another complex field") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field and having is null predicate on another deep nested complex field") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map key using map_keys") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map key using map_keys") - .exclude("Spark vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - without partition data column - select nested field from a complex map value using map_values") - .exclude("Non-vectorized reader - with partition data column - select nested field from a complex map value using map_values") - .exclude("Spark vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - without partition data column - select explode of nested field of array of struct") - .exclude("Non-vectorized reader - with partition data column - select explode of nested field of array of struct") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after repartition by expression") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after repartition by expression") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after join") - .exclude("Spark vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - without partition data column - select one deep nested complex field after outer join") - .exclude("Non-vectorized reader - with partition data column - select one deep nested complex field after outer join") - .exclude("Spark vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - without partition data column - select nested field in aggregation function of Aggregate") - .exclude("Non-vectorized reader - with partition data column - select nested field in aggregation function of Aggregate") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function") - .exclude( - "Non-vectorized reader - with partition data column - select nested field in window function") - .exclude("Spark vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Spark vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - without partition data column - select nested field in window function and then order by") - .exclude("Non-vectorized reader - with partition data column - select nested field in window function and then order by") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Sort") - .exclude("Spark vectorized reader - with partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - without partition data column - select nested field in Sort") - .exclude("Non-vectorized reader - with partition data column - select nested field in Sort") - .exclude( - "Spark vectorized reader - without partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - with partition data column - select nested field in Expand") - .exclude( - "Non-vectorized reader - without partition data column - select nested field in Expand") - .exclude("Non-vectorized reader - with partition data column - select nested field in Expand") - .exclude("Spark vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - without partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Non-vectorized reader - with partition data column - SPARK-32163: nested pruning should work even with cosmetic variations") - .exclude("Spark vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Spark vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - without partition data column - SPARK-38918: nested schema pruning with correlated subqueries") - .exclude("Non-vectorized reader - with partition data column - SPARK-38918: nested schema pruning with correlated subqueries") .exclude("Case-insensitive parser - mixed-case schema - select with exact column names") .exclude("Case-insensitive parser - mixed-case schema - select with lowercase column names") .exclude( @@ -1306,36 +1168,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude( "Case-insensitive parser - mixed-case schema - filter with different-case column names") .exclude("Case-insensitive parser - mixed-case schema - subquery filter with different-case column names") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from array") - .exclude("Spark vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Spark vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - without partition data column - SPARK-34963: extract case-insensitive struct field from struct") - .exclude("Non-vectorized reader - with partition data column - SPARK-34963: extract case-insensitive struct field from struct") .exclude("SPARK-36352: Spark should check result plan's output schema name") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT EXISTS subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - without partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Non-vectorized reader - with partition data column - SPARK-38977: schema pruning with correlated NOT IN subquery") - .exclude("Spark vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Spark vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - without partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("Non-vectorized reader - with partition data column - SPARK-34638: nested column prune on generator output - case-sensitivity") - .exclude("SPARK-37450: Prunes unnecessary fields from Explode for count aggregation") enableSuite[GlutenOrcV2QuerySuite] .exclude("Enabling/disabling ignoreCorruptFiles") .exclude("SPARK-27160 Predicate pushdown correctness on DecimalType for ORC")