Skip to content

Commit

Permalink
Test scan with filter
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Nov 14, 2024
1 parent 59d5248 commit f9140e4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ package org.apache.gluten.execution

import org.apache.gluten.GlutenConfig
import org.apache.gluten.backendsapi.velox.VeloxBackendSettings
import org.apache.gluten.benchmarks.RandomParquetDataGenerator
import org.apache.gluten.utils.VeloxFileSystemValidationJniWrapper

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.GreaterThan
import org.apache.spark.sql.execution.ScalarSubquery
import org.apache.spark.sql.types._

class VeloxScanSuite extends VeloxWholeStageTransformerSuite {
protected val rootPath: String = getClass.getResource("/").getPath
Expand Down Expand Up @@ -114,4 +116,38 @@ class VeloxScanSuite extends VeloxWholeStageTransformerSuite {
!VeloxFileSystemValidationJniWrapper.allSupportedByRegisteredFileSystems(
Array("file:/test_path/", "unsupported://test_path")))
}

test("scan with filter on decimal/timestamp/binary field") {
withTempView("t") {
withTempDir {
dir =>
val path = dir.getAbsolutePath
val schema = StructType(
Array(
StructField("short_decimal_field", DecimalType(5, 2), nullable = true),
StructField("long_decimal_field", DecimalType(32, 8), nullable = true),
StructField("binary_field", BinaryType, nullable = true),
StructField("timestamp_field", TimestampType, nullable = true)
))
RandomParquetDataGenerator(0).generateRandomData(spark, schema, 10, Some(path))
spark.catalog.createTable("t", path, "parquet")

runQueryAndCompare(
"""select * from t where long_decimal_field = 3.14""".stripMargin
)(checkGlutenOperatorMatch[FileSourceScanExecTransformer])

runQueryAndCompare(
"""select * from t where short_decimal_field = 3.14""".stripMargin
)(checkGlutenOperatorMatch[FileSourceScanExecTransformer])

runQueryAndCompare(
"""select * from t where binary_field = '3.14'""".stripMargin
)(checkGlutenOperatorMatch[FileSourceScanExecTransformer])

runQueryAndCompare(
"""select * from t where timestamp_field = current_timestamp()""".stripMargin
)(checkGlutenOperatorMatch[FileSourceScanExecTransformer])
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import org.apache.spark.sql.types._

import com.github.javafaker.Faker

import java.sql.Date
import java.sql.{Date, Timestamp}
import java.util.Random

case class RandomParquetDataGenerator(initialSeed: Long = 0L) extends Logging {
Expand Down Expand Up @@ -67,7 +67,7 @@ case class RandomParquetDataGenerator(initialSeed: Long = 0L) extends Logging {
case DoubleType =>
faker.number().randomDouble(2, Double.MinValue.toLong, Double.MaxValue.toLong)
case DateType => new Date(faker.date().birthday().getTime)
// case TimestampType => new Timestamp(faker.date().birthday().getTime)
case TimestampType => new Timestamp(faker.date().birthday().getTime)
case t: DecimalType =>
BigDecimal(
faker.number().randomDouble(t.scale, 0, Math.pow(10, t.precision - t.scale).toLong))
Expand Down Expand Up @@ -124,7 +124,7 @@ case class RandomParquetDataGenerator(initialSeed: Long = 0L) extends Logging {
() => StructField(fieldName, FloatType, nullable = true),
() => StructField(fieldName, DoubleType, nullable = true),
() => StructField(fieldName, DateType, nullable = true),
// () => StructField(fieldName, TimestampType, nullable = true),
() => StructField(fieldName, TimestampType, nullable = true),
() => StructField(fieldName, DecimalType(10, 2), nullable = true),
() => StructField(fieldName, DecimalType(30, 10), nullable = true)
)
Expand Down

0 comments on commit f9140e4

Please sign in to comment.