From c0ec591925d3b6002274edec8576d4f14a6dabd1 Mon Sep 17 00:00:00 2001
From: BInwei Yang <binwei.yang@intel.com>
Date: Mon, 19 Sep 2022 23:36:57 -0700
Subject: [PATCH] [OPPRO-368] Add parquet support in readme. Remove dwrf (#381)

---
 backends-velox/workload/tpch/run_tpch/tpch_parquet.scala | 2 +-
 docs/Velox.md                                            | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/backends-velox/workload/tpch/run_tpch/tpch_parquet.scala b/backends-velox/workload/tpch/run_tpch/tpch_parquet.scala
index 067e1a2afcda..82903b5c5cb7 100644
--- a/backends-velox/workload/tpch/run_tpch/tpch_parquet.scala
+++ b/backends-velox/workload/tpch/run_tpch/tpch_parquet.scala
@@ -5,7 +5,7 @@ import java.util.Arrays
 import sys.process._
 
 //Configurations:
-var parquet_file_path = "/PATH/TO/TPCH_DWRF_PATH"
+var parquet_file_path = "/PATH/TO/TPCH_PARQUET_PATH"
 var gluten_root = "/PATH/TO/GLUTEN"
 
 def time[R](block: => R): R = {
diff --git a/docs/Velox.md b/docs/Velox.md
index 50d0c844db87..ca2b1bc30e8f 100644
--- a/docs/Velox.md
+++ b/docs/Velox.md
@@ -39,23 +39,21 @@ In Gluten, all 22 queries can be fully offloaded into Velox for computing.
 
 ### Data preparation
 
-Parquet format still have performance issue in Velox. We use dwrf format instead. Refer to [Test TPCH on Velox backend](../backends-velox/workload/tpch/README.md) for How to convert parquet to dwrf format during data generation.
-
 Considering current Velox does not fully support Decimal and Date data type, the [datagen script](../backends-velox/workload/tpch/gen_data/parquet_dataset/tpch_datagen_parquet.scala) transforms "Decimal-to-Double" and "Date-to-String". As a result, we need to modify the TPCH queries a bit. You can find the [modified TPC-H queries](../backends-velox/workload/tpch/tpch.queries.updated/).
 
 ### Submit the Spark SQL job
 
-Submit test script from spark-shell. You can find the scala code to [Run TPC-H](../backends-velox/workload/tpch/run_tpch/tpch_dwrf.scala) as an example. Please remember to modify the location of TPC-H files as well as TPC-H queries in backends-velox/workload/tpch/run_tpch/tpch_dwrf.scala before you run the testing. 
+Submit test script from spark-shell. You can find the scala code to [Run TPC-H](../backends-velox/workload/tpch/run_tpch/tpch_parquet.scala) as an example. Please remember to modify the location of TPC-H files as well as TPC-H queries in backends-velox/workload/tpch/run_tpch/tpch_parquet.scala before you run the testing. 
 
 ```
-var dwrf_file_path = "/PATH/TO/TPCH_DWRF_PATH"
+var parquet_file_path = "/PATH/TO/TPCH_PARQUET_PATH"
 var gluten_root = "/PATH/TO/GLUTEN"
 ```
 
 Below script shows an example about how to run the testing, you should modify the parameters such as executor cores, memory, offHeap size based on your environment. 
 
 ```shell script
-cat tpch_dwrf.scala | spark-shell --name tpch_powertest_velox --master yarn --deploy-mode client --conf spark.plugins=io.glutenproject.GlutenPlugin --conf --conf spark.gluten.sql.columnar.backend.lib=velox --conf spark.driver.extraClassPath=${gluten_jvm_jar} --conf spark.executor.extraClassPath=${gluten_jvm_jar} --conf spark.memory.offHeap.size=20g --conf spark.sql.sources.useV1SourceList=avro --num-executors 6 --executor-cores 6 --driver-memory 20g --executor-memory 25g --conf spark.executor.memoryOverhead=5g --conf spark.driver.maxResultSize=32g
+cat tpch_parquet.scala | spark-shell --name tpch_powertest_velox --master yarn --deploy-mode client --conf spark.plugins=io.glutenproject.GlutenPlugin --conf --conf spark.gluten.sql.columnar.backend.lib=velox --conf spark.driver.extraClassPath=${gluten_jvm_jar} --conf spark.executor.extraClassPath=${gluten_jvm_jar} --conf spark.memory.offHeap.size=20g --conf spark.sql.sources.useV1SourceList=avro --num-executors 6 --executor-cores 6 --driver-memory 20g --executor-memory 25g --conf spark.executor.memoryOverhead=5g --conf spark.driver.maxResultSize=32g
 ```
 
 ### Result