-
Notifications
You must be signed in to change notification settings - Fork 304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HPCC-33155 Revisit Parquet Test Suite #19405
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,16 @@ | ||
<Dataset name='HivePartitionedSampleData'> | ||
<Row><id>1</id><name>Alice </name><age>30</age><city>New York </city></Row> | ||
<Row><id>2</id><name>Bob </name><age>25</age><city>Los Angeles </city></Row> | ||
<Row><id>3</id><name>Charlie </name><age>40</age><city>Chicago </city></Row> | ||
<Row><district>1</district><firstname>Alice</firstname><lastname>A</lastname><age>30</age><city>New York</city></Row> | ||
<Row><district>1</district><firstname>Alice</firstname><lastname>B</lastname><age>35</age><city>New York</city></Row> | ||
<Row><district>1</district><firstname>Chalice</firstname><lastname>C</lastname><age>40</age><city>Boston</city></Row> | ||
<Row><district>2</district><firstname>Bob</firstname><lastname>A</lastname><age>25</age><city>Los Angeles</city></Row> | ||
<Row><district>2</district><firstname>Jim</firstname><lastname>A</lastname><age>25</age><city>Los Angeles</city></Row> | ||
<Row><district>3</district><firstname>Charlie</firstname><lastname>C</lastname><age>40</age><city>Chicago</city></Row> | ||
</Dataset> | ||
<Dataset name='DirPartitionedSampleData'> | ||
<Row><id>1</id><name>Alice </name><age>30</age><city>New York </city></Row> | ||
<Row><id>2</id><name>Bob </name><age>25</age><city>Los Angeles </city></Row> | ||
<Row><id>3</id><name>Charlie </name><age>40</age><city>Chicago </city></Row> | ||
<Row><district>1</district><firstname>Alice</firstname><lastname>A</lastname><age>30</age><city>New York</city></Row> | ||
<Row><district>1</district><firstname>Alice</firstname><lastname>B</lastname><age>35</age><city>New York</city></Row> | ||
<Row><district>1</district><firstname>Chalice</firstname><lastname>C</lastname><age>40</age><city>Boston</city></Row> | ||
<Row><district>2</district><firstname>Bob</firstname><lastname>A</lastname><age>25</age><city>Los Angeles</city></Row> | ||
<Row><district>2</district><firstname>Jim</firstname><lastname>A</lastname><age>25</age><city>Los Angeles</city></Row> | ||
<Row><district>3</district><firstname>Charlie</firstname><lastname>C</lastname><age>40</age><city>Chicago</city></Row> | ||
</Dataset> |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,9 +12,14 @@ | |
############################################################################## */ | ||
|
||
//class=parquet | ||
//fail | ||
//nothor | ||
//noroxie | ||
|
||
// This test tries writing an empty dataset to a Parquet file and then tries to read it. | ||
// When writing Parquet files the plugin waits for a row before opening a file. The record | ||
// is empty, therefore the file is never created and the read fails. | ||
|
||
IMPORT Std; | ||
IMPORT Parquet; | ||
|
||
|
@@ -33,4 +38,3 @@ ParquetIO.Write(EMPTY_PARQUET, filePath, TRUE); | |
read_data := ParquetIO.Read(RECORDDEF, filePath); | ||
|
||
OUTPUT(read_data); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this deleted it will not read the file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,8 +13,6 @@ | |
|
||
//class=parquet | ||
//fail | ||
//nothor | ||
//noroxie | ||
|
||
IMPORT Std; | ||
IMPORT Parquet; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,26 +10,29 @@ | |
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
############################################################################## */ | ||
|
||
//class=parquet | ||
//nothor | ||
//noroxie | ||
|
||
IMPORT Std; | ||
IMPORT Parquet; | ||
|
||
// Define the record layout with explicit field lengths | ||
datasetRecordLayout := RECORD | ||
UNSIGNED4 id; | ||
STRING25 name; | ||
UNSIGNED4 district; | ||
STRING firstname; | ||
STRING lastname; | ||
UNSIGNED4 age; | ||
STRING50 city; | ||
STRING city; | ||
END; | ||
|
||
// Create a small dataset - ensure all records have valid data | ||
smallData := DATASET([ | ||
{1, 'Alice', 30, 'New York'}, | ||
{2, 'Bob', 25, 'Los Angeles'}, | ||
{3, 'Charlie', 40, 'Chicago'} | ||
{1, 'Alice', 'A', 30, 'New York'}, | ||
{1, 'Alice', 'B', 35, 'New York'}, | ||
{1, 'Chalice', 'C', 40, 'Boston'}, | ||
{2, 'Bob', 'A', 25, 'Los Angeles'}, | ||
{2, 'Jim', 'A', 25, 'Los Angeles'}, | ||
{3, 'Charlie', 'C', 40, 'Chicago'} | ||
], datasetRecordLayout); | ||
|
||
// Set options | ||
|
@@ -40,7 +43,51 @@ rowSize := 1024; // Increased buffer size | |
basePath := Std.File.GetDefaultDropZone() + '/regress/parquet/'; | ||
|
||
// Define partition keys as a semicolon-separated string with all keys | ||
partitionKeys := 'id'; | ||
partitionKeys := 'district;firstname;city'; | ||
|
||
/** | ||
* This partitioning creates a structure like this: | ||
* | ||
* Hive Partitioning: | ||
* ├── district=1 | ||
* │ ├── firstname=Alice | ||
* │ │ └── city=New%20York | ||
* │ │ └── part_0_of_table_0_from_worker_0.parquet | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These part... names are files? If so, they are all the same for every value combination. Is that correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that is correct. |
||
* │ └── firstname=Chalice | ||
* │ └── city=Boston | ||
* │ └── part_0_of_table_0_from_worker_0.parquet | ||
* ├── district=2 | ||
* │ ├── firstname=Bob | ||
* │ │ └── city=Los%20Angeles | ||
* │ │ └── part_0_of_table_0_from_worker_0.parquet | ||
* │ └── firstname=Jim | ||
* │ └── city=Los%20Angeles | ||
* │ └── part_0_of_table_0_from_worker_0.parquet | ||
* └── district=3 | ||
* └── firstname=Charlie | ||
* └── city=Chicago | ||
* └── part_0_of_table_0_from_worker_0.parquet | ||
* | ||
* Directory Partitioning: | ||
* ├── 1 | ||
* │ ├── Alice | ||
* │ │ └── New York | ||
* │ │ └── part_0_of_table_0_from_worker_0.parquet | ||
* │ └── Chalice | ||
* │ └── Boston | ||
* │ └── part_0_of_table_0_from_worker_0.parquet | ||
* ├── 2 | ||
* │ ├── Bob | ||
* │ │ └── Los Angeles | ||
* │ │ └── part_0_of_table_0_from_worker_0.parquet | ||
* │ └── Jim | ||
* │ └── Los Angeles | ||
* │ └── part_0_of_table_0_from_worker_0.parquet | ||
* └── 3 | ||
* └── Charlie | ||
* └── Chicago | ||
* └── part_0_of_table_0_from_worker_0.parquet | ||
*/ | ||
|
||
// Write out the dataset with Hive partitioning on all keys | ||
ParquetIO.HivePartition.Write( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,6 @@ | |
//class=parquet | ||
//nothor | ||
//noroxie | ||
//fail | ||
|
||
IMPORT Std; | ||
IMPORT Parquet; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Without the output this workunit will not do anything.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.