Skip to content

Commit

Permalink
Merge pull request #42 from reproio/fix-it
Browse files Browse the repository at this point in the history
Fix #34 #41 Fix map values -> Parquet conversions
  • Loading branch information
syucream authored Jun 2, 2020
2 parents 6756698 + 0ccee22 commit 58e959a
Show file tree
Hide file tree
Showing 65 changed files with 784 additions and 1,625 deletions.
68 changes: 42 additions & 26 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,48 @@ test:

.PHONY: it
it: build
./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType avro examples/record/primitives.avro > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType csv examples/record/primitives.csv > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType jsonl examples/record/primitives.jsonl > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType ltsv examples/record/primitives.ltsv > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType msgpack examples/record/primitives.msgpack > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType tsv examples/record/primitives.tsv > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/nested.avsc -recordType avro examples/record/nested.avro > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/nested.avsc -recordType jsonl examples/record/nested.jsonl > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/nested.avsc -recordType msgpack examples/record/nested.msgpack > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/array.avsc -recordType avro examples/record/array.avro > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/array.avsc -recordType jsonl examples/record/array.jsonl > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/array.avsc -recordType msgpack examples/record/array.msgpack > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/complicated.avsc -recordType avro examples/record/complicated.avro > /dev/null
./columnify -schemaType avro -schemaFile examples/schema/complicated.avsc -recordType jsonl examples/record/complicated.json > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/primitives.bq.json -recordType avro examples/record/primitives.avro > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/primitives.bq.json -recordType csv examples/record/primitives.csv > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/primitives.bq.json -recordType jsonl examples/record/primitives.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/primitives.bq.json -recordType ltsv examples/record/primitives.ltsv > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/primitives.bq.json -recordType msgpack examples/record/primitives.msgpack > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/primitives.bq.json -recordType tsv examples/record/primitives.tsv > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/nested.bq.json -recordType avro examples/record/nested.avro > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/nested.bq.json -recordType jsonl examples/record/nested.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/nested.bq.json -recordType msgpack examples/record/nested.msgpack > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/array.bq.json -recordType avro examples/record/array.avro > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/array.bq.json -recordType jsonl examples/record/array.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile examples/schema/array.bq.json -recordType msgpack examples/record/array.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/primitives.avsc -recordType avro columnifier/testdata/record/primitives.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/primitives.avsc -recordType csv columnifier/testdata/record/primitives.csv > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/primitives.avsc -recordType jsonl columnifier/testdata/record/primitives.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/primitives.avsc -recordType ltsv columnifier/testdata/record/primitives.ltsv > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/primitives.avsc -recordType msgpack columnifier/testdata/record/primitives.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/primitives.avsc -recordType tsv columnifier/testdata/record/primitives.tsv > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nullables.avsc -recordType avro columnifier/testdata/record/nullables.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nullables.avsc -recordType jsonl columnifier/testdata/record/nullables.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nullables.avsc -recordType msgpack columnifier/testdata/record/nullables.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType avro columnifier/testdata/record/logicals.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType csv columnifier/testdata/record/logicals.csv > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType jsonl columnifier/testdata/record/logicals.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType ltsv columnifier/testdata/record/logicals.ltsv > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType msgpack columnifier/testdata/record/logicals.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType tsv columnifier/testdata/record/logicals.tsv > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nested.avsc -recordType avro columnifier/testdata/record/nested.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nested.avsc -recordType jsonl columnifier/testdata/record/nested.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nested.avsc -recordType msgpack columnifier/testdata/record/nested.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/array.avsc -recordType avro columnifier/testdata/record/array.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/array.avsc -recordType jsonl columnifier/testdata/record/array.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/array.avsc -recordType msgpack columnifier/testdata/record/array.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType jsonl columnifier/testdata/record/logicals.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType avro columnifier/testdata/record/logicals.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/logicals.avsc -recordType msgpack columnifier/testdata/record/logicals.msgpack > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nullable_complex.avsc -recordType avro columnifier/testdata/record/nullable_complex.avro > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nullable_complex.avsc -recordType jsonl columnifier/testdata/record/nullable_complex.jsonl > /dev/null
./columnify -schemaType avro -schemaFile columnifier/testdata/schema/nullable_complex.avsc -recordType msgpack columnifier/testdata/record/nullable_complex.msgpack > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/primitives.bq.json -recordType avro columnifier/testdata/record/primitives.avro > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/primitives.bq.json -recordType csv columnifier/testdata/record/primitives.csv > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/primitives.bq.json -recordType jsonl columnifier/testdata/record/primitives.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/primitives.bq.json -recordType ltsv columnifier/testdata/record/primitives.ltsv > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/primitives.bq.json -recordType msgpack columnifier/testdata/record/primitives.msgpack > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/primitives.bq.json -recordType tsv columnifier/testdata/record/primitives.tsv > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/nullables.bq.json -recordType avro columnifier/testdata/record/nullables.avro > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/nullables.bq.json -recordType jsonl columnifier/testdata/record/nullables.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/nullables.bq.json -recordType msgpack columnifier/testdata/record/nullables.msgpack > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/nested.bq.json -recordType avro columnifier/testdata/record/nested.avro > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/nested.bq.json -recordType jsonl columnifier/testdata/record/nested.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/nested.bq.json -recordType msgpack columnifier/testdata/record/nested.msgpack > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/array.bq.json -recordType avro columnifier/testdata/record/array.avro > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/array.bq.json -recordType jsonl columnifier/testdata/record/array.jsonl > /dev/null
./columnify -schemaType bigquery -schemaFile columnifier/testdata/schema/array.bq.json -recordType msgpack columnifier/testdata/record/array.msgpack > /dev/null

# Set GITHUB_TOKEN and create release git tag
.PHONY: release
Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ $ cat examples/record/primitives.jsonl
{"boolean": false, "int": 1, "long": 1, "float": 1.1, "double": 1.1, "bytes": "foo", "string": "foo"}
{"boolean": true, "int": 2, "long": 2, "float": 2.2, "double": 2.2, "bytes": "bar", "string": "bar"}

$ ./columnify -schemaType avro -schemaFile examples/schema/primitives.avsc -recordType jsonl examples/record/primitives.jsonl > out.parquet
$ ./columnify -schemaType avro -schemaFile examples/primitives.avsc -recordType jsonl examples/primitives.jsonl > out.parquet

$ parquet-tools schema out.parquet
message Primitives {
Expand Down Expand Up @@ -86,6 +86,14 @@ $ parquet-tools cat -json out.parquet
- An example is `examples/fluent-plugin-s3`
- It works as a Compressor of fluent-plugin-s3 write parquet file to tmp via chunk data.

## Limilations

Currently it has some limitations from schema/record types.

- Some logical types like Decimal are unsupported.
- If using `-recordType = avro`, it doesn't support a nested record has only 1 sub field.
- If using `-recordType = avro`, it converts bytes fields to base64 encoded value implicitly.

## Development

`Columnifier` reads input file(s), converts format based on given parameter, finally writes output files.
Expand Down
Loading

0 comments on commit 58e959a

Please sign in to comment.