From 26a10dfbb8e55586bd8ab612c94d4afbf4567d55 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Wed, 24 Jul 2024 17:17:00 +0200 Subject: [PATCH] cmd/parquet-tool: Fix percentage calculation The total byte size recorded in the row group represents the uncompressed size, so to calculate the percentage correctly we also need to use the uncompressed pages of the column. --- cmd/parquet-tool/cmd/dump.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/parquet-tool/cmd/dump.go b/cmd/parquet-tool/cmd/dump.go index 032c1fbef..d48597359 100644 --- a/cmd/parquet-tool/cmd/dump.go +++ b/cmd/parquet-tool/cmd/dump.go @@ -48,7 +48,7 @@ func dump(file string) error { humanize.Bytes(uint64(ds.MetaData.TotalCompressedSize)), humanize.Bytes(uint64(ds.MetaData.TotalUncompressedSize)), fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize-ds.MetaData.TotalCompressedSize)/float64(ds.MetaData.TotalCompressedSize)*100), - fmt.Sprintf("%.2f", float64(ds.MetaData.TotalCompressedSize)/float64(rg.TotalByteSize)*100), + fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize)/float64(rg.TotalByteSize)*100), }) } table.Render()