From aec8e968da99a2db191265ba9f56ae0a25f545ce Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Tue, 14 Jan 2025 12:56:04 -0500 Subject: [PATCH] chore(dataobj): track non-NULL values in pages/columns (#15758) --- pkg/dataobj/internal/dataset/column.go | 1 + .../internal/dataset/column_builder.go | 1 + pkg/dataobj/internal/dataset/column_test.go | 2 + pkg/dataobj/internal/dataset/page.go | 1 + pkg/dataobj/internal/dataset/page_builder.go | 6 +- pkg/dataobj/internal/dataset/page_test.go | 2 + pkg/dataobj/internal/encoding/dataset_logs.go | 2 + .../internal/encoding/dataset_streams.go | 2 + pkg/dataobj/internal/encoding/encoder_logs.go | 2 + .../internal/encoding/encoder_streams.go | 2 + .../metadata/datasetmd/datasetmd.pb.go | 175 +++++++++++++----- .../metadata/datasetmd/datasetmd.proto | 6 + 12 files changed, 155 insertions(+), 47 deletions(-) diff --git a/pkg/dataobj/internal/dataset/column.go b/pkg/dataobj/internal/dataset/column.go index 0243ad3cd49b3..717bd02c20d27 100644 --- a/pkg/dataobj/internal/dataset/column.go +++ b/pkg/dataobj/internal/dataset/column.go @@ -16,6 +16,7 @@ type ( Compression datasetmd.CompressionType // Compression used for the column. RowsCount int // Total number of rows in the column. + ValuesCount int // Total number of non-NULL values in the column. CompressedSize int // Total size of all pages in the column after compression. UncompressedSize int // Total size of all pages in the column before compression. diff --git a/pkg/dataobj/internal/dataset/column_builder.go b/pkg/dataobj/internal/dataset/column_builder.go index 83476f6f04c92..369de361b72a0 100644 --- a/pkg/dataobj/internal/dataset/column_builder.go +++ b/pkg/dataobj/internal/dataset/column_builder.go @@ -138,6 +138,7 @@ func (cb *ColumnBuilder) Flush() (*MemColumn, error) { for _, page := range cb.pages { info.RowsCount += page.Info.RowCount + info.ValuesCount += page.Info.ValuesCount info.CompressedSize += page.Info.CompressedSize info.UncompressedSize += page.Info.UncompressedSize } diff --git a/pkg/dataobj/internal/dataset/column_test.go b/pkg/dataobj/internal/dataset/column_test.go index 545c9a26e357c..d065402921665 100644 --- a/pkg/dataobj/internal/dataset/column_test.go +++ b/pkg/dataobj/internal/dataset/column_test.go @@ -39,6 +39,8 @@ func TestColumnBuilder_ReadWrite(t *testing.T) { col, err := b.Flush() require.NoError(t, err) require.Equal(t, datasetmd.VALUE_TYPE_STRING, col.Info.Type) + require.Equal(t, len(in), col.Info.RowsCount) + require.Equal(t, len(in)-2, col.Info.ValuesCount) // -2 for the empty strings require.Greater(t, len(col.Pages), 1) t.Log("Uncompressed size: ", col.Info.UncompressedSize) diff --git a/pkg/dataobj/internal/dataset/page.go b/pkg/dataobj/internal/dataset/page.go index 6ae52e35d8814..9e8846d88eeed 100644 --- a/pkg/dataobj/internal/dataset/page.go +++ b/pkg/dataobj/internal/dataset/page.go @@ -34,6 +34,7 @@ type ( CompressedSize int // CompressedSize is the size of a page after compression. CRC32 uint32 // CRC32 checksum of the page after encoding and compression. RowCount int // RowCount is the number of rows in the page, including NULLs. + ValuesCount int // ValuesCount is the number of non-NULL values in the page. Encoding datasetmd.EncodingType // Encoding used for values in the page. Stats *datasetmd.Statistics // Optional statistics for the page. diff --git a/pkg/dataobj/internal/dataset/page_builder.go b/pkg/dataobj/internal/dataset/page_builder.go index 34a63a2181fb5..62b2b2efeb27a 100644 --- a/pkg/dataobj/internal/dataset/page_builder.go +++ b/pkg/dataobj/internal/dataset/page_builder.go @@ -44,7 +44,8 @@ type pageBuilder struct { presenceEnc *bitmapEncoder valuesEnc valueEncoder - rows int // Number of rows appended to the builder. + rows int // Number of rows appended to the builder. + values int // Number of non-NULL values appended to the builder. } // newPageBuilder creates a new pageBuilder that stores a sequence of [Value]s. @@ -104,6 +105,7 @@ func (b *pageBuilder) Append(value Value) bool { } b.rows++ + b.values++ return true } @@ -209,6 +211,7 @@ func (b *pageBuilder) Flush() (*MemPage, error) { CompressedSize: finalData.Len(), CRC32: checksum, RowCount: b.rows, + ValuesCount: b.values, Encoding: b.opts.Encoding, @@ -237,4 +240,5 @@ func (b *pageBuilder) Reset() { b.presenceBuffer.Reset() b.valuesEnc.Reset(b.valuesWriter) b.rows = 0 + b.values = 0 } diff --git a/pkg/dataobj/internal/dataset/page_test.go b/pkg/dataobj/internal/dataset/page_test.go index 1cbd025576a4d..b041285433ec8 100644 --- a/pkg/dataobj/internal/dataset/page_test.go +++ b/pkg/dataobj/internal/dataset/page_test.go @@ -39,6 +39,8 @@ func Test_pageBuilder_WriteRead(t *testing.T) { page, err := b.Flush() require.NoError(t, err) + require.Equal(t, len(in), page.Info.RowCount) + require.Equal(t, len(in)-2, page.Info.ValuesCount) // -2 for the empty strings t.Log("Uncompressed size: ", page.Info.UncompressedSize) t.Log("Compressed size: ", page.Info.CompressedSize) diff --git a/pkg/dataobj/internal/encoding/dataset_logs.go b/pkg/dataobj/internal/encoding/dataset_logs.go index 37060c443e861..35c6f469360a6 100644 --- a/pkg/dataobj/internal/encoding/dataset_logs.go +++ b/pkg/dataobj/internal/encoding/dataset_logs.go @@ -89,6 +89,7 @@ func (col *logsDatasetColumn) ColumnInfo() *dataset.ColumnInfo { Compression: col.desc.Info.Compression, RowsCount: int(col.desc.Info.RowsCount), + ValuesCount: int(col.desc.Info.ValuesCount), CompressedSize: int(col.desc.Info.CompressedSize), UncompressedSize: int(col.desc.Info.UncompressedSize), @@ -133,6 +134,7 @@ func (p *logsDatasetPage) PageInfo() *dataset.PageInfo { CompressedSize: int(p.desc.Info.CompressedSize), CRC32: p.desc.Info.Crc32, RowCount: int(p.desc.Info.RowsCount), + ValuesCount: int(p.desc.Info.ValuesCount), Encoding: p.desc.Info.Encoding, Stats: p.desc.Info.Statistics, diff --git a/pkg/dataobj/internal/encoding/dataset_streams.go b/pkg/dataobj/internal/encoding/dataset_streams.go index 5843cc4c2cd47..9442a50686d19 100644 --- a/pkg/dataobj/internal/encoding/dataset_streams.go +++ b/pkg/dataobj/internal/encoding/dataset_streams.go @@ -89,6 +89,7 @@ func (col *streamsDatasetColumn) ColumnInfo() *dataset.ColumnInfo { Compression: col.desc.Info.Compression, RowsCount: int(col.desc.Info.RowsCount), + ValuesCount: int(col.desc.Info.ValuesCount), CompressedSize: int(col.desc.Info.CompressedSize), UncompressedSize: int(col.desc.Info.UncompressedSize), @@ -133,6 +134,7 @@ func (p *streamsDatasetPage) PageInfo() *dataset.PageInfo { CompressedSize: int(p.desc.Info.CompressedSize), CRC32: p.desc.Info.Crc32, RowCount: int(p.desc.Info.RowsCount), + ValuesCount: int(p.desc.Info.ValuesCount), Encoding: p.desc.Info.Encoding, Stats: p.desc.Info.Statistics, diff --git a/pkg/dataobj/internal/encoding/encoder_logs.go b/pkg/dataobj/internal/encoding/encoder_logs.go index f5186d543eddc..c8baef1e9d386 100644 --- a/pkg/dataobj/internal/encoding/encoder_logs.go +++ b/pkg/dataobj/internal/encoding/encoder_logs.go @@ -55,6 +55,7 @@ func (enc *LogsEncoder) OpenColumn(columnType logsmd.ColumnType, info *dataset.C Name: info.Name, ValueType: info.Type, RowsCount: uint32(info.RowsCount), + ValuesCount: uint32(info.ValuesCount), Compression: info.Compression, UncompressedSize: uint32(info.UncompressedSize), CompressedSize: uint32(info.CompressedSize), @@ -199,6 +200,7 @@ func (enc *LogsColumnEncoder) AppendPage(page *dataset.MemPage) error { CompressedSize: uint32(page.Info.CompressedSize), Crc32: page.Info.CRC32, RowsCount: uint32(page.Info.RowCount), + ValuesCount: uint32(page.Info.ValuesCount), Encoding: page.Info.Encoding, DataOffset: uint32(enc.startOffset + enc.data.Len()), diff --git a/pkg/dataobj/internal/encoding/encoder_streams.go b/pkg/dataobj/internal/encoding/encoder_streams.go index b02e5a90bd714..77ec6fd02805c 100644 --- a/pkg/dataobj/internal/encoding/encoder_streams.go +++ b/pkg/dataobj/internal/encoding/encoder_streams.go @@ -55,6 +55,7 @@ func (enc *StreamsEncoder) OpenColumn(columnType streamsmd.ColumnType, info *dat Name: info.Name, ValueType: info.Type, RowsCount: uint32(info.RowsCount), + ValuesCount: uint32(info.ValuesCount), Compression: info.Compression, UncompressedSize: uint32(info.UncompressedSize), CompressedSize: uint32(info.CompressedSize), @@ -199,6 +200,7 @@ func (enc *StreamsColumnEncoder) AppendPage(page *dataset.MemPage) error { CompressedSize: uint32(page.Info.CompressedSize), Crc32: page.Info.CRC32, RowsCount: uint32(page.Info.RowCount), + ValuesCount: uint32(page.Info.ValuesCount), Encoding: page.Info.Encoding, DataOffset: uint32(enc.startOffset + enc.data.Len()), diff --git a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go index fcbbefbebef32..cc74da03cec25 100644 --- a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go +++ b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go @@ -147,6 +147,8 @@ type ColumnInfo struct { MetadataSize uint32 `protobuf:"varint,8,opt,name=metadata_size,json=metadataSize,proto3" json:"metadata_size,omitempty"` // Statistics for the column. Statistics *Statistics `protobuf:"bytes,9,opt,name=statistics,proto3" json:"statistics,omitempty"` + // Total number of non-NULL values in the entire column. + ValuesCount uint32 `protobuf:"varint,10,opt,name=values_count,json=valuesCount,proto3" json:"values_count,omitempty"` } func (m *ColumnInfo) Reset() { *m = ColumnInfo{} } @@ -244,6 +246,13 @@ func (m *ColumnInfo) GetStatistics() *Statistics { return nil } +func (m *ColumnInfo) GetValuesCount() uint32 { + if m != nil { + return m.ValuesCount + } + return 0 +} + // Statistics about a column or a page. All statistics are optional and are // conditionally set depending on the column type. type Statistics struct { @@ -318,6 +327,8 @@ type PageInfo struct { DataSize uint32 `protobuf:"varint,7,opt,name=data_size,json=dataSize,proto3" json:"data_size,omitempty"` // Optional statistics for the page. Statistics *Statistics `protobuf:"bytes,8,opt,name=statistics,proto3" json:"statistics,omitempty"` + // Total number of non-NULL values in the page. + ValuesCount uint32 `protobuf:"varint,9,opt,name=values_count,json=valuesCount,proto3" json:"values_count,omitempty"` } func (m *PageInfo) Reset() { *m = PageInfo{} } @@ -408,6 +419,13 @@ func (m *PageInfo) GetStatistics() *Statistics { return nil } +func (m *PageInfo) GetValuesCount() uint32 { + if m != nil { + return m.ValuesCount + } + return 0 +} + func init() { proto.RegisterEnum("dataobj.metadata.dataset.v1.ValueType", ValueType_name, ValueType_value) proto.RegisterEnum("dataobj.metadata.dataset.v1.CompressionType", CompressionType_name, CompressionType_value) @@ -422,50 +440,51 @@ func init() { } var fileDescriptor_7ab9d5b21b743868 = []byte{ - // 679 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x54, 0x4f, 0x4f, 0xdb, 0x4e, - 0x10, 0xcd, 0x26, 0xc0, 0x2f, 0x19, 0xfe, 0x99, 0xfd, 0x41, 0x31, 0x4d, 0x71, 0x23, 0x2a, 0x95, - 0x94, 0x56, 0xb6, 0x1a, 0xaa, 0xf6, 0x1c, 0x12, 0x83, 0x2c, 0x81, 0x63, 0xd9, 0x06, 0x09, 0x2e, - 0x96, 0x71, 0x36, 0xa9, 0x4b, 0x6c, 0x47, 0xb1, 0x93, 0x02, 0xa7, 0x9e, 0x7a, 0xee, 0xc7, 0xe8, - 0xf7, 0xe8, 0xa5, 0x47, 0x8e, 0x1c, 0x8b, 0x73, 0xe9, 0x91, 0x8f, 0x50, 0x65, 0xf3, 0xcf, 0x24, - 0x69, 0x84, 0x7a, 0x1b, 0xbf, 0xf7, 0x66, 0x66, 0x3d, 0x6f, 0x76, 0xe1, 0x43, 0xfd, 0xa2, 0x2a, - 0x94, 0xcd, 0xc0, 0xf4, 0xce, 0x3f, 0x09, 0xb6, 0x1b, 0x90, 0x86, 0x6b, 0xd6, 0x04, 0x87, 0x04, - 0x66, 0x07, 0xa4, 0x8c, 0x4f, 0x02, 0xa7, 0x3c, 0x8c, 0xf8, 0x7a, 0xc3, 0x0b, 0x3c, 0x9c, 0xee, - 0x25, 0xf1, 0x7d, 0x2d, 0xdf, 0x53, 0xf0, 0xad, 0xb7, 0x5b, 0x3f, 0x12, 0x00, 0x05, 0xaf, 0xd6, - 0x74, 0x5c, 0xc9, 0xad, 0x78, 0x18, 0xc3, 0x8c, 0x6b, 0x3a, 0x84, 0x45, 0x19, 0x94, 0x4d, 0xa9, - 0x34, 0xc6, 0x22, 0x40, 0xcb, 0xac, 0x35, 0x89, 0x11, 0x5c, 0xd5, 0x09, 0x1b, 0xcf, 0xa0, 0xec, - 0x52, 0xee, 0x25, 0x3f, 0xa5, 0x28, 0x7f, 0xd2, 0x91, 0xeb, 0x57, 0x75, 0xa2, 0xa6, 0x5a, 0xfd, - 0x10, 0x6f, 0x02, 0x34, 0xbc, 0xcf, 0xbe, 0x61, 0x79, 0x4d, 0x37, 0x60, 0x13, 0x19, 0x94, 0x5d, - 0x54, 0x53, 0x1d, 0xa4, 0xd0, 0x01, 0xb0, 0x0c, 0xf3, 0x96, 0xe7, 0xd4, 0x1b, 0xc4, 0xf7, 0x6d, - 0xcf, 0x65, 0x67, 0x68, 0x9b, 0x37, 0x53, 0xdb, 0x14, 0x86, 0x7a, 0xda, 0x2c, 0x5a, 0x00, 0xbf, - 0x86, 0x95, 0xa6, 0xdb, 0x07, 0x48, 0xd9, 0xf0, 0xed, 0x6b, 0xc2, 0xce, 0xd2, 0xae, 0x4c, 0x94, - 0xd0, 0xec, 0x6b, 0x82, 0xb7, 0x61, 0x79, 0x54, 0x3a, 0x47, 0xa5, 0x4b, 0xe3, 0xc2, 0xfe, 0x49, - 0x0c, 0xaf, 0x52, 0xf1, 0x49, 0xc0, 0xfe, 0xd7, 0x15, 0xf6, 0xe1, 0x12, 0x45, 0xf1, 0x0b, 0x58, - 0x1c, 0x08, 0x69, 0xbd, 0x24, 0x95, 0x2d, 0xf4, 0x41, 0x5a, 0xed, 0x00, 0xc0, 0x0f, 0xcc, 0xc0, - 0xf6, 0x03, 0xdb, 0xf2, 0xd9, 0x54, 0x06, 0x65, 0xe7, 0x73, 0xdb, 0x53, 0x7f, 0x59, 0x1b, 0xc8, - 0xd5, 0x48, 0xea, 0xd6, 0x3e, 0xc0, 0x90, 0xc1, 0x69, 0x48, 0x39, 0xb6, 0x6b, 0xd0, 0xd1, 0x53, - 0x27, 0x17, 0xd4, 0xa4, 0x63, 0xbb, 0xd4, 0x15, 0x4a, 0x9a, 0x97, 0x3d, 0x32, 0xde, 0x23, 0xcd, - 0x4b, 0x4a, 0x6e, 0xb5, 0xe3, 0x90, 0x54, 0xcc, 0x2a, 0xa1, 0xbb, 0x30, 0x71, 0x82, 0xe8, 0xf1, - 0x13, 0x8c, 0x4f, 0x9c, 0xe0, 0x2a, 0xcc, 0x5a, 0x0d, 0x6b, 0x37, 0xd7, 0xdb, 0x80, 0xee, 0xc7, - 0xc8, 0x72, 0xcc, 0x8c, 0x2e, 0x87, 0x08, 0x49, 0xe2, 0x5a, 0x5e, 0xd9, 0x76, 0xab, 0xd4, 0xc3, - 0xa5, 0xdc, 0xab, 0xa9, 0x63, 0x12, 0x7b, 0x62, 0xba, 0x16, 0x83, 0x54, 0xfc, 0x1c, 0xe6, 0xa3, - 0xce, 0x75, 0x2d, 0x86, 0x88, 0x6b, 0x69, 0x48, 0x0d, 0x1d, 0xeb, 0x1a, 0x9b, 0xfc, 0x8b, 0x5b, - 0xc9, 0x7f, 0x76, 0x6b, 0xe7, 0x02, 0x52, 0x83, 0x1b, 0x82, 0x9f, 0xc2, 0x93, 0x93, 0xfc, 0xe1, - 0xb1, 0x68, 0xe8, 0xa7, 0x8a, 0x68, 0x1c, 0xcb, 0x9a, 0x22, 0x16, 0xa4, 0x7d, 0x49, 0x2c, 0x32, - 0x31, 0xbc, 0x0a, 0x4c, 0x84, 0x93, 0x64, 0xfd, 0xfd, 0x3b, 0x06, 0xe1, 0x35, 0x58, 0x89, 0x66, - 0x74, 0xe1, 0xf8, 0x08, 0xac, 0xe9, 0xaa, 0x24, 0x1f, 0x30, 0x89, 0x9d, 0xaf, 0x08, 0x96, 0x47, - 0x2e, 0x0a, 0xce, 0xc0, 0xb3, 0x42, 0xe9, 0x48, 0x51, 0x45, 0x4d, 0x93, 0x4a, 0xf2, 0xa4, 0xce, - 0x1b, 0xb0, 0x36, 0xa6, 0x90, 0x4b, 0xb2, 0xc8, 0x20, 0x9c, 0x86, 0xf5, 0x31, 0x4a, 0x93, 0xf3, - 0x8a, 0x72, 0xca, 0xc4, 0x27, 0xe6, 0x9d, 0x69, 0x7a, 0x91, 0x49, 0xec, 0x5c, 0xc1, 0x42, 0xd4, - 0x16, 0xbc, 0x09, 0x1b, 0xa2, 0x5c, 0x28, 0x15, 0x25, 0xf9, 0x60, 0xd2, 0x09, 0xd6, 0xe1, 0xff, - 0x87, 0xb4, 0x72, 0x98, 0x97, 0x64, 0x06, 0x8d, 0x13, 0x45, 0xf1, 0x50, 0xcf, 0x33, 0x71, 0xcc, - 0xc2, 0xea, 0x43, 0x62, 0x4f, 0xd2, 0x8f, 0xf2, 0x0a, 0x93, 0xd8, 0xbb, 0xbc, 0xb9, 0xe3, 0x62, - 0xb7, 0x77, 0x5c, 0xec, 0xfe, 0x8e, 0x43, 0x5f, 0x42, 0x0e, 0x7d, 0x0f, 0x39, 0xf4, 0x33, 0xe4, - 0xd0, 0x4d, 0xc8, 0xa1, 0x5f, 0x21, 0x87, 0x7e, 0x87, 0x5c, 0xec, 0x3e, 0xe4, 0xd0, 0xb7, 0x36, - 0x17, 0xbb, 0x69, 0x73, 0xb1, 0xdb, 0x36, 0x17, 0x3b, 0xdb, 0xab, 0xda, 0xc1, 0xc7, 0xe6, 0x39, - 0x6f, 0x79, 0x8e, 0x50, 0x6d, 0x98, 0x15, 0xd3, 0x35, 0x85, 0x9a, 0x77, 0x61, 0x0b, 0xad, 0x5d, - 0xe1, 0x91, 0x6f, 0xf1, 0xf9, 0x1c, 0x7d, 0x82, 0x77, 0xff, 0x04, 0x00, 0x00, 0xff, 0xff, 0x5f, - 0xec, 0x7c, 0x9f, 0xbd, 0x05, 0x00, 0x00, + // 696 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x54, 0x41, 0x53, 0xda, 0x5c, + 0x14, 0xe5, 0x81, 0xfa, 0x91, 0x0b, 0x6a, 0x7c, 0x9f, 0xd6, 0x58, 0x6a, 0x4a, 0xed, 0x4c, 0xa5, + 0xb6, 0x03, 0x53, 0xec, 0xb4, 0x6b, 0x84, 0xe8, 0x64, 0x46, 0x43, 0x86, 0x44, 0x67, 0x74, 0x93, + 0x89, 0xe1, 0x41, 0x53, 0x49, 0xc2, 0x90, 0x40, 0xd5, 0x55, 0x57, 0xae, 0xfb, 0x33, 0xfa, 0x53, + 0xba, 0x74, 0xe9, 0xb2, 0xc6, 0x99, 0x4e, 0x97, 0xfe, 0x84, 0x0e, 0x0f, 0x02, 0x11, 0x28, 0xe3, + 0xa2, 0xbb, 0xc7, 0x39, 0xe7, 0xde, 0x1b, 0xce, 0xb9, 0xef, 0xc1, 0xc7, 0xe6, 0x59, 0x3d, 0x57, + 0xd5, 0x3d, 0xdd, 0x39, 0xfd, 0x9c, 0x33, 0x6d, 0x8f, 0xb4, 0x6c, 0xbd, 0x91, 0xb3, 0x88, 0xa7, + 0x77, 0x41, 0xca, 0xb8, 0xc4, 0xb3, 0xaa, 0xc3, 0x53, 0xb6, 0xd9, 0x72, 0x3c, 0x07, 0xa7, 0xfa, + 0x45, 0xd9, 0x40, 0x9b, 0xed, 0x2b, 0xb2, 0x9d, 0x77, 0x1b, 0xbf, 0x62, 0x00, 0x45, 0xa7, 0xd1, + 0xb6, 0x6c, 0xd1, 0xae, 0x39, 0x18, 0xc3, 0x8c, 0xad, 0x5b, 0x84, 0x43, 0x69, 0x94, 0x61, 0x2a, + 0xf4, 0x8c, 0x05, 0x80, 0x8e, 0xde, 0x68, 0x13, 0xcd, 0xbb, 0x68, 0x12, 0x2e, 0x9a, 0x46, 0x99, + 0x85, 0xfc, 0xab, 0xec, 0x94, 0xa6, 0xd9, 0xa3, 0xae, 0x5c, 0xbd, 0x68, 0x92, 0x0a, 0xd3, 0x09, + 0x8e, 0x78, 0x1d, 0xa0, 0xe5, 0x7c, 0x71, 0x35, 0xc3, 0x69, 0xdb, 0x1e, 0x17, 0x4b, 0xa3, 0xcc, + 0x7c, 0x85, 0xe9, 0x22, 0xc5, 0x2e, 0x80, 0x25, 0x48, 0x18, 0x8e, 0xd5, 0x6c, 0x11, 0xd7, 0x35, + 0x1d, 0x9b, 0x9b, 0xa1, 0x63, 0xde, 0x4e, 0x1d, 0x53, 0x1c, 0xea, 0xe9, 0xb0, 0x70, 0x03, 0xfc, + 0x06, 0x96, 0xda, 0x76, 0x00, 0x90, 0xaa, 0xe6, 0x9a, 0x97, 0x84, 0x9b, 0xa5, 0x53, 0xd9, 0x30, + 0xa1, 0x98, 0x97, 0x04, 0x6f, 0xc2, 0xe2, 0xa8, 0x74, 0x8e, 0x4a, 0x17, 0xc6, 0x85, 0xc1, 0x97, + 0x68, 0x4e, 0xad, 0xe6, 0x12, 0x8f, 0xfb, 0xaf, 0x27, 0x0c, 0xe0, 0x32, 0x45, 0xf1, 0x4b, 0x98, + 0x1f, 0x08, 0x69, 0xbf, 0x38, 0x95, 0x25, 0x03, 0x90, 0x76, 0xdb, 0x03, 0x70, 0x3d, 0xdd, 0x33, + 0x5d, 0xcf, 0x34, 0x5c, 0x8e, 0x49, 0xa3, 0x4c, 0x22, 0xbf, 0x39, 0xf5, 0x2f, 0x2b, 0x03, 0x79, + 0x25, 0x54, 0x8a, 0x5f, 0x40, 0x92, 0x1a, 0x1d, 0xb8, 0x0b, 0x74, 0x58, 0xa2, 0x87, 0x51, 0x7f, + 0x37, 0x76, 0x01, 0x86, 0xc5, 0x38, 0x05, 0x8c, 0x65, 0xda, 0x1a, 0x15, 0xd0, 0xb0, 0x93, 0x95, + 0xb8, 0x65, 0xda, 0x34, 0x38, 0x4a, 0xea, 0xe7, 0x7d, 0x32, 0xda, 0x27, 0xf5, 0x73, 0x4a, 0x6e, + 0x5c, 0xc5, 0x20, 0x2e, 0xeb, 0x75, 0x42, 0xd7, 0x65, 0xa2, 0xc9, 0xe8, 0xf1, 0x26, 0x47, 0x27, + 0x9a, 0xbc, 0x0c, 0xb3, 0x46, 0xcb, 0xd8, 0xce, 0xf7, 0x97, 0xa4, 0xf7, 0x63, 0x64, 0x7f, 0x66, + 0x46, 0xf7, 0x47, 0x80, 0x38, 0xb1, 0x0d, 0xa7, 0x6a, 0xda, 0x75, 0x1a, 0xf3, 0x42, 0xfe, 0xf5, + 0x54, 0x27, 0x85, 0xbe, 0x98, 0x6e, 0xce, 0xa0, 0x14, 0x3f, 0x87, 0x44, 0x38, 0xdc, 0xde, 0x16, + 0x40, 0x28, 0xd8, 0x14, 0x30, 0xc3, 0x50, 0x7b, 0xd9, 0xc7, 0xff, 0x12, 0x68, 0xfc, 0xdf, 0x05, + 0xca, 0x8c, 0x05, 0xba, 0x75, 0x06, 0xcc, 0xe0, 0x9e, 0xe1, 0xa7, 0xf0, 0xe4, 0xa8, 0xb0, 0x7f, + 0x28, 0x68, 0xea, 0xb1, 0x2c, 0x68, 0x87, 0x92, 0x22, 0x0b, 0x45, 0x71, 0x57, 0x14, 0x4a, 0x6c, + 0x04, 0x2f, 0x03, 0x1b, 0xe2, 0x44, 0x49, 0xfd, 0xf0, 0x9e, 0x45, 0x78, 0x05, 0x96, 0xc2, 0x15, + 0x3d, 0x38, 0x3a, 0x02, 0x2b, 0x6a, 0x45, 0x94, 0xf6, 0xd8, 0xd8, 0xd6, 0x15, 0x82, 0xc5, 0x91, + 0xeb, 0x86, 0xd3, 0xf0, 0xac, 0x58, 0x3e, 0x90, 0x2b, 0x82, 0xa2, 0x88, 0x65, 0x69, 0xd2, 0xe4, + 0x35, 0x58, 0x19, 0x53, 0x48, 0x65, 0x49, 0x60, 0x11, 0x4e, 0xc1, 0xea, 0x18, 0xa5, 0x48, 0x05, + 0x59, 0x3e, 0x66, 0xa3, 0x13, 0xeb, 0x4e, 0x14, 0xb5, 0xc4, 0xc6, 0xb6, 0x2e, 0x20, 0x19, 0x4e, + 0x0e, 0xaf, 0xc3, 0x9a, 0x20, 0x15, 0xcb, 0x25, 0x51, 0xda, 0x9b, 0xf4, 0x05, 0xab, 0xf0, 0xff, + 0x43, 0x5a, 0xde, 0x2f, 0x88, 0x12, 0x8b, 0xc6, 0x89, 0x92, 0xb0, 0xaf, 0x16, 0xd8, 0x28, 0xe6, + 0x60, 0xf9, 0x21, 0xb1, 0x23, 0xaa, 0x07, 0x05, 0x99, 0x8d, 0xed, 0x9c, 0x5f, 0xdf, 0xf2, 0x91, + 0x9b, 0x5b, 0x3e, 0x72, 0x7f, 0xcb, 0xa3, 0xaf, 0x3e, 0x8f, 0xbe, 0xfb, 0x3c, 0xfa, 0xe1, 0xf3, + 0xe8, 0xda, 0xe7, 0xd1, 0x4f, 0x9f, 0x47, 0xbf, 0x7d, 0x3e, 0x72, 0xef, 0xf3, 0xe8, 0xdb, 0x1d, + 0x1f, 0xb9, 0xbe, 0xe3, 0x23, 0x37, 0x77, 0x7c, 0xe4, 0x64, 0xa7, 0x6e, 0x7a, 0x9f, 0xda, 0xa7, + 0x59, 0xc3, 0xb1, 0x72, 0xf5, 0x96, 0x5e, 0xd3, 0x6d, 0x3d, 0xd7, 0x70, 0xce, 0xcc, 0x5c, 0x67, + 0x3b, 0xf7, 0xc8, 0x17, 0xfd, 0x74, 0x8e, 0x3e, 0xe4, 0xdb, 0x7f, 0x02, 0x00, 0x00, 0xff, 0xff, + 0x17, 0x09, 0x3d, 0x18, 0x03, 0x06, 0x00, 0x00, } func (x ValueType) String() string { @@ -535,6 +554,9 @@ func (this *ColumnInfo) Equal(that interface{}) bool { if !this.Statistics.Equal(that1.Statistics) { return false } + if this.ValuesCount != that1.ValuesCount { + return false + } return true } func (this *Statistics) Equal(that interface{}) bool { @@ -607,13 +629,16 @@ func (this *PageInfo) Equal(that interface{}) bool { if !this.Statistics.Equal(that1.Statistics) { return false } + if this.ValuesCount != that1.ValuesCount { + return false + } return true } func (this *ColumnInfo) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 13) + s := make([]string, 0, 14) s = append(s, "&datasetmd.ColumnInfo{") s = append(s, "Name: "+fmt.Sprintf("%#v", this.Name)+",\n") s = append(s, "ValueType: "+fmt.Sprintf("%#v", this.ValueType)+",\n") @@ -626,6 +651,7 @@ func (this *ColumnInfo) GoString() string { if this.Statistics != nil { s = append(s, "Statistics: "+fmt.Sprintf("%#v", this.Statistics)+",\n") } + s = append(s, "ValuesCount: "+fmt.Sprintf("%#v", this.ValuesCount)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -644,7 +670,7 @@ func (this *PageInfo) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 12) + s := make([]string, 0, 13) s = append(s, "&datasetmd.PageInfo{") s = append(s, "UncompressedSize: "+fmt.Sprintf("%#v", this.UncompressedSize)+",\n") s = append(s, "CompressedSize: "+fmt.Sprintf("%#v", this.CompressedSize)+",\n") @@ -656,6 +682,7 @@ func (this *PageInfo) GoString() string { if this.Statistics != nil { s = append(s, "Statistics: "+fmt.Sprintf("%#v", this.Statistics)+",\n") } + s = append(s, "ValuesCount: "+fmt.Sprintf("%#v", this.ValuesCount)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -687,6 +714,11 @@ func (m *ColumnInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.ValuesCount != 0 { + i = encodeVarintDatasetmd(dAtA, i, uint64(m.ValuesCount)) + i-- + dAtA[i] = 0x50 + } if m.Statistics != nil { { size, err := m.Statistics.MarshalToSizedBuffer(dAtA[:i]) @@ -801,6 +833,11 @@ func (m *PageInfo) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + if m.ValuesCount != 0 { + i = encodeVarintDatasetmd(dAtA, i, uint64(m.ValuesCount)) + i-- + dAtA[i] = 0x48 + } if m.Statistics != nil { { size, err := m.Statistics.MarshalToSizedBuffer(dAtA[:i]) @@ -897,6 +934,9 @@ func (m *ColumnInfo) Size() (n int) { l = m.Statistics.Size() n += 1 + l + sovDatasetmd(uint64(l)) } + if m.ValuesCount != 0 { + n += 1 + sovDatasetmd(uint64(m.ValuesCount)) + } return n } @@ -948,6 +988,9 @@ func (m *PageInfo) Size() (n int) { l = m.Statistics.Size() n += 1 + l + sovDatasetmd(uint64(l)) } + if m.ValuesCount != 0 { + n += 1 + sovDatasetmd(uint64(m.ValuesCount)) + } return n } @@ -971,6 +1014,7 @@ func (this *ColumnInfo) String() string { `MetadataOffset:` + fmt.Sprintf("%v", this.MetadataOffset) + `,`, `MetadataSize:` + fmt.Sprintf("%v", this.MetadataSize) + `,`, `Statistics:` + strings.Replace(this.Statistics.String(), "Statistics", "Statistics", 1) + `,`, + `ValuesCount:` + fmt.Sprintf("%v", this.ValuesCount) + `,`, `}`, }, "") return s @@ -999,6 +1043,7 @@ func (this *PageInfo) String() string { `DataOffset:` + fmt.Sprintf("%v", this.DataOffset) + `,`, `DataSize:` + fmt.Sprintf("%v", this.DataSize) + `,`, `Statistics:` + strings.Replace(this.Statistics.String(), "Statistics", "Statistics", 1) + `,`, + `ValuesCount:` + fmt.Sprintf("%v", this.ValuesCount) + `,`, `}`, }, "") return s @@ -1241,6 +1286,25 @@ func (m *ColumnInfo) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 10: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ValuesCount", wireType) + } + m.ValuesCount = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ValuesCount |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipDatasetmd(dAtA[iNdEx:]) @@ -1584,6 +1648,25 @@ func (m *PageInfo) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 9: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field ValuesCount", wireType) + } + m.ValuesCount = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + m.ValuesCount |= uint32(b&0x7F) << shift + if b < 0x80 { + break + } + } default: iNdEx = preIndex skippy, err := skipDatasetmd(dAtA[iNdEx:]) diff --git a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto index f6b827838eb03..b1b3055f42b67 100644 --- a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto +++ b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto @@ -34,6 +34,9 @@ message ColumnInfo { // Statistics for the column. Statistics statistics = 9; + + // Total number of non-NULL values in the entire column. + uint32 values_count = 10; } // ValueType represents the valid types that values within a column can have. @@ -103,6 +106,9 @@ message PageInfo { // Optional statistics for the page. Statistics statistics = 8; + + // Total number of non-NULL values in the page. + uint32 values_count = 9; } // EncodingType represents the valid types that a sequence of values which a