diff --git a/lib/util.ts b/lib/util.ts index 198c3585..e45be959 100644 --- a/lib/util.ts +++ b/lib/util.ts @@ -4,17 +4,11 @@ import fs, { WriteStream } from 'fs' import * as parquet_thrift from '../gen-nodejs/parquet_types' import { NewFileMetaData } from './types/types' -/** We need to use a patched version of TFramedTransport where - * readString returns the original buffer instead of a string if the - * buffer can not be safely encoded as utf8 (see http://bit.ly/2GXeZEF) - */ - - -type Enums = typeof parquet_thrift.Encoding | typeof parquet_thrift.FieldRepetitionType | typeof parquet_thrift.Type | typeof parquet_thrift.CompressionCodec | typeof parquet_thrift.PageType | typeof parquet_thrift.ConvertedType; - -type ThriftObject = NewFileMetaData | parquet_thrift.PageHeader | parquet_thrift.BloomFilterHeader | parquet_thrift.OffsetIndex | parquet_thrift.ColumnIndex | NewFileMetaData; - -// May not be needed anymore, Issue at https://github.com/LibertyDSNP/parquetjs/issues/41 +/** + * We need to patch Thrift's TFramedTransport class bc the TS type definitions + * do not define a `readPos` field, even though the class implementation has + * one. + */ class fixedTFramedTransport extends thrift.TFramedTransport { inBuf: Buffer readPos: number @@ -23,19 +17,11 @@ class fixedTFramedTransport extends thrift.TFramedTransport { this.inBuf = inBuf this.readPos = 0 } - - readString(len = 0): string { - this.ensureAvailable(len); - var buffer = this.inBuf.slice(this.readPos, this.readPos + len); - var str = this.inBuf.toString('utf8', this.readPos, this.readPos + len); - this.readPos += len; - //@ts-ignore - return (Buffer.from(str).equals(buffer)) ? str : buffer; - } } - -/** Patch PageLocation to be three element array that has getters/setters +type Enums = typeof parquet_thrift.Encoding | typeof parquet_thrift.FieldRepetitionType | typeof parquet_thrift.Type | typeof parquet_thrift.CompressionCodec | typeof parquet_thrift.PageType | typeof parquet_thrift.ConvertedType; + +type ThriftObject = NewFileMetaData | parquet_thrift.PageHeader | parquet_thrift.BloomFilterHeader | parquet_thrift.OffsetIndex | parquet_thrift.ColumnIndex | NewFileMetaData;/** Patch PageLocation to be three element array that has getters/setters * for each of the properties (offset, compressed_page_size, first_row_index) * This saves space considerably as we do not need to store the full variable * names for every PageLocation