Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nanos in timestamp INT96 can be negative #485

Open
wants to merge 2 commits into
base: 2024_03_22
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ if(VELOX_ENABLE_ARROW)
set(VELOX_ARROW_BUILD_SHA256_CHECKSUM
01dd3f70e85d9b5b933ec92c0db8a4ef504a5105f78d2d8622e84279fb45c25d)
set(VELOX_ARROW_SOURCE_URL
"https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz"
"https://downloads.apache.org/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz"
)

resolve_dependency_url(ARROW)
Expand Down
6 changes: 3 additions & 3 deletions velox/dwio/parquet/reader/PageReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,15 +388,15 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) {
for (auto i = dictionary_.numValues - 1; i >= 0; --i) {
// Convert the timestamp into seconds and nanos since the Unix epoch,
// 00:00:00.000000 on 1 January 1970.
uint64_t nanos;
int64_t nanos;
memcpy(
&nanos,
parquetValues + i * sizeof(Int96Timestamp),
sizeof(uint64_t));
sizeof(int64_t));
int32_t days;
memcpy(
&days,
parquetValues + i * sizeof(Int96Timestamp) + sizeof(uint64_t),
parquetValues + i * sizeof(Int96Timestamp) + sizeof(int64_t),
sizeof(int32_t));

values[i] = Timestamp::fromDaysAndNanos(days, nanos);
Expand Down
9 changes: 6 additions & 3 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ struct Timestamp {

constexpr Timestamp() : seconds_(0), nanos_(0) {}

Timestamp(int64_t seconds, uint64_t nanos)
Timestamp(int64_t seconds, int64_t nanos)
: seconds_(seconds), nanos_(nanos) {
VELOX_USER_DCHECK_GE(
seconds, kMinSeconds, "Timestamp seconds out of range");
Expand All @@ -110,16 +110,19 @@ struct Timestamp {
VELOX_USER_DCHECK_LE(nanos, kMaxNanos, "Timestamp nanos out of range");
}

static Timestamp fromDaysAndNanos(int32_t days, uint64_t nanos) {
static Timestamp fromDaysAndNanos(int32_t days, int64_t nanos) {
static constexpr int64_t kJulianToUnixEpochDays = 2440588LL;
static constexpr int64_t kSecondsPerDay = 86400LL;
static constexpr int64_t kNanosPerSecond =
Timestamp::kNanosecondsInMillisecond * Timestamp::kMillisecondsInSecond;

int64_t seconds = (days - kJulianToUnixEpochDays) * kSecondsPerDay;
if (nanos > Timestamp::kMaxNanos) {
if (nanos > static_cast<int64_t>(Timestamp::kMaxNanos)) {
seconds += nanos / kNanosPerSecond;
nanos -= (nanos / kNanosPerSecond) * kNanosPerSecond;
} else if (nanos < 0) {
seconds += (nanos / kNanosPerSecond - 1);
nanos -= (nanos / kNanosPerSecond - 1) * kNanosPerSecond;
}

return Timestamp(seconds, nanos);
Expand Down
Loading