Skip to content

Commit

Permalink
snapshot: delegate reading to arrow readers
Browse files Browse the repository at this point in the history
We were previously copying all snapshot part bytes into a slice that was then
passed to arrow readers. This was an unnecessary indirection and resulted in
extra allocations on startup. Note that parquet bytes are still fully copied
since we cannot read those lazily (underlying file is closed after reading from
snapshot).
Benchmark results on recovery from snapshot only:
```
goos: darwin
goarch: arm64
pkg: github.com/polarsignals/frostdb
          │ benchmain  │             benchnew              │
          │   sec/op   │   sec/op    vs base               │
Replay-12   1.681 ± 5%   2.545 ± 4%  +51.41% (p=0.002 n=6)

          │  benchmain   │              benchnew              │
          │     B/op     │     B/op      vs base              │
Replay-12   2.327Gi ± 0%   2.278Gi ± 0%  -2.11% (p=0.002 n=6)

          │  benchmain  │             benchnew              │
          │  allocs/op  │  allocs/op   vs base              │
Replay-12   20.81M ± 0%   20.79M ± 0%  -0.10% (p=0.002 n=6)
```
  • Loading branch information
asubiotto committed Nov 23, 2023
1 parent dcf20c4 commit d720639
Showing 1 changed file with 12 additions and 10 deletions.
22 changes: 12 additions & 10 deletions snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@ import (
"strconv"
"time"

"github.com/apache/arrow/go/v14/arrow/ipc"
"github.com/apache/arrow/go/v14/arrow/util"
"github.com/go-kit/log/level"
"github.com/oklog/ulid"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"google.golang.org/protobuf/proto"

"github.com/apache/arrow/go/v14/arrow/ipc"
"github.com/apache/arrow/go/v14/arrow/util"
"github.com/go-kit/log/level"

"github.com/polarsignals/frostdb/dynparquet"
snapshotpb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/snapshot/v1alpha1"
tablepb "github.com/polarsignals/frostdb/gen/proto/go/frostdb/table/v1alpha1"
Expand Down Expand Up @@ -621,20 +620,23 @@ func loadSnapshot(ctx context.Context, db *DB, r io.ReaderAt, size int64) ([]byt
}
startOffset := partMeta.StartOffset
endOffset := partMeta.EndOffset
partBytes := make([]byte, endOffset-startOffset)
if _, err := r.ReadAt(partBytes, startOffset); err != nil {
return err
}
partOptions := parts.WithCompactionLevel(int(partMeta.CompactionLevel))
partReader := io.NewSectionReader(r, startOffset, endOffset-startOffset)
switch partMeta.Encoding {
case snapshotpb.Part_ENCODING_PARQUET:
serBuf, err := dynparquet.ReaderFromBytes(partBytes)
// Copy the full part here since parquet reads lazily
// and the file is closed after the snapshot is read.
var b bytes.Buffer
if _, err := io.Copy(&b, partReader); err != nil {
return err
}
serBuf, err := dynparquet.ReaderFromBytes(b.Bytes())
if err != nil {
return err
}
resultParts = append(resultParts, parts.NewPart(partMeta.Tx, serBuf, partOptions))
case snapshotpb.Part_ENCODING_ARROW:
arrowReader, err := ipc.NewReader(bytes.NewReader(partBytes))
arrowReader, err := ipc.NewReader(partReader)
if err != nil {
return err
}
Expand Down

0 comments on commit d720639

Please sign in to comment.