Skip to content

Commit

Permalink
Make headerLBS and finalLBS index-type-polymorphic
Browse files Browse the repository at this point in the history
  • Loading branch information
jeltsch committed Dec 9, 2024
1 parent 82df7e4 commit 0d0eab5
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 33 deletions.
9 changes: 7 additions & 2 deletions src/Database/LSMTree/Internal/ChecksumHandle.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE MagicHash #-}

module Database.LSMTree.Internal.ChecksumHandle
(
-- * Checksum handles
Expand Down Expand Up @@ -44,6 +46,7 @@ import qualified Database.LSMTree.Internal.RawOverflowPage as RawOverflowPage
import Database.LSMTree.Internal.RawPage (RawPage)
import qualified Database.LSMTree.Internal.RawPage as RawPage
import Database.LSMTree.Internal.Serialise
import GHC.Exts (Proxy#)
import qualified System.FS.API as FS
import System.FS.API
import qualified System.FS.BlockIO.API as FS
Expand Down Expand Up @@ -204,15 +207,17 @@ writeFilter hfs filterHandle bf =
{-# SPECIALISE writeIndexHeader ::
HasFS IO h
-> ForIndex (ChecksumHandle RealWorld h)
-> Proxy# IndexCompact
-> IO () #-}
writeIndexHeader ::
(MonadSTM m, PrimMonad m)
=> HasFS m h
-> ForIndex (ChecksumHandle (PrimState m) h)
-> Proxy# IndexCompact
-> m ()
writeIndexHeader hfs indexHandle =
writeIndexHeader hfs indexHandle indexTypeProxy =
writeToHandle hfs (unForIndex indexHandle) $
Index.headerLBS
Index.headerLBS indexTypeProxy

{-# SPECIALISE writeIndexChunk ::
HasFS IO h
Expand Down
35 changes: 33 additions & 2 deletions src/Database/LSMTree/Internal/Index.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE TypeFamilies #-}

{-|
Expand All @@ -6,20 +7,35 @@
-}
module Database.LSMTree.Internal.Index
(
Index (search, sizeInPages, fromSBS),
Index (search, sizeInPages, headerLBS, finalLBS, fromSBS),
IndexAcc (ResultingIndex, appendSingle, appendMulti, unsafeEnd)
)
where

import Control.Monad.ST.Strict (ST)
import Data.ByteString.Lazy (LazyByteString)
import Data.ByteString.Short (ShortByteString)
import Data.Word (Word32)
import Database.LSMTree.Internal.Chunk (Chunk)
import Database.LSMTree.Internal.Entry (NumEntries)
import Database.LSMTree.Internal.Page (NumPages, PageSpan)
import Database.LSMTree.Internal.Serialise (SerialisedKey)
import GHC.Exts (Proxy#)

-- | The class of index types.
{-|
The class of index types.
This class contains also methods for the non-incremental parts of otherwise
incremental serialisation. To completely serialise an index interleaved with
its construction, proceed as follows:
1. Use 'headerLBS' to generate the header of the serialised index.
2. Incrementally construct the index using the methods of 'IndexAcc', and
assemble the body of the serialised index from the generated chunks.
3. Use 'finalLBS' to generate the footer of the serialised index.
-}
class Index i where

{-|
Expand All @@ -33,6 +49,21 @@ class Index i where
-- | Yields the number of pages covered by an index.
sizeInPages :: i -> NumPages

{-|
Yields the header of the serialised form of an index.
See the documentation of the 'Index' class for how to generate a
complete serialised index.
-}
headerLBS :: Proxy# i -> LazyByteString

{-|
Yields the footer of the serialised form of an index.
See the documentation of the 'Index' class for how to generate a
complete serialised index.
-}
finalLBS :: NumEntries -> i -> LazyByteString
{-|
Reads an index along with the number of entries of the respective run
from a byte string.
Expand Down
44 changes: 24 additions & 20 deletions src/Database/LSMTree/Internal/Index/Compact.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE MagicHash #-}

-- | A compact fence-pointer index for uniformly distributed keys.
--
-- TODO: add utility functions for clash probability calculations
Expand All @@ -13,9 +15,8 @@ module Database.LSMTree.Internal.Index.Compact (
-- * Non-incremental serialisation
, toLBS
-- * Incremental serialisation
-- $incremental-serialisation
, headerLBS
, finalLBS
, Index.headerLBS
, Index.finalLBS
, word64VectorToChunk
-- * Deserialisation
, Index.fromSBS
Expand Down Expand Up @@ -49,12 +50,13 @@ import Database.LSMTree.Internal.Chunk (Chunk (Chunk))
import qualified Database.LSMTree.Internal.Chunk as Chunk (toByteString)
import Database.LSMTree.Internal.Entry (NumEntries (..))
import Database.LSMTree.Internal.Index (Index)
import qualified Database.LSMTree.Internal.Index as Index (fromSBS, search,
sizeInPages)
import qualified Database.LSMTree.Internal.Index as Index (finalLBS, fromSBS,
headerLBS, search, sizeInPages)
import Database.LSMTree.Internal.Page
import Database.LSMTree.Internal.Serialise
import Database.LSMTree.Internal.Unsliced
import Database.LSMTree.Internal.Vector
import GHC.Exts (Proxy#, proxy#)

{- $compact
Expand Down Expand Up @@ -460,38 +462,34 @@ sizeInPages = NumPages . toEnum . VU.length . icPrimary
-- | Serialises a compact index in one go.
toLBS :: NumEntries -> IndexCompact -> LBS.ByteString
toLBS numEntries index =
headerLBS
headerLBS (proxy# @IndexCompact)
<> LBS.fromStrict (Chunk.toByteString (word64VectorToChunk (icPrimary index)))
<> finalLBS numEntries index

{-------------------------------------------------------------------------------
Incremental serialisation
-------------------------------------------------------------------------------}

{- $incremental-serialisation
To incrementally serialise a compact index as it is being constructed, start
by using 'headerLBS'. Each yielded chunk can then be written using
'Chunk.toByteString'. Once construction is completed, 'finalLBS' will
serialise the remaining parts of the compact index.
Also see module "Database.LSMTree.Internal.Index.CompactAcc".
-}

-- | By writing out the type–version indicator in host endianness, we also
-- indicate endianness. During deserialisation, we would discover an endianness
-- mismatch.
supportedTypeAndVersion :: Word32
supportedTypeAndVersion = 0x0001

-- | 64 bits, to be used before writing any other parts of the serialised file!
headerLBS :: LBS.ByteString
headerLBS =
{-|
For a specification of this operation, see the documentation of [its
polymorphic version]('Index.headerLBS').
-}
headerLBS :: Proxy# IndexCompact -> LBS.ByteString
headerLBS _ =
-- create a single 4 byte chunk
BB.toLazyByteStringWith (BB.safeStrategy 4 BB.smallChunkSize) mempty $
BB.word32Host supportedTypeAndVersion <> BB.word32Host 0

-- | Writes everything after the primary array, which is assumed to have already
-- been written using 'Chunk.toByteString'.
{-|
For a specification of this operation, see the documentation of [its
polymorphic version]('Index.finalLBS').
-}
finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString
finalLBS (NumEntries numEntries) IndexCompact {..} =
-- use a builder, since it is all relatively small
Expand Down Expand Up @@ -690,6 +688,12 @@ instance Index IndexCompact where
sizeInPages :: IndexCompact -> NumPages
sizeInPages = sizeInPages

headerLBS :: Proxy# IndexCompact -> LBS.ByteString
headerLBS = headerLBS

finalLBS :: NumEntries -> IndexCompact -> LBS.ByteString
finalLBS = finalLBS

fromSBS :: ShortByteString -> Either String (NumEntries, IndexCompact)
fromSBS = fromSBS

Expand Down
29 changes: 24 additions & 5 deletions src/Database/LSMTree/Internal/Index/Ordinary.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE MagicHash #-}

{- HLINT ignore "Avoid restricted alias" -}

-- | A general-purpose fence pointer index.
Expand All @@ -15,6 +17,9 @@ import Prelude hiding (drop, last, length)

import Control.Exception (assert)
import Control.Monad (when)
import Data.ByteString.Builder (toLazyByteString)
import Data.ByteString.Builder.Extra (word32Host, word64Host)
import Data.ByteString.Lazy (LazyByteString)
import Data.ByteString.Short (ShortByteString (SBS))
import qualified Data.ByteString.Short as ShortByteString (length)
import Data.Primitive.ByteArray (ByteArray (ByteArray),
Expand All @@ -24,14 +29,16 @@ import Data.Vector (Vector, drop, findIndex, findIndexR, fromList,
import qualified Data.Vector.Primitive as Primitive (Vector (Vector), drop,
force, length, null, splitAt, take)
import Data.Word (Word16, Word32, Word64, Word8, byteSwap32)
import Database.LSMTree.Internal.Entry (NumEntries (NumEntries))
import Database.LSMTree.Internal.Entry (NumEntries (NumEntries),
unNumEntries)
import Database.LSMTree.Internal.Index
(Index (fromSBS, search, sizeInPages))
(Index (finalLBS, fromSBS, headerLBS, search, sizeInPages))
import Database.LSMTree.Internal.Page (NumPages (NumPages),
PageNo (PageNo), PageSpan (PageSpan))
import Database.LSMTree.Internal.Serialise
(SerialisedKey (SerialisedKey'))
import Database.LSMTree.Internal.Vector (binarySearchL, mkPrimVector)
import GHC.Exts (Proxy#)

{-|
The type–version indicator for the ordinary index and its serialisation
Expand Down Expand Up @@ -99,6 +106,18 @@ instance Index IndexOrdinary where
sizeInPages (IndexOrdinary lastKeys)
= NumPages $ fromIntegral (length lastKeys)

headerLBS :: Proxy# IndexOrdinary -> LazyByteString
headerLBS _ = toLazyByteString $
word32Host $
supportedTypeAndVersion

finalLBS :: NumEntries -> IndexOrdinary -> LazyByteString
finalLBS entryCount _ = toLazyByteString $
word64Host $
fromIntegral $
unNumEntries $
entryCount

fromSBS :: ShortByteString -> Either String (NumEntries, IndexOrdinary)
fromSBS shortByteString@(SBS unliftedByteArray)
| fullSize < 12
Expand All @@ -123,12 +142,12 @@ instance Index IndexOrdinary where
typeAndVersion :: Word32
typeAndVersion = indexByteArray byteArray 0

postVersionBytes :: Primitive.Vector Word8
postVersionBytes = Primitive.drop 4 fullBytes
postTypeAndVersionBytes :: Primitive.Vector Word8
postTypeAndVersionBytes = Primitive.drop 4 fullBytes

lastKeysBytes, entryCountBytes :: Primitive.Vector Word8
(lastKeysBytes, entryCountBytes)
= Primitive.splitAt (fullSize - 12) postVersionBytes
= Primitive.splitAt (fullSize - 12) postTypeAndVersionBytes

entryCount :: Either String NumEntries
entryCount
Expand Down
5 changes: 4 additions & 1 deletion src/Database/LSMTree/Internal/RunBuilder.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
{-# LANGUAGE MagicHash #-}

-- | A mutable run ('RunBuilder') that is under construction.
--
module Database.LSMTree.Internal.RunBuilder (
Expand Down Expand Up @@ -30,6 +32,7 @@ import Database.LSMTree.Internal.RawPage (RawPage)
import Database.LSMTree.Internal.RunAcc (RunAcc, RunBloomFilterAlloc)
import qualified Database.LSMTree.Internal.RunAcc as RunAcc
import Database.LSMTree.Internal.Serialise
import GHC.Exts (proxy#)
import qualified System.FS.API as FS
import System.FS.API (HasFS)
import qualified System.FS.BlockIO.API as FS
Expand Down Expand Up @@ -90,7 +93,7 @@ new hfs hbio runBuilderFsPaths numEntries alloc = do
runBuilderHandles <- traverse (makeHandle hfs) (pathsForRunFiles runBuilderFsPaths)

let builder = RunBuilder { runBuilderHasFS = hfs, runBuilderHasBlockIO = hbio, .. }
writeIndexHeader hfs (forRunIndex runBuilderHandles)
writeIndexHeader hfs (forRunIndex runBuilderHandles) (proxy# @IndexCompact)
return builder

{-# SPECIALISE addKeyOp ::
Expand Down
8 changes: 5 additions & 3 deletions test/Test/Database/LSMTree/Internal/Index/Compact.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Wno-orphans #-}
{- HLINT ignore "Eta reduce" -}
Expand Down Expand Up @@ -37,6 +38,7 @@ import Database.LSMTree.Internal.Index.CompactAcc as IndexCompact
import Database.LSMTree.Internal.Page (PageNo (PageNo), PageSpan,
multiPage, singlePage)
import Database.LSMTree.Internal.Serialise
import GHC.Exts (proxy#)
import Numeric (showHex)
import Prelude hiding (max, min, pi)
import qualified Test.QuickCheck as QC
Expand Down Expand Up @@ -99,7 +101,7 @@ tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [
, 7, 0
]

let header = LBS.unpack headerLBS
let header = LBS.unpack (headerLBS (proxy# @IndexCompact))
let primary = LBS.unpack $
LBS.fromChunks (map Chunk.toByteString chunks)
let rest = LBS.unpack (finalLBS (NumEntries 7) index)
Expand Down Expand Up @@ -231,7 +233,7 @@ prop_roundtrip_chunks (Chunks chunks index) numEntries =
counterexample ("rest:\n" <> showBS bsRest) $
Right (numEntries, index) === fromSBS sbs
where
bsVersion = headerLBS
bsVersion = headerLBS (proxy# @IndexCompact)
bsPrimary = LBS.fromChunks $
map (Chunk.toByteString . word64VectorToChunk) chunks
bsRest = finalLBS numEntries index
Expand Down Expand Up @@ -293,7 +295,7 @@ writeIndexCompact numEntries (ChunkSize csize) ps = runST $ do
cs <- mapM (`append` ica) (toAppends ps)
(c, index) <- unsafeEnd ica
return
( headerLBS
( headerLBS (proxy# @IndexCompact)
, LBS.fromChunks $
foldMap (map Chunk.toByteString) $ cs <> pure (toList c)
, finalLBS numEntries index
Expand Down

0 comments on commit 0d0eab5

Please sign in to comment.