Skip to content

Commit

Permalink
Merge pull request #45 from input-output-hk/jdral/compact-index-bench…
Browse files Browse the repository at this point in the history
…marks

Micro-benchmarks for `CompactIndex`
  • Loading branch information
dcoutts authored Jan 8, 2024
2 parents 3b97ccd + 7db02b9 commit 2a99c3e
Show file tree
Hide file tree
Showing 8 changed files with 274 additions and 68 deletions.
47 changes: 25 additions & 22 deletions bench/micro/Bench/Database/LSMTree/Internal/Run/BloomFilter.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE TypeApplications #-}
{- HLINT ignore "Use camelCase" -}
{- HLINT ignore "Eta reduce" -}

Expand All @@ -9,9 +10,12 @@ import qualified Data.BloomFilter.Easy as Bloom.Easy
import Data.Foldable (Foldable (..))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Word
import Database.LSMTree.Extras
import Database.LSMTree.Generators
import Database.LSMTree.Internal.Run.BloomFilter as Bloom
import Database.LSMTree.Internal.Serialise (Serialise (serialise),
SerialisedKey)
import Database.LSMTree.Util.Orphans ()
import System.Random
import System.Random.Extras
import Test.QuickCheck (generate, shuffle)
Expand All @@ -29,15 +33,12 @@ benchmarks = bgroup "Bench.Database.LSMTree.Internal.Run.BloomFilter" [
, env (elemEnv 0.9 2_500_000 0 1_000_000) $ \ ~(b, xs) ->
bench "onlyNegatives 0.9" $ whnf (elems b) xs
]
, bgroup "construction" [
env (constructionEnv 2_500_000) $ \ m ->
bench "incrementalST 0.1" $ whnf (constructBloom mkBloomST 0.1) m
, env (constructionEnv 2_500_000) $ \ m ->
bench "incrementalST 0.9" $ whnf (constructBloom mkBloomST 0.9) m
, env (constructionEnv 2_500_000) $ \ m ->
bench "easyList 0.1" $ whnf (constructBloom mkBloomEasy 0.1) m
, env (constructionEnv 2_500_000) $ \ m ->
bench "easyList 0.9" $ whnf (constructBloom mkBloomEasy 0.9) m
, env (constructionEnv 2_500_000) $ \ m ->
bgroup "construction" [
bench "incrementalST 0.1" $ whnf (constructBloom mkBloomST 0.1) m
, bench "incrementalST 0.9" $ whnf (constructBloom mkBloomST 0.9) m
, bench "easyList 0.1" $ whnf (constructBloom mkBloomEasy 0.1) m
, bench "easyList 0.9" $ whnf (constructBloom mkBloomEasy 0.9) m
]
]

Expand All @@ -47,31 +48,33 @@ elemEnv ::
-> Int -- ^ Number of entries in the bloom filter
-> Int -- ^ Number of positive lookups
-> Int -- ^ Number of negative lookups
-> IO (Bloom Word64, [Word64])
-> IO (Bloom SerialisedKey, [SerialisedKey])
elemEnv fpr nbloom nelemsPositive nelemsNegative = do
stdgen <- newStdGen
stdgen' <- newStdGen
let (xs, ys1) = splitAt nbloom
$ uniformWithoutReplacement stdgen (nbloom + nelemsNegative)
ys2 = sampleUniformWithReplacement stdgen' nelemsPositive xs
$ uniformWithoutReplacement @UTxOKey stdgen (nbloom + nelemsNegative)
ys2 = sampleUniformWithReplacement @UTxOKey stdgen' nelemsPositive xs
zs <- generate $ shuffle (ys1 ++ ys2)
pure (Bloom.Easy.easyList fpr xs, zs)
pure (Bloom.Easy.easyList fpr (fmap serialise xs), fmap serialise zs)

-- | Used for benchmarking 'Bloom.elem'.
elems :: Bloom a -> [a] -> ()
elems b xs = foldl' (\acc x -> Bloom.elem x b `seq` acc) () xs

-- | Input environment for benchmarking 'constructBloom'.
constructionEnv :: Int -> IO (Map Word64 Word64)
constructionEnv :: Int -> IO (Map SerialisedKey SerialisedKey)
constructionEnv n = do
stdgen <- newStdGen
let ks = uniformWithoutReplacement stdgen n
vs = uniformWithReplacement stdgen n
pure $ Map.fromList (zip ks vs)
stdgen <- newStdGen
stdgen' <- newStdGen
let ks = uniformWithoutReplacement @UTxOKey stdgen n
vs = uniformWithReplacement @UTxOKey stdgen' n
pure $ Map.fromList (zipWith (\k v -> (serialise k, serialise v)) ks vs)

-- | Used for benchmarking the construction of bloom filters from write buffers.
constructBloom ::
(Double -> BloomMaker Word64)
(Double -> BloomMaker SerialisedKey)
-> Double
-> Map Word64 Word64
-> Bloom Word64
-> Map SerialisedKey SerialisedKey
-> Bloom SerialisedKey
constructBloom mkBloom fpr m = mkBloom fpr (Map.keys m)
72 changes: 72 additions & 0 deletions bench/micro/Bench/Database/LSMTree/Internal/Run/Index/Compact.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE TypeApplications #-}
{- HLINT ignore "Eta reduce" -}

module Bench.Database.LSMTree.Internal.Run.Index.Compact (benchmarks) where

import Control.DeepSeq (deepseq)
import Criterion.Main
import Data.Foldable (Foldable (..))
import qualified Data.List.NonEmpty as NonEmpty
import Database.LSMTree.Generators
import Database.LSMTree.Internal.Run.Index.Compact
import Database.LSMTree.Internal.Serialise (Serialise (serialise),
SerialisedKey)
import System.Random
import System.Random.Extras

-- See 'utxoNumPages'.
benchmarks :: Benchmark
benchmarks = bgroup "Bench.Database.LSMTree.Internal.Run.Index.Compact" [
bgroup "searches" [
env (searchEnv 0 100 2_500_000 1_000_000) $ \ ~(ci, ks) ->
bench "searches with 0-bit rfprec" $ whnf (searches ci) ks
, env (searchEnv 16 100 2_500_000 1_000_000) $ \ ~(ci, ks) ->
bench "searches with 16-bit rfprec" $ whnf (searches ci) ks
]
, bgroup "construction" [
env (constructionEnv 0 2_500_000) $ \ pages ->
bench "construction with 0-bit rfprec and chunk size 100" $ whnf (constructCompactIndex 100) pages
, env (constructionEnv 16 2_500_000) $ \ pages ->
bench "construction with 16-bit rfprec and chunk size 100" $ whnf (constructCompactIndex 100) pages
]
]

-- | Input environment for benchmarking 'searches'.
searchEnv ::
RFPrecision -- ^ Range-finder bit-precision
-> ChunkSize
-> Int -- ^ Number of pages
-> Int -- ^ Number of searches
-> IO (CompactIndex, [SerialisedKey])
searchEnv fpr csize npages nsearches = do
ci <- constructCompactIndex csize <$> constructionEnv fpr npages
stdgen <- newStdGen
let ks = serialise <$> uniformWithReplacement @UTxOKey stdgen nsearches
pure (ci, ks)

-- | Used for benchmarking 'search'.
searches ::
CompactIndex
-> [SerialisedKey] -- ^ Keys to search for
-> ()
searches ci ks = foldl' (\acc k -> search k ci `deepseq` acc) () ks

-- | Input environment for benchmarking 'constructCompactIndex'.
constructionEnv ::
RFPrecision -- ^ Range-finder bit-precision
-> Int -- ^ Number of pages
-> IO (Pages SerialisedKey)
constructionEnv rfprec n = do
stdgen <- newStdGen
let ks = uniformWithoutReplacement @UTxOKey stdgen (2 * n)
pure $ serialise <$> mkPages rfprec (NonEmpty.fromList ks)

-- | Used for benchmarking the incremental construction of a 'CompactIndex'.
constructCompactIndex ::
ChunkSize
-> Pages SerialisedKey -- ^ Pages to add in succession
-> CompactIndex
constructCompactIndex (ChunkSize csize) (Pages (RFPrecision rfprec) ks) =
-- under the hood, 'fromList' uses the incremental construction interface
fromList rfprec csize ks
2 changes: 2 additions & 0 deletions bench/micro/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
module Main (main) where

import qualified Bench.Database.LSMTree.Internal.Run.BloomFilter
import qualified Bench.Database.LSMTree.Internal.Run.Index.Compact
import Criterion.Main

main :: IO ()
main = defaultMain [
Bench.Database.LSMTree.Internal.Run.BloomFilter.benchmarks
, Bench.Database.LSMTree.Internal.Run.Index.Compact.benchmarks
]
13 changes: 11 additions & 2 deletions lsm-tree.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,20 @@ library lsm-tree-utils
exposed-modules:
Database.LSMTree.Extras
Database.LSMTree.Generators
Database.LSMTree.Util.Orphans
System.Random.Extras

build-depends:
, array
, base
, bytestring
, containers
, deepseq
, lsm-tree:{lsm-tree, bloomfilter}
, primitive
, QuickCheck
, random
, wide-word

test-suite lsm-tree-test
import: warnings
Expand Down Expand Up @@ -146,7 +152,6 @@ test-suite lsm-tree-test
, io-sim >=1.2
, lsm-tree:{lsm-tree, lsm-tree-utils}
, mtl
, primitive
, QuickCheck
, quickcheck-dynamic
, quickcheck-instances
Expand All @@ -165,11 +170,15 @@ benchmark lsm-tree-micro-bench
type: exitcode-stdio-1.0
hs-source-dirs: bench/micro
main-is: Main.hs
other-modules: Bench.Database.LSMTree.Internal.Run.BloomFilter
other-modules:
Bench.Database.LSMTree.Internal.Run.BloomFilter
Bench.Database.LSMTree.Internal.Run.Index.Compact

build-depends:
, base
, containers
, criterion
, deepseq
, lsm-tree:{lsm-tree, bloomfilter, lsm-tree-utils}
, QuickCheck
, random
Expand Down
9 changes: 9 additions & 0 deletions src/Database/LSMTree/Internal/Serialise.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE StandaloneKindSignatures #-}
{-# LANGUAGE UnboxedTuples #-}
Expand All @@ -15,9 +16,11 @@ module Database.LSMTree.Internal.Serialise (
) where

import Data.Bits (Bits (shiftL, shiftR))
import qualified Data.ByteString.Short as SBS
import Data.ByteString.Short.Internal (ShortByteString (SBS))
import Data.Kind (Type)
import Data.Primitive.ByteArray (ByteArray (..))
import Database.LSMTree.Internal.Run.BloomFilter (Hashable (..))
import GHC.Exts
import GHC.Word

Expand Down Expand Up @@ -46,6 +49,12 @@ instance Ord SerialisedKey where
(SerialisedKey (ByteArray skey1#)) `compare` (SerialisedKey (ByteArray skey2#)) =
SBS skey1# `compare` SBS skey2#

-- TODO: optimisation
instance Hashable SerialisedKey where
hashIO32 :: SerialisedKey -> Word32 -> IO Word32
hashIO32 (SerialisedKey (ByteArray ba#)) =
hashIO32 (SBS.fromShort $ SBS ba#)

-- | @'topBits16' n k@ slices the first @n@ bits from the /top/ of the
-- serialised key @k@. Returns the string of bits as a 'Word16'.
--
Expand Down
87 changes: 77 additions & 10 deletions src/utils/Database/LSMTree/Generators.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE GeneralisedNewtypeDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# OPTIONS_GHC -Wno-orphans #-}

module Database.LSMTree.Generators (
-- * WithSerialised
WithSerialised (..)
-- * UTxO keys
, UTxOKey (..)
-- * Range-finder precision
RFPrecision (..)
, RFPrecision (..)
, rfprecInvariant
-- * Pages (non-partitioned)
, Pages (..)
Expand All @@ -14,23 +24,82 @@ module Database.LSMTree.Generators (
, chunkSizeInvariant
) where

import Control.DeepSeq (NFData)
import Data.Containers.ListUtils (nubOrd)
import Data.List (sort)
import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.WideWord.Word256 (Word256 (..))
import Database.LSMTree.Internal.Run.BloomFilter (Hashable (..))
import Database.LSMTree.Internal.Run.Index.Compact
(rangeFinderPrecisionBounds, suggestRangeFinderPrecision)
import Database.LSMTree.Internal.Serialise (Serialise (..), topBits16)
import Database.LSMTree.Internal.Serialise (Serialise (..),
SerialisedKey, topBits16)
import Database.LSMTree.Util.Orphans ()
import GHC.Generics (Generic)
import System.Random (Uniform)
import Test.QuickCheck (Arbitrary (..), NonEmptyList (..), Property,
chooseInt, scale, tabulate)
import Text.Printf (printf)

{-------------------------------------------------------------------------------
WithSerialised
-------------------------------------------------------------------------------}

-- | Cach serialised keys
--
-- Also useful for failing tests that have keys as inputs, because the printed
-- 'WithSerialised' values will show both keys and their serialised form.
data WithSerialised k = TestKey k SerialisedKey
deriving Show

instance Eq k => Eq (WithSerialised k) where
TestKey k1 _ == TestKey k2 _ = k1 == k2

instance Ord k => Ord (WithSerialised k) where
TestKey k1 _ `compare` TestKey k2 _ = k1 `compare` k2

instance (Arbitrary k, Serialise k) => Arbitrary (WithSerialised k) where
arbitrary = do
x <- arbitrary
pure $ TestKey x (serialise x)
shrink (TestKey k _) = [TestKey k' (serialise k') | k' <- shrink k]

instance Serialise (WithSerialised k) where
serialise (TestKey _ skey) = skey

{-------------------------------------------------------------------------------
UTxO keys
-------------------------------------------------------------------------------}

-- | A model of a UTxO key (256-bit hash)
newtype UTxOKey = UTxOKey Word256
deriving stock (Show, Generic)
deriving newtype ( Eq, Ord, NFData
, Hashable, Serialise
)
deriving anyclass Uniform

instance Arbitrary UTxOKey where
arbitrary = UTxOKey <$>
(Word256 <$> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary)
shrink (UTxOKey w256) = [
UTxOKey w256'
| let i256 = toInteger w256
, i256' <- shrink i256
, toInteger (minBound :: Word256) <= i256'
, toInteger (maxBound :: Word256) >= i256'
, let w256' = fromIntegral i256'
]

{-------------------------------------------------------------------------------
Range-finder precision
-------------------------------------------------------------------------------}

newtype RFPrecision = RFPrecision Int
deriving Show
deriving stock (Show, Generic)
deriving newtype Num
deriving anyclass NFData

instance Arbitrary RFPrecision where
arbitrary = RFPrecision <$> chooseInt (rfprecLB, rfprecUB)
Expand All @@ -55,7 +124,8 @@ data Pages k = Pages {
getRangeFinderPrecision :: RFPrecision
, getPages :: [(k, k)]
}
deriving Show
deriving stock (Show, Generic, Functor)
deriving anyclass NFData

instance (Arbitrary k, Ord k, Serialise k) => Arbitrary (Pages k) where
arbitrary = mkPages <$>
Expand All @@ -66,10 +136,6 @@ instance (Arbitrary k, Ord k, Serialise k) => Arbitrary (Pages k) where
] <> [
Pages rfprec' ks
| rfprec' <- shrink rfprec, pagesInvariant (Pages rfprec' ks)
] <> [
Pages rfprec' ks'
| ks' <- shrink ks
, rfprec' <- shrink rfprec, pagesInvariant (Pages rfprec' ks')
]

mkPages ::
Expand Down Expand Up @@ -132,7 +198,8 @@ labelPages (Pages (RFPrecision rfprec) ks) =
-------------------------------------------------------------------------------}

newtype ChunkSize = ChunkSize Int
deriving Show
deriving stock Show
deriving newtype Num

instance Arbitrary ChunkSize where
arbitrary = ChunkSize <$> chooseInt (chunkSizeLB, chunkSizeUB)
Expand Down
Loading

0 comments on commit 2a99c3e

Please sign in to comment.