-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #441 from IntersectMBO/jeltsch/growing-vector/tests
Add tests for growing vectors
- Loading branch information
Showing
3 changed files
with
225 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
module Test.Database.LSMTree.Internal.Vector.Growing (tests) where | ||
|
||
import Prelude hiding (head, length, tail) | ||
|
||
import Control.Category ((>>>)) | ||
import Control.Monad.ST.Strict (ST, runST) | ||
import qualified Data.List as List (length) | ||
import Data.Vector (Vector, toList) | ||
import Database.LSMTree.Internal.Vector.Growing (GrowingVector, | ||
append, freeze, new) | ||
import Test.QuickCheck (Arbitrary (arbitrary, shrink), Gen, | ||
NonNegative (NonNegative, getNonNegative), | ||
Positive (Positive), Property, Small (Small), Testable, | ||
chooseInt, coverTable, shrinkList, shrinkMap, shrinkMapBy, | ||
sized, tabulate, (===)) | ||
import Test.Tasty (TestTree, testGroup) | ||
import Test.Tasty.QuickCheck (testProperty) | ||
|
||
-- * Tests | ||
|
||
tests :: TestTree | ||
tests = testGroup "Test.Database.LSMTree.Internal.Vector.Growing" $ | ||
[ | ||
testProperty "Final vector is correct" | ||
prop_finalVectorIsCorrect | ||
] | ||
|
||
-- * Utilities | ||
|
||
-- ** Segments | ||
|
||
data Segment a = Replicate Int a deriving stock Show | ||
|
||
segmentLength :: Segment a -> Int | ||
segmentLength (Replicate count _) = max 0 count | ||
|
||
segmentToList :: Segment a -> [a] | ||
segmentToList (Replicate count val) = replicate count val | ||
|
||
appendSegment :: GrowingVector s a -> Segment a -> ST s () | ||
appendSegment vec (Replicate count val) = append vec count val | ||
|
||
-- ** Vector construction | ||
|
||
{- | ||
Constructs an ordinary vector by creating a growing vector, appending | ||
segments to it, and finally freezing it. | ||
-} | ||
finalVector :: Int -- Initial buffer size | ||
-> [Segment a] -- Segments | ||
-> Vector a -- Final vector | ||
finalVector initialBufferSize segments = runST $ do | ||
vec <- new initialBufferSize | ||
mapM_ (appendSegment vec) segments | ||
freeze vec | ||
|
||
{- | ||
Supplies the final contents of a growing vector for constructing a property. | ||
The resulting property additionally provides information about the | ||
occurrence of buffer size scaling exponents, where a buffer size scaling | ||
exponent is the binary logarithm of the ratio between the buffer size after | ||
and before appending a segment. If buffer enlargement for an append would | ||
happen in cycles where in each cycle the buffer size is doubled, then the | ||
buffer size scaling exponent would be equal to the number of such cycles. | ||
Therefore, a buffer size scaling exponent tells if the buffer is enlarged | ||
and, if yes, how much. | ||
-} | ||
withFinalVector | ||
:: Testable prop | ||
=> Int -- Initial buffer size | ||
-> [Segment a] -- Segments | ||
-> (Vector a -> prop) -- Property from final vector | ||
-> Property -- Resulting property | ||
withFinalVector initialBufferSize segments consumer | ||
= coverTable | ||
"Buffer size scaling exponents" | ||
[(show scalingExp, 10) | scalingExp <- [0 .. 3] :: [Int]] | ||
$ | ||
tabulate | ||
"Buffer size scaling exponents" | ||
(show <$> bufferSizeScalingExponents availableBufferSizes 0 segments) | ||
$ | ||
consumer (finalVector initialBufferSize segments) | ||
where | ||
|
||
availableBufferSizes :: [Int] | ||
availableBufferSizes = iterate (* 2) initialBufferSize | ||
{- | ||
We do not need to account for overflow here, because the lengths of the | ||
vectors we construct are small compared to @maxBound :: Int@. | ||
-} | ||
|
||
bufferSizeScalingExponents :: [Int] -> Int -> [Segment a] -> [Int] | ||
bufferSizeScalingExponents _ _ [] | ||
= [] | ||
bufferSizeScalingExponents availableSizes length (head : tail) | ||
= List.length insufficient : | ||
bufferSizeScalingExponents sufficient length' tail | ||
where | ||
|
||
length' :: Int | ||
!length' = length + segmentLength head | ||
|
||
insufficient, sufficient :: [Int] | ||
(insufficient, sufficient) = span (< length') availableSizes | ||
|
||
-- * Properties to test | ||
|
||
prop_finalVectorIsCorrect :: Positive (Small Int) | ||
-> Segments Integer | ||
-> Property | ||
prop_finalVectorIsCorrect (Positive (Small initialBufferSize)) | ||
(Segments segments) | ||
= withFinalVector initialBufferSize segments $ \ vec -> | ||
toList vec === concatMap segmentToList segments | ||
|
||
-- * Test case generation and shrinking | ||
|
||
generateSegment :: Arbitrary a => Int -> Gen (Segment a) | ||
generateSegment maxCount = Replicate <$> chooseInt (0, maxCount) <*> arbitrary | ||
|
||
shrinkSegment :: Arbitrary a => Segment a -> [Segment a] | ||
shrinkSegment (Replicate count val) | ||
= [Replicate count' val | count' <- shrinkNat count] ++ | ||
[Replicate count val' | val' <- shrink val] | ||
where | ||
|
||
shrinkNat :: Int -> [Int] | ||
shrinkNat = shrinkMap getNonNegative NonNegative | ||
|
||
{- | ||
A list of segments to be appended to an initially empty vector. 'Segments a' | ||
is isomorphic to '[Segment a]' but has a special way of generating test | ||
cases, which avoids skewing with respect to buffer size scaling exponents. | ||
The key issue of segment list generation is how to generate the length of an | ||
individual segment. It seems worthwhile to randomly pick a natural number | ||
smaller than or equal to some maximum length, using a uniform distribution. | ||
The question to be answered is how to choose the maximum lengths for the | ||
different segments. | ||
A naïve approach is to use a common maximum length for all segments in a | ||
particular list, possibly computed from the size parameter. However, this | ||
results in most appends not enlarging the buffer, meaning that the buffer | ||
size scaling exponent is zero in most cases. This is because, as the buffer | ||
size increases, the number of elements needed to cause the next buffer | ||
enlargement increases too. | ||
Note that it does not help much to choose the maximum segment length large | ||
compared to the initial buffer size. If this is done, then the buffer is | ||
enlarged very quickly, likely during the first append, and the distributions | ||
of buffer size scaling exponents up to this first enlargement may actually | ||
be very good. However, after the first enlargement, the buffer size is | ||
typically of the same order of magnitude as the maximum segment length, so | ||
that the buffer size scaling exponents of the following appends are likely | ||
to be zero or close to it, with further buffer enlargements making the | ||
situation only worse. | ||
A better idea is to choose the maximum length of each segment but the first | ||
proportional to the length of the vector just before appending it (for the | ||
first segment, such a choice is not appropriate, because it would lead to a | ||
maximum length of zero). With this approach, the distribution of buffer size | ||
scaling exponents stays roughly the same as soon as the buffer has been | ||
enlarged for the first time, because then the ratio between the current | ||
buffer size and the current vector length is always in the interval \([1, | ||
2)\) and thus varies only slightly. Reasonable distributions of buffer size | ||
scaling exponents can be achieved by choosing the ratio between maximum | ||
segment lengths and corresponding current-vector lengths appropriately. | ||
A downside of this approach is that it requires tracking the current vector | ||
length. This tracking can be avoided by considering only the /expected/ | ||
current vector length and choosing the maximum segment length proportional | ||
to it. The expected length of a vector is the sum of the expected lengths of | ||
the segments constituting it, and the expected length of a segment is | ||
proportional to the maximum length used for generating it. Therefore, this | ||
modified solution boils down to choosing the maximum length of each segment | ||
but the first proportional to the sum of the maximum lengths of the segments | ||
preceding it. | ||
Note that, with the approach just set out, the maximum lengths of the | ||
segments in a segment list almost form an exponential sequence, whose base | ||
is determined by the chosen ratio between maximum segment lengths and | ||
corresponding sums of maximum predecessor segment lengths. Therefore, a | ||
similarly good, but simpler, solution is to have the maximum segment lengths | ||
precisely form an exponential sequence. This is the approach that we use in | ||
our segment list generation algorithm. | ||
The concrete exponential sequences that we employ start with the size | ||
parameter and use 3 as the base of the exponentiation. Since initial buffer | ||
sizes are generated using a uniform distribution with the size parameter as | ||
the maximum, the choice of the size parameter as the first segment’s maximum | ||
length leads to non-trivial distributions of buffer size scaling exponents | ||
for the appends up to the one that causes the first buffer enlargement. The | ||
choice of 3 as the exponentiation’s base leads to a good overall | ||
distribution of buffer size scaling exponents, as experiments have shown. | ||
Because of the exponential growth of maximum segment lengths, vectors | ||
constructed during testing get very large for longer segment lists. | ||
Therefore, we do not pick the lengths of segment lists in the usual way but | ||
instead make all segment lists have a common, small length. Concretely, we | ||
use 9 as this common length, because this leads to vectors that are not | ||
trivial but still consume reasonable amounts of memory. | ||
-} | ||
newtype Segments a = Segments [Segment a] deriving stock Show | ||
|
||
instance Arbitrary a => Arbitrary (Segments a) where | ||
|
||
arbitrary = sized $ iterate (* scalingFactor) >>> | ||
take count >>> | ||
mapM generateSegment >>> | ||
fmap Segments | ||
where | ||
|
||
scalingFactor :: Int | ||
scalingFactor = 3 | ||
|
||
count :: Int | ||
count = 9 | ||
|
||
shrink = shrinkMapBy Segments (\ (Segments segments) -> segments) $ | ||
shrinkList shrinkSegment |