Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix discard ratio and maxSuccess interacting poorly #371

Merged
merged 7 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/Test/QuickCheck/Features.hs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ prop_noNewFeatures feats prop =
-- > where count x xs = length (filter (== x) xs)
--
-- 'labelledExamples' generates three example test cases, one for each label:
--
--
-- >>> labelledExamples prop_delete
-- *** Found example of count x xs == 0
-- 0
Expand Down Expand Up @@ -100,7 +100,7 @@ labelledExamplesWithResult args prop =
mapM_ (putLine (terminal state)) (failingTestCase res)
putStrLn ""
loop (Set.union feats feats')
state{randomSeed = usedSeed res, computeSize = computeSize state `at0` usedSize res}
state{randomSeed = usedSeed res, replayStartSize = Just $ usedSize res}
_ -> do
out <- terminalOutput nullterm
putStr out
Expand Down
6 changes: 4 additions & 2 deletions src/Test/QuickCheck/State.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ data State
, maxDiscardedRatio :: Int
-- ^ maximum number of discarded tests per successful test
, coverageConfidence :: Maybe Confidence
-- ^ required coverage confidence
, computeSize :: Int -> Int -> Int
-- ^ how to compute the size of test cases from
-- #tests and #discarded tests
, numTotMaxShrinks :: !Int
-- ^ How many shrinks to try before giving up
, replayStartSize :: Maybe Int
-- ^ Size to start at when replaying
, maxTestSize :: !Int
-- ^ Maximum size of test

-- dynamic
, numSuccessTests :: !Int
Expand Down
41 changes: 26 additions & 15 deletions src/Test/QuickCheck/Test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,8 @@ withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $
, maxSuccessTests = maxSuccess a
, coverageConfidence = Nothing
, maxDiscardedRatio = maxDiscardRatio a
, computeSize = case replay a of
Nothing -> computeSize'
Just (_,s) -> computeSize' `at0` s
, replayStartSize = snd <$> replay a
, maxTestSize = maxSize a
, numTotMaxShrinks = maxShrinks a
, numSuccessTests = 0
, numDiscardedTests = 0
Expand All @@ -223,17 +222,28 @@ withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $
, numTryShrinks = 0
, numTotTryShrinks = 0
}
where computeSize' n d
-- e.g. with maxSuccess = 250, maxSize = 100, goes like this:
-- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98.
| n `roundTo` maxSize a + maxSize a <= maxSuccess a ||
n >= maxSuccess a ||
maxSuccess a `mod` maxSize a == 0 = (n `mod` maxSize a + d `div` 10) `min` maxSize a
| otherwise =
((n `mod` maxSize a) * maxSize a `div` (maxSuccess a `mod` maxSize a) + d `div` 10) `min` maxSize a
n `roundTo` m = (n `div` m) * m
at0 f s 0 0 = s
at0 f s n d = f n d

computeSize :: State -> Int
computeSize MkState{replayStartSize = Just s,numSuccessTests = 0,numRecentlyDiscardedTests=0} = s
computeSize MkState{maxSuccessTests = ms, maxTestSize = mts, maxDiscardedRatio = md,numSuccessTests=n,numRecentlyDiscardedTests=d}
-- e.g. with maxSuccess = 250, maxSize = 100, goes like this:
-- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98.
| n `roundTo` mts + mts <= ms ||
n >= ms ||
ms `mod` mts == 0 = (n `mod` mts + d `div` dDenom) `min` mts
| otherwise =
((n `mod` mts) * mts `div` (ms `mod` mts) + d `div` dDenom) `min` mts
where
-- The inverse of the rate at which we increase size as a function of discarded tests
-- if the discard ratio is high we can afford this to be slow, but if the discard ratio
-- is low we risk bowing out too early
dDenom
| md > 0 = (ms * md `div` 3) `clamp` (1, 10)
| otherwise = 1 -- Doesn't matter because there will be no discards allowed
n `roundTo` m = (n `div` m) * m

clamp :: Ord a => a -> (a, a) -> a
clamp x (l, h) = max l (min x h)

-- | Tests a property and prints the results and all test cases generated to 'stdout'.
-- This is just a convenience function that means the same as @'quickCheck' . 'verbose'@.
Expand Down Expand Up @@ -345,7 +355,7 @@ runATest st f =
Just confidence | (1 + numSuccessTests st) `mod` 100 == 0 && powerOfTwo ((1 + numSuccessTests st) `div` 100) ->
addCoverageCheck confidence st f
_ -> f
let size = computeSize st (numSuccessTests st) (numRecentlyDiscardedTests st)
let size = computeSize st
MkRose res ts <- protectRose (reduceRose (unProp (unGen (unProperty f_or_cov) rnd1 size)))
res <- callbackPostTest st res

Expand Down Expand Up @@ -378,6 +388,7 @@ runATest st f =
-- Don't add coverage info from this test
st{ numDiscardedTests = numDiscardedTests st' + 1
, numRecentlyDiscardedTests = numRecentlyDiscardedTests st' + 1
, maxSuccessTests = fromMaybe (maxSuccessTests st) (maybeNumTests res)
, maxDiscardedRatio = fromMaybe (maxDiscardedRatio st) (maybeDiscardedRatio res)
, randomSeed = rnd2
} f
Expand Down
33 changes: 24 additions & 9 deletions tests/DiscardRatio.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@ assert s False = do
exitFailure
assert _ _ = pure ()

quickCheckYes, quickCheckNo :: Property -> IO ()
quickCheckYes p = do
res <- quickCheckResult p
quickCheckYesWith, quickCheckNoWith :: Testable p => Args -> p -> IO ()
quickCheckYesWith args p = do
res <- quickCheckWithResult args p
unless (isSuccess res) exitFailure
quickCheckNo p = do
res <- quickCheckResult p
quickCheckNoWith args p = do
res <- quickCheckWithResult args p
when (isSuccess res) exitFailure
quickCheckYes, quickCheckNo :: Testable p => p -> IO ()
quickCheckYes = quickCheckYesWith stdArgs
quickCheckNo = quickCheckNoWith stdArgs

check :: Result -> Int -> Int -> IO ()
check res n d = do
Expand All @@ -25,18 +28,30 @@ check res n d = do

main :: IO ()
main = do
putStrLn "Testing: False ==> True"
putStrLn "Expecting gave up after 200 tries: False ==> True"
res <- quickCheckResult $ withDiscardRatio 2 $ False ==> True
check res 0 200
res <- quickCheckWithResult stdArgs{maxDiscardRatio = 2} $ False ==> True
check res 0 200

putStrLn "Testing: x == x"
putStrLn "\nExpecting success after 100 tests: x == x"
res <- quickCheckResult $ withDiscardRatio 2 $ \ x -> (x :: Int) == x
check res 100 0
res <- quickCheckWithResult stdArgs{maxDiscardRatio = 2} $ \ x -> (x :: Int) == x
check res 100 0

-- The real ratio is 20, if 1 works or 40 doesn't it's
-- probably because we broke something!
let p50 = forAll (choose (1, 1000)) $ \ x -> (x :: Int) < 50 ==> True
putStrLn "Expecting failure (discard ratio 1): x < 50 ==> True"
putStrLn "\nExpecting failure (discard ratio 1): x < 50 ==> True"
quickCheckNo $ withDiscardRatio 1 p50
putStrLn "Expecting success (discard ratio 40): x < 50 ==> True"
quickCheckNoWith stdArgs{maxDiscardRatio = 1} p50
putStrLn "\nExpecting success (discard ratio 40): x < 50 ==> True"
quickCheckYes $ withDiscardRatio 40 p50
quickCheckYesWith stdArgs{maxDiscardRatio = 40} p50

-- This was brought to our attention by @robx in issue #338
let p k k' = k /= k' ==> (k :: Int) /= k'
putStrLn "\nExpecting success (maxSuccess = 1): k /= k' ==> k /= k'"
quickCheckYes $ withMaxSuccess 1 p
MaximilianAlgehed marked this conversation as resolved.
Show resolved Hide resolved
quickCheckYesWith stdArgs{maxSuccess = 1} p