diff --git a/QuickCheck.cabal b/QuickCheck.cabal index 2b7646b0..f5d681b7 100644 --- a/QuickCheck.cabal +++ b/QuickCheck.cabal @@ -102,7 +102,6 @@ library Test.QuickCheck.Test, Test.QuickCheck.Text, Test.QuickCheck.Poly, - Test.QuickCheck.State, Test.QuickCheck.Random, Test.QuickCheck.Exception, Test.QuickCheck.Features diff --git a/README b/README index e9e5b57d..d5ab78c7 100644 --- a/README +++ b/README @@ -1,3 +1,36 @@ +Roberts fork: this fork contains experimental support for running tests in parallel, and shrinking in parallel. +Only the internal evaluation of a property is changed, so the API of QC remains unchanged. There is a module +`Test.QuickCheck.Features` which I've completely commented out for now, so that will obviously not work. + +In order to try this out yourself, you must follow three steps: + +1: You need to make sure cabal knows where my fork of QC exists. You do this by either cloning this repository onto your local machine and pointing your `cabal.project` to it. +You do this e.g. by adding the line `packages: *.cabal /QuickCheck.cabal`. You can also optionally point your cabal to this remote repository. You do this by +editing your `cabal.project` to say +``` +source-repository-package + type: git + location: https://github.com/Rewbert/quickcheck.git + -- optionally also add this to point to a particular commit, otherwise you will always get the freshest master commit + -- tag: +``` + +2: You need to add some flags when you compile your code. Specifically, `-threaded -feager-blackholing -rtsopts`. + +3: Finally, all that is left is to change the call to `quickCheck` with a call to `quickCheckPar`. If you don't want parallel shrinking, you should call `quickCheckParWith (stdArgs { parallelShrinking = False}) property`. + +4: You also need to pass in the runtime option that actually creates more HECs. You need to either instead of `cabal run executable` do `cabal run executable -- +RTS -N` or `-Nx` where x is a number between 1 and the number of cores you have. You can also ddd another compilation option `-with-rtsopts=-N` or `-with-rtsopts=-Nx` + +As a sanity check of whether you are using my fork or not, if you run `quickCheckPar` with just 1 HEC available, the word `donkey` will be printed. + + + + + + + + + This is QuickCheck 2, a library for random testing of program properties. Add `QuickCheck` to your package dependencies to use it in tests or REPL. diff --git a/src/Test/QuickCheck.hs b/src/Test/QuickCheck.hs index 405ced04..164a0290 100644 --- a/src/Test/QuickCheck.hs +++ b/src/Test/QuickCheck.hs @@ -51,6 +51,56 @@ module Test.QuickCheck , verboseCheckWith , verboseCheckWithResult , verboseCheckResult + -- ** Running tests in parallel + {- | After intense labour by Lord Robert von Krook Af Göhteborgh, the internal + testing loop can be instructed to run tests in parallel. Note. Not running properties + in parallel, but the tests of a property. + + As an example, running the property above with 4 HECs + +@ +quickCheckPar $ withMaxSuccess 10000 prop_reverse ++++ OK, passed 10000 tests + tester 0: 2693 tests + tester 1: 2514 tests + tester 2: 2503 tests + tester 3: 2290 tests +@ + + To make use of this functionality, GHC needs the options @-threaded@ and @-rtsopts@. + Furthermore, the runtime options need to specify that more HECs should be used, with + the @-with-rtsopts=-N@ flag. You could optionally specify exactly how many HECs to + use, e.g @-with-rtsopts=-N4@. This is where the API fetches the number of parallel + workers to launch. It will be equal to however many you instruct the RTS to use. + I've found @-feager-blackholing@ to benefit parallel Haskell before. + + Example of an options section in a cabal file + +@ +ghc-options: + -threaded + -rtsopts + -feager-blackholing + -with-rtsopts=-N4 +@ + + The parallelism is implemented using @Control.Concurrent@ and @forkIO@. Instead of + running one sequential test loop, quickCheck will spawn n sequential test loops + with @forkIO@. The threads are all assigned an equal share of the desired number of + tests to run, but by default attempt to steal the right to run more tests from + sibling threads if they run out. Please see `rightToWorkSteal`. + + The functions below behave the same as their non-parallel counterparts, with the + exception that they ask the RTS how many schedulers are available, and populate the + @numTesters@ field with that number. E.g @quickCheckPar p@ when you compiled with + @-N4@ is equivalent to @quickCheckWith (stdArgs { numTesters = 4 }) p@. + + -} + , quickCheckPar + , SizeStrategy(..) + , quickCheckParWith + , quickCheckParResult + , quickCheckParWithResult #ifndef NO_TEMPLATE_HASKELL -- ** Testing all properties in a module @@ -270,6 +320,9 @@ module Test.QuickCheck , withMaxSuccess , within , discardAfter + , withDiscardRatio + , withMaxSize + , withMaxShrinks , once , again , mapSize @@ -307,7 +360,6 @@ module Test.QuickCheck -------------------------------------------------------------------------- -- imports - import Test.QuickCheck.Gen import Test.QuickCheck.Arbitrary import Test.QuickCheck.Modifiers @@ -318,7 +370,6 @@ import Test.QuickCheck.Exception import Test.QuickCheck.Function #endif import Test.QuickCheck.Features -import Test.QuickCheck.State #ifndef NO_TEMPLATE_HASKELL import Test.QuickCheck.All #endif diff --git a/src/Test/QuickCheck/Features.hs b/src/Test/QuickCheck/Features.hs index 8aedfa42..d9eef079 100644 --- a/src/Test/QuickCheck/Features.hs +++ b/src/Test/QuickCheck/Features.hs @@ -5,7 +5,6 @@ import Test.QuickCheck.Property hiding (Result, reason) import qualified Test.QuickCheck.Property as P import Test.QuickCheck.Test import Test.QuickCheck.Gen -import Test.QuickCheck.State import Test.QuickCheck.Text import qualified Data.Set as Set import Data.Set(Set) @@ -17,16 +16,16 @@ features :: [String] -> Set String -> Set String features labels classes = Set.fromList labels `Set.union` classes -prop_noNewFeatures :: Testable prop => Set String -> prop -> Property -prop_noNewFeatures feats prop = - mapResult f prop - where - f res = - case ok res of - Just True - | not (features (P.labels res) (Set.fromList (P.classes res)) `Set.isSubsetOf` feats) -> - res{ok = Just False, P.reason = "New feature found"} - _ -> res +-- prop_noNewFeatures :: Testable prop => Set String -> prop -> Property +-- prop_noNewFeatures feats prop = +-- mapResult f prop +-- where +-- f res = +-- case ok res of +-- Just True +-- | not (features (P.labels res) (Set.fromList (P.classes res)) `Set.isSubsetOf` feats) -> +-- res{ok = Just False, P.reason = "New feature found"} +-- _ -> res -- | Given a property, which must use 'label', 'collect', 'classify' or 'cover' -- to associate labels with test cases, find an example test case for each possible label. @@ -81,26 +80,26 @@ labelledExamplesResult prop = labelledExamplesWithResult stdArgs prop -- | A variant of 'labelledExamples' that takes test arguments and returns a result. labelledExamplesWithResult :: Testable prop => Args -> prop -> IO Result -labelledExamplesWithResult args prop = - withState args $ \state -> do - let - loop :: Set String -> State -> IO Result - loop feats state = withNullTerminal $ \nullterm -> do - res <- test state{terminal = nullterm} (property (prop_noNewFeatures feats prop)) - let feats' = features (failingLabels res) (failingClasses res) - case res of - Failure{reason = "New feature found"} -> do - putLine (terminal state) $ - "*** Found example of " ++ - concat (intersperse ", " (Set.toList (feats' Set.\\ feats))) - mapM_ (putLine (terminal state)) (failingTestCase res) - putStrLn "" - loop (Set.union feats feats') - state{randomSeed = usedSeed res, computeSize = computeSize state `at0` usedSize res} - _ -> do - out <- terminalOutput nullterm - putStr out - return res - at0 f s 0 0 = s - at0 f s n d = f n d - loop Set.empty state +labelledExamplesWithResult args prop = undefined -- TODO fix this + -- withState args (\state -> do + -- let + -- loop :: Set String -> State -> IO Result + -- loop feats state = withNullTerminal $ \nullterm -> do + -- res <- test state{terminal = nullterm} (property (prop_noNewFeatures feats prop)) + -- let feats' = features (failingLabels res) (failingClasses res) + -- case res of + -- Failure{reason = "New feature found"} -> do + -- putLine (terminal state) $ + -- "*** Found example of " ++ + -- concat (intersperse ", " (Set.toList (feats' Set.\\ feats))) + -- mapM_ (putLine (terminal state)) (failingTestCase res) + -- putStrLn "" + -- loop (Set.union feats feats') + -- state{randomSeed = usedSeed res, computeSize = computeSize state `at0` usedSize res} + -- _ -> do + -- out <- terminalOutput nullterm + -- putStr out + -- return res + -- at0 f s 0 0 = s + -- at0 f s n d = f n d + -- loop Set.empty state) Nothing 0 diff --git a/src/Test/QuickCheck/Monadic.hs b/src/Test/QuickCheck/Monadic.hs index e9c0af84..e1398b79 100644 --- a/src/Test/QuickCheck/Monadic.hs +++ b/src/Test/QuickCheck/Monadic.hs @@ -63,6 +63,7 @@ module Test.QuickCheck.Monadic ( , forAllM , monitor , stop + , graceful -- * Run functions , monadic @@ -85,6 +86,8 @@ import Control.Monad(liftM, liftM2) import Control.Monad.ST import Control.Applicative +import Control.Exception hiding (assert) +import Control.Concurrent #ifndef NO_TRANSFORMERS import Control.Monad.IO.Class @@ -273,6 +276,51 @@ monadic' (MkPropertyM m) = m (\prop -> return (return (property prop))) monadicIO :: Testable a => PropertyM IO a -> Property monadicIO = monadic ioProperty +{- | Add a graceful optional termination of an IO action. If your test needs to perform +some IO that, if interrupted by CTRL-C being pressed, needs to do some teardown, this +combinator is for you! + +Example: + +@ +prop_graceful :: Property +prop_graceful = monadicIO $ do + -- first IO action that might leave unwanted artifacts + c1 <- graceful + (do writeFile "firstfile.txt" "hi" + c <- readFile "firstfile.txt" + removeFile "firstfile.txt" + return c) + -- cleanup function only called if ctrl-c pressed + (do b <- doesFileExist "firstfile.txt" + if b then removeFile "firstfile.txt" else return ()) + + -- second IO action that might leave unwanted artifacts + c2 <- graceful + (do writeFile "secondfile.txt" "hi" + c <- readFile "secondfile.txt" + removeFile "secondfile.txt" + return c) + -- cleanup function only called if ctrl-c pressed + (do b <- doesFileExist "secondfile.txt" + if b then removeFile "secondfile.txt" else return ()) + + -- donkey property + assert (c1 == c2) +@ + +-} +graceful :: IO a -> IO () -> PropertyM IO a +-- TODO piggybacking on UserInterrupt here, but this should really be a QC internal exception +-- this was just placed here for my evaluation +-- +-- design of this combinator might have to change +graceful prop ioa = run $ prop `catch` \UserInterrupt -> do + ioa + tid <- myThreadId + throwTo tid UserInterrupt -- defer to default handler + error "this will never evaluate, but will have the correct type!" + #ifndef NO_ST_MONAD -- | Runs the property monad for 'ST'-computations. -- diff --git a/src/Test/QuickCheck/Property.hs b/src/Test/QuickCheck/Property.hs index 053e153e..f9973764 100644 --- a/src/Test/QuickCheck/Property.hs +++ b/src/Test/QuickCheck/Property.hs @@ -17,7 +17,8 @@ import Test.QuickCheck.Gen.Unsafe import Test.QuickCheck.Arbitrary import Test.QuickCheck.Text( isOneLine, putLine ) import Test.QuickCheck.Exception -import Test.QuickCheck.State( State(terminal), Confidence(..) ) +import Test.QuickCheck.Text +import Test.QuickCheck.Random #ifndef NO_TIMEOUT import System.Timeout(timeout) @@ -25,6 +26,8 @@ import System.Timeout(timeout) import Data.Maybe import Control.Applicative import Control.Monad +import Control.Concurrent +import Control.Exception import qualified Data.Map as Map import Data.Map(Map) import qualified Data.Set as Set @@ -36,6 +39,7 @@ import Control.DeepSeq import Data.Typeable (Typeable) #endif import Data.Maybe +import Data.IORef -------------------------------------------------------------------------- -- fixities @@ -265,31 +269,37 @@ data CallbackKind = Counterexample -- ^ Affected by the 'verbose' combinator -- | The result of a single test. data Result = MkResult - { ok :: Maybe Bool + { ok :: Maybe Bool -- ^ result of the test case; Nothing = discard - , expect :: Bool + , expect :: Bool -- ^ indicates what the expected result of the property is - , reason :: String + , reason :: String -- ^ a message indicating what went wrong - , theException :: Maybe AnException + , theException :: Maybe AnException -- ^ the exception thrown, if any - , abort :: Bool + , abort :: Bool -- ^ if True, the test should not be repeated - , maybeNumTests :: Maybe Int + , maybeNumTests :: Maybe Int -- ^ stop after this many tests - , maybeCheckCoverage :: Maybe Confidence + , maybeCheckCoverage :: Maybe Confidence -- ^ required coverage confidence - , labels :: [String] + , maybeDiscardedRatio :: Maybe Int + -- ^ maximum number of discarded tests per succesful test + , maybeMaxShrinks :: Maybe Int + -- ^ maximum number of shrinks + , maybeMaxTestSize :: Maybe Int + -- ^ maximum test size + , labels :: [String] -- ^ test case labels - , classes :: [String] + , classes :: [(String, Bool)] -- ^ test case classes - , tables :: [(String, String)] + , tables :: [(String, String)] -- ^ test case tables - , requiredCoverage :: [(Maybe String, String, Double)] + , requiredCoverage :: [(Maybe String, String, Double)] -- ^ required coverage - , callbacks :: [Callback] + , callbacks :: [Callback] -- ^ the callbacks for this test case - , testCase :: [String] + , testCase :: [String] -- ^ the generated test case } @@ -315,19 +325,22 @@ succeeded, failed, rejected :: Result where result = MkResult - { ok = undefined - , expect = True - , reason = "" - , theException = Nothing - , abort = True - , maybeNumTests = Nothing - , maybeCheckCoverage = Nothing - , labels = [] - , classes = [] - , tables = [] - , requiredCoverage = [] - , callbacks = [] - , testCase = [] + { ok = undefined + , expect = True + , reason = "" + , theException = Nothing + , abort = True + , maybeNumTests = Nothing + , maybeCheckCoverage = Nothing + , maybeDiscardedRatio = Nothing + , maybeMaxShrinks = Nothing + , maybeMaxTestSize = Nothing + , labels = [] + , classes = [] + , tables = [] + , requiredCoverage = [] + , callbacks = [] + , testCase = [] } -------------------------------------------------------------------------- @@ -477,6 +490,30 @@ again = mapTotalResult (\res -> res{ abort = False }) withMaxSuccess :: Testable prop => Int -> prop -> Property withMaxSuccess n = n `seq` mapTotalResult (\res -> res{ maybeNumTests = Just n }) +-- | Configures how many times a property is allowed to be discarded before failing. +-- +-- For example, +-- +-- > quickCheck (withDiscardRatio 10 p) +-- +-- will allow @p@ to fail up to 10 times per successful test. +withDiscardRatio :: Testable prop => Int -> prop -> Property +withDiscardRatio n = n `seq` mapTotalResult (\res -> res{ maybeDiscardedRatio = Just n }) + +-- | Configure the maximum number of times a property will be shrunk. +-- +-- For example, +-- +-- > quickCheck (withMaxShrinks 100 p) +-- +-- will cause @p@ to only attempt 100 shrinks on failure. +withMaxShrinks :: Testable prop => Int -> prop -> Property +withMaxShrinks n = n `seq` mapTotalResult (\res -> res{ maybeMaxShrinks = Just n }) + +-- | Configure the maximum size a property will be tested at. +withMaxSize :: Testable prop => Int -> prop -> Property +withMaxSize n = n `seq` mapTotalResult (\res -> res{ maybeMaxTestSize = Just n }) + -- | Check that all coverage requirements defined by 'cover' and 'coverTable' -- are met, using a statistically sound test, and fail if they are not met. -- @@ -591,13 +628,12 @@ classify :: Testable prop => Bool -- ^ @True@ if the test case should be labelled. -> String -- ^ Label. -> prop -> Property -classify False _ = property -classify True s = +classify b s = #ifndef NO_DEEPSEQ s `deepseq` #endif mapTotalResult $ - \res -> res { classes = s:classes res } + \res -> res { classes = (s,b):classes res } -- | Checks that at least the given proportion of /successful/ test -- cases belong to the given class. Discarded tests (i.e. ones @@ -930,6 +966,9 @@ disjoin ps = abort = False, maybeNumTests = Nothing, maybeCheckCoverage = Nothing, + maybeDiscardedRatio = Nothing, + maybeMaxShrinks = Nothing, + maybeMaxTestSize = Nothing, labels = [], classes = [], tables = [], @@ -992,3 +1031,154 @@ total x = property (rnf x) -------------------------------------------------------------------------- -- the end. + +data QCException = QCInterrupted + deriving (Show, Typeable) + +instance Exception QCException + +{- | Strategy used to compute the synthetic number of successful tests to be used when +computing the size to use for a test. If only one thread is running, both of these +strategies produce the same result. If using more than one, they start to behave +differently. + +To compute the size to use for a property, we need both the number of successful tests +so far (the more tests we run, the bigger the size we want) and the number of tests that +we discarded in a row (if we start to discard too many we want to adjust the size). + +If each thread passed in its own thread-local number of successful tests, they would all +pass in the same numbers. To adjust for this, a thread will compute a synthetic number +to use instead of the thread-local number of successful tests so far. This can be done in +two different ways. + + 1. @Offset@ specifies that each thread should add an offset to the thread-local number + of successful tests. E.g asking for 100 tests to be run on 4 cores, the 4 testers would + use the offsets @0@, @25@, @50@ and @75@. The first number used by the first thread + be 0, while the first number used by the fourth thread will be 75. + This option is good if you want to explore different sizes from the start. However, + if the test size depends on the size there is a chance that the thread with the low + offset will exhaust its share of the tests first. When it starts to steal the right + to run more tests from its siblings, it might steal one test from the fourth thread + and then pass in its own offset to the size function, running a smaller test than + would have been run if the fourth thread ran it. + + 2. @Stride@ makes each thread take a stride, rather than incrementing the + synthetic number of tests by one each time. E.g using 4 threads and @Stride@, the + first thread will use numbers @[0,4,8,...]@, thread two will use numbers + @[1,5,9,...]@, and so on. If a thread steals the right to run another test from + another thread, it will now use a number that is close to what that thread would + have used itself. + +-} +data SizeStrategy + = Stride + -- ^ Compute numbers using a stride + | Offset + -- ^ Compute numbers by adding an offset to the thread-local number of successful tests + deriving (Show, Read) + +-- | State represents QuickCheck's internal state while testing a property. +-- The state is made visible to callback functions. +data State + = MkState + -- static + { terminal :: Terminal + -- ^ the current terminal + , maxSuccessTests :: Int + -- ^ maximum number of successful tests needed + , maxDiscardedRatio :: Int + -- ^ maximum number of discarded tests per successful test + , coverageConfidence :: Maybe Confidence + -- ^ required coverage confidence + , replayStartSize :: Maybe Int + -- ^ Size to start at when replaying + , maxTestSize :: !Int + -- ^ Maximum size of test + , numTotMaxShrinks :: !Int + -- ^ How many shrinks to try before giving up + + -- dynamic + , numSuccessTests :: !Int + -- ^ the current number of tests that have succeeded + , numDiscardedTests :: !Int + -- ^ the current number of discarded tests + , numRecentlyDiscardedTests :: !Int + -- ^ the number of discarded tests since the last successful test + , stlabels :: !(Map [String] Int) + -- ^ counts for each combination of labels (label/collect) + , stclasses :: !(Map String Int) + -- ^ counts for each class of test case (classify/cover) + , sttables :: !(Map String (Map String Int)) + -- ^ tables collected using tabulate + , strequiredCoverage :: !(Map (Maybe String, String) Double) + -- ^ coverage requirements + , expected :: !Bool + -- ^ indicates the expected result of the property + , randomSeed :: !QCGen + -- ^ the current random seed + + -- shrinking + , numSuccessShrinks :: !Int + -- ^ number of successful shrinking steps so far + , numTryShrinks :: !Int + -- ^ number of failed shrinking steps since the last successful shrink + , numTotTryShrinks :: !Int + -- ^ total number of failed shrinking steps + + -- parallelism + , numConcurrent :: Int + -- ^ Number of concurrent siblings + , numSuccessOffset :: !Int + -- ^ Offset to use when calculating numSuccessTests for computeSize + , myId :: !Int + -- ^ Id of this concurrent tester + , signalGaveUp :: IO () + -- ^ Signal to the parent that the concurrent testers should give up + , signalTerminating :: IO () + -- ^ Signal to the parent that you're terminating gracefully (not because of an exceptional event) + , signalFailureFound :: State -> QCGen -> Result -> [Rose Result] -> Int -> IO () + + , shouldUpdateAfterWithStar :: Bool + -- ^ Should the budgets be adjusted after seeing withMaxSuccess/withDiscardRatio? + , stsizeStrategy :: SizeStrategy + + , testBudget :: IORef Int + -- ^ How many tests I can successfully test, my 'budget' + , stealTests :: IO (Maybe Int) + -- ^ Steal tests from concurrent testers, if any + + , discardBudget :: IORef Int + -- ^ How many discards I can perform, my 'budget' + , stealDiscards :: IO (Maybe Int) + -- ^ Steal discards from concurrent testers, if any + } + +-- | The statistical parameters used by 'checkCoverage'. +data Confidence = + Confidence { + certainty :: Integer, + -- ^ How certain 'checkCoverage' must be before the property fails. + -- If the coverage requirement is met, and the certainty parameter is @n@, + -- then you should get a false positive at most one in @n@ runs of QuickCheck. + -- The default value is @10^9@. + -- + -- Lower values will speed up 'checkCoverage' at the cost of false + -- positives. + -- + -- If you are using 'checkCoverage' as part of a test suite, you should + -- be careful not to set @certainty@ too low. If you want, say, a 1% chance + -- of a false positive during a project's lifetime, then @certainty@ should + -- be set to at least @100 * m * n@, where @m@ is the number of uses of + -- 'cover' in the test suite, and @n@ is the number of times you expect the + -- test suite to be run during the project's lifetime. The default value + -- is chosen to be big enough for most projects. + tolerance :: Double + -- ^ For statistical reasons, 'checkCoverage' will not reject coverage + -- levels that are only slightly below the required levels. + -- If the required level is @p@ then an actual level of @tolerance * p@ + -- will be accepted. The default value is @0.9@. + -- + -- Lower values will speed up 'checkCoverage' at the cost of not detecting + -- minor coverage violations. + } + deriving Show diff --git a/src/Test/QuickCheck/State.hs b/src/Test/QuickCheck/State.hs deleted file mode 100644 index ca8f179b..00000000 --- a/src/Test/QuickCheck/State.hs +++ /dev/null @@ -1,91 +0,0 @@ -{-# OPTIONS_HADDOCK hide #-} --- | QuickCheck's internal state. Internal QuickCheck module. -module Test.QuickCheck.State where - -import Test.QuickCheck.Text -import Test.QuickCheck.Random -import Data.Map(Map) - --------------------------------------------------------------------------- --- State - --- | State represents QuickCheck's internal state while testing a property. --- The state is made visible to callback functions. -data State - = MkState - -- static - { terminal :: Terminal - -- ^ the current terminal - , maxSuccessTests :: Int - -- ^ maximum number of successful tests needed - , maxDiscardedRatio :: Int - -- ^ maximum number of discarded tests per successful test - , coverageConfidence :: Maybe Confidence - -- ^ required coverage confidence - , computeSize :: Int -> Int -> Int - -- ^ how to compute the size of test cases from - -- #tests and #discarded tests - , numTotMaxShrinks :: !Int - -- ^ How many shrinks to try before giving up - - -- dynamic - , numSuccessTests :: !Int - -- ^ the current number of tests that have succeeded - , numDiscardedTests :: !Int - -- ^ the current number of discarded tests - , numRecentlyDiscardedTests :: !Int - -- ^ the number of discarded tests since the last successful test - , labels :: !(Map [String] Int) - -- ^ counts for each combination of labels (label/collect) - , classes :: !(Map String Int) - -- ^ counts for each class of test case (classify/cover) - , tables :: !(Map String (Map String Int)) - -- ^ tables collected using tabulate - , requiredCoverage :: !(Map (Maybe String, String) Double) - -- ^ coverage requirements - , expected :: !Bool - -- ^ indicates the expected result of the property - , randomSeed :: !QCGen - -- ^ the current random seed - - -- shrinking - , numSuccessShrinks :: !Int - -- ^ number of successful shrinking steps so far - , numTryShrinks :: !Int - -- ^ number of failed shrinking steps since the last successful shrink - , numTotTryShrinks :: !Int - -- ^ total number of failed shrinking steps - } - --- | The statistical parameters used by 'checkCoverage'. -data Confidence = - Confidence { - certainty :: Integer, - -- ^ How certain 'checkCoverage' must be before the property fails. - -- If the coverage requirement is met, and the certainty parameter is @n@, - -- then you should get a false positive at most one in @n@ runs of QuickCheck. - -- The default value is @10^9@. - -- - -- Lower values will speed up 'checkCoverage' at the cost of false - -- positives. - -- - -- If you are using 'checkCoverage' as part of a test suite, you should - -- be careful not to set @certainty@ too low. If you want, say, a 1% chance - -- of a false positive during a project's lifetime, then @certainty@ should - -- be set to at least @100 * m * n@, where @m@ is the number of uses of - -- 'cover' in the test suite, and @n@ is the number of times you expect the - -- test suite to be run during the project's lifetime. The default value - -- is chosen to be big enough for most projects. - tolerance :: Double - -- ^ For statistical reasons, 'checkCoverage' will not reject coverage - -- levels that are only slightly below the required levels. - -- If the required level is @p@ then an actual level of @tolerance * p@ - -- will be accepted. The default value is @0.9@. - -- - -- Lower values will speed up 'checkCoverage' at the cost of not detecting - -- minor coverage violations. - } - deriving Show - --------------------------------------------------------------------------- --- the end. diff --git a/src/Test/QuickCheck/Test.hs b/src/Test/QuickCheck/Test.hs index 2cbcb08c..9ad64224 100644 --- a/src/Test/QuickCheck/Test.hs +++ b/src/Test/QuickCheck/Test.hs @@ -1,6 +1,7 @@ {-# OPTIONS_HADDOCK hide #-} -- | The main test loop. {-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} #ifndef NO_TYPEABLE {-# LANGUAGE DeriveDataTypeable #-} #endif @@ -13,11 +14,9 @@ module Test.QuickCheck.Test where -- imports import Test.QuickCheck.Gen -import Test.QuickCheck.Property hiding ( Result( reason, theException, labels, classes, tables ), (.&.) ) +import Test.QuickCheck.Property hiding ( Result( reason, theException, labels, classes, tables ), (.&.), IOException ) import qualified Test.QuickCheck.Property as P import Test.QuickCheck.Text -import Test.QuickCheck.State hiding (labels, classes, tables, requiredCoverage) -import qualified Test.QuickCheck.State as S import Test.QuickCheck.Exception import Test.QuickCheck.Random import System.Random(split) @@ -33,6 +32,7 @@ import qualified Data.Map as Map import qualified Data.Set as Set import Data.Set(Set) import Data.Map(Map) +import Data.IORef import Data.Char ( isSpace @@ -43,9 +43,15 @@ import Data.List , sortBy , group , intersperse + , intercalate + , zip3 + , zip4 + , zip5 + , zip6 + , partition ) -import Data.Maybe(fromMaybe, isNothing, catMaybes) +import Data.Maybe(fromMaybe, isNothing, isJust, catMaybes) import Data.Ord(comparing) import Text.Printf(printf) import Control.Monad @@ -55,6 +61,11 @@ import Data.Bits import Data.Typeable (Typeable) #endif +import Control.Concurrent +import Control.Exception +import Control.Exception.Base +import Control.Monad.Fix + -------------------------------------------------------------------------- -- quickCheck @@ -63,25 +74,43 @@ import Data.Typeable (Typeable) -- | Args specifies arguments to the QuickCheck driver data Args = Args - { replay :: Maybe (QCGen,Int) + { replay :: Maybe (QCGen,Int) -- ^ Should we replay a previous test? -- Note: saving a seed from one version of QuickCheck and -- replaying it in another is not supported. -- If you want to store a test case permanently you should save -- the test case itself. - , maxSuccess :: Int + , maxSuccess :: Int -- ^ Maximum number of successful tests before succeeding. Testing stops -- at the first failure. If all tests are passing and you want to run more tests, -- increase this number. - , maxDiscardRatio :: Int + , maxDiscardRatio :: Int -- ^ Maximum number of discarded tests per successful test before giving up - , maxSize :: Int + , maxSize :: Int -- ^ Size to use for the biggest test cases - , chatty :: Bool + , chatty :: Bool -- ^ Whether to print anything - , maxShrinks :: Int + , maxShrinks :: Int -- ^ Maximum number of shrinks to before giving up. Setting this to zero -- turns shrinking off. + , numTesters :: Int + -- ^ How many concurrent testers to run (uses @forkIO@ internally). A good number to + -- use is as many as you have physical cores. Hyperthreading does not seem to add + -- much value. + , sizeStrategy :: SizeStrategy + -- ^ How to compute the number of successful tests so far to use when computing the + -- size for a test. + , rightToWorkSteal :: Bool + -- ^ Should the testers try to steal the right to run more tests from each other if + -- they run out? + , parallelShrinking :: Bool + -- ^ Shrink in parallel? Does nothing if numTesters == 1, and otherwise spawns numTesters + -- workers. + , parallelTesting :: Bool + -- ^ Test in parallel? Default is True, but if you are replaying a seed with multiple cores, + -- but you want the same counterexample evry time, setting this to False guarantee that + , boundWorkers :: Bool + -- ^ Use forkIO or forkOS? True = forkOS, False = forkIO } deriving ( Show, Read #ifndef NO_TYPEABLE @@ -116,6 +145,15 @@ data Result , tables :: !(Map String (Map String Int)) , output :: String } + | Aborted + { numTests :: Int + , numDiscarded :: Int + -- ^ Number of tests skipped + , labels :: !(Map [String] Int) + , classes :: !(Map String Int) + , tables :: !(Map String (Map String Int)) + , output :: String + } -- | A failed test run | Failure { numTests :: Int @@ -160,17 +198,88 @@ isSuccess :: Result -> Bool isSuccess Success{} = True isSuccess _ = False +isFailure :: Result -> Bool +isFailure Failure{} = True +isFailure _ = False + +isGaveUp :: Result -> Bool +isGaveUp GaveUp{} = True +isGaveUp _ = False + +isAborted :: Result -> Bool +isAborted Aborted{} = True +isAborted _ = False + +isNoExpectedFailure :: Result -> Bool +isNoExpectedFailure NoExpectedFailure{} = True +isNoExpectedFailure _ = False + -- | The default test arguments stdArgs :: Args stdArgs = Args - { replay = Nothing - , maxSuccess = 100 - , maxDiscardRatio = 10 - , maxSize = 100 - , chatty = True - , maxShrinks = maxBound + { replay = Nothing + , maxSuccess = 100 + , maxDiscardRatio = 10 + , maxSize = 100 + , chatty = True + , maxShrinks = maxBound + , numTesters = 1 + , sizeStrategy = Stride + , rightToWorkSteal = True + , parallelShrinking = False + , parallelTesting = True + , boundWorkers = False } +quickCheckPar' :: (Int -> IO a) -> IO a +quickCheckPar' test = do + numHECs <- getNumCapabilities + if numHECs == 1 + then do putStr warning + test numHECs + else test numHECs + where + warning :: String + warning = unlines [ "[WARNING] You have requested parallel testing, but there appears to only be one HEC available" + , "[WARNING] please recompile with these ghc options" + , "[WARNING] -threaded -feager-blackholing -rtsopts" + , "[WARNING] and run your program with this runtime flag" + , "[WARNING] -N[x]" + , "[WARNING] where x indicates the number of workers you want"] + +{- | Run a property in parallel. This is done by distributing the total number of tests +over all available HECs. If only one HEC is available, it reverts to the sequential +testing framework. -} +quickCheckPar :: Testable prop => prop -> IO () +quickCheckPar p = quickCheckPar' $ \numhecs -> + quickCheckInternal (stdArgs { numTesters = numhecs, parallelShrinking = True, parallelTesting = True }) p >> return () + -- do + -- numHecs <- getNumCapabilities + -- if numHecs == 1 + -- then do putStrLn $ concat [ "quickCheckPar called, but only one HEC available -- " + -- , "testing will be sequential..." + -- ] + -- quickCheck p + -- else quickCheckInternal (stdArgs { numTesters = numHECs }) p >> return () + +-- | The parallel version of `quickCheckWith` +quickCheckParWith :: Testable prop => Args -> prop -> IO () +quickCheckParWith a p = quickCheckPar' $ \numhecs -> + quickCheckInternal (a { numTesters = numhecs }) p >> return () + --quickCheckInternal a pa p >> return () + +-- -- | The parallel version of `quickCheckResult` +quickCheckParResult :: Testable prop => prop -> IO Result +quickCheckParResult p = quickCheckPar' $ \numhecs -> + quickCheckInternal (stdArgs { numTesters = numhecs }) p +-- quickCheckParResult p = quickCheckInternal stdArgs stdParArgs p + +-- -- | The parallel version of `quickCheckWithResult` +quickCheckParWithResult :: Testable prop => Args -> prop -> IO Result +quickCheckParWithResult a p = quickCheckPar' $ \numhecs -> + quickCheckInternal (a { numTesters = numhecs }) p + --quickCheckInternal a pa p + -- | Tests a property and prints the results to 'stdout'. -- -- By default up to 100 tests are performed, which may not be enough @@ -180,60 +289,20 @@ stdArgs = Args -- rather than just printing it, try the -- -- package. - quickCheck :: Testable prop => prop -> IO () quickCheck p = quickCheckWith stdArgs p -- | Tests a property, using test arguments, and prints the results to 'stdout'. quickCheckWith :: Testable prop => Args -> prop -> IO () -quickCheckWith args p = quickCheckWithResult args p >> return () +quickCheckWith args p = quickCheckInternal args p >> return () -- | Tests a property, produces a test result, and prints the results to 'stdout'. quickCheckResult :: Testable prop => prop -> IO Result -quickCheckResult p = quickCheckWithResult stdArgs p +quickCheckResult p = quickCheckInternal stdArgs p --- | Tests a property, using test arguments, produces a test result, and prints the results to 'stdout'. +-- | Tests a property, produces a test result, and prints the results to 'stdout'. quickCheckWithResult :: Testable prop => Args -> prop -> IO Result -quickCheckWithResult a p = - withState a (\s -> test s (property p)) - -withState :: Args -> (State -> IO a) -> IO a -withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $ \tm -> do - rnd <- case replay a of - Nothing -> newQCGen - Just (rnd,_) -> return rnd - test MkState{ terminal = tm - , maxSuccessTests = maxSuccess a - , coverageConfidence = Nothing - , maxDiscardedRatio = maxDiscardRatio a - , computeSize = case replay a of - Nothing -> computeSize' - Just (_,s) -> computeSize' `at0` s - , numTotMaxShrinks = maxShrinks a - , numSuccessTests = 0 - , numDiscardedTests = 0 - , numRecentlyDiscardedTests = 0 - , S.labels = Map.empty - , S.classes = Map.empty - , S.tables = Map.empty - , S.requiredCoverage = Map.empty - , expected = True - , randomSeed = rnd - , numSuccessShrinks = 0 - , numTryShrinks = 0 - , numTotTryShrinks = 0 - } - where computeSize' n d - -- e.g. with maxSuccess = 250, maxSize = 100, goes like this: - -- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98. - | n `roundTo` maxSize a + maxSize a <= maxSuccess a || - n >= maxSuccess a || - maxSuccess a `mod` maxSize a == 0 = (n `mod` maxSize a + d `div` 10) `min` maxSize a - | otherwise = - ((n `mod` maxSize a) * maxSize a `div` (maxSuccess a `mod` maxSize a) + d `div` 10) `min` maxSize a - n `roundTo` m = (n `div` m) * m - at0 f s 0 0 = s - at0 f s n d = f n d +quickCheckWithResult args p = quickCheckInternal args p -- | Tests a property and prints the results and all test cases generated to 'stdout'. -- This is just a convenience function that means the same as @'quickCheck' . 'verbose'@. @@ -271,222 +340,691 @@ verboseCheckResult p = quickCheckResult (verbose p) verboseCheckWithResult :: Testable prop => Args -> prop -> IO Result verboseCheckWithResult a p = quickCheckWithResult a (verbose p) --------------------------------------------------------------------------- --- main test loop - -test :: State -> Property -> IO Result -test st f - | numSuccessTests st >= maxSuccessTests st && isNothing (coverageConfidence st) = - doneTesting st f - | numDiscardedTests st >= maxDiscardedRatio st * max (numSuccessTests st) (maxSuccessTests st) = - giveUp st f - | otherwise = - runATest st f +-- new testloop + +-- | A 'message' of type TesterSignal will be communicated to the main thread by the concurrent +-- testers when testing is terminated +data TesterSignal + = KillTesters ThreadId State QCGen P.Result [Rose P.Result] Int + -- | A counterexample was found, and a killsignal should be sent to all concurrent testers + | FinishedTesting + -- | All tests were successfully executed + | NoMoreDiscardBudget ThreadId + -- | There is no more allowance to discard tests, so we should give up + | Interrupted + -- | User pressed CTRL-C (TODO: probably remove this) + +-- | Tests a property, using test arguments, produces a test result, and prints the results to 'stdout'. +quickCheckInternal :: Testable prop => Args -> prop -> IO Result +quickCheckInternal a p = do + -- either reuse the supplied seed, or generate a new one + rnd <- case replay a of + Nothing -> newQCGen + Just (rnd,_) -> return rnd + + let numtesters = if parallelTesting a then numTesters a else 1 + let numShrinkers = if parallelShrinking a then numTesters a else 1 + + {- initial seeds for each tester. The original seed will be split like this: + rnd + / \ + r1 _ + / / \ + r2 _ _ + / / \ / \ + r3 _ __ _ + / + ... + The initial seeds for each tester will be [rnd,r1,r2,r3,...]. + + This may look bad, as there is a clear relationship between them. However, during testing, + the seed to use for each test case is acquired by splitting the seed like this + + rnd + / \ + _ s1 + / \ + s2 _ + + s1 will be used for the current test-case, and s2 will be fed into the next iteration of the loop + i.e. it will take the role of rnd above + Hopefully this yields good enough distribution of seeds + -} + let initialSeeds = snd $ foldr (\_ (rnd, a) -> let (r1,_) = split rnd + in (r1, a ++ [rnd])) + (rnd, []) + [0..numtesters - 1] + + -- how big to make each testers buffer + let numTestsPerTester = maxSuccess a `div` numtesters + + -- returns a list indicating how many tests each tester can run, what their offset for size computation is, and how many + -- tests they can discard + let testsoffsetsanddiscards = snd $ + foldr (\_ ((numtests, offset, numdiscards), acc) -> + ((numTestsPerTester, offset+numtests, numTestsPerTester * maxDiscardRatio a), acc ++ [(numtests, offset, numdiscards)])) + (( numTestsPerTester + (maxSuccess a `rem` numtesters) + , 0 + , numTestsPerTester * maxDiscardRatio a + ((maxSuccess a `rem` numtesters) * maxDiscardRatio a) + ), []) + [0..numtesters - 1] + + -- the MVars that holds the test budget for each tester + testbudgets <- sequence $ replicate numtesters (newIORef 0) + + -- the MVars that hold each testers discard budget + budgets <- sequence $ replicate numtesters (newIORef 0) + + -- the MVars that hold each testers state + states <- sequence $ replicate numtesters newEmptyMVar + + -- the components making up a tester + -- lol zip6 + let testerinfo = zip6 states initialSeeds [0..numtesters - 1] testbudgets budgets testsoffsetsanddiscards + + -- this function tries to steal budget from an MVar Int, if any budget remains. + -- used for stealing test budgets and discard budgets. + tryStealBudget [] = return Nothing + tryStealBudget (b:bs) = do + v <- claimMoreBudget b 1 + case v of + Nothing -> tryStealBudget bs + Just n -> return $ Just n + + -- parent thread will block on this mvar. When it is unblocked, testing should terminate + signal <- newEmptyMVar + numrunning <- newIORef numtesters + + -- initialize the states of each tester + flip mapM_ testerinfo $ \(st, seed, testerID, tbudget, dbudget, (numtests, testoffset, numdiscards)) -> do + mask_ $ (if chatty a then withStdioTerminal else withNullTerminal) $ \tm -> do + writeIORef tbudget (numtests - 1) + writeIORef dbudget (numdiscards - 1) + putMVar st $ (MkState { terminal = tm + , maxSuccessTests = maxSuccess a + , coverageConfidence = Nothing + , maxDiscardedRatio = maxDiscardRatio a + , replayStartSize = snd <$> replay a + , maxTestSize = maxSize a + , numTotMaxShrinks = maxShrinks a + , numSuccessTests = 0 + , numDiscardedTests = 0 + , numRecentlyDiscardedTests = 0 + , stlabels = Map.empty + , stclasses = Map.empty + , sttables = Map.empty + , strequiredCoverage = Map.empty + , expected = True + , randomSeed = seed + , numSuccessShrinks = 0 + , numTryShrinks = 0 + , numTotTryShrinks = 0 + + -- new + -- there is a lot of callbacks here etc + , testBudget = tbudget + , stealTests = if rightToWorkSteal a + then tryStealBudget $ filter ((/=) tbudget) testbudgets + else return Nothing + , numConcurrent = numtesters + , numSuccessOffset = testoffset + , discardBudget = dbudget + , stealDiscards = if rightToWorkSteal a + then tryStealBudget $ filter ((/=) dbudget) budgets + else return Nothing + , myId = testerID + , signalGaveUp = myThreadId >>= \id -> tryPutMVar signal (NoMoreDiscardBudget id) >> return() + , signalTerminating = do b <- atomicModifyIORef' numrunning $ \i -> (i-1, i-1 == 0) + if b + then tryPutMVar signal FinishedTesting >> return () + else return () + , signalFailureFound = \st seed res ts size -> do tid <- myThreadId + tryPutMVar signal (KillTesters tid st seed res ts size) + return () + , shouldUpdateAfterWithStar = True + , stsizeStrategy = sizeStrategy a + }) + + -- continuously print current state + printerID <- if chatty a then Just <$> forkIO (withBuffering $ printer 200 states) else return Nothing + + -- the IO actions that run the test loops + let testers = map (\vst -> testLoop vst True (property p)) states + + -- spawn testers + tids <- if numtesters > 1 + then let fork = if boundWorkers a then forkOS else forkIO + in zipWithM (\comp vst -> fork comp) testers states + else do head testers >> return [] -- if only one worker, let the main thread run the tests + + -- wait for wakeup + s <- readMVar signal `catch` (\UserInterrupt -> return Interrupted) -- catching Ctrl-C, Nick thinks this is bad, as users might have their own handlers + mt <- case s of + Interrupted -> mapM_ (\tid -> throwTo tid QCInterrupted) tids >> mapM_ killThread tids >> return Nothing + KillTesters tid st seed res ts size -> do mapM_ (\tid -> throwTo tid QCInterrupted >> killThread tid) (filter ((/=) tid) tids) + return $ Just tid + FinishedTesting -> return Nothing + NoMoreDiscardBudget tid -> do mapM_ killThread (filter ((/=) tid) tids) + return $ Just tid + + -- stop printing current progress + case printerID of + Just id -> killThread id + Nothing -> return () + + -- get report depending on what happened + reports <- case s of + KillTesters tid st seed res ts size -> do + -- mvar states of all testers that are are aborted + let abortedvsts = map snd $ filter (\(tid', _) -> tid /= tid') (zip tids states) + -- read the states from those mvars + abortedsts <- mapM readMVar abortedvsts + -- complete number of tests that were run over all testers + let numsucc = numSuccessTests st + sum (map numSuccessTests abortedsts) + failed <- withBuffering $ shrinkResult (chatty a) st numsucc seed numShrinkers res ts size -- shrink and return report from failed + aborted <- mapM abortConcurrent abortedsts -- reports from aborted testers + return (failed : aborted) + NoMoreDiscardBudget tid -> mapM (\vst -> readMVar vst >>= flip giveUp (property p)) states + FinishedTesting -> mapM (\vst -> readMVar vst >>= flip doneTesting (property p)) states + Interrupted -> mapM (\vst -> readMVar vst >>= abortConcurrent) states + + -- compute the required coverage (if any), and merge the individual tester reports + sts <- mapM readMVar states + let completeRequiredCoverage = Map.unionsWith max (map strequiredCoverage sts) + finalReport = mergeReports reports + + -- output the final outcome to the terminal, clearing the line before a new print is emitted + putPart (terminal (head sts)) "" + printFinal (terminal (head sts)) finalReport sts (coverageConfidence (head sts)) completeRequiredCoverage + + -- finally, return the report! + return $ mergeReports reports + +{- +See the function testLoop to see how we take replaying into account + + maxSuccessTests + | maxTestSize + | maxDiscardedRation + | | |numSuccessTests + | | | | numRecentlyDiscarded + v v v v v -} +computeSize :: Int -> Int -> Int -> Int -> Int -> Int +computeSize ms mts md n d + -- e.g. with maxSuccess = 250, maxSize = 100, goes like this: + -- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98. + | n `roundTo` mts + mts <= ms || + n >= ms || + ms `mod` mts == 0 = (n `mod` mts + d `div` dDenom) `min` mts + | otherwise = + ((n `mod` mts) * mts `div` (ms `mod` mts) + d `div` dDenom) `min` mts + where + -- The inverse of the rate at which we increase size as a function of discarded tests + -- if the discard ratio is high we can afford this to be slow, but if the discard ratio + -- is low we risk bowing out too early + dDenom + | md > 0 = (ms * md `div` 3) `clamp` (1, 10) + | otherwise = 1 -- Doesn't matter because there will be no discards allowed + n `roundTo` m = (n `div` m) * m + +clamp :: Ord a => a -> (a, a) -> a +clamp x (l, h) = max l (min x h) + +-- | Merge every individual testers report into one report containing the composite information. +mergeReports :: [Result] -> Result +mergeReports rs + | not (null (filter isFailure rs)) = + createFailed (filter (not . isFailure) rs) (head (filter isFailure rs)) + | null (filter (not . isSuccess) rs) = createGeneric rs Success + | null (filter (not . isGaveUp) rs) = createGeneric rs GaveUp + | null (filter (not . isAborted) rs) = createGeneric rs Aborted + | null (filter (not . isNoExpectedFailure) rs) = createGeneric rs NoExpectedFailure + | otherwise = error $ concat ["don't know how to merge reports: ", intercalate "\n" $ map show rs] + where + -- | create a Result value by passing in a constructor as a parameter to this function + createGeneric :: [Result] + -> ( Int + -> Int + -> Map [String] Int + -> Map String Int + -> Map String (Map String Int) + -> String + -> Result) + -> Result + createGeneric rs f = f (sum $ map numTests rs) + (sum $ map numDiscarded rs) + (Map.unionsWith (+) $ map labels rs) + (Map.unionsWith (+) $ map classes rs) + (Map.unionsWith (Map.unionWith (+)) $ map tables rs) + (intercalate "\n" $ map output rs) + + {- | create a Result that indicates a failure happened + NOTE: in this case, the labels and tables are dropped and not reported to the user. -} + createFailed :: [Result] -> Result -> Result + createFailed rs f = f { numTests = sum $ map numTests (f:rs) + , numDiscarded = sum $ map numDiscarded (f:rs) + , output = intercalate "\n" $ map output rs + } + +{- | Given a ref with more budget (either test budget or discard budget), try to claim +- at most @maxchunk@ from it. -} +claimMoreBudget :: IORef Int -> Int -> IO (Maybe Int) +claimMoreBudget budgetioref maxchunk = do + atomicModifyIORef' budgetioref $ \budget -> + if budget <= 0 + then (0, Nothing) + else let chunk = min budget maxchunk + in (max 0 (budget - chunk), Just chunk) + +{- | Update the state in an mvar with a new state. +NOTE: interrupts are masked during the actual update, so that if updatestate +begins evaluation, it will always be allowed to finish. -} +updateState :: MVar State -> State -> IO () +updateState vst st = do + modifyMVar_ vst $ \_ -> return st + +-- TODO merge runOneMore and continueAfterDiscard into one function, they are identical with the +-- exception of the actual MVar and the stealing function + +{- | Given a state, returns @True@ if another test should be executed, and @False@ if not. +If a specific tester thread has run out of testing 'budget', it will try to steal the +right to run more tests from other testers. -} +runOneMore :: State -> IO Bool +runOneMore st = do + b <- claimMoreBudget (testBudget st) 1 + case b of + Just _ -> return True + Nothing -> do n <- stealTests st + case n of + Nothing -> return False + Just _ -> return True + +{- | After a test has been discarded, calling this function will let the tester know if +it should stop trying to satisfy the test predicate, or if it should continue. If it has +run out of testing budget, it will try to steal the right to discard more from other +testers. -} +continueAfterDiscard :: State -> IO Bool +continueAfterDiscard st = do + b <- claimMoreBudget (discardBudget st) 1 + case b of + Just _ -> return True + Nothing -> do n <- stealDiscards st + case n of + Nothing -> return False + Just _ -> return True + +{- | The actual testing loop that each tester runs. TODO document more-} +testLoop :: MVar State -> Bool -> Property -> IO () +testLoop vst False f = do + st <- readMVar vst + b <- runOneMore st + if b + then testLoop vst True f + else signalTerminating st +testLoop vst True f = do + st <- readMVar vst + let (_,s2) = split (randomSeed st) + (s1,_) = split s2 + numSuccSize = testSizeInput st + res@(MkRose r ts) <- runTest st f s1 (size st) + let (classification, st') = resultOf res st + st'' = st' { randomSeed = s2 } + finst <- maybeUpdateAfterWithMaxSuccess res st'' + case classification of + -- test was successful! + OK | abort r -> updateState vst (updateStateAfterResult res finst) >> signalTerminating finst + OK -> do + updateState vst (updateStateAfterResult res finst) + testLoop vst False f + -- test was discarded, and we're out of discarded budget + -- do not keep coverage information for discarded tests + Discarded | abort r -> updateState vst finst >> signalTerminating finst + Discarded -> do + b <- continueAfterDiscard st -- should we keep going? + if b + then updateState vst finst >> testLoop vst True f + else updateState vst finst >> signalGaveUp finst + + -- test failed, and we should abort concurrent testers and start shrinking the result + Failed -> + signalFailureFound st' st' (randomSeed st) r ts (size st) + where + -- | Compute the numSuccess-parameter to feed to the @computeSize@ function + -- NOTE: if there is a size to replay, the computed size is offset by that much to make sure + -- that we explore it first. In the parallel case we will explore the sizes [replay, replay+1, ...] etc, + -- so we might actually end up with another counterexample. We are, however, guaranteed that one thread + -- is going to explore the replayed size and seed. + testSizeInput :: State -> Int + testSizeInput st = case stsizeStrategy st of + Offset -> (fromMaybe 0 (replayStartSize st)) + numSuccessOffset st + numSuccessTests st + Stride -> (fromMaybe 0 (replayStartSize st)) + numSuccessTests st * numConcurrent st + myId st + + size :: State -> Int + size st = computeSize (maxSuccessTests st) + (maxTestSize st) + (maxDiscardedRatio st) + (testSizeInput st) + (numRecentlyDiscardedTests st) + +{- | Printing loop. It will read the current test state from the list of @MVar State@, +and print a summary to the terminal. It will do this every @delay@ microseconds. +NOTE: while the actual print is happening, interruptions are masked. This is so that if +the printer is terminated mid-print, the terminal is in an allowed state. -} +printer :: Int -> [MVar State] -> IO () +printer delay vsts = do + mask_ printStuff + threadDelay delay + printer delay vsts + where + -- | Does the actual compiling of the states and printing it to the terminal + printStuff :: IO () + printStuff = do + states <- sequence $ map readMVar vsts + putTemp (terminal (head states)) ( concat ["(", summary states, ")"] ) + where + summary states = + number (sum (map numSuccessTests states)) "test" ++ + concat [ "; " ++ show (sum (map numDiscardedTests states)) ++ " discarded" + | sum (map numDiscardedTests states) > 0 + ] + +{- | This function inspects the final @Result@ of testing and prints a summary to the +terminal. The parameters to this function are + + 1. The terminal to which to print + 2. The final @Result@ value + 3. The coverage confidence, if any + 4. The required coverage, if any (the map might be empty) + +If @isFailure r = True@, where @r@ is the final @Result@, this function is a no-op. + +-} +printFinal :: Terminal -> Result -> [State] -> Maybe Confidence -> Map.Map (Maybe String, String) Double -> IO () +printFinal terminal r states coverageConfidence requiredCoverage + | isSuccess r = do + putLine terminal ("+++ OK, passed " ++ testCount (numTests r) (numDiscarded r)) + individualTester + printTheLabelsAndTables + | isFailure r = return () + | isGaveUp r = do + putLine terminal ( bold ("*** Gave up!") ++ " Passed only " ++ testCount (numTests r) (numDiscarded r) ++ " tests") + individualTester + printTheLabelsAndTables + | isAborted r = do + putLine terminal ( bold ("*** Aborted prematurely!") ++ " Passed " ++ testCount (numTests r) (numDiscarded r) ++ " before interrupted") + individualTester + printTheLabelsAndTables + | isNoExpectedFailure r = do + putLine terminal ( bold ("*** Failed!") ++ " Passed " ++ testCount (numTests r) (numDiscarded r) ++ " (expected failure") + individualTester + printTheLabelsAndTables + where + -- | print the information collected via labels, tabels, coverage etc + printTheLabelsAndTables :: IO () + printTheLabelsAndTables = do + mapM_ (putLine terminal) (paragraphs [short, long]) + + (short,long) = case labelsAndTables (labels r) (classes r) (tables r) requiredCoverage (numTests r) coverageConfidence of + ([msg], long) -> ([" (" ++ dropWhile isSpace msg ++ ")."], long) + ([], long) -> ([], long) + (short, long) -> (":":short, long) + + -- | Final count of successful tests, and discarded tests, rendered as a string + testCount :: Int -> Int -> String + testCount numTests numDiscarded = + concat [ number numTests "test" + , if numDiscarded > 0 + then concat ["; ", show numDiscarded, " discarded"] + else "" + ] + + individualTester :: IO () + individualTester = + if length states > 1 + then mapM_ (\st -> putLine terminal $ concat [" tester ", show (myId st), ": ", testCount (numSuccessTests st) (numDiscardedTests st)]) states + else return () + +{- | This function will shrink the result of a failed test case (if possible), and then +return a final report. The parameters are + + 1. The state associated with this test case + 2. The random seed used to generate the failing test case + 3. The result of running the failing test case + 4. The shrinking candidates + 5. The size fed to the test case + +-} +shrinkResult :: Bool -> State -> Int -> QCGen -> Int -> P.Result -> [Rose P.Result] -> Int -> IO Result +shrinkResult chatty st numsucc rs n res ts size = do + (numShrinks, totFailed, lastFailed, res) <- foundFailure chatty st numsucc n res ts + theOutput <- terminalOutput (terminal st) + if not (expect res) then + return Success{ labels = stlabels st, + classes = stclasses st, + tables = sttables st, + numTests = numSuccessTests st+1, + numDiscarded = numDiscardedTests st, + output = theOutput } + else do + testCase <- mapM showCounterexample (P.testCase res) + return Failure{ usedSeed = rs + , usedSize = size + , numTests = numSuccessTests st+1 + , numDiscarded = numDiscardedTests st + , numShrinks = numShrinks + , numShrinkTries = totFailed + , numShrinkFinal = lastFailed + , output = theOutput + , reason = P.reason res + , theException = P.theException res + , failingTestCase = testCase + , failingLabels = P.labels res + , failingClasses = Set.fromList (map fst $ filter snd $ P.classes res) + } +{- | Inspect the result of running a test, and return the next action to take as well as +an updated state. The parts of the state that might be updated are + + * @numSuccessTests@ + * @numRecentlyDiscardedTests@ + * @numDiscardedTests@ + +-} +resultOf :: Rose P.Result -> State -> (TestRes, State) +resultOf (MkRose res _) st + -- successful test + | ok res == Just True = + ( OK + , st { numSuccessTests = numSuccessTests st + 1 + , numRecentlyDiscardedTests = 0 + } + ) + -- discarded test + | ok res == Nothing = + ( Discarded + , st { numDiscardedTests = numDiscardedTests st + 1 + , numRecentlyDiscardedTests = numRecentlyDiscardedTests st + 1 + } + ) + -- failed test + | ok res == Just False = (Failed, st) + +-- | The result of running a test +data TestRes + = OK + -- ^ The test was OK (successful) + | Failed + -- ^ The test failed + | Discarded + -- ^ The test was discarded, as it did not meet one of the preconditions + +{- | Some test settings are attached to the property rather than the testing arguments, +and will thus only be visible after running a test. This function takes the @Rose Result@ +of running a test and the @State@ associated with that test, and updates the state with +information about such settings. Settings affected are + + * @coverageConfidence@ + * @labels@ + * @classes@ + * @tables@ + * @requiredCoverage@ -- what are the coverage requirements? + * @expected@ -- should the test fail? + +-} +updateStateAfterResult :: Rose P.Result -> State -> State +updateStateAfterResult (MkRose res ts) st = + st { coverageConfidence = maybeCheckCoverage res `mplus` coverageConfidence st + , stlabels = Map.insertWith (+) (P.labels res) 1 (stlabels st) + , stclasses = Map.unionWith (+) (stclasses st) (Map.fromList [ (s, if b then 1 else 0) | (s, b) <- P.classes res ]) + , sttables = foldr (\(tab, x) -> Map.insertWith (Map.unionWith (+)) tab (Map.singleton x 1)) + (sttables st) (P.tables res) + , strequiredCoverage = foldr (\(key, value, p) -> Map.insertWith max (key, value) p) + (strequiredCoverage st) (P.requiredCoverage res) + , expected = expect res + } + +{- | A property might specify that a specific number of tests should be run (@withMaxSuccess@ + and/or that we should use a custom discard ratio when (@withDiscardRatio@). This function +will detect that and recompute the testing/discard budget and update them accordingly. +It will also set a flag in the state that makes it so that this stuff computed only once. -} +maybeUpdateAfterWithMaxSuccess :: Rose P.Result -> State -> IO State +maybeUpdateAfterWithMaxSuccess (MkRose res ts) st = do + case (maybeNumTests res, maybeDiscardedRatio res) of + (Nothing, Nothing) -> return st + (mnt, mdr) -> + if shouldUpdateAfterWithStar st + then updateState (fromMaybe (maxSuccessTests st) mnt) (fromMaybe (maxDiscardedRatio st) mdr) st + else return st + where + updateState :: Int -> Int -> State -> IO State + updateState numTests' maxDiscarded' st = do + let numTestsPerTester = + if myId st == 0 + then numTests' `div` numConcurrent st + (numTests' `rem` numConcurrent st) + else numTests' `div` numConcurrent st + + newSuccessOffset = + if myId st == 0 + then 0 + else numTestsPerTester * myId st + (numTests' `rem` numConcurrent st) + + numDiscardsPerTester = numTestsPerTester * maxDiscarded' + + atomicModifyIORef' (testBudget st) $ \remainingbudget -> + let newbudget = numTestsPerTester - 1 in (newbudget, ()) + + atomicModifyIORef' (discardBudget st) $ \remainingbudget -> + let newbudget = numDiscardsPerTester - 1 in (newbudget, ()) + + return $ st { maxSuccessTests = numTests' + , numSuccessOffset = newSuccessOffset + , maxDiscardedRatio = maxDiscarded' + , shouldUpdateAfterWithStar = False + } + +-- | Run a test +{- | This function will generate and run a test case! The parameters are: + + 1. The current @State@ of the tester responsible for running the test + 2. The property to test + 3. The random seed to use + 4. The size to use + +-} +runTest :: State -> Property -> QCGen -> Int -> IO (Rose P.Result) +runTest st f seed size = do + let f_or_cov = case coverageConfidence st of + Just confidence | confidenceTest -> addCoverageCheck + (stlabels st) + (stclasses st) + (sttables st) + (strequiredCoverage st) + (numSuccessTests st) + confidence + f + _ -> f + MkRose res ts <- protectRose (reduceRose (unProp (unGen (unProperty f_or_cov) seed size))) + res <- callbackPostTest st res + return (MkRose res ts) + where + powerOfTwo :: (Integral a, Bits a) => a -> Bool + powerOfTwo n = n .&. (n - 1) == 0 + + confidenceTest :: Bool + confidenceTest = (1 + numSuccessTests st) `mod` 100 == 0 && powerOfTwo ((1 + numSuccessTests st) `div` 100) + +{- | If a tester terminates without falsifying a property, this function converts the +testers @State@ to a @Result@ -} doneTesting :: State -> Property -> IO Result doneTesting st _f | expected st == False = do - putPart (terminal st) - ( bold ("*** Failed!") - ++ " Passed " - ++ showTestCount st - ++ " (expected failure)" - ) finished NoExpectedFailure | otherwise = do - putPart (terminal st) - ( "+++ OK, passed " - ++ showTestCount st - ) finished Success where finished k = do - success st theOutput <- terminalOutput (terminal st) - return (k (numSuccessTests st) (numDiscardedTests st) (S.labels st) (S.classes st) (S.tables st) theOutput) + return (k (numSuccessTests st) (numDiscardedTests st) (stlabels st) (stclasses st) (sttables st) theOutput) +{- | If a tester terminates because it discarded too many test cases, this function +converts the testers @State@ to a @Result@ -} giveUp :: State -> Property -> IO Result -giveUp st _f = - do -- CALLBACK gave_up? - putPart (terminal st) - ( bold ("*** Gave up!") - ++ " Passed only " - ++ showTestCount st - ++ " tests" - ) - success st +giveUp st _f = do -- CALLBACK gave_up? theOutput <- terminalOutput (terminal st) return GaveUp{ numTests = numSuccessTests st , numDiscarded = numDiscardedTests st - , labels = S.labels st - , classes = S.classes st - , tables = S.tables st + , labels = stlabels st + , classes = stclasses st + , tables = sttables st , output = theOutput } -showTestCount :: State -> String -showTestCount st = - number (numSuccessTests st) "test" - ++ concat [ "; " ++ show (numDiscardedTests st) ++ " discarded" - | numDiscardedTests st > 0 - ] - -runATest :: State -> Property -> IO Result -runATest st f = - do -- CALLBACK before_test - putTemp (terminal st) - ( "(" - ++ showTestCount st - ++ ")" - ) - let powerOfTwo n = n .&. (n - 1) == 0 - let f_or_cov = - case coverageConfidence st of - Just confidence | (1 + numSuccessTests st) `mod` 100 == 0 && powerOfTwo ((1 + numSuccessTests st) `div` 100) -> - addCoverageCheck confidence st f - _ -> f - let size = computeSize st (numSuccessTests st) (numRecentlyDiscardedTests st) - MkRose res ts <- protectRose (reduceRose (unProp (unGen (unProperty f_or_cov) rnd1 size))) - res <- callbackPostTest st res - - let continue break st' | abort res = break st' - | otherwise = test st' - - let st' = st{ coverageConfidence = maybeCheckCoverage res `mplus` coverageConfidence st - , maxSuccessTests = fromMaybe (maxSuccessTests st) (maybeNumTests res) - , S.labels = Map.insertWith (+) (P.labels res) 1 (S.labels st) - , S.classes = Map.unionWith (+) (S.classes st) (Map.fromList (zip (P.classes res) (repeat 1))) - , S.tables = - foldr (\(tab, x) -> Map.insertWith (Map.unionWith (+)) tab (Map.singleton x 1)) - (S.tables st) (P.tables res) - , S.requiredCoverage = - foldr (\(key, value, p) -> Map.insertWith max (key, value) p) - (S.requiredCoverage st) (P.requiredCoverage res) - , expected = expect res } - - case res of - MkResult{ok = Just True} -> -- successful test - do continue doneTesting - st'{ numSuccessTests = numSuccessTests st' + 1 - , numRecentlyDiscardedTests = 0 - , randomSeed = rnd2 - } f - - MkResult{ok = Nothing, expect = expect, maybeNumTests = mnt, maybeCheckCoverage = mcc} -> -- discarded test - do continue giveUp - -- Don't add coverage info from this test - st{ numDiscardedTests = numDiscardedTests st' + 1 - , numRecentlyDiscardedTests = numRecentlyDiscardedTests st' + 1 - , randomSeed = rnd2 - } f - - MkResult{ok = Just False} -> -- failed test - do (numShrinks, totFailed, lastFailed, res) <- foundFailure st' res ts - theOutput <- terminalOutput (terminal st') - if not (expect res) then - return Success{ labels = S.labels st', - classes = S.classes st', - tables = S.tables st', - numTests = numSuccessTests st'+1, - numDiscarded = numDiscardedTests st', - output = theOutput } - else do - testCase <- mapM showCounterexample (P.testCase res) - return Failure{ usedSeed = randomSeed st' -- correct! (this will be split first) - , usedSize = size - , numTests = numSuccessTests st'+1 - , numDiscarded = numDiscardedTests st' - , numShrinks = numShrinks - , numShrinkTries = totFailed - , numShrinkFinal = lastFailed - , output = theOutput - , reason = P.reason res - , theException = P.theException res - , failingTestCase = testCase - , failingLabels = P.labels res - , failingClasses = Set.fromList (P.classes res) - } - where - (rnd1,rnd2) = split (randomSeed st) - -failureSummary :: State -> P.Result -> String -failureSummary st res = fst (failureSummaryAndReason st res) - -failureReason :: State -> P.Result -> [String] -failureReason st res = snd (failureSummaryAndReason st res) - -failureSummaryAndReason :: State -> P.Result -> (String, [String]) -failureSummaryAndReason st res = (summary, full) - where - summary = - header ++ - short 26 (oneLine theReason ++ " ") ++ - count True ++ "..." - - full = - (header ++ - (if isOneLine theReason then theReason ++ " " else "") ++ - count False ++ ":"): - if isOneLine theReason then [] else lines theReason - - theReason = P.reason res - - header = - if expect res then - bold "*** Failed! " - else "+++ OK, failed as expected. " - - count full = - "(after " ++ number (numSuccessTests st+1) "test" ++ - concat [ - " and " ++ - show (numSuccessShrinks st) ++ - concat [ "." ++ show (numTryShrinks st) | showNumTryShrinks ] ++ - " shrink" ++ - (if numSuccessShrinks st == 1 && not showNumTryShrinks then "" else "s") - | numSuccessShrinks st > 0 || showNumTryShrinks ] ++ - ")" - where - showNumTryShrinks = full && numTryShrinks st > 0 - -success :: State -> IO () -success st = do - mapM_ (putLine $ terminal st) (paragraphs [short, long]) - where - (short, long) = - case labelsAndTables st of - ([msg], long) -> - ([" (" ++ dropWhile isSpace msg ++ ")."], long) - ([], long) -> - (["."], long) - (short, long) -> - (":":short, long) - -labelsAndTables :: State -> ([String], [String]) -labelsAndTables st = (theLabels, theTables) +{- | If a tester terminates because it was aborted by the parent thread, this function +converts the testers @State@ to a @Result@ -} +abortConcurrent :: State -> IO Result +abortConcurrent st = do + theOutput <- terminalOutput (terminal st) + return Aborted{ numTests = numSuccessTests st + , numDiscarded = numDiscardedTests st + , labels = stlabels st + , classes = stclasses st + , tables = sttables st + , output = theOutput + } + +labelsAndTables :: Map.Map [String] Int + -> Map.Map String Int + -> Map.Map String (Map.Map String Int) + -> Map.Map (Maybe String, String) Double + -> Int -> Maybe Confidence + -> ([String], [String]) +labelsAndTables labels classes tables requiredCoverage numTests coverageConfidence = (theLabels, theTables) where theLabels :: [String] theLabels = paragraphs $ - [ showTable (numSuccessTests st) Nothing m - | m <- S.classes st:Map.elems numberedLabels ] + [showTable numTests Nothing m + | m <- classes : Map.elems numberedLabels + ] - numberedLabels :: Map Int (Map String Int) + numberedLabels :: Map.Map Int (Map.Map String Int) numberedLabels = Map.fromListWith (Map.unionWith (+)) $ [ (i, Map.singleton l n) - | (labels, n) <- Map.toList (S.labels st), - (i, l) <- zip [0..] labels ] + | (labels, n) <- Map.toList labels + , (i,l) <- zip [0..] labels + ] theTables :: [String] theTables = paragraphs $ [ showTable (sum (Map.elems m)) (Just table) m - | (table, m) <- Map.toList (S.tables st) ] ++ - [[ (case mtable of Nothing -> "Only "; Just table -> "Table '" ++ table ++ "' had only ") + | (table, m) <- Map.toList tables + ] ++ + [[ (case mtable of Nothing -> "Only"; Just table -> "Table '" ++ table ++ "' had only ") ++ lpercent n tot ++ " " ++ label ++ ", but expected " ++ lpercentage p tot - | (mtable, label, tot, n, p) <- allCoverage st, - insufficientlyCovered (fmap certainty (coverageConfidence st)) tot n p ]] + | (mtable, label, tot, n, p) <- allCoverage classes tables requiredCoverage numTests, + insufficientlyCovered (fmap certainty coverageConfidence) tot n p ]] -- TODO here showTable :: Int -> Maybe String -> Map String Int -> [String] showTable k mtable m = @@ -506,48 +1044,216 @@ showTable k mtable m = -------------------------------------------------------------------------- -- main shrinking loop -foundFailure :: State -> P.Result -> [Rose P.Result] -> IO (Int, Int, Int, P.Result) -foundFailure st res ts = - do localMin st{ numTryShrinks = 0 } res ts - -localMin :: State -> P.Result -> [Rose P.Result] -> IO (Int, Int, Int, P.Result) --- Don't try to shrink for too long -localMin st res ts - | numSuccessShrinks st + numTotTryShrinks st >= numTotMaxShrinks st = - localMinFound st res -localMin st res ts = do - r <- tryEvaluateIO $ - putTemp (terminal st) (failureSummary st res) - case r of - Left err -> - localMinFound st (exception "Exception while printing status message" err) { callbacks = callbacks res } - Right () -> do - r <- tryEvaluate ts - case r of - Left err -> - localMinFound st - (exception "Exception while generating shrink-list" err) { callbacks = callbacks res } - Right ts' -> localMin' st res ts' - -localMin' :: State -> P.Result -> [Rose P.Result] -> IO (Int, Int, Int, P.Result) -localMin' st res [] = localMinFound st res -localMin' st res (t:ts) = - do -- CALLBACK before_test - MkRose res' ts' <- protectRose (reduceRose t) - res' <- callbackPostTest st res' - if ok res' == Just False - then localMin st{ numSuccessShrinks = numSuccessShrinks st + 1, - numTryShrinks = 0 } res' ts' - else localMin st{ numTryShrinks = numTryShrinks st + 1, - numTotTryShrinks = numTotTryShrinks st + 1 } res ts - -localMinFound :: State -> P.Result -> IO (Int, Int, Int, P.Result) -localMinFound st res = - do sequence_ [ putLine (terminal st) msg | msg <- failureReason st res ] - callbackPostFinalFailure st res - -- NB no need to check if callbacks threw an exception because - -- we are about to return to the user anyway - return (numSuccessShrinks st, numTotTryShrinks st - numTryShrinks st, numTryShrinks st, res) +foundFailure :: Bool -> State -> Int -> Int -> P.Result -> [Rose P.Result] -> IO (Int, Int, Int, P.Result) +foundFailure chatty st numsucc n res ts = do + re@(n1,n2,n3,r) <- shrinker chatty st numsucc n res ts + sequence_ [ putLine (terminal st) msg | msg <- snd $ failureSummaryAndReason2 (n1, n2, n3) numsucc r ] + callbackPostFinalFailure st r + return re + +-- | State kept during shrinking, will live in an MVar to make all shrinkers able to modify it +-- NOTE: Having one shared resource like this can lead to contention -- don't use too many workers +data ShrinkSt = ShrinkSt + { row :: Int + -- ^ current row + , col :: Int + -- ^ current column + , book :: Map.Map ThreadId (Int, Int) + -- ^ map from @ThreadId@ to the candidate they are currently evaluating + , path :: [(Int, Int)] -- TODO make this list be built in reverse order and then reverse it at the end + -- ^ path taken when shrinking so far + , selfTerminated :: Int + -- ^ how many threads died on their own + , blockUntilAwoken :: MVar () + -- ^ when you self terminate, block until you are awoken by taking this mvar + , currentResult :: P.Result + -- ^ current best candidate + , candidates :: [Rose P.Result] + -- ^ candidates yet to evaluate + } + +shrinker :: Bool -> State -> Int -> Int -> P.Result -> [Rose P.Result] -> IO (Int, Int, Int, P.Result) +shrinker chatty st numsucc n res ts = do + + blocker <- newEmptyMVar + jobs <- newMVar $ ShrinkSt 0 0 Map.empty [(-1,-1)] 0 blocker res ts + stats <- newIORef (0,0,0) + signal <- newEmptyMVar + + -- continuously print current state + printerID <- if chatty + then Just <$> forkIO (shrinkPrinter (terminal st) stats numsucc res 200) + else return Nothing + + -- start shrinking + tids <- spawnWorkers n jobs stats signal + + -- need to block here until completely done + takeMVar signal + + -- stop printing + maybe (return ()) killThread printerID + withBuffering $ clearTemp (terminal st) + + -- make sure to kill the spawned shrinkers + mapM_ killThread tids + + -- get res + ShrinkSt _ _ _ p _ _ r _ <- readMVar jobs + (_,nt,ntot) <- readIORef stats + + return (length p, ntot-nt, nt, r) + where + -- | The shrink loop evaluated by each individual worker + worker :: MVar ShrinkSt -> IORef (Int, Int, Int) -> MVar () -> IO () + worker jobs stats signal = do + -- try to get a candidate to evaluate + j <- getJob jobs + case j of + -- no new candidate, removeFromMap will block this worker until new work exists + Nothing -> removeFromMap jobs signal >> worker jobs stats signal + Just (r,c,parent,t) -> do + mec <- evaluateCandidate t + case mec of + Nothing -> do + failedShrink stats -- shrinking failed, update counters and recurse + worker jobs stats signal + Just (res', ts') -> do + successShrink stats -- shrinking succeeded, update counters and shared pool of work, and recurse + updateWork res' ts' (r,c) parent jobs + worker jobs stats signal + + -- | get a new candidate to evaluate + getJob :: MVar ShrinkSt -> IO (Maybe (Int, Int, (Int, Int), Rose P.Result)) + getJob jobs = do + tid <- myThreadId + modifyMVar jobs $ \st -> + case candidates st of + [] -> return (st, Nothing) + (t:ts) -> return (st { col = col st + 1 + , book = Map.insert tid (row st, col st) (book st) + , candidates = ts + }, Just (row st, col st, head (path st), t)) + + -- | this worker is idle. Indicate in the shared book that it is not working on anything, and block + removeFromMap :: MVar ShrinkSt -> MVar () -> IO () + removeFromMap jobs signal = do + tid <- myThreadId + block <- modifyMVar jobs $ \st -> do + let newst = selfTerminated st + 1 + if newst == n then putMVar signal () else return () + return ( st { book = Map.delete tid (book st) + , selfTerminated = newst} + , blockUntilAwoken st + ) + takeMVar block + + evaluateCandidate :: Rose P.Result -> IO (Maybe (P.Result, [Rose P.Result])) + evaluateCandidate t = do + MkRose res' ts' <- protectRose (reduceRose t) + res' <- callbackPostTest st res' + if ok res' == Just False + then return $ Just (res', ts') + else return Nothing + + failedShrink :: IORef (Int, Int, Int) -> IO () + failedShrink stats = atomicModifyIORef' stats $ \(ns, nt, ntot) -> ((ns, nt + 1, ntot + 1), ()) + + successShrink :: IORef (Int, Int, Int) -> IO () + successShrink stats = atomicModifyIORef' stats $ \(ns, nt, ntot) -> ((ns + 1, 0, ntot), ()) + + -- | A new counterexample is found. Maybe update the shared resource + updateWork :: P.Result -- result of new counterexample + -> [Rose P.Result] -- new candidates + -> (Int, Int) -- 'coordinates' of the new counterexample + -> (Int, Int) -- 'coordinates' of the parent of the counterexample + -> MVar ShrinkSt -- shared resource + -> IO () + updateWork res' ts' cand@(r',c') parent jobs = do + tid <- myThreadId + modifyMVar_ jobs $ \st -> + if not $ parent `elem` path st -- in rare cases, 'stale' candidates could be delivered. Here we check if this candidate is to be considered + then return st + else do let (tids, wm') = toRestart tid (book st) + interruptShrinkers tids + let n = selfTerminated st + if n > 0 + then sequence_ (replicate n (putMVar (blockUntilAwoken st) ())) + else return () + return $ st { row = r' + 1 + , col = 0 + , book = wm' + , path = path st <> [cand] -- path' + , currentResult = res' + , candidates = ts' + , selfTerminated = 0} + where + toRestart :: ThreadId -> Map.Map ThreadId (Int, Int) -> ([ThreadId], Map.Map ThreadId (Int, Int)) + toRestart tid wm = (filter ((/=) tid) $ Map.keys wm, Map.empty) + + -- TODO I tried adding my own kind of internal exception here, but I could not get it to work... piggybacking on this one + -- for now, but it can not stay in the merge. Need to figure out what went wrong last time. + -- I think the QCException type I added didn't work here for some reason, but I can't quite remember what that was now. + interruptShrinkers :: [ThreadId] -> IO () + interruptShrinkers tids = mapM_ (\tid -> throwTo tid UserInterrupt) tids + + spawnWorkers :: Int -> MVar ShrinkSt -> IORef (Int, Int, Int) -> MVar () -> IO [ThreadId] + spawnWorkers num jobs stats signal = + sequence $ replicate num $ forkIO $ defHandler $ worker jobs stats signal + where + -- apparently this programming style can leak a lot of memory, but I tried to measure it during my evaluation, and + -- had no real problems. Could someone verify, or is this OK? + -- Edsko De Vries showed code at HIW 2023 that looked like this, and said it had major memory flaws. Title of his + -- lightening talk was: Severing ties: the need for non-updateable thunks + defHandler :: IO () -> IO () + defHandler ioa = do + r <- try ioa + case r of + Right a -> pure a + Left UserInterrupt -> defHandler ioa + Left ThreadKilled -> myThreadId >>= killThread + +shrinkPrinter :: Terminal -> IORef (Int, Int, Int) -> Int -> P.Result -> Int -> IO () +shrinkPrinter terminal stats n res delay = do + triple <- readIORef stats + let output = fst $ failureSummaryAndReason2 triple n res + withBuffering $ putTemp terminal output + threadDelay delay + shrinkPrinter terminal stats n res delay + +failureSummaryAndReason2 :: (Int, Int, Int) -> Int -> P.Result -> (String, [String]) +failureSummaryAndReason2 (ns, nt, _) numSuccTests res = (summary, full) + where + summary = + header ++ + short 26 (oneLine theReason ++ " ") ++ + count True ++ "..." + + full = + (header ++ + (if isOneLine theReason then theReason ++ " " else "") ++ + count False ++ ":"): + if isOneLine theReason then [] else lines theReason + + theReason = P.reason res + + header = + if expect res then + bold "*** Failed! " + else "+++ OK, failed as expected. " + + count full = + "(after " ++ number (numSuccTests + 1) "test" ++ + concat [ + " and " ++ + show ns ++ + concat [ "." ++ show nt | showNumTryShrinks ] ++ + " shrink" ++ + (if ns == 1 && not showNumTryShrinks then "" else "s") + | ns > 0 || showNumTryShrinks ] ++ + ")" + where + showNumTryShrinks = full && nt > 0 -------------------------------------------------------------------------- -- callbacks @@ -664,36 +1370,40 @@ invnormcdf p p_low = 0.02425 p_high = 1 - p_low -addCoverageCheck :: Confidence -> State -> Property -> Property -addCoverageCheck confidence st prop - | and [ sufficientlyCovered confidence tot n p - | (_, _, tot, n, p) <- allCoverage st ] = - -- Note: run prop once more so that we get labels for this test case run - once prop - | or [ insufficientlyCovered (Just (certainty confidence)) tot n p - | (_, _, tot, n, p) <- allCoverage st ] = - let (theLabels, theTables) = labelsAndTables st in - foldr counterexample (property failed{P.reason = "Insufficient coverage"}) - (paragraphs [theLabels, theTables]) +addCoverageCheck :: Map.Map [String] Int + -> Map.Map String Int + -> Map.Map String (Map.Map String Int) + -> Map.Map (Maybe String, String) Double + -> Int + -> Confidence + -> Property + -> Property +addCoverageCheck labels classes tables requiredCoverage numTests coverageConfidence prop + | and [ sufficientlyCovered coverageConfidence tot n p + | (_, _, tot, n, p) <- allCoverage classes tables requiredCoverage numTests + ] = once prop + | or [ insufficientlyCovered (Just (certainty coverageConfidence)) tot n p + | (_, _, tot, n, p) <- allCoverage classes tables requiredCoverage numTests + ] = let (theLabels, theTables) = labelsAndTables labels classes tables requiredCoverage numTests (Just coverageConfidence) in + foldr counterexample (property failed{P.reason = "Insufficient coverage"}) + (paragraphs [theLabels, theTables]) | otherwise = prop -allCoverage :: State -> [(Maybe String, String, Int, Int, Double)] -allCoverage st = +allCoverage :: Map.Map String Int -> Map.Map String (Map.Map String Int) -> Map.Map (Maybe String, String) Double -> Int -> [(Maybe String, String, Int, Int, Double)] +allCoverage classes tables requiredCoverage numTests = [ (key, value, tot, n, p) - | ((key, value), p) <- Map.toList (S.requiredCoverage st), - let tot = - case key of - Just key -> Map.findWithDefault 0 key totals - Nothing -> numSuccessTests st, - let n = Map.findWithDefault 0 value (Map.findWithDefault Map.empty key combinedCounts) ] + | ((key, value), p) <- Map.toList requiredCoverage, + let tot = case key of + Just key -> Map.findWithDefault 0 key totals + Nothing -> numTests, + let n = Map.findWithDefault 0 value (Map.findWithDefault Map.empty key combinedCounts) + ] where - combinedCounts :: Map (Maybe String) (Map String Int) - combinedCounts = - Map.insert Nothing (S.classes st) - (Map.mapKeys Just (S.tables st)) + combinedCounts :: Map.Map (Maybe String) (Map.Map String Int) + combinedCounts = Map.insert Nothing classes (Map.mapKeys Just tables) - totals :: Map String Int - totals = fmap (sum . Map.elems) (S.tables st) + totals :: Map.Map String Int + totals = fmap (sum . Map.elems) tables -------------------------------------------------------------------------- -- the end. diff --git a/src/Test/QuickCheck/Text.hs b/src/Test/QuickCheck/Text.hs index 3348e902..7cde857c 100644 --- a/src/Test/QuickCheck/Text.hs +++ b/src/Test/QuickCheck/Text.hs @@ -18,10 +18,12 @@ module Test.QuickCheck.Text , withStdioTerminal , withHandleTerminal , withNullTerminal + , withBuffering , terminalOutput , handle , Terminal , putTemp + , clearTemp , putPart , putLine ) @@ -228,5 +230,9 @@ putTemp tm@(MkTerminal _ tmp _ err) s = s ++ [ '\b' | _ <- s ] writeIORef tmp (length s) +clearTemp tm@(MkTerminal _ tmp _ err) = + do n <- readIORef tmp + err $ + replicate n ' ' ++ replicate n '\b' -------------------------------------------------------------------------- -- the end.