diff --git a/cabal.project b/cabal.project index 8f33510928..19736cf1b8 100644 --- a/cabal.project +++ b/cabal.project @@ -53,3 +53,13 @@ if impl(ghc >= 9.10) constraints: -- Earlier versions do not compile with ghc-9.10 , plutus-ledger-api ^>=1.31 + +source-repository-package + type: git + location: https://github.com/IntersectMBO/ouroboros-network + tag: 5c304e5adbd27907c675bd60a2282264a65f117c + --sha256: Jn3Qqlsirlyu23bsFN2SO054LIe2rcsHUW1tN8Pm1Ks= + subdir: + ouroboros-network + ouroboros-network-api + ouroboros-network-protocols diff --git a/ouroboros-consensus-diffusion/changelog.d/20240807_100458_alexander.esgen_milestone_1.md b/ouroboros-consensus-diffusion/changelog.d/20240807_100458_alexander.esgen_milestone_1.md new file mode 100644 index 0000000000..05f1db55a7 --- /dev/null +++ b/ouroboros-consensus-diffusion/changelog.d/20240807_100458_alexander.esgen_milestone_1.md @@ -0,0 +1,3 @@ +### Breaking + +- Adapted to Genesis-related changes in `ouroboros-consensus` ([#1179](https://github.com/IntersectMBO/ouroboros-consensus/pull/1179)). diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 27a83a7ad0..1bc9c94ad9 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -1,24 +1,32 @@ +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} module Ouroboros.Consensus.Node.Genesis ( -- * 'GenesisConfig' GenesisConfig (..) + , GenesisConfigFlags (..) , LoEAndGDDConfig (..) + , defaultGenesisConfigFlags , disableGenesisConfig , enableGenesisConfigDefault + , mkGenesisConfig -- * NodeKernel helpers , GenesisNodeKernelArgs (..) + , LoEAndGDDNodeKernelArgs (..) , mkGenesisNodeKernelArgs , setGetLoEFragment ) where import Control.Monad (join) +import Data.Maybe (fromMaybe) import Data.Traversable (for) +import GHC.Generics (Generic) import Ouroboros.Consensus.Block import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), CSJEnabledConfig (..), @@ -32,51 +40,121 @@ import Ouroboros.Consensus.Util.Args import Ouroboros.Consensus.Util.IOLike import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.BlockFetch + (GenesisBlockFetchConfiguration (..)) -- | Whether to en-/disable the Limit on Eagerness and the Genesis Density -- Disconnector. data LoEAndGDDConfig a = LoEAndGDDEnabled !a | LoEAndGDDDisabled - deriving stock (Show, Functor, Foldable, Traversable) + deriving stock (Eq, Generic, Show, Functor, Foldable, Traversable) -- | Aggregating the various configs for Genesis-related subcomponents. -data GenesisConfig = GenesisConfig { - gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig +data GenesisConfig = GenesisConfig + { gcBlockFetchConfig :: !GenesisBlockFetchConfiguration + , gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig , gcCSJConfig :: !CSJConfig - , gcLoEAndGDDConfig :: !(LoEAndGDDConfig ()) + , gcLoEAndGDDConfig :: !(LoEAndGDDConfig LoEAndGDDParams) + } deriving stock (Eq, Generic, Show) + +-- | Genesis configuration flags and low-level args, as parsed from config file or CLI +data GenesisConfigFlags = GenesisConfigFlags + { gcfEnableCSJ :: Bool + , gcfEnableLoEAndGDD :: Bool + , gcfEnableLoP :: Bool + , gcfBlockFetchGracePeriod :: Maybe Integer + , gcfBucketCapacity :: Maybe Integer + , gcfBucketRate :: Maybe Integer + , gcfCSJJumpSize :: Maybe Integer + , gcfGDDRateLimit :: Maybe DiffTime + } deriving stock (Eq, Generic, Show) + +defaultGenesisConfigFlags :: GenesisConfigFlags +defaultGenesisConfigFlags = GenesisConfigFlags + { gcfEnableCSJ = True + , gcfEnableLoEAndGDD = True + , gcfEnableLoP = True + , gcfBlockFetchGracePeriod = Nothing + , gcfBucketCapacity = Nothing + , gcfBucketRate = Nothing + , gcfCSJJumpSize = Nothing + , gcfGDDRateLimit = Nothing } --- TODO justification/derivation from other parameters enableGenesisConfigDefault :: GenesisConfig -enableGenesisConfigDefault = GenesisConfig { - gcChainSyncLoPBucketConfig = ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig { - csbcCapacity = 100_000 -- number of tokens - , csbcRate = 500 -- tokens per second leaking, 1/2ms - } - , gcCSJConfig = CSJEnabled CSJEnabledConfig { - csjcJumpSize = 3 * 2160 * 20 -- mainnet forecast range - } - , gcLoEAndGDDConfig = LoEAndGDDEnabled () - } +enableGenesisConfigDefault = mkGenesisConfig $ Just $ defaultGenesisConfigFlags -- | Disable all Genesis components, yielding Praos behavior. disableGenesisConfig :: GenesisConfig -disableGenesisConfig = GenesisConfig { - gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled +disableGenesisConfig = mkGenesisConfig Nothing + +mkGenesisConfig :: Maybe GenesisConfigFlags -> GenesisConfig +mkGenesisConfig Nothing = -- disable Genesis + GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcGracePeriod = 0 -- no grace period when Genesis is disabled + } + , gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled , gcCSJConfig = CSJDisabled , gcLoEAndGDDConfig = LoEAndGDDDisabled } +mkGenesisConfig (Just GenesisConfigFlags{..}) = + GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcGracePeriod + } + , gcChainSyncLoPBucketConfig = if gcfEnableLoP + then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig + { csbcCapacity + , csbcRate + } + else ChainSyncLoPBucketDisabled + , gcCSJConfig = if gcfEnableCSJ + then CSJEnabled CSJEnabledConfig + { csjcJumpSize + } + else CSJDisabled + , gcLoEAndGDDConfig = if gcfEnableLoEAndGDD + then LoEAndGDDEnabled LoEAndGDDParams{lgpGDDRateLimit} + else LoEAndGDDDisabled + } + where + -- TODO justification/derivation from other parameters + defaultBlockFetchGracePeriod = 10 -- seconds + defaultCapacity = 100_000 -- number of tokens + defaultRate = 500 -- tokens per second leaking, 1/2ms + -- 3 * 2160 * 20 works in more recent ranges of slots, but causes syncing to + -- block in byron. + defaultCSJJumpSize = 2 * 2160 + defaultGDDRateLimit = 1.0 -- seconds + + gbfcGracePeriod = fromInteger $ fromMaybe defaultBlockFetchGracePeriod gcfBlockFetchGracePeriod + csbcCapacity = fromInteger $ fromMaybe defaultCapacity gcfBucketCapacity + csbcRate = fromInteger $ fromMaybe defaultRate gcfBucketRate + csjcJumpSize = fromInteger $ fromMaybe defaultCSJJumpSize gcfCSJJumpSize + lgpGDDRateLimit = fromMaybe defaultGDDRateLimit gcfGDDRateLimit + +newtype LoEAndGDDParams = LoEAndGDDParams + { -- | How often to evaluate GDD. 0 means as soon as possible. + -- Otherwise, no faster than once every T seconds, where T is the + -- value of the field. + lgpGDDRateLimit :: DiffTime + } deriving stock (Eq, Generic, Show) -- | Genesis-related arguments needed by the NodeKernel initialization logic. data GenesisNodeKernelArgs m blk = GenesisNodeKernelArgs { + gnkaLoEAndGDDArgs :: !(LoEAndGDDConfig (LoEAndGDDNodeKernelArgs m blk)) + } + +data LoEAndGDDNodeKernelArgs m blk = LoEAndGDDNodeKernelArgs { -- | A TVar containing an action that returns the 'ChainDB.GetLoEFragment' -- action. We use this extra indirection to update this action after we -- opened the ChainDB (which happens before we initialize the NodeKernel). -- After that, this TVar will not be modified again. - gnkaGetLoEFragment :: !(LoEAndGDDConfig (StrictTVar m (ChainDB.GetLoEFragment m blk))) + lgnkaLoEFragmentTVar :: !(StrictTVar m (ChainDB.GetLoEFragment m blk)) + , lgnkaGDDRateLimit :: DiffTime } - -- | Create the initial 'GenesisNodeKernelArgs" (with a temporary -- 'ChainDB.GetLoEFragment' that will be replaced via 'setGetLoEFragment') and a -- function to update the 'ChainDbArgs' accordingly. @@ -87,20 +165,24 @@ mkGenesisNodeKernelArgs :: , Complete ChainDbArgs m blk -> Complete ChainDbArgs m blk ) mkGenesisNodeKernelArgs gcfg = do - gnkaGetLoEFragment <- for (gcLoEAndGDDConfig gcfg) $ \() -> - newTVarIO $ pure $ + gnkaLoEAndGDDArgs <- for (gcLoEAndGDDConfig gcfg) $ \p -> do + loeFragmentTVar <- newTVarIO $ pure $ -- Use the most conservative LoE fragment until 'setGetLoEFragment' -- is called. ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis - let updateChainDbArgs = case gnkaGetLoEFragment of + pure LoEAndGDDNodeKernelArgs + { lgnkaLoEFragmentTVar = loeFragmentTVar + , lgnkaGDDRateLimit = lgpGDDRateLimit p + } + let updateChainDbArgs = case gnkaLoEAndGDDArgs of LoEAndGDDDisabled -> id - LoEAndGDDEnabled varGetLoEFragment -> \cfg -> + LoEAndGDDEnabled lgnkArgs -> \cfg -> cfg { ChainDB.cdbsArgs = (ChainDB.cdbsArgs cfg) { ChainDB.cdbsLoE = getLoEFragment } } where - getLoEFragment = join $ readTVarIO varGetLoEFragment - pure (GenesisNodeKernelArgs {gnkaGetLoEFragment}, updateChainDbArgs) + getLoEFragment = join $ readTVarIO $ lgnkaLoEFragmentTVar lgnkArgs + pure (GenesisNodeKernelArgs{gnkaLoEAndGDDArgs}, updateChainDbArgs) -- | Set 'gnkaGetLoEFragment' to the actual logic for determining the current -- LoE fragment. diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs index 8b93417d0c..97634562cb 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs @@ -31,14 +31,17 @@ import Ouroboros.Consensus.MiniProtocol.BlockFetch.Server (TraceBlockFetchServerEvent) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (InvalidBlockReason, TraceChainSyncClientEvent) +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping import Ouroboros.Consensus.MiniProtocol.ChainSync.Server (TraceChainSyncServerEvent) import Ouroboros.Consensus.MiniProtocol.LocalTxSubmission.Server (TraceLocalTxSubmissionServerEvent (..)) import Ouroboros.Consensus.Node.GSM (TraceGsmEvent) import Ouroboros.Network.Block (Tip) -import Ouroboros.Network.BlockFetch (FetchDecision, - TraceFetchClientState, TraceLabelPeer) +import Ouroboros.Network.BlockFetch (TraceFetchClientState, + TraceLabelPeer) +import Ouroboros.Network.BlockFetch.Decision.Trace + (TraceDecisionEvent) import Ouroboros.Network.KeepAlive (TraceKeepAliveClient) import Ouroboros.Network.TxSubmission.Inbound (TraceTxSubmissionInbound) @@ -53,7 +56,7 @@ data Tracers' remotePeer localPeer blk f = Tracers { chainSyncClientTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncClientEvent blk)) , chainSyncServerHeaderTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncServerEvent blk)) , chainSyncServerBlockTracer :: f (TraceChainSyncServerEvent blk) - , blockFetchDecisionTracer :: f [TraceLabelPeer remotePeer (FetchDecision [Point (Header blk)])] + , blockFetchDecisionTracer :: f (TraceDecisionEvent remotePeer (Header blk)) , blockFetchClientTracer :: f (TraceLabelPeer remotePeer (TraceFetchClientState (Header blk))) , blockFetchServerTracer :: f (TraceLabelPeer remotePeer (TraceBlockFetchServerEvent blk)) , txInboundTracer :: f (TraceLabelPeer remotePeer (TraceTxSubmissionInbound (GenTxId blk) (GenTx blk))) @@ -68,6 +71,7 @@ data Tracers' remotePeer localPeer blk f = Tracers , consensusErrorTracer :: f SomeException , gsmTracer :: f (TraceGsmEvent (Tip blk)) , gddTracer :: f (TraceGDDEvent remotePeer blk) + , csjTracer :: f (CSJumping.TraceEvent remotePeer) } instance (forall a. Semigroup (f a)) @@ -91,6 +95,7 @@ instance (forall a. Semigroup (f a)) , consensusErrorTracer = f consensusErrorTracer , gsmTracer = f gsmTracer , gddTracer = f gddTracer + , csjTracer = f csjTracer } where f :: forall a. Semigroup a @@ -122,6 +127,7 @@ nullTracers = Tracers , consensusErrorTracer = nullTracer , gsmTracer = nullTracer , gddTracer = nullTracer + , csjTracer = nullTracer } showTracers :: ( Show blk @@ -156,6 +162,7 @@ showTracers tr = Tracers , consensusErrorTracer = showTracing tr , gsmTracer = showTracing tr , gddTracer = showTracing tr + , csjTracer = showTracing tr } {------------------------------------------------------------------------------- diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 1c6f0bdb0f..a56388a3cd 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -41,7 +41,6 @@ import Data.Function (on) import Data.Functor ((<&>)) import Data.Hashable (Hashable) import Data.List.NonEmpty (NonEmpty) -import Data.Map.Strict (Map) import Data.Maybe (isJust, mapMaybe) import Data.Proxy import qualified Data.Text as Text @@ -61,12 +60,14 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol import Ouroboros.Consensus.Mempool import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientHandle (..), ChainSyncState (..), - viewChainSyncState) + (ChainSyncClientHandle (..), + ChainSyncClientHandleCollection (..), ChainSyncState (..), + newChainSyncClientHandleCollection) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck (SomeHeaderInFutureCheck) import Ouroboros.Consensus.Node.Genesis (GenesisNodeKernelArgs (..), - LoEAndGDDConfig (..), setGetLoEFragment) + LoEAndGDDConfig (..), LoEAndGDDNodeKernelArgs (..), + setGetLoEFragment) import Ouroboros.Consensus.Node.GSM (GsmNodeKernelArgs (..)) import qualified Ouroboros.Consensus.Node.GSM as GSM import Ouroboros.Consensus.Node.Run @@ -92,6 +93,8 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment, import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (castTip, tipFromHeader) import Ouroboros.Network.BlockFetch +import Ouroboros.Network.BlockFetch.ConsensusInterface + (GenesisFetchMode) import Ouroboros.Network.Diffusion (PublicPeerSelectionState) import Ouroboros.Network.NodeToNode (ConnectionId, MiniProtocolParameters (..)) @@ -132,7 +135,7 @@ data NodeKernel m addrNTN addrNTC blk = NodeKernel { -- | The fetch mode, used by diffusion. -- - , getFetchMode :: STM m FetchMode + , getFetchMode :: STM m GenesisFetchMode -- | The GSM state, used by diffusion. A ledger judgement can be derived -- from it with 'GSM.gsmStateToLedgerJudgement'. @@ -140,7 +143,7 @@ data NodeKernel m addrNTN addrNTC blk = NodeKernel { , getGsmState :: STM m GSM.GsmState -- | The kill handle and exposed state for each ChainSync client. - , getChainSyncHandles :: StrictTVar m (Map (ConnectionId addrNTN) (ChainSyncClientHandle m blk)) + , getChainSyncHandles :: ChainSyncClientHandleCollection (ConnectionId addrNTN) m blk -- | Read the current peer sharing registry, used for interacting with -- the PeerSharing protocol @@ -242,7 +245,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers <&> \wd (_headers, lst) -> GSM.getDurationUntilTooOld wd (getTipSlot lst) , GSM.equivalent = (==) `on` (AF.headPoint . fst) - , GSM.getChainSyncStates = fmap cschState <$> readTVar varChainSyncHandles + , GSM.getChainSyncStates = fmap cschState <$> cschcMap varChainSyncHandles , GSM.getCurrentSelection = do headers <- ChainDB.getCurrentChain chainDB extLedgerState <- ChainDB.getCurrentLedger chainDB @@ -254,7 +257,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , GSM.writeGsmState = \gsmState -> atomicallyWithMonotonicTime $ \time -> do writeTVar varGsmState gsmState - handles <- readTVar varChainSyncHandles + handles <- cschcMap varChainSyncHandles traverse_ (($ time) . ($ gsmState) . cschOnGsmStateChanged) handles , GSM.isHaaSatisfied = do readTVar varOutboundConnectionsState <&> \case @@ -273,23 +276,24 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers ps_POLICY_PEER_SHARE_STICKY_TIME ps_POLICY_PEER_SHARE_MAX_PEERS - case gnkaGetLoEFragment genesisArgs of - LoEAndGDDDisabled -> pure () - LoEAndGDDEnabled varGetLoEFragment -> do + case gnkaLoEAndGDDArgs genesisArgs of + LoEAndGDDDisabled -> pure () + LoEAndGDDEnabled lgArgs -> do varLoEFragment <- newTVarIO $ AF.Empty AF.AnchorGenesis setGetLoEFragment (readTVar varGsmState) (readTVar varLoEFragment) - varGetLoEFragment + (lgnkaLoEFragmentTVar lgArgs) void $ forkLinkedWatcher registry "NodeKernel.GDD" $ gddWatcher cfg (gddTracer tracers) chainDB + (lgnkaGDDRateLimit lgArgs) (readTVar varGsmState) -- TODO GDD should only consider (big) ledger peers - (readTVar varChainSyncHandles) + (cschcMap varChainSyncHandles) varLoEFragment void $ forkLinkedThread registry "NodeKernel.blockForging" $ @@ -345,7 +349,7 @@ data InternalState m addrNTN addrNTC blk = IS { , chainDB :: ChainDB m blk , blockFetchInterface :: BlockFetchConsensusInterface (ConnectionId addrNTN) (Header blk) blk m , fetchClientRegistry :: FetchClientRegistry (ConnectionId addrNTN) (Header blk) blk m - , varChainSyncHandles :: StrictTVar m (Map (ConnectionId addrNTN) (ChainSyncClientHandle m blk)) + , varChainSyncHandles :: ChainSyncClientHandleCollection (ConnectionId addrNTN) m blk , varGsmState :: StrictTVar m GSM.GsmState , mempool :: Mempool m blk , peerSharingRegistry :: PeerSharingRegistry addrNTN m @@ -364,6 +368,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg , blockFetchSize, btime , mempoolCapacityOverride , gsmArgs, getUseBootstrapPeers + , genesisArgs } = do varGsmState <- do let GsmNodeKernelArgs {..} = gsmArgs @@ -373,7 +378,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg gsmMarkerFileView newTVarIO gsmState - varChainSyncHandles <- newTVarIO mempty + varChainSyncHandles <- atomically newChainSyncClientHandleCollection mempool <- openMempool registry (chainDBLedgerInterface chainDB) (configLedger cfg) @@ -383,20 +388,19 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg fetchClientRegistry <- newFetchClientRegistry - let getCandidates :: STM m (Map (ConnectionId addrNTN) (AnchoredFragment (Header blk))) - getCandidates = viewChainSyncState varChainSyncHandles csCandidate - slotForgeTimeOracle <- BlockFetchClientInterface.initSlotForgeTimeOracle cfg chainDB let readFetchMode = BlockFetchClientInterface.readFetchModeDefault + (isGenesisEnabled $ gnkaLoEAndGDDArgs genesisArgs) btime (ChainDB.getCurrentChain chainDB) getUseBootstrapPeers (GSM.gsmStateToLedgerJudgement <$> readTVar varGsmState) blockFetchInterface :: BlockFetchConsensusInterface (ConnectionId addrNTN) (Header blk) blk m blockFetchInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface + (csjTracer tracers) (configBlock cfg) (BlockFetchClientInterface.defaultChainDbView chainDB) - getCandidates + varChainSyncHandles blockFetchSize slotForgeTimeOracle readFetchMode @@ -404,6 +408,11 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg peerSharingRegistry <- newPeerSharingRegistry return IS {..} + where + isGenesisEnabled :: forall a. LoEAndGDDConfig a -> Bool + isGenesisEnabled = \case + LoEAndGDDDisabled -> False + LoEAndGDDEnabled _ -> True forkBlockForging :: forall m addrNTN addrNTC blk. diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index c6ce17dc07..0bab8823e9 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1017,10 +1017,11 @@ runThreadNetwork systemTime ThreadNetworkArgs bfcMaxConcurrencyBulkSync = 1 , bfcMaxConcurrencyDeadline = 2 , bfcMaxRequestsInflight = 10 - , bfcDecisionLoopInterval = 0.0 -- Mock testsuite can use sub-second slot - -- interval which doesn't play nice with - -- blockfetch descision interval. + , bfcDecisionLoopIntervalPraos = 0.0 -- Mock testsuite can use sub-second slot + , bfcDecisionLoopIntervalGenesis = 0.0 -- interval which doesn't play nice with + -- blockfetch descision interval. , bfcSalt = 0 + , bfcGenesisBFConfig = gcBlockFetchConfig enableGenesisConfigDefault } , gsmArgs = GSM.GsmNodeKernelArgs { gsmAntiThunderingHerd = kaRng @@ -1035,7 +1036,7 @@ runThreadNetwork systemTime ThreadNetworkArgs , getUseBootstrapPeers = pure DontUseBootstrapPeers , publicPeerSelectionStateVar , genesisArgs = GenesisNodeKernelArgs { - gnkaGetLoEFragment = LoEAndGDDDisabled + gnkaLoEAndGDDArgs = LoEAndGDDDisabled } } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs index a1e661c264..c4cb2aef8b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs @@ -18,7 +18,7 @@ import Control.Monad.IOSim (IOSim, runSimStrictShutdown) import Control.Tracer (debugTracer, traceWith) import Data.Maybe (mapMaybe) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (DensityTooLow, EmptyBucket)) + (ChainSyncClientException (..)) import Ouroboros.Consensus.Util.Condense import Ouroboros.Consensus.Util.IOLike (Exception, fromException) import Ouroboros.Network.Driver.Limits @@ -124,6 +124,7 @@ forAllGenesisTest generator schedulerConfig shrinker mkProperty = | Just DensityTooLow <- e = true | Just (ExceededTimeLimit _) <- e = true | Just AsyncCancelled <- e = true + | Just CandidateTooSparse{} <- e = true | otherwise = counterexample ("Encountered unexpected exception: " ++ show exn) False diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs index 2cc40aafde..3339729db1 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup/GenChains.hs @@ -2,7 +2,6 @@ {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE NamedFieldPuns #-} -{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE ScopedTypeVariables #-} module Test.Consensus.Genesis.Setup.GenChains ( @@ -132,10 +131,11 @@ genChainsWithExtraHonestPeers genNumExtraHonest genNumForks = do gtSlotLength, gtChainSyncTimeouts = chainSyncTimeouts gtSlotLength asc, gtBlockFetchTimeouts = blockFetchTimeouts, - gtLoPBucketParams = LoPBucketParams { lbpCapacity = 100_000, lbpRate = 1_000 }, - -- ^ REVIEW: Do we want to generate those randomly? For now, the chosen - -- values carry no special meaning. Someone needs to think about what values - -- would make for interesting tests. + gtLoPBucketParams = LoPBucketParams { lbpCapacity = 50, lbpRate = 10 }, + -- ^ REVIEW: Do we want to generate those randomly? + -- These values give little enough leeway (5s) so that some adversaries get disconnected + -- by the LoP during the stalling attack test. Maybe we should design a way to override + -- those values for individual tests? gtCSJParams = CSJParams $ fromIntegral scg, gtBlockTree = List.foldl' (flip BT.addBranch') (BT.mkTrunk goodChain) $ zipWith (genAdversarialFragment goodBlocks) [1..] alternativeChainSchemas, gtExtraHonestPeers, diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index 652b305e19..3fdf598fa3 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -6,12 +6,15 @@ module Test.Consensus.Genesis.Tests.CSJ (tests) where import Data.List (nub) import qualified Data.Map.Strict as Map import Data.Maybe (mapMaybe) -import Ouroboros.Consensus.Block (Header, blockSlot, succWithOrigin) +import Ouroboros.Consensus.Block (Header, blockSlot, succWithOrigin, + unSlotNo) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (TraceChainSyncClientEvent (..)) import Ouroboros.Consensus.Util.Condense (PaddingDirection (..), condenseListWithPadding) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.Protocol.ChainSync.Codec + (ChainSyncTimeout (mustReplyTimeout), idleTimeout) import Test.Consensus.BlockTree (BlockTree (..)) import Test.Consensus.Genesis.Setup import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) @@ -28,10 +31,12 @@ import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors import Test.Util.TestBlock (TestBlock) -import Test.Util.TestEnv (adjustQuickCheckMaxSize) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "CSJ" @@ -49,6 +54,7 @@ tests = -- | A flag to indicate if properties are tested with adversarial peers data WithAdversariesFlag = NoAdversaries | WithAdversaries + deriving Eq -- | A flag to indicate if properties are tested using the same schedule for the -- honest peers, or if each peer should used its own schedule. @@ -81,7 +87,7 @@ prop_CSJ adversariesFlag numHonestSchedules = do NoAdversaries -> pure 0 WithAdversaries -> choose (2, 4) forAllGenesisTest - ( case numHonestSchedules of + ( disableBoringTimeouts <$> case numHonestSchedules of OneScheduleForAllPeers -> genChains genForks `enrichedWith` genDuplicatedHonestSchedule @@ -93,6 +99,13 @@ prop_CSJ adversariesFlag numHonestSchedules = do { scEnableCSJ = True , scEnableLoE = True , scEnableLoP = True + , scEnableChainSelStarvation = adversariesFlag == NoAdversaries + -- ^ NOTE: When there are adversaries and the ChainSel + -- starvation detection of BlockFetch is enabled, then our property does + -- not actually hold, because peer simulator-based tests have virtually + -- infinite CPU, and therefore ChainSel gets starved at every tick, which + -- makes us cycle the dynamos, which can lead to some extra headers being + -- downloaded. } ) shrinkPeerSchedules @@ -111,8 +124,16 @@ prop_CSJ adversariesFlag numHonestSchedules = do _ -> Nothing ) svTrace + -- We receive headers at most once from honest peer. The only + -- exception is when an honest peer gets to be the objector, until an + -- adversary dies, and then the dynamo. In that specific case, we + -- might re-download jumpSize blocks. TODO: If we ever choose to + -- promote objectors to dynamo to reuse their state, then we could + -- make this bound tighter. receivedHeadersAtMostOnceFromHonestPeers = - length (nub $ snd <$> headerHonestDownloadEvents) == length headerHonestDownloadEvents + length headerHonestDownloadEvents <= + length (nub $ snd <$> headerHonestDownloadEvents) + + (fromIntegral $ unSlotNo $ csjpJumpSize $ gtCSJParams gt) in tabulate "" [ if headerHonestDownloadEvents == [] @@ -152,3 +173,12 @@ prop_CSJ adversariesFlag numHonestSchedules = do in -- Sanity check: add @1 +@ after @>@ and watch the World burn. hdrSlot + jumpSize >= succWithOrigin tipSlot + + disableBoringTimeouts gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { mustReplyTimeout = Nothing, + idleTimeout = Nothing + } + } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index e4c3376e36..73c2b2c10f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -31,8 +31,7 @@ import Ouroboros.Consensus.Config.SecurityParam import Ouroboros.Consensus.Genesis.Governor (DensityBounds, densityDisconnect, sharedCandidatePrefix) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (DensityTooLow), - ChainSyncState (..)) + (ChainSyncClientException (..), ChainSyncState (..)) import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF @@ -68,7 +67,7 @@ import Test.Util.TestEnv (adjustQuickCheckMaxSize, tests :: TestTree tests = - adjustQuickCheckTests (* 4) $ + adjustQuickCheckTests (* 10) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "gdd" [ testProperty "basic" prop_densityDisconnectStatic, @@ -474,9 +473,10 @@ prop_densityDisconnectTriggersChainSel = let othersCount = Map.size (adversarialPeers $ psSchedule gtSchedule) exnCorrect = case exceptionsByComponent ChainSyncClient stateView of - [fromException -> Just DensityTooLow] -> True - [] | othersCount == 0 -> True - _ -> False + [fromException -> Just DensityTooLow] -> True + [fromException -> Just CandidateTooSparse{}] -> True + [] | othersCount == 0 -> True + _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock in counterexample "Unexpected exceptions" exnCorrect .&&. @@ -499,7 +499,8 @@ prop_densityDisconnectTriggersChainSel = (AF.Empty _) -> Origin (_ AF.:> tipBlock) -> At tipBlock advTip = getOnlyBranchTip tree - in mkPointSchedule $ peers' + in PointSchedule { + psSchedule = peers' -- Eagerly serve the honest tree, but after the adversary has -- advertised its chain up to the intersection. [[(Time 0, scheduleTipPoint trunkTip), @@ -514,4 +515,7 @@ prop_densityDisconnectTriggersChainSel = (Time 0, ScheduleBlockPoint intersect), (Time 1, scheduleHeaderPoint advTip), (Time 1, scheduleBlockPoint advTip) - ]] + ]], + psStartOrder = [], + psMinEndTime = Time 0 + } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 9dca4e004d..f26ffb0c91 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -26,21 +26,30 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ testGroup "LoE" [ - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "adversary does not hit timeouts" (prop_adversaryHitsTimeouts False), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "adversary hits timeouts" (prop_adversaryHitsTimeouts True) ] -- | Tests that the selection advances in presence of the LoE when a peer is --- killed by something that is not LoE-aware, eg. the timeouts. +-- killed by something that is not LoE-aware, eg. the timeouts. This test +-- features an honest peer behaving normally and an adversarial peer behaving +-- such that it will get killed by timeouts. We check that, after the adversary +-- gets disconnected, the LoE gets updated to stop taking it into account. There +-- are two variants of the test: one with timeouts enabled, and one without. In +-- the case where timeouts are disabled, we check that we do in fact remain +-- stuck at the intersection between trunk and other chain. +-- -- NOTE: Same as 'LoP.prop_delayAttack' with timeouts instead of LoP. prop_adversaryHitsTimeouts :: Bool -> Property prop_adversaryHitsTimeouts timeoutsEnabled = @@ -115,4 +124,4 @@ prop_adversaryHitsTimeouts timeoutsEnabled = ] -- We want to wait more than the short wait timeout psMinEndTime = Time 11 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 552a10696d..6e633e98a4 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -22,7 +22,8 @@ import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (peers', peersOnlyHonest) +import Test.Consensus.PointSchedule.Peers (peers', peersOnlyAdversary, + peersOnlyHonest) import Test.Consensus.PointSchedule.Shrinking (shrinkPeerSchedules) import Test.Consensus.PointSchedule.SinglePeer (scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) @@ -30,10 +31,12 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ testGroup "LoP" [ -- \| NOTE: Running the test that must _not_ timeout (@prop_smoke False@) takes @@ -41,19 +44,28 @@ tests = -- does all the computation (serving the headers, validating them, serving the -- block, validating them) while the former does nothing, because it timeouts -- before reaching the last tick of the point schedule. - adjustQuickCheckTests (`div` 10) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "wait just enough" (prop_wait False), testProperty "wait too much" (prop_wait True), + adjustQuickCheckMaxSize (`div` 5) $ testProperty "wait behind forecast horizon" prop_waitBehindForecastHorizon, - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "serve just fast enough" (prop_serve False), + adjustQuickCheckMaxSize (`div` 5) $ testProperty "serve too slow" (prop_serve True), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "delaying attack succeeds without LoP" (prop_delayAttack False), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "delaying attack fails with LoP" (prop_delayAttack True) ] +-- | Simple test in which we connect to only one peer, who advertises the tip of +-- the block tree trunk and then does nothing. If the given boolean, +-- @mustTimeout@, if @True@, then we wait just long enough for the LoP bucket to +-- empty; we expect to observe an 'EmptyBucket' exception in the ChainSync +-- client. If @mustTimeout@ is @False@, then we wait not quite as long, so the +-- LoP bucket should not be empty at the end of the test and we should observe +-- no exception in the ChainSync client. prop_wait :: Bool -> Property prop_wait mustTimeout = forAllGenesisTest @@ -78,10 +90,20 @@ prop_wait mustTimeout = dullSchedule timeout (_ AF.:> tipBlock) = let offset :: DiffTime = if mustTimeout then 1 else -1 in PointSchedule - { psSchedule = peersOnlyHonest [(Time 0, scheduleTipPoint tipBlock)] + { psSchedule = + (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) + [(Time 0, scheduleTipPoint tipBlock)] + , psStartOrder = [] , psMinEndTime = Time $ timeout + offset } +-- | Simple test in which we connect to only one peer, who advertises the tip of +-- the block tree trunk, serves all of its headers, and then does nothing. +-- Because the peer does not send its blocks, then the ChainSync client will end +-- up stuck, waiting behind the forecast horizon. We expect that the LoP will +-- then be disabled and that, therefore, one could wait forever in this state. +-- We disable the timeouts and check that, indeed, the ChainSync client observes +-- no exception. prop_waitBehindForecastHorizon :: Property prop_waitBehindForecastHorizon = forAllGenesisTest @@ -108,6 +130,7 @@ prop_waitBehindForecastHorizon = [ (Time 0, scheduleTipPoint tipBlock) , (Time 0, scheduleHeaderPoint tipBlock) ] + , psStartOrder = [] , psMinEndTime = Time 11 } @@ -166,13 +189,18 @@ prop_serve mustTimeout = makeSchedule :: (HasHeader blk) => AnchoredFragment blk -> PointSchedule blk makeSchedule (AF.Empty _) = error "fragment must have at least one block" makeSchedule fragment@(_ AF.:> tipBlock) = - mkPointSchedule $ peersOnlyHonest $ + PointSchedule { + psSchedule = + (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) $ (Time 0, scheduleTipPoint tipBlock) : ( flip concatMap (zip [1 ..] (AF.toOldestFirst fragment)) $ \(i, block) -> [ (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleHeaderPoint block), (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleBlockPoint block) ] - ) + ), + psStartOrder = [], + psMinEndTime = Time 0 + } -- NOTE: Same as 'LoE.prop_adversaryHitsTimeouts' with LoP instead of timeouts. prop_delayAttack :: Bool -> Property @@ -249,4 +277,4 @@ prop_delayAttack lopEnabled = ] -- Wait for LoP bucket to empty psMinEndTime = Time 11 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs index 67a6846c0f..dcf37b1b8e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs @@ -34,6 +34,12 @@ tests = testProperty "one adversary" prop_longRangeAttack ] +-- | This test case features a long-range attack with one adversary. The honest +-- peer serves the block tree trunk, while the adversary serves its own chain, +-- forking off the trunk by at least @k@ blocks, but less good than the trunk. +-- The adversary serves the chain more rapidly than the honest peer. We check at +-- the end that the selection is honest. This property does not hold with Praos, +-- but should hold with Genesis. prop_longRangeAttack :: Property prop_longRangeAttack = -- NOTE: `shrinkPeerSchedules` only makes sense for tests that expect the diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 7ebdd0a84e..26533421b6 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -18,11 +18,12 @@ module Test.Consensus.Genesis.Tests.Uniform ( import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) -import Control.Monad.Class.MonadTime.SI (Time, addTime) -import Data.List (intercalate, sort) +import Control.Monad.Class.MonadTime.SI (Time (..), addTime) +import Data.List (intercalate, sort, uncons) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe, mapMaybe) +import qualified Data.Set as Set import Data.Word (Word64) import GHC.Stack (HasCallStack) import Ouroboros.Consensus.Block.Abstract (WithOrigin (NotOrigin)) @@ -40,7 +41,8 @@ import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (Peers (..), isHonestPeerId) +import Test.Consensus.PointSchedule.Peers (Peers (..), getPeerIds, + isHonestPeerId, peers') import Test.Consensus.PointSchedule.Shrinking (shrinkByRemovingAdversaries, shrinkPeerSchedules) import Test.Consensus.PointSchedule.SinglePeer @@ -72,9 +74,15 @@ tests = -- because this test writes the immutable chain to disk and `instance Binary TestBlock` -- chokes on long chains. adjustQuickCheckMaxSize (const 10) $ - testProperty "the node is shut down and restarted after some time" prop_downtime + testProperty "the node is shut down and restarted after some time" prop_downtime, + testProperty "block fetch leashing attack" prop_blockFetchLeashingAttack ] +-- | The conjunction of +-- +-- * no honest peer has been disconnected, +-- * the immutable tip is on the best chain, and +-- * the immutable tip is no older than s + d + 1 slots theProperty :: GenesisTestFull TestBlock -> StateView TestBlock -> @@ -89,8 +97,8 @@ theProperty genesisTest stateView@StateView{svSelectedChain} = -- to the governor that the density is too low. longerThanGenesisWindow ==> conjoin [ - counterexample "An honest peer was disconnected" (not $ any isHonestPeerId disconnected), - counterexample ("The immutable tip is not honest: " ++ show immutableTip) $ + counterexample "Honest peers shouldn't be disconnected" (not $ any isHonestPeerId disconnected), + counterexample ("The immutable tip should be honest: " ++ show immutableTip) $ property (isHonest immutableTipHash), immutableTipIsRecent ] @@ -98,7 +106,7 @@ theProperty genesisTest stateView@StateView{svSelectedChain} = advCount = Map.size (adversarialPeers (psSchedule $ gtSchedule genesisTest)) immutableTipIsRecent = - counterexample ("Age of the immutable tip: " ++ show immutableTipAge) $ + counterexample ("The immutable tip is too old: " ++ show immutableTipAge) $ immutableTipAge `le` s + fromIntegral d + 1 SlotNo immutableTipAge = case (honestTipSlot, immutableTipSlot) of @@ -210,6 +218,7 @@ prop_leashingAttackStalling = , scEnableLoE = True , scEnableLoP = True , scEnableCSJ = True + , scEnableBlockFetchTimeouts = False } shrinkPeerSchedules @@ -228,47 +237,39 @@ prop_leashingAttackStalling = advs <- mapM dropRandomPoints $ adversarialPeers sch pure $ ps {psSchedule = sch {adversarialPeers = advs}} - disableBoringTimeouts gt = - gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) - { mustReplyTimeout = Nothing - , idleTimeout = Nothing - } - } - - dropRandomPoints :: [(Time, SchedulePoint blk)] -> QC.Gen [(Time, SchedulePoint blk)] - dropRandomPoints ps = do +dropRandomPoints :: [(Time, SchedulePoint blk)] -> QC.Gen [(Time, SchedulePoint blk)] +dropRandomPoints ps = do let lenps = length ps - dropCount <- QC.choose (0, max 1 $ div lenps 5) + dropsMax = max 1 $ lenps - 1 + dropCount <- QC.choose (div dropsMax 2, dropsMax) let dedup = map NE.head . NE.group is <- fmap (dedup . sort) $ replicateM dropCount $ QC.choose (0, lenps - 1) pure $ dropElemsAt ps is - + where dropElemsAt :: [a] -> [Int] -> [a] - dropElemsAt xs [] = xs - dropElemsAt xs (i:is) = - let (ys, zs) = splitAt i xs - in ys ++ dropElemsAt (drop 1 zs) is + dropElemsAt xs is' = + let is = Set.fromList is' + in map fst $ filter (\(_, i) -> not $ i `Set.member` is) (zip xs [0..]) -- | Test that the leashing attacks do not delay the immutable tip after. The -- immutable tip needs to be advanced enough when the honest peer has offered -- all of its ticks. -- --- This test is expected to fail because we don't test a genesis implementation --- yet. --- -- See Note [Leashing attacks] prop_leashingAttackTimeLimited :: Property prop_leashingAttackTimeLimited = forAllGenesisTest - (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) + (disableCanAwaitTimeout . disableBoringTimeouts <$> + genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule + ) defaultSchedulerConfig { scTrace = False , scEnableLoE = True , scEnableLoP = True - , scEnableBlockFetchTimeouts = False , scEnableCSJ = True + , scEnableBlockFetchTimeouts = False } shrinkPeerSchedules @@ -285,22 +286,16 @@ prop_leashingAttackTimeLimited = (gtLoPBucketParams genesisTest) (getHonestPeer honests) (Map.elems advs0) - advs = fmap (takePointsUntil timeLimit) advs0 + advs1 = fmap (takePointsUntil timeLimit) advs0 + advs <- mapM dropRandomPoints advs1 pure $ PointSchedule { psSchedule = Peers honests advs - , psMinEndTime = timeLimit + , psStartOrder = [] + , psMinEndTime = addGracePeriodDelay (length advs) timeLimit } takePointsUntil limit = takeWhile ((<= limit) . fst) - disableBoringTimeouts gt = - gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) - { canAwaitTimeout = Nothing - , mustReplyTimeout = Nothing - , idleTimeout = Nothing - } - } - estimateTimeBound :: AF.HasHeader blk => ChainSyncTimeout @@ -341,16 +336,22 @@ prop_leashingAttackTimeLimited = fromTipPoint (t, ScheduleTipPoint bp) = Just (t, bp) fromTipPoint _ = Nothing + disableCanAwaitTimeout :: GenesisTest blk schedule -> GenesisTest blk schedule + disableCanAwaitTimeout gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { canAwaitTimeout = Nothing + } + } + headCallStack :: HasCallStack => [a] -> a headCallStack = \case x:_ -> x _ -> error "headCallStack: empty list" --- | Test that enabling the LoE using the updater that sets the LoE fragment to --- the shared prefix (as used by the GDDG) causes the selection to remain at +-- | Test that enabling the LoE causes the selection to remain at -- the first fork intersection (keeping the immutable tip honest). --- --- This is pretty slow since it relies on timeouts to terminate the test. prop_loeStalling :: Property prop_loeStalling = forAllGenesisTest @@ -363,7 +364,8 @@ prop_loeStalling = defaultSchedulerConfig { scEnableLoE = True, - scEnableCSJ = True + scEnableCSJ = True, + scEnableBlockFetchTimeouts = False } shrinkPeerSchedules @@ -396,7 +398,7 @@ prop_loeStalling = prop_downtime :: Property prop_downtime = forAllGenesisTest - (genChains (QC.choose (1, 4)) `enrichedWith` \ gt -> + (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` \ gt -> ensureScheduleDuration gt <$> stToGen (uniformPoints (pointsGeneratorParams gt) (gtBlockTree gt))) defaultSchedulerConfig @@ -404,14 +406,88 @@ prop_downtime = forAllGenesisTest , scEnableLoP = True , scDowntime = Just 11 , scEnableCSJ = True + , scEnableBlockFetchTimeouts = False } shrinkPeerSchedules - theProperty + (\genesisTest stateView -> + counterexample (unlines + [ "TODO: Shutting down the node inserts delays in the simulation that" + , "are not reflected in the point schedule table. Reporting these delays" + , "correctly is still to be done." + ]) $ + theProperty genesisTest stateView + ) where pointsGeneratorParams gt = PointsGeneratorParams { pgpExtraHonestPeers = fromIntegral (gtExtraHonestPeers gt) , pgpDowntime = DowntimeWithSecurityParam (gtSecurityParam gt) } + +-- | Test that the block fetch leashing attack does not delay the immutable tip. +-- This leashing attack consists in having adversarial peers that behave +-- honestly when it comes to ChainSync but refuse to send blocks. A proper node +-- under test should detect those behaviours as adversarial and find a way to +-- make progress. +prop_blockFetchLeashingAttack :: Property +prop_blockFetchLeashingAttack = + forAllGenesisTest + (disableBoringTimeouts <$> genChains (pure 0) `enrichedWith` genBlockFetchLeashingSchedule) + defaultSchedulerConfig + { scEnableLoE = True, + scEnableLoP = True, + scEnableCSJ = True, + scEnableBlockFetchTimeouts = False + } + shrinkPeerSchedules + theProperty + where + genBlockFetchLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PointSchedule TestBlock) + genBlockFetchLeashingSchedule genesisTest = do + -- A schedule with several honest peers and no adversaries. We will then + -- keep one of those as honest and remove the block points from the + -- others, hence producing one honest peer and several adversaries. + PointSchedule {psSchedule} <- + stToGen $ + uniformPoints + (PointsGeneratorParams {pgpExtraHonestPeers = 1, pgpDowntime = NoDowntime}) + (gtBlockTree genesisTest) + peers <- QC.shuffle $ Map.elems $ honestPeers psSchedule + let (honest, adversaries) = fromMaybe (error "blockFetchLeashingAttack") $ uncons peers + adversaries' = map (filter (not . isBlockPoint . snd)) adversaries + psSchedule' = peers' [honest] adversaries' + -- Important to shuffle the order in which the peers start, otherwise the + -- honest peer starts first and systematically becomes dynamo. + psStartOrder <- shuffle $ getPeerIds psSchedule' + let maxTime = addGracePeriodDelay (length adversaries') $ maximum $ + Time 0 : [ pt | s <- honest : adversaries', (pt, _) <- take 1 (reverse s) ] + pure $ PointSchedule { + psSchedule = psSchedule', + psStartOrder, + -- Allow to run the blockfetch decision logic after the last tick + -- 11 is the grace period for unresponsive peers that should send + -- blocks + psMinEndTime = addTime 11 maxTime + } + + isBlockPoint :: SchedulePoint blk -> Bool + isBlockPoint (ScheduleBlockPoint _) = True + isBlockPoint _ = False + +-- | Add a delay at the end of tests to account for retention of blocks +-- by adversarial peers in blockfetch. This delay is 10 seconds per +-- adversarial peer. +addGracePeriodDelay :: Int -> Time -> Time +addGracePeriodDelay adversaryCount = addTime (fromIntegral adversaryCount * 10) + +disableBoringTimeouts :: GenesisTest blk schedule -> GenesisTest blk schedule +disableBoringTimeouts gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { mustReplyTimeout = Nothing + , idleTimeout = Nothing + } + } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 536a49f2fc..1b3958a4e3 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -23,25 +23,29 @@ import Control.Monad.Class.MonadTime import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer, nullTracer, traceWith) import Data.Functor.Contravariant ((>$<)) -import Data.Map.Strict (Map) import Network.TypedProtocol.Codec (AnyMessage, PeerHasAgency (..), PeerRole) import Ouroboros.Consensus.Block (HasHeader) import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (ChainSyncClientHandleCollection) +import Ouroboros.Consensus.Node.Genesis (GenesisConfig (..), + enableGenesisConfigDefault) import Ouroboros.Consensus.Node.ProtocolInfo (NumCoreNodes (NumCoreNodes)) import Ouroboros.Consensus.Storage.ChainDB.API import Ouroboros.Consensus.Util (ShowProxy) import Ouroboros.Consensus.Util.IOLike (DiffTime, - Exception (fromException), IOLike, STM, atomically, retry, - try) + Exception (fromException), IOLike, atomically, retry, try) import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), - FetchClientRegistry, FetchMode (..), blockFetchLogic, - bracketFetchClient, bracketKeepAliveClient) + FetchClientRegistry, GenesisBlockFetchConfiguration (..), + blockFetchLogic, bracketFetchClient, + bracketKeepAliveClient) import Ouroboros.Network.BlockFetch.Client (blockFetchClient) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (GenesisFetchMode (..)) import Ouroboros.Network.Channel (Channel) import Ouroboros.Network.ControlMessage (ControlMessageSTM) import Ouroboros.Network.Driver (runPeer) @@ -73,49 +77,51 @@ import Test.Util.Time (dawnOfTime) startBlockFetchLogic :: forall m. - (IOLike m) - => ResourceRegistry m + (IOLike m, MonadTimer m) + => Bool -- ^ Whether to enable chain selection starvation + -> ResourceRegistry m -> Tracer m (TraceEvent TestBlock) -> ChainDB m TestBlock -> FetchClientRegistry PeerId (Header TestBlock) TestBlock m - -> STM m (Map PeerId (AnchoredFragment (Header TestBlock))) + -> ChainSyncClientHandleCollection PeerId m TestBlock -> m () -startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = do +startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClientRegistry csHandlesCol = do let slotForgeTime :: BlockFetchClientInterface.SlotForgeTimeOracle m blk slotForgeTime _ = pure dawnOfTime blockFetchConsensusInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface + nullTracer -- FIXME (TestBlockConfig $ NumCoreNodes 0) -- Only needed when minting blocks (BlockFetchClientInterface.defaultChainDbView chainDb) - getCandidates + csHandlesCol -- The size of headers in bytes is irrelevant because our tests -- do not serialize the blocks. (\_hdr -> 1000) slotForgeTime - -- Initially, we tried FetchModeBulkSync, but adversaries had the - -- opportunity to delay syncing by not responding to block requests. - -- The BlockFetch logic would then wait for the timeout to expire - -- before trying to download the block from another peer. - (pure FetchModeDeadline) + -- This is a syncing test, so we use 'FetchModeGenesis'. + (pure FetchModeGenesis) + + bfcGenesisBFConfig = if enableChainSelStarvation + then GenesisBlockFetchConfiguration + { gbfcGracePeriod = + if enableChainSelStarvation then + 10 -- default value for cardano-node at the time of writing + else + 1000000 -- (more than 11 days) + } + else gcBlockFetchConfig enableGenesisConfigDefault -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs blockFetchCfg = BlockFetchConfiguration - { -- We set a higher value here to allow downloading blocks from all - -- peers. - -- - -- If the value is too low, block downloads from a peer may prevent - -- blocks from being downloaded from other peers. This can be - -- problematic, since the batch download of a simulated BlockFetch - -- server can last serveral ticks if the block pointer is not - -- advanced to allow completion of the batch. - -- - bfcMaxConcurrencyBulkSync = 50 - , bfcMaxConcurrencyDeadline = 50 + { bfcMaxConcurrencyBulkSync = 50 + , bfcMaxConcurrencyDeadline = 50 -- unused because of @pure FetchModeBulkSync@ above , bfcMaxRequestsInflight = 10 - , bfcDecisionLoopInterval = 0 + , bfcDecisionLoopIntervalPraos = 0 + , bfcDecisionLoopIntervalGenesis = 0 , bfcSalt = 0 + , bfcGenesisBFConfig } void $ forkLinkedThread registry "BlockFetchLogic" $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs index 7b83e04101..0d66e1f154 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs @@ -19,7 +19,7 @@ import Data.Typeable (Typeable) import Ouroboros.Consensus.Block (Point, StandardHash, castPoint) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State as CSState import Ouroboros.Consensus.Util.IOLike (Exception, MonadSTM (STM), - MonadThrow (throwIO), StrictTVar, readTVar) + MonadThrow (throwIO), readTVar) import Ouroboros.Consensus.Util.STM (Watcher (..)) -------------------------------------------------------------------------------- @@ -109,10 +109,10 @@ readAndView :: forall m peer blk. ( MonadSTM m ) => - StrictTVar m (Map peer (CSState.ChainSyncClientHandle m blk)) -> + STM m (Map peer (CSState.ChainSyncClientHandle m blk)) -> STM m (View peer blk) -readAndView handles = - traverse (fmap idealiseState . readTVar . CSState.cschJumping) =<< readTVar handles +readAndView readHandles = + traverse (fmap idealiseState . readTVar . CSState.cschJumping) =<< readHandles where -- Idealise the state of a ChainSync peer with respect to ChainSync jumping. -- In particular, we get rid of non-comparable information such as the TVars @@ -170,7 +170,7 @@ watcher :: Typeable blk, StandardHash blk ) => - StrictTVar m (Map peer (CSState.ChainSyncClientHandle m blk)) -> + STM m (Map peer (CSState.ChainSyncClientHandle m blk)) -> Watcher m (View peer blk) (View peer blk) watcher handles = Watcher diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs index 2197bea732..5678b4d7c9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs @@ -13,7 +13,6 @@ module Test.Consensus.PeerSimulator.ChainSync ( import Control.Exception (SomeException) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (Tracer), nullTracer, traceWith) -import Data.Map.Strict (Map) import Data.Proxy (Proxy (..)) import Network.TypedProtocol.Codec (AnyMessage) import Ouroboros.Consensus.Block (Header, Point) @@ -21,15 +20,16 @@ import Ouroboros.Consensus.Config (TopLevelConfig (..)) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (CSJConfig (..), ChainDbView, ChainSyncClientHandle, - ChainSyncLoPBucketConfig, ChainSyncStateView (..), - Consensus, bracketChainSyncClient, chainSyncClient) + (CSJConfig (..), ChainDbView, + ChainSyncClientHandleCollection, ChainSyncLoPBucketConfig, + ChainSyncStateView (..), Consensus, bracketChainSyncClient, + chainSyncClient) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.GsmState (GsmState (Syncing)) import Ouroboros.Consensus.Util (ShowProxy) import Ouroboros.Consensus.Util.IOLike (Exception (fromException), - IOLike, MonadCatch (try), StrictTVar) + IOLike, MonadCatch (try)) import Ouroboros.Network.Block (Tip) import Ouroboros.Network.Channel (Channel) import Ouroboros.Network.ControlMessage (ControlMessage (..)) @@ -124,7 +124,7 @@ runChainSyncClient :: -- ^ Configuration for ChainSync Jumping StateViewTracers blk m -> -- ^ Tracers used to record information for the future 'StateView'. - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection PeerId m blk -> -- ^ A TVar containing a map of states for each peer. This -- function will (via 'bracketChainSyncClient') register and de-register a -- TVar for the state of the peer. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs index 3d6ea7d04e..993d1b1263 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs @@ -19,6 +19,8 @@ import Data.Set (Set) import qualified Data.Set as Set import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config (TopLevelConfig (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (ChainSyncClientHandleCollection (..)) import Ouroboros.Consensus.Storage.ChainDB.API import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB @@ -204,7 +206,7 @@ lifecycleStop resources LiveNode {lnStateViewTracers, lnCopyToImmDb, lnPeers} = releaseAll lrRegistry -- Reset the resources in TVars that were allocated by the simulator atomically $ do - modifyTVar psrHandles (const mempty) + cschcRemoveAllHandles psrHandles case lrLoEVar of LoEEnabled var -> modifyTVar var (const (AF.Empty AF.AnchorGenesis)) LoEDisabled -> pure () diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs index c4fe394a60..a594d9059c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs @@ -25,7 +25,8 @@ import Data.Traversable (for) import Ouroboros.Consensus.Block (WithOrigin (Origin)) import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientHandle) + (ChainSyncClientHandleCollection, + newChainSyncClientHandleCollection) import Ouroboros.Consensus.Util.IOLike (IOLike, MonadSTM (STM), StrictTVar, readTVar, uncheckedNewTVarM, writeTVar) import qualified Ouroboros.Network.AnchoredFragment as AF @@ -115,7 +116,7 @@ data PeerSimulatorResources m blk = -- | Handles to interact with the ChainSync client of each peer. -- See 'ChainSyncClientHandle' for more details. - psrHandles :: StrictTVar m (Map PeerId (ChainSyncClientHandle m TestBlock)) + psrHandles :: ChainSyncClientHandleCollection PeerId m TestBlock } -- | Create 'ChainSyncServerHandlers' for our default implementation using 'NodeState'. @@ -233,5 +234,5 @@ makePeerSimulatorResources tracer blockTree peers = do resources <- for peers $ \ peerId -> do peerResources <- makePeerResources tracer blockTree peerId pure (peerId, peerResources) - psrHandles <- uncheckedNewTVarM mempty + psrHandles <- atomically newChainSyncClientHandleCollection pure PeerSimulatorResources {psrPeers = Map.fromList $ toList resources, psrHandles} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 586f7776ca..6f88312ed2 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -10,12 +10,13 @@ module Test.Consensus.PeerSimulator.Run ( , runPointSchedule ) where -import Control.Monad (foldM, forM, void) +import Control.Monad (foldM, forM, void, when) import Control.Monad.Class.MonadTime (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (..), nullTracer, traceWith) import Data.Coerce (coerce) import Data.Foldable (for_) +import Data.List (sort) import qualified Data.List.NonEmpty as NonEmpty import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map @@ -26,7 +27,9 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), CSJEnabledConfig (..), ChainDbView, - ChainSyncClientHandle, ChainSyncLoPBucketConfig (..), + ChainSyncClientHandle, + ChainSyncClientHandleCollection (..), + ChainSyncLoPBucketConfig (..), ChainSyncLoPBucketEnabledConfig (..), viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.Node.GsmState as GSM @@ -102,6 +105,11 @@ data SchedulerConfig = -- duration to trigger it. , scDowntime :: Maybe DiffTime + -- | Enable the use of ChainSel starvation information in the block fetch + -- decision logic. It is never actually disabled, but rather the grace + -- period is made virtually infinite. + , scEnableChainSelStarvation :: Bool + -- | Whether to enable ChainSync Jumping. The parameters come from -- 'GenesisTest'. , scEnableCSJ :: Bool @@ -119,6 +127,7 @@ defaultSchedulerConfig = scEnableLoE = False, scEnableLoP = False, scDowntime = Nothing, + scEnableChainSelStarvation = True, scEnableCSJ = False } @@ -147,7 +156,7 @@ startChainSyncConnectionThread :: ChainSyncLoPBucketConfig -> CSJConfig -> StateViewTracers blk m -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection PeerId m blk -> m (Thread m (), Thread m ()) startChainSyncConnectionThread registry @@ -212,8 +221,8 @@ smartDelay :: LiveNode blk m -> DiffTime -> m (LiveNode blk m) -smartDelay NodeLifecycle {nlMinDuration, nlStart, nlShutdown} node duration - | Just minInterval <- nlMinDuration, duration > minInterval = do +smartDelay lifecycle@NodeLifecycle {nlStart, nlShutdown} node duration + | itIsTimeToRestartTheNode lifecycle duration = do results <- nlShutdown node threadDelay duration nlStart results @@ -221,6 +230,12 @@ smartDelay _ node duration = do threadDelay duration pure node +itIsTimeToRestartTheNode :: NodeLifecycle blk m -> DiffTime -> Bool +itIsTimeToRestartTheNode NodeLifecycle {nlMinDuration} duration = + case nlMinDuration of + Just minInterval -> duration > minInterval + Nothing -> False + -- | The 'Tick' contains a state update for a specific peer. -- If the peer has not terminated by protocol rules, this will update its TMVar -- with the new state, thereby unblocking the handler that's currently waiting @@ -230,7 +245,7 @@ smartDelay _ node duration = do dispatchTick :: forall m blk. IOLike m => Tracer m (TraceSchedulerEvent blk) -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + STM m (Map PeerId (ChainSyncClientHandle m blk)) -> Map PeerId (PeerResources m blk) -> NodeLifecycle blk m -> LiveNode blk m -> @@ -250,7 +265,7 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid traceNewTick = do currentChain <- atomically $ ChainDB.getCurrentChain (lnChainDb node) (csState, jumpingStates) <- atomically $ do - m <- readTVar varHandles + m <- varHandles csState <- traverse (readTVar . CSClient.cschState) (m Map.!? pid) jumpingStates <- forM (Map.toList m) $ \(peer, h) -> do st <- readTVar (CSClient.cschJumping h) @@ -272,7 +287,7 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid runScheduler :: IOLike m => Tracer m (TraceSchedulerEvent blk) -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + STM m (Map PeerId (ChainSyncClientHandle m blk)) -> PointSchedule blk -> Map PeerId (PeerResources m blk) -> NodeLifecycle blk m -> @@ -287,7 +302,16 @@ runScheduler tracer varHandles ps@PointSchedule{psMinEndTime} peers lifecycle@No else Nothing _ -> Just $ coerce psMinEndTime LiveNode{lnChainDb, lnStateViewTracers} <- - maybe (pure nodeEnd) (smartDelay lifecycle nodeEnd) extraDelay + case extraDelay of + Just duration -> do + nodeEnd' <- smartDelay lifecycle nodeEnd duration + -- Give an opportunity to the node to finish whatever it was doing at + -- shutdown + when (itIsTimeToRestartTheNode lifecycle duration) $ + threadDelay $ coerce psMinEndTime + pure nodeEnd' + Nothing -> + pure nodeEnd traceWith tracer TraceEndOfTime pure (lnChainDb, lnStateViewTracers) where @@ -314,7 +338,7 @@ mkStateTracer :: m (Tracer m ()) mkStateTracer schedulerConfig GenesisTest {gtBlockTree} PeerSimulatorResources {psrHandles, psrPeers} chainDb | scTraceState schedulerConfig - , let getCandidates = viewChainSyncState psrHandles CSClient.csCandidate + , let getCandidates = viewChainSyncState (cschcMap psrHandles) CSClient.csCandidate getCurrentChain = ChainDB.getCurrentChain chainDb getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers) = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints @@ -333,13 +357,18 @@ startNode :: LiveInterval TestBlock m -> m () startNode schedulerConfig genesisTest interval = do - let - handles = psrHandles lrPeerSim - getCandidates = viewChainSyncState handles CSClient.csCandidate + let handles = psrHandles lrPeerSim fetchClientRegistry <- newFetchClientRegistry let chainDbView = CSClient.defaultChainDbView lnChainDb - activePeers = Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) - for_ activePeers $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do + activePeers = Map.toList $ Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) + peersStartOrder = psStartOrder ++ sort [pid | (pid, _) <- activePeers, pid `notElem` psStartOrder] + activePeersOrdered = [ + peerResources + | pid <- peersStartOrder + , (pid', peerResources) <- activePeers + , pid == pid' + ] + for_ activePeersOrdered $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do let pid = srPeerId prShared forkLinkedThread lrRegistry ("Peer overview " ++ show pid) $ -- The peerRegistry helps ensuring that if any thread fails, then @@ -375,7 +404,13 @@ startNode schedulerConfig genesisTest interval = do -- The block fetch logic needs to be started after the block fetch clients -- otherwise, an internal assertion fails because getCandidates yields more -- peer fragments than registered clients. - BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry getCandidates + BlockFetch.startBlockFetchLogic + (scEnableChainSelStarvation schedulerConfig) + lrRegistry + lrTracer + lnChainDb + fetchClientRegistry + handles for_ lrLoEVar $ \ var -> do forkLinkedWatcher lrRegistry "LoE updater background" $ @@ -383,11 +418,16 @@ startNode schedulerConfig genesisTest interval = do lrConfig (mkGDDTracerTestBlock lrTracer) lnChainDb + 0.0 -- The rate limit makes simpler the calculations of how long tests + -- should run and still should produce interesting interleavings. + -- It is similar to the setting of bfcDecisionLoopInterval in + -- Test.Consensus.PeerSimulator.BlockFetch (pure GSM.Syncing) -- TODO actually run GSM - (readTVar handles) + (cschcMap handles) var - void $ forkLinkedWatcher lrRegistry "CSJ invariants watcher" $ CSJInvariants.watcher handles + void $ forkLinkedWatcher lrRegistry "CSJ invariants watcher" $ + CSJInvariants.watcher (cschcMap handles) where LiveResources {lrRegistry, lrTracer, lrConfig, lrPeerSim, lrLoEVar} = resources @@ -402,6 +442,7 @@ startNode schedulerConfig genesisTest interval = do , gtBlockFetchTimeouts , gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } , gtCSJParams = CSJParams { csjpJumpSize } + , gtSchedule = PointSchedule {psStartOrder} } = genesisTest StateViewTracers{svtTraceTracer} = lnStateViewTracers @@ -483,7 +524,7 @@ runPointSchedule schedulerConfig genesisTest tracer0 = lifecycle <- nodeLifecycle schedulerConfig genesisTest tracer registry peerSim (chainDb, stateViewTracers) <- runScheduler (Tracer $ traceWith tracer . TraceSchedulerEvent) - (psrHandles peerSim) + (cschcMap (psrHandles peerSim)) gtSchedule (psrPeers peerSim) lifecycle diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs index a94c69a968..e023e24335 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs @@ -72,4 +72,4 @@ prop_chainSyncKillsBlockFetch = do (Time 0, scheduleHeaderPoint firstBlock) ] psMinEndTime = Time $ timeout + 1 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs index 17509dc458..e2f31d8919 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs @@ -95,7 +95,11 @@ rollbackSchedule n blockTree = , banalSchedulePoints trunkSuffix , banalSchedulePoints (btbSuffix branch) ] - in mkPointSchedule $ peersOnlyHonest $ zip (map (Time . (/30)) [0..]) schedulePoints + in PointSchedule { + psSchedule = peersOnlyHonest $ zip (map (Time . (/30)) [0..]) schedulePoints, + psStartOrder = [], + psMinEndTime = Time 0 + } where banalSchedulePoints :: AnchoredFragment blk -> [SchedulePoint blk] banalSchedulePoints = concatMap banalSchedulePoints' . toOldestFirst diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs index 0c594c67e3..e33ac3154b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs @@ -18,7 +18,8 @@ import Test.Consensus.Genesis.Setup import Test.Consensus.PeerSimulator.Run (defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (peersOnlyHonest) +import Test.Consensus.PointSchedule.Peers (peersOnlyAdversary, + peersOnlyHonest) import Test.Consensus.PointSchedule.SinglePeer (scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) import Test.QuickCheck @@ -63,11 +64,11 @@ prop_timeouts mustTimeout = do dullSchedule _ (AF.Empty _) = error "requires a non-empty block tree" dullSchedule timeout (_ AF.:> tipBlock) = let offset :: DiffTime = if mustTimeout then 1 else -1 - psSchedule = peersOnlyHonest $ [ + psSchedule = (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) $ [ (Time 0, scheduleTipPoint tipBlock), (Time 0, scheduleHeaderPoint tipBlock), (Time 0, scheduleBlockPoint tipBlock) ] -- This keeps the test running long enough to pass the timeout by 'offset'. psMinEndTime = Time $ timeout + offset - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index f003dfe447..199f9d35df 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -190,7 +190,7 @@ traceSchedulerEventTestBlockWith :: Tracer m String -> TraceSchedulerEvent TestBlock -> m () -traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case +traceSchedulerEventTestBlockWith setTickTime tracer0 tracer = \case TraceBeginningOfTime -> traceWith tracer0 "Running point schedule ..." TraceEndOfTime -> @@ -221,13 +221,13 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case " jumping states:\n" ++ traceJumpingStates jumpingStates ] TraceNodeShutdownStart immTip -> - traceWith tracer0 (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) + traceWith tracer (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) TraceNodeShutdownComplete -> - traceWith tracer0 " Node shutdown complete" + traceWith tracer " Node shutdown complete" TraceNodeStartupStart -> - traceWith tracer0 " Initiating node startup" + traceWith tracer " Initiating node startup" TraceNodeStartupComplete selection -> - traceWith tracer0 (" Node startup complete with selection " ++ terseHFragment selection) + traceWith tracer (" Node startup complete with selection " ++ terseHFragment selection) where traceJumpingStates :: [(PeerId, ChainSyncJumpingState m TestBlock)] -> String @@ -369,6 +369,10 @@ traceChainDBEventTestBlockWith tracer = \case AddedReprocessLoEBlocksToQueue -> trace $ "Requested ChainSel run" _ -> pure () + ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationStarted time) -> + trace $ "ChainSel starvation started at " ++ prettyTime time + ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationEnded time pt) -> + trace $ "ChainSel starvation ended at " ++ prettyTime time ++ " thanks to " ++ terseRealPoint pt _ -> pure () where trace = traceUnitWith tracer "ChainDB" diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index 52990a0f9a..8460882828 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -37,7 +37,6 @@ module Test.Consensus.PointSchedule ( , ensureScheduleDuration , genesisNodeState , longRangeAttack - , mkPointSchedule , peerSchedulesBlocks , peerStates , peersStates @@ -53,6 +52,7 @@ import Control.Monad (replicateM) import Control.Monad.Class.MonadTime.SI (Time (Time), addTime, diffTime) import Control.Monad.ST (ST) +import Data.Bifunctor (first) import Data.Functor (($>)) import Data.List (mapAccumL, partition, scanl') import qualified Data.Map.Strict as Map @@ -77,8 +77,8 @@ import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), import Test.Consensus.PeerSimulator.StateView (StateView) import Test.Consensus.PointSchedule.NodeState (NodeState (..), genesisNodeState) -import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), - peers', peersList) +import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, + Peers (..), getPeerIds, peers', peersList) import Test.Consensus.PointSchedule.SinglePeer (IsTrunk (IsBranch, IsTrunk), PeerScheduleParams (..), SchedulePoint (..), defaultPeerScheduleParams, mergeOn, @@ -97,21 +97,24 @@ prettyPointSchedule :: (CondenseList (NodeState blk)) => PointSchedule blk -> [String] -prettyPointSchedule peers = - [ "honest peers: " ++ show (Map.size (honestPeers $ psSchedule peers)) - , "adversaries: " ++ show (Map.size (adversarialPeers $ psSchedule peers)) - , "minimal duration: " ++ show (psMinEndTime peers) - ] ++ - zipWith3 - (\number time peerState -> - number ++ ": " ++ peerState ++ " @ " ++ time - ) - (condenseListWithPadding PadLeft $ fst <$> numberedPeersStates) - (showDT . fst . snd <$> numberedPeersStates) - (condenseList $ (snd . snd) <$> numberedPeersStates) +prettyPointSchedule ps@PointSchedule {psStartOrder, psMinEndTime} = + [] + ++ [ "psSchedule =" + ] + ++ ( zipWith3 + ( \number time peerState -> + " " ++ number ++ ": " ++ peerState ++ " @ " ++ time + ) + (condenseListWithPadding PadLeft $ fst <$> numberedPeersStates) + (showDT . fst . snd <$> numberedPeersStates) + (condenseList $ (snd . snd) <$> numberedPeersStates) + ) + ++ [ "psStartOrder = " ++ show psStartOrder, + "psMinEndTime = " ++ show psMinEndTime + ] where numberedPeersStates :: [(Int, (Time, Peer (NodeState blk)))] - numberedPeersStates = zip [0..] (peersStates peers) + numberedPeersStates = zip [0 ..] (peersStates ps) showDT :: Time -> String showDT (Time dt) = printf "%.6f" (realToFrac dt :: Double) @@ -125,12 +128,6 @@ prettyPointSchedule peers = -- Accumulates the new points in each tick into the previous state, starting with a set of all -- 'Origin' points. -- --- Also shifts all tick start times so that the first tip point is announced at the very beginning --- of the test, keeping the relative delays of the schedule intact. --- This is a preliminary measure to make the long range attack test work, since that relies on the --- honest node sending headers later than the adversary, which is not possible if the adversary's --- first tip point is delayed by 20 or more seconds due to being in a later slot. --- -- Finally, drops the first state, since all points being 'Origin' (in particular the tip) has no -- useful effects in the simulator, but it could set the tip in the GDD governor to 'Origin', which -- causes slow nodes to be disconnected right away. @@ -138,14 +135,8 @@ prettyPointSchedule peers = -- TODO Remove dropping the first state in favor of better GDD logic peerStates :: Peer (PeerSchedule blk) -> [(Time, Peer (NodeState blk))] peerStates Peer {name, value = schedulePoints} = - drop 1 (zip (Time 0 : (map shiftTime times)) (Peer name <$> scanl' modPoint genesisNodeState points)) + drop 1 (zip (Time 0 : times) (Peer name <$> scanl' modPoint genesisNodeState points)) where - shiftTime :: Time -> Time - shiftTime t = addTime (- firstTipOffset) t - - firstTipOffset :: DiffTime - firstTipOffset = case times of [] -> 0; (Time dt : _) -> dt - modPoint z = \case ScheduleTipPoint nsTip -> z {nsTip} ScheduleHeaderPoint nsHeader -> z {nsHeader} @@ -177,15 +168,17 @@ peerScheduleBlocks = mapMaybe (withOriginToMaybe . schedulePointToBlock . snd) data PointSchedule blk = PointSchedule { -- | The actual point schedule psSchedule :: Peers (PeerSchedule blk), + -- | The order in which the peers start and connect to the node under test. + -- The peers that are absent from 'psSchedule' are ignored; the peers from + -- 'psSchedule' that are absent of 'psStartOrder' are started in the end in + -- the order of 'PeerId'. + psStartOrder :: [PeerId], -- | Minimum duration for the simulation of this point schedule. -- If no point in the schedule is larger than 'psMinEndTime', -- the simulation will still run until this time is reached. psMinEndTime :: Time } -mkPointSchedule :: Peers (PeerSchedule blk) -> PointSchedule blk -mkPointSchedule sch = PointSchedule sch $ Time 0 - -- | List of all blocks appearing in the schedules. peerSchedulesBlocks :: Peers (PeerSchedule blk) -> [blk] peerSchedulesBlocks = concatMap (peerScheduleBlocks . value) . peersList @@ -208,7 +201,11 @@ longRangeAttack :: longRangeAttack BlockTree {btTrunk, btBranches = [branch]} g = do honest <- peerScheduleFromTipPoints g honParams [(IsTrunk, [AF.length btTrunk - 1])] btTrunk [] adv <- peerScheduleFromTipPoints g advParams [(IsBranch, [AF.length (btbFull branch) - 1])] btTrunk [btbFull branch] - pure $ mkPointSchedule $ peers' [honest] [adv] + pure $ shiftPointSchedule $ PointSchedule { + psSchedule = peers' [honest] [adv], + psStartOrder = [], + psMinEndTime = Time 0 + } where honParams = defaultPeerScheduleParams {pspHeaderDelayInterval = (0.3, 0.4)} advParams = defaultPeerScheduleParams {pspTipDelayInterval = (0, 0.1)} @@ -229,9 +226,33 @@ uniformPoints :: BlockTree blk -> g -> m (PointSchedule blk) -uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} = case pgpDowntime of - NoDowntime -> uniformPointsWithExtraHonestPeers pgpExtraHonestPeers - DowntimeWithSecurityParam k -> uniformPointsWithExtraHonestPeersAndDowntime pgpExtraHonestPeers k +uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} bt = + fmap shiftPointSchedule . case pgpDowntime of + NoDowntime -> + uniformPointsWithExtraHonestPeers pgpExtraHonestPeers bt + DowntimeWithSecurityParam k -> + uniformPointsWithExtraHonestPeersAndDowntime pgpExtraHonestPeers k bt + +-- | Shifts all tick start times so that the first tip point is announced at +-- the very beginning of the test, keeping the relative delays of the schedule +-- intact. +-- +-- This is a measure to make the long range attack test work, since that +-- relies on the honest node sending headers later than the adversary, which +-- is not possible if the adversary's first tip point is delayed by 20 or +-- more seconds due to being in a later slot. +shiftPointSchedule :: PointSchedule blk -> PointSchedule blk +shiftPointSchedule s = s {psSchedule = shiftPeerSchedule <$> psSchedule s} + where + shiftPeerSchedule :: PeerSchedule blk -> PeerSchedule blk + shiftPeerSchedule times = map (first shiftTime) times + where + shiftTime :: Time -> Time + shiftTime t = addTime (- firstTipOffset) t + + firstTipOffset :: DiffTime + firstTipOffset = case times of [] -> 0; ((Time dt, _) : _) -> dt + -- | Generate a schedule in which the trunk is served by @pgpExtraHonestPeers + 1@ peers, -- and extra branches are served by one peer each, using a single tip point, @@ -240,6 +261,7 @@ uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} = case pg -- Include rollbacks in a percentage of adversaries, in which case that peer uses two branchs. -- uniformPointsWithExtraHonestPeers :: + forall g m blk. (StatefulGen g m, AF.HasHeader blk) => Int -> BlockTree blk -> @@ -254,7 +276,9 @@ uniformPointsWithExtraHonestPeers honests <- replicateM (extraHonestPeers + 1) $ mkSchedule [(IsTrunk, [honestTip0 .. AF.length btTrunk - 1])] [] advs <- takeBranches btBranches - pure $ mkPointSchedule $ peers' honests advs + let psSchedule = peers' honests advs + psStartOrder <- shuffle (getPeerIds psSchedule) + pure $ PointSchedule {psSchedule, psStartOrder, psMinEndTime = Time 0} where takeBranches = \case [] -> pure [] @@ -305,6 +329,15 @@ uniformPointsWithExtraHonestPeers rollbackProb = 0.2 + -- Inefficient implementation, but sufficient for small lists. + shuffle :: [a] -> m [a] + shuffle [] = pure [] + shuffle xs = do + i <- Random.uniformRM (0, length xs - 1) g + let x = xs !! i + xs' = take i xs ++ drop (i+1) xs + (x :) <$> shuffle xs' + minusClamp :: (Ord a, Num a) => a -> a -> a minusClamp a b | a <= b = 0 | otherwise = a - b @@ -361,6 +394,7 @@ syncTips honests advs = -- -- Includes rollbacks in some schedules. uniformPointsWithExtraHonestPeersAndDowntime :: + forall g m blk. (StatefulGen g m, AF.HasHeader blk) => Int -> SecurityParam -> @@ -383,7 +417,9 @@ uniformPointsWithExtraHonestPeersAndDowntime mkSchedule [(IsTrunk, [honestTip0, minusClamp (AF.length btTrunk) 1])] [] advs <- takeBranches pauseSlot btBranches let (honests', advs') = syncTips honests advs - pure $ mkPointSchedule $ peers' honests' advs' + psSchedule = peers' honests' advs' + psStartOrder <- shuffle $ getPeerIds psSchedule + pure $ PointSchedule {psSchedule, psStartOrder, psMinEndTime = Time 0} where takeBranches pause = \case [] -> pure [] @@ -438,6 +474,15 @@ uniformPointsWithExtraHonestPeersAndDowntime rollbackProb = 0.2 + -- Inefficient implementation, but sufficient for small lists. + shuffle :: [a] -> m [a] + shuffle [] = pure [] + shuffle xs = do + i <- Random.uniformRM (0, length xs - 1) g + let x = xs !! i + xs' = take i xs ++ drop (i+1) xs + (x :) <$> shuffle xs' + newtype ForecastRange = ForecastRange { unForecastRange :: Word64 } deriving (Show) @@ -545,19 +590,22 @@ stToGen gen = do pure (runSTGen_ seed gen) ensureScheduleDuration :: GenesisTest blk a -> PointSchedule blk -> PointSchedule blk -ensureScheduleDuration gt PointSchedule{psSchedule, psMinEndTime} = +ensureScheduleDuration gt PointSchedule{psSchedule, psStartOrder, psMinEndTime} = PointSchedule { psSchedule + , psStartOrder , psMinEndTime = max psMinEndTime (Time endingDelay) } where endingDelay = let cst = gtChainSyncTimeouts gt bft = gtBlockFetchTimeouts gt - in 1 + fromIntegral peerCount * maximum (0 : catMaybes + bfGracePeriodDelay = fromIntegral adversaryCount * 10 + in 1 + bfGracePeriodDelay + fromIntegral peerCount * maximum (0 : catMaybes [ canAwaitTimeout cst , intersectTimeout cst , busyTimeout bft , streamingTimeout bft ]) peerCount = length (peersList psSchedule) + adversaryCount = Map.size (adversarialPeers psSchedule) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index 973de5ef3a..168729dd20 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -32,6 +32,7 @@ module Test.Consensus.PointSchedule.Peers ( , peersFromPeerIdList' , peersFromPeerList , peersList + , peersOnlyAdversary , peersOnlyHonest , toMap , toMap' @@ -147,6 +148,13 @@ peersOnlyHonest value = adversarialPeers = Map.empty } +peersOnlyAdversary :: a -> Peers a +peersOnlyAdversary value = + Peers + { adversarialPeers = Map.singleton 1 value, + honestPeers = Map.empty + } + -- | Extract all 'PeerId's. getPeerIds :: Peers a -> [PeerId] getPeerIds Peers {honestPeers, adversarialPeers} = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs index 820f844b22..eb24ccf6e0 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs @@ -41,7 +41,7 @@ shrinkPeerSchedules :: StateView TestBlock -> [GenesisTestFull TestBlock] shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView = - let PointSchedule {psSchedule} = gtSchedule + let PointSchedule {psSchedule, psStartOrder} = gtSchedule simulationDuration = duration gtSchedule trimmedBlockTree sch = trimBlockTree' sch gtBlockTree shrunkAdversarialPeers = @@ -50,6 +50,7 @@ shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder , psMinEndTime = simulationDuration } , gtBlockTree = trimmedBlockTree shrunkSchedule @@ -61,6 +62,7 @@ shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView <&> \shrunkSchedule -> genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder , psMinEndTime = simulationDuration } } @@ -81,6 +83,7 @@ shrinkByRemovingAdversaries genesisTest@GenesisTest{gtSchedule, gtBlockTree} _st in genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder = psStartOrder gtSchedule , psMinEndTime = simulationDuration } , gtBlockTree = trimmedBlockTree diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs index d715375a3f..b375a8ee94 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs @@ -82,14 +82,14 @@ checkShrinkProperty :: (Peers (PeerSchedule TestBlock) -> Peers (PeerSchedule Te checkShrinkProperty prop = forAllBlind (genChains (choose (1, 4)) >>= genUniformSchedulePoints) - (\sch@PointSchedule{psSchedule, psMinEndTime} -> + (\sch@PointSchedule{psSchedule, psStartOrder, psMinEndTime} -> conjoin $ map (\shrunk -> counterexample ( "Original schedule:\n" ++ unlines (map (" " ++) $ prettyPointSchedule sch) ++ "\nShrunk schedule:\n" - ++ unlines (map (" " ++) $ prettyPointSchedule $ PointSchedule shrunk psMinEndTime) + ++ unlines (map (" " ++) $ prettyPointSchedule $ PointSchedule {psSchedule = shrunk, psStartOrder, psMinEndTime}) ) (prop psSchedule shrunk) ) diff --git a/ouroboros-consensus/changelog.d/20240807_095933_alexander.esgen_milestone_1.md b/ouroboros-consensus/changelog.d/20240807_095933_alexander.esgen_milestone_1.md new file mode 100644 index 0000000000..bca96bf61b --- /dev/null +++ b/ouroboros-consensus/changelog.d/20240807_095933_alexander.esgen_milestone_1.md @@ -0,0 +1,11 @@ +### Breaking + +- Integrated new bulk sync BlockFetch logic. + +- CSJ: implemented rotation of dynamos. + +- ChainDB: let the BlockFetch client add blocks asynchronously + +- GDD: added rate limit + +- Tweaked certain edge cases in the GDD and ChainSync client ([#1179](https://github.com/IntersectMBO/ouroboros-consensus/pull/1179)) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 4edbc85b9d..5d5080ea39 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -40,7 +40,7 @@ import Data.Foldable (for_, toList) import Data.Functor.Compose (Compose (..)) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map -import Data.Maybe (mapMaybe, maybeToList) +import Data.Maybe (maybeToList) import Data.Maybe.Strict (StrictMaybe) import Data.Word (Word64) import Ouroboros.Consensus.Block @@ -83,6 +83,9 @@ gddWatcher :: => TopLevelConfig blk -> Tracer m (TraceGDDEvent peer blk) -> ChainDB m blk + -> DiffTime -- ^ How often to evaluate GDD. 0 means as soon as possible. + -- Otherwise, no faster than once every T seconds, where T is + -- the provided value. -> STM m GsmState -> STM m (Map peer (ChainSyncClientHandle m blk)) -- ^ The ChainSync handles. We trigger the GDD whenever our 'GsmState' @@ -95,7 +98,7 @@ gddWatcher :: -> Watcher m (GsmState, GDDStateView m blk peer) (Map peer (StrictMaybe (WithOrigin SlotNo), Bool)) -gddWatcher cfg tracer chainDb getGsmState getHandles varLoEFrag = +gddWatcher cfg tracer chainDb rateLimit getGsmState getHandles varLoEFrag = Watcher { wInitial = Nothing , wReader = (,) <$> getGsmState <*> getGDDStateView @@ -137,12 +140,17 @@ gddWatcher cfg tracer chainDb getGsmState getHandles varLoEFrag = wNotify :: (GsmState, GDDStateView m blk peer) -> m () wNotify (_gsmState, stateView) = do + t0 <- getMonotonicTime loeFrag <- evaluateGDD cfg tracer stateView oldLoEFrag <- atomically $ swapTVar varLoEFrag loeFrag -- The chain selection only depends on the LoE tip, so there -- is no point in retriggering it if the LoE tip hasn't changed. when (AF.headHash oldLoEFrag /= AF.headHash loeFrag) $ ChainDB.triggerChainSelectionAsync chainDb + tf <- getMonotonicTime + -- We limit the rate at which GDD is evaluated, otherwise it would + -- be called every time a new header is validated. + threadDelay $ rateLimit - diffTime tf t0 -- | Pure snapshot of the dynamic data the GDD operates on. data GDDStateView m blk peer = GDDStateView { @@ -241,16 +249,41 @@ sharedCandidatePrefix curChain candidates = immutableTip = AF.anchorPoint curChain splitAfterImmutableTip (peer, frag) = - (,) peer . snd <$> AF.splitAfterPoint frag immutableTip + case AF.splitAfterPoint frag immutableTip of + -- When there is no intersection, we assume the candidate fragment is + -- empty and anchored at the immutable tip. + -- See Note [CSJ truncates the candidate fragments]. + Nothing -> (peer, AF.takeOldest 0 curChain) + Just (_, suffix) -> (peer, suffix) immutableTipSuffixes = - -- If a ChainSync client's candidate forks off before the - -- immutable tip, then this transaction is currently winning an - -- innocuous race versus the thread that will fatally raise - -- 'InvalidIntersection' within that ChainSync client, so it's - -- sound to pre-emptively discard their candidate from this - -- 'Map' via 'mapMaybe'. - mapMaybe splitAfterImmutableTip candidates + map splitAfterImmutableTip candidates + +-- Note [CSJ truncates the candidate fragments] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Before CSJ, only rollback could cause truncation of a candidate fragment. +-- Truncation is a serious business to GDD because the LoE might have allowed +-- the selection to advance, based on the tips of the candidate fragments. +-- +-- Truncating a candidate fragment risks moving the LoE back, which could be +-- earlier than the anchor of the latest selection. When rollbacks where the +-- only mechanism to truncate, it was fine to ignore candidate fragments that +-- don't intersect with the current selection. This could only happen if the +-- peer is rolling back more than k blocks, which is dishonest behavior. +-- +-- With CSJ, however, the candidate fragments can recede without a rollback. +-- A former objector might be asked to jump back when it becomes a jumper again. +-- The jump point might still be a descendent of the immutable tip. But by the +-- time the jump is accepted, the immutable tip might have advanced, and the +-- candidate fragment of the otherwise honest peer might be ignored by GDD. +-- +-- Therefore, at the moment, when there is no intersection with the current +-- selection, the GDD assumes that the candidate fragment is empty and anchored +-- at the immutable tip. It is the job of the ChainSync client to update the +-- candidate fragment so it intersects with the selection or to disconnect the +-- peer if no such fragment can be established. +-- data DensityBounds blk = DensityBounds { @@ -350,11 +383,7 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe , upperBound = ub0 , hasBlockAfter = hasBlockAfter0 , idling = idling0 - }) -> - -- If the density is 0, the peer should be disconnected. This affects - -- ChainSync jumping, where genesis windows with no headers prevent jumps - -- from happening. - if ub0 == 0 then pure peer0 else do + }) -> do (_peer1, DensityBounds {clippedFragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- densityBounds -- Don't disconnect peer0 if it sent no headers after the intersection yet @@ -362,8 +391,6 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe -- -- See Note [Chain disagreement] -- - -- Note: hasBlockAfter0 is False if frag0 is empty and ub0>0. - -- But we leave it here as a reminder that we care about it. guard $ idling0 || not (AF.null frag0) || hasBlockAfter0 -- ensure that the two peer fragments don't share any -- headers after the LoE diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 2d8a8e1b1e..06d49a4a17 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -14,6 +14,7 @@ module Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface ( ) where import Control.Monad +import Control.Tracer (Tracer) import Data.Map.Strict (Map) import Data.Time.Clock (UTCTime) import GHC.Stack (HasCallStack) @@ -26,7 +27,12 @@ import qualified Ouroboros.Consensus.HardFork.Abstract as History import qualified Ouroboros.Consensus.HardFork.History as History import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended -import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping +import Ouroboros.Consensus.Storage.ChainDB.API (AddBlockPromise, + ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment (InvalidBlockPunishment) @@ -38,8 +44,10 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (MaxSlotNo) import Ouroboros.Network.BlockFetch.ConsensusInterface - (BlockFetchConsensusInterface (..), FetchMode (..), - FromConsensus (..), WhetherReceivingTentativeBlocks (..)) + (BlockFetchConsensusInterface (..), + ChainSelStarvation (..), FetchMode (..), + FromConsensus (..), GenesisFetchMode (..), + WhetherReceivingTentativeBlocks (..)) import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers, requiresBootstrapPeers) import Ouroboros.Network.PeerSelection.LedgerPeers.Type @@ -51,15 +59,17 @@ data ChainDbView m blk = ChainDbView { getCurrentChain :: STM m (AnchoredFragment (Header blk)) , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo - , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool + , addBlockAsync :: InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) + , getChainSelStarvation :: STM m ChainSelStarvation } -defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk +defaultChainDbView :: ChainDB m blk -> ChainDbView m blk defaultChainDbView chainDB = ChainDbView { getCurrentChain = ChainDB.getCurrentChain chainDB , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + , addBlockAsync = ChainDB.addBlockAsync chainDB + , getChainSelStarvation = ChainDB.getChainSelStarvation chainDB } -- | How to get the wall-clock time of a slot. Note that this is a very @@ -133,24 +143,28 @@ initSlotForgeTimeOracle cfg chainDB = do readFetchModeDefault :: (MonadSTM m, HasHeader blk) - => BlockchainTime m + => Bool -- Is genesis enabled? + -> BlockchainTime m -> STM m (AnchoredFragment blk) -> STM m UseBootstrapPeers -> STM m LedgerStateJudgement - -> STM m FetchMode -readFetchModeDefault btime getCurrentChain + -> STM m GenesisFetchMode +readFetchModeDefault genesisEnabled btime getCurrentChain getUseBootstrapPeers getLedgerStateJudgement = do mCurSlot <- getCurrentSlot btime usingBootstrapPeers <- requiresBootstrapPeers <$> getUseBootstrapPeers <*> getLedgerStateJudgement + let nonDeadlineFetchMode = if genesisEnabled + then FetchModeGenesis + else PraosFetchMode FetchModeBulkSync -- This logic means that when the node is using bootstrap peers and is in -- TooOld state it will always return BulkSync. Otherwise if the node -- isn't using bootstrap peers (i.e. has them disabled it will use the old -- logic of returning BulkSync if behind 1000 slots case (usingBootstrapPeers, mCurSlot) of - (True, _) -> return FetchModeBulkSync - (False, CurrentSlotUnknown) -> return FetchModeBulkSync + (True, _) -> return nonDeadlineFetchMode + (False, CurrentSlotUnknown) -> return nonDeadlineFetchMode (False, CurrentSlot curSlot) -> do curChainSlot <- AF.headSlot <$> getCurrentChain let slotsBehind = case curChainSlot of @@ -162,28 +176,33 @@ readFetchModeDefault btime getCurrentChain return $ if slotsBehind < maxSlotsBehind -- When the current chain is near to "now", use deadline mode, -- when it is far away, use bulk sync mode. - then FetchModeDeadline - else FetchModeBulkSync + then PraosFetchMode FetchModeDeadline + else nonDeadlineFetchMode mkBlockFetchConsensusInterface :: forall m peer blk. ( IOLike m , BlockSupportsDiffusionPipelining blk - , BlockSupportsProtocol blk + , Ord peer + , LedgerSupportsProtocol blk ) - => BlockConfig blk + => Tracer m (CSJumping.TraceEvent peer) + -> BlockConfig blk -> ChainDbView m blk - -> STM m (Map peer (AnchoredFragment (Header blk))) + -> CSClient.ChainSyncClientHandleCollection peer m blk -> (Header blk -> SizeInBytes) -> SlotForgeTimeOracle m blk -- ^ Slot forge time, see 'headerForgeUTCTime' and 'blockForgeUTCTime'. - -> STM m FetchMode + -> STM m GenesisFetchMode -- ^ See 'readFetchMode'. -> BlockFetchConsensusInterface peer (Header blk) blk m mkBlockFetchConsensusInterface - bcfg chainDB getCandidates blockFetchSize slotForgeTime readFetchMode = + csjTracer bcfg chainDB csHandlesCol blockFetchSize slotForgeTime readFetchMode = BlockFetchConsensusInterface {..} where + getCandidates :: STM m (Map peer (AnchoredFragment (Header blk))) + getCandidates = CSClient.viewChainSyncState (CSClient.cschcMap csHandlesCol) CSClient.csCandidate + blockMatchesHeader :: Header blk -> blk -> Bool blockMatchesHeader = Block.blockMatchesHeader @@ -204,8 +223,8 @@ mkBlockFetchConsensusInterface pipeliningPunishment <- InvalidBlockPunishment.mkForDiffusionPipelining pure $ mkAddFetchedBlock_ pipeliningPunishment enabledPipelining - -- Waits until the block has been written to disk, but not until chain - -- selection has processed the block. + -- Hand over the block to the ChainDB, but don't wait until it has been + -- written to disk or processed. mkAddFetchedBlock_ :: ( BlockConfig blk -> Header blk @@ -249,7 +268,7 @@ mkBlockFetchConsensusInterface NotReceivingTentativeBlocks -> disconnect ReceivingTentativeBlocks -> pipeliningPunishment bcfg (getHeader blk) disconnect - addBlockWaitWrittenToDisk + addBlockAsync chainDB punishment blk @@ -340,3 +359,8 @@ mkBlockFetchConsensusInterface headerForgeUTCTime = slotForgeTime . headerRealPoint . unFromConsensus blockForgeUTCTime = slotForgeTime . blockRealPoint . unFromConsensus + + readChainSelStarvation = getChainSelStarvation chainDB + + demoteCSJDynamo :: peer -> m () + demoteCSJDynamo = CSJumping.rotateDynamo csjTracer csHandlesCol diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 452ae24930..76e433c051 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -64,10 +64,12 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client ( , TraceChainSyncClientEvent (..) -- * State shared with other components , ChainSyncClientHandle (..) + , ChainSyncClientHandleCollection (..) , ChainSyncState (..) , ChainSyncStateView (..) , Jumping.noJumping , chainSyncStateFor + , newChainSyncClientHandleCollection , noIdling , noLoPBucket , viewChainSyncState @@ -111,7 +113,8 @@ import Ouroboros.Consensus.Storage.ChainDB (ChainDB, InvalidBlockReason) import qualified Ouroboros.Consensus.Storage.ChainDB as ChainDB import Ouroboros.Consensus.Util -import Ouroboros.Consensus.Util.AnchoredFragment (cross) +import Ouroboros.Consensus.Util.AnchoredFragment (cross, + preferAnchoredCandidate) import Ouroboros.Consensus.Util.Assert (assertWithMsg) import Ouroboros.Consensus.Util.EarlyExit (WithEarlyExit, exitEarly) import qualified Ouroboros.Consensus.Util.EarlyExit as EarlyExit @@ -163,7 +166,7 @@ data ChainSyncLoPBucketEnabledConfig = ChainSyncLoPBucketEnabledConfig { csbcCapacity :: Integer, -- | The rate of the bucket (think tokens per second). csbcRate :: Rational - } + } deriving stock (Eq, Generic, Show) -- | Configuration of the leaky bucket. data ChainSyncLoPBucketConfig @@ -174,6 +177,7 @@ data ChainSyncLoPBucketConfig | -- | Enable the leaky bucket. ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig + deriving stock (Eq, Generic, Show) -- | Configuration of ChainSync Jumping data CSJConfig @@ -184,6 +188,7 @@ data CSJConfig | -- | Enable ChainSync Jumping CSJEnabled CSJEnabledConfig + deriving stock (Eq, Generic, Show) newtype CSJEnabledConfig = CSJEnabledConfig { -- | The _ideal_ size for ChainSync jumps. Note that the algorithm @@ -203,7 +208,7 @@ newtype CSJEnabledConfig = CSJEnabledConfig { -- window has a higher change that dishonest peers can delay syncing by a -- small margin (around 2 minutes per dishonest peer with mainnet parameters). csjcJumpSize :: SlotNo -} +} deriving stock (Eq, Generic, Show) defaultChainDbView :: (IOLike m, LedgerSupportsProtocol blk) @@ -229,11 +234,11 @@ newtype Our a = Our { unOur :: a } -- data from 'ChainSyncState'. viewChainSyncState :: IOLike m => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + STM m (Map peer (ChainSyncClientHandle m blk)) -> (ChainSyncState blk -> a) -> STM m (Map peer a) -viewChainSyncState varHandles f = - Map.map f <$> (traverse (readTVar . cschState) =<< readTVar varHandles) +viewChainSyncState readHandles f = + Map.map f <$> (traverse (readTVar . cschState) =<< readHandles) -- | Convenience function for reading the 'ChainSyncState' for a single peer -- from a nested set of TVars. @@ -327,7 +332,7 @@ bracketChainSyncClient :: ) => Tracer m (TraceChainSyncClientEvent blk) -> ChainDbView m blk - -> StrictTVar m (Map peer (ChainSyncClientHandle m blk)) + -> ChainSyncClientHandleCollection peer m blk -- ^ The kill handle and states for each peer, we need the whole map because we -- (de)register nodes (@peer@). -> STM m GsmState @@ -400,8 +405,8 @@ bracketChainSyncClient insertHandle = atomicallyWithMonotonicTime $ \time -> do initialGsmState <- getGsmState updateLopBucketConfig lopBucket initialGsmState time - modifyTVar varHandles $ Map.insert peer handle - deleteHandle = atomically $ modifyTVar varHandles $ Map.delete peer + cschcAddHandle varHandles peer handle + deleteHandle = atomically $ cschcRemoveHandle varHandles peer bracket_ insertHandle deleteHandle $ f Jumping.noJumping withCSJCallbacks lopBucket csHandleState (CSJEnabled csjEnabledConfig) f = @@ -1613,7 +1618,8 @@ checkKnownInvalid cfgEnv dynEnv intEnv hdr = case scrutinee of -- Finally, the client will block on the intersection a second time, if -- necessary, since it's possible for a ledger state to determine the slot's -- onset's timestamp without also determining the slot's 'LedgerView'. During --- this pause, the LoP bucket is paused. +-- this pause, the LoP bucket is paused. If we need to block and their fragment +-- is not preferrable to ours, we disconnect. checkTime :: forall m blk arrival judgment. ( IOLike m @@ -1722,10 +1728,43 @@ checkTime cfgEnv dynEnv intEnv = ) $ getPastLedger mostRecentIntersection case prj lst of - Nothing -> retry + Nothing -> do + checkPreferTheirsOverOurs kis' + retry Just ledgerView -> return $ return $ Intersects kis' ledgerView + -- Note [Candidate comparing beyond the forecast horizon] + -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -- + -- When a header is beyond the forecast horizon and their fragment is not + -- preferrable to our selection (ourFrag), then we disconnect, as we will + -- never end up selecting it. + -- + -- In the context of Genesis, one can think of the candidate losing a + -- density comparison against the selection. See the Genesis documentation + -- for why this check is necessary. + -- + -- In particular, this means that we will disconnect from peers who offer us + -- a chain containing a slot gap larger than a forecast window. + checkPreferTheirsOverOurs :: KnownIntersectionState blk -> STM m () + checkPreferTheirsOverOurs kis + | -- Precondition is fulfilled as ourFrag and theirFrag intersect by + -- construction. + preferAnchoredCandidate (configBlock cfg) ourFrag theirFrag + = pure () + | otherwise + = throwSTM $ CandidateTooSparse + mostRecentIntersection + (ourTipFromChain ourFrag) + (theirTipFromChain theirFrag) + where + KnownIntersectionState { + mostRecentIntersection + , ourFrag + , theirFrag + } = kis + -- Returns 'Nothing' if the ledger state cannot forecast the ledger view -- that far into the future. projectLedgerView :: @@ -1909,6 +1948,12 @@ ourTipFromChain :: -> Our (Tip blk) ourTipFromChain = Our . AF.anchorToTip . AF.headAnchor +theirTipFromChain :: + HasHeader (Header blk) + => AnchoredFragment (Header blk) + -> Their (Tip blk) +theirTipFromChain = Their . AF.anchorToTip . AF.headAnchor + -- | A type-legos auxillary function used in 'readLedgerState'. castM :: Monad m => m (WithEarlyExit m x) -> WithEarlyExit m x castM = join . EarlyExit.lift @@ -2126,6 +2171,14 @@ data ChainSyncClientException = -- different from the previous argument. (InvalidBlockReason blk) -- ^ The upstream node's chain contained a block that we know is invalid. + | + forall blk. BlockSupportsProtocol blk => + CandidateTooSparse + (Point blk) -- ^ Intersection + (Our (Tip blk)) + (Their (Tip blk)) + -- ^ The upstream node's chain was so sparse that it was worse than our + -- selection despite being blocked on the forecast horizon. | InFutureHeaderExceedsClockSkew !InFutureCheck.HeaderArrivalException -- ^ A header arrived from the far future. @@ -2159,6 +2212,12 @@ instance Eq ChainSyncClientException where | Just Refl <- eqT @blk @blk' = (a, b, c) == (a', b', c') + (==) + (CandidateTooSparse (a :: Point blk ) b c ) + (CandidateTooSparse (a' :: Point blk') b' c') + | Just Refl <- eqT @blk @blk' + = (a, b, c) == (a', b', c') + (==) (InFutureHeaderExceedsClockSkew a ) (InFutureHeaderExceedsClockSkew a') @@ -2176,6 +2235,7 @@ instance Eq ChainSyncClientException where HeaderError{} == _ = False InvalidIntersection{} == _ = False InvalidBlock{} == _ = False + CandidateTooSparse{} == _ = False InFutureHeaderExceedsClockSkew{} == _ = False EmptyBucket == _ = False InvalidJumpResponse == _ = False diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index bd150c93d4..9441525862 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -74,6 +74,21 @@ -- when the client should pause, download headers, or ask about agreement with -- a given point (jumping). See the 'Jumping' type for more details. -- +-- CSJ depends on the ChainSync client to disconnect dynamos that have an empty +-- genesis window after their intersection with the selection. This is necessary +-- because otherwise there are no points to jump to, and CSJ could would get +-- stuck when the dynamo blocks on the forecast horizon. See +-- Note [Candidate comparing beyond the forecast horizon] in +-- "Ouroboros.Consensus.MiniProtocol.ChainSync.Client". +-- +-- Interactions with the BlockFetch logic +-- -------------------------------------- +-- +-- When syncing, the BlockFetch logic might request to change the dynamo with +-- a call to 'rotateDynamo'. This is because the choice of dynamo influences +-- which peer is selected to download blocks. See the note "Interactions with +-- ChainSync Jumping" in "Ouroboros.Network.BlockFetch.Decision.BulkSync". +-- -- Interactions with the Limit on Patience -- --------------------------------------- -- @@ -100,15 +115,15 @@ -- -- > j ╔════════╗ -- > ╭────────── ║ Dynamo ║ ◀─────────╮ --- > │ ╚════════╝ │f --- > ▼ ▲ │ --- > ┌────────────┐ │ k ┌──────────┐ --- > │ Disengaged │ ◀───────────│────────── │ Objector │ --- > └────────────┘ ╭─────│────────── └──────────┘ --- > │ │ ▲ ▲ │ --- > g│ │e b │ │ │ --- > │ │ ╭─────╯ i│ │c --- > ╭╌╌╌╌╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ +-- > │ ╭──╚════════╝ │f +-- > ▼ │ ▲ │ +-- > ┌────────────┐ │ │ k ┌──────────┐ +-- > │ Disengaged │ ◀─│─────────│────────── │ Objector │ +-- > └────────────┘ │ ╭─────│────────── └──────────┘ +-- > │ │ │ ▲ ▲ │ +-- > l│ g│ │e b │ │ │ +-- > │ │ │ ╭─────╯ i│ │c +-- > ╭╌╌╌▼╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ -- > ┆ ╔═══════╗ a ┌──────┐ d ┌─────┐ | -- > ┆ ║ Happy ║ ───▶ │ LFI* │ ───▶ │ FI* │ | -- > ┆ ╚═══════╝ ◀─╮ └──────┘ └─────┘ | @@ -147,6 +162,11 @@ -- If dynamo or objector claim to have no more headers, they are disengaged -- (j|k). -- +-- The BlockFetch logic can ask to change the dynamo if it is not serving blocks +-- fast enough. If there are other non-disengaged peers, the dynamo (and the +-- objector if there is one) is demoted to a jumper (l+g) and a new dynamo is +-- elected. +-- module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( Context , ContextWith (..) @@ -154,20 +174,26 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( , JumpInstruction (..) , JumpResult (..) , Jumping (..) + , TraceEvent (..) + , getDynamo , makeContext , mkJumping , noJumping , registerClient + , rotateDynamo , unregisterClient ) where import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) -import Control.Monad (forM, forM_, when) +import Control.Monad (forM, forM_, void, when) +import Control.Tracer (Tracer, traceWith) +import Data.Foldable (toList, traverse_) import Data.List (sortOn) -import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map +import qualified Data.Map as Map import Data.Maybe (catMaybes, fromMaybe) import Data.Maybe.Strict (StrictMaybe (..)) +import Data.Sequence.Strict (StrictSeq) +import qualified Data.Sequence.Strict as Seq import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader (getHeaderFields), Header, Point (..), castPoint, pointSlot, succWithOrigin) @@ -175,6 +201,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State (ChainSyncClientHandle (..), + ChainSyncClientHandleCollection (..), ChainSyncJumpingJumperState (..), ChainSyncJumpingState (..), ChainSyncState (..), DisengagedInitState (..), DynamoInitState (..), @@ -257,16 +284,16 @@ mkJumping peerContext = Jumping -- -- Invariants: -- --- - If 'handlesVar' is not empty, then there is exactly one dynamo in it. --- - There is at most one objector in 'handlesVar'. --- - If there exist 'FoundIntersection' jumpers in 'handlesVar', then there +-- - If 'handlesCol' is not empty, then there is exactly one dynamo in it. +-- - There is at most one objector in 'handlesCol'. +-- - If there exist 'FoundIntersection' jumpers in 'handlesCol', then there -- is an objector and the intersection of the objector with the dynamo is -- at least as old as the oldest intersection of the `FoundIntersection` jumpers -- with the dynamo. data ContextWith peerField handleField m peer blk = Context { peer :: !peerField, handle :: !handleField, - handlesVar :: !(StrictTVar m (Map peer (ChainSyncClientHandle m blk))), + handlesCol :: !(ChainSyncClientHandleCollection peer m blk), jumpSize :: !SlotNo } @@ -276,12 +303,12 @@ type Context = ContextWith () () -- | A peer-specific context for ChainSync jumping. This is a 'ContextWith' -- pointing on the handler of the peer in question. -- --- Invariant: The binding from 'peer' to 'handle' is present in 'handlesVar'. +-- Invariant: The binding from 'peer' to 'handle' is present in 'handlesCol'. type PeerContext m peer blk = ContextWith peer (ChainSyncClientHandle m blk) m peer blk makeContext :: MonadSTM m => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection peer m blk -> SlotNo -> -- ^ The size of jumps, in number of slots. STM m (Context m peer blk) @@ -427,8 +454,8 @@ onRollForward context point = setJumps (Just jumpInfo) = do writeTVar (cschJumping (handle context)) $ Dynamo DynamoStarted $ pointSlot $ AF.headPoint $ jTheirFragment jumpInfo - handles <- readTVar (handlesVar context) - forM_ (Map.elems handles) $ \h -> + handles <- cschcSeq (handlesCol context) + forM_ handles $ \(_, h) -> readTVar (cschJumping h) >>= \case Jumper nextJumpVar Happy{} -> writeTVar nextJumpVar (Just jumpInfo) _ -> pure () @@ -459,7 +486,7 @@ onRollBackward context slot = Dynamo _ lastJumpSlot | slot < lastJumpSlot -> do disengage (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) | otherwise -> pure () -- | This function is called when we receive a 'MsgAwaitReply' message. @@ -477,7 +504,7 @@ onAwaitReply context = readTVar (cschJumping (handle context)) >>= \case Dynamo{} -> do disengage (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) Objector{} -> do disengage (handle context) electNewObjector (stripContext context) @@ -510,7 +537,7 @@ processJumpResult context jumpResult = updateChainSyncState (handle context) jumpInfo RejectedJump JumpToGoodPoint{} -> do startDisengaging (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) -- Not interesting in the dynamo state AcceptedJump JumpTo{} -> pure () @@ -660,11 +687,11 @@ updateJumpInfo context jumpInfo = -- of the dynamo, or 'Nothing' if there is none. getDynamo :: (MonadSTM m) => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> - STM m (Maybe (ChainSyncClientHandle m blk)) -getDynamo handlesVar = do - handles <- Map.elems <$> readTVar handlesVar - findM (\handle -> isDynamo <$> readTVar (cschJumping handle)) handles + ChainSyncClientHandleCollection peer m blk -> + STM m (Maybe (peer, ChainSyncClientHandle m blk)) +getDynamo handlesCol = do + handles <- cschcSeq handlesCol + findM (\(_, handle) -> isDynamo <$> readTVar (cschJumping handle)) handles where isDynamo Dynamo{} = True isDynamo _ = False @@ -705,8 +732,7 @@ newJumper jumpInfo jumperState = do -- that peer. If there is no dynamo, the peer starts as dynamo; otherwise, it -- starts as a jumper. registerClient :: - ( Ord peer, - LedgerSupportsProtocol blk, + ( LedgerSupportsProtocol blk, IOLike m ) => Context m peer blk -> @@ -716,16 +742,16 @@ registerClient :: (StrictTVar m (ChainSyncJumpingState m blk) -> ChainSyncClientHandle m blk) -> STM m (PeerContext m peer blk) registerClient context peer csState mkHandle = do - csjState <- getDynamo (handlesVar context) >>= \case + csjState <- getDynamo (handlesCol context) >>= \case Nothing -> do fragment <- csCandidate <$> readTVar csState pure $ Dynamo DynamoStarted $ pointSlot $ AF.anchorPoint fragment - Just handle -> do + Just (_, handle) -> do mJustInfo <- readTVar (cschJumpInfo handle) newJumper mJustInfo (Happy FreshJumper Nothing) cschJumping <- newTVar csjState let handle = mkHandle cschJumping - modifyTVar (handlesVar context) $ Map.insert peer handle + cschcAddHandle (handlesCol context) peer handle pure $ context {peer, handle} -- | Unregister a client from a 'PeerContext'; this might trigger the election @@ -738,13 +764,63 @@ unregisterClient :: PeerContext m peer blk -> STM m () unregisterClient context = do - modifyTVar (handlesVar context) $ Map.delete (peer context) + cschcRemoveHandle (handlesCol context) (peer context) let context' = stripContext context readTVar (cschJumping (handle context)) >>= \case Disengaged{} -> pure () Jumper{} -> pure () Objector{} -> electNewObjector context' - Dynamo{} -> electNewDynamo context' + Dynamo{} -> void $ electNewDynamo context' + +-- | Elects a new dynamo by demoting the given dynamo (and the objector if there +-- is one) to a jumper, moving the peer to the end of the queue of chain sync +-- handles and electing a new dynamo. +-- +-- It does nothing if there is no other engaged peer to elect or if the given +-- peer is not the dynamo. +-- +-- Yields the new dynamo, if there is one. +rotateDynamo :: + ( Ord peer, + LedgerSupportsProtocol blk, + MonadSTM m + ) => + Tracer m (TraceEvent peer) -> + ChainSyncClientHandleCollection peer m blk -> + peer -> + m () + -- STM m (Maybe (peer, ChainSyncClientHandle m blk)) +rotateDynamo tracer handlesCol peer = do + traceEvent <- atomically $ do + handles <- cschcMap handlesCol + case handles Map.!? peer of + Nothing -> + -- Do not re-elect a dynamo if the peer has been disconnected. + pure Nothing + Just oldDynHandle -> + readTVar (cschJumping oldDynHandle) >>= \case + Dynamo{} -> do + cschcRotateHandle handlesCol peer + peerStates <- cschcSeq handlesCol + mEngaged <- findNonDisengaged peerStates + case mEngaged of + Nothing -> + -- There are no engaged peers. This case cannot happen, as the + -- dynamo is always engaged. + error "rotateDynamo: no engaged peer found" + Just (newDynamoId, newDynHandle) + | newDynamoId == peer -> + -- The old dynamo is the only engaged peer left. + pure Nothing + | otherwise -> do + newJumper Nothing (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping oldDynHandle) + promoteToDynamo peerStates newDynamoId newDynHandle + pure $ Just $ RotatedDynamo peer newDynamoId + _ -> + -- Do not re-elect a dynamo if the peer is not the dynamo. + pure Nothing + traverse_ (traceWith tracer) traceEvent -- | Choose an unspecified new non-idling dynamo and demote all other peers to -- jumpers. @@ -754,49 +830,68 @@ electNewDynamo :: LedgerSupportsProtocol blk ) => Context m peer blk -> - STM m () + STM m (Maybe (peer, ChainSyncClientHandle m blk)) electNewDynamo context = do - peerStates <- Map.toList <$> readTVar (handlesVar context) + peerStates <- cschcSeq (handlesCol context) mDynamo <- findNonDisengaged peerStates case mDynamo of - Nothing -> pure () + Nothing -> pure Nothing Just (dynId, dynamo) -> do - fragment <- csCandidate <$> readTVar (cschState dynamo) - mJumpInfo <- readTVar (cschJumpInfo dynamo) - -- If there is no jump info, the dynamo must be just starting and - -- there is no need to set the intersection of the ChainSync server. - let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo - writeTVar (cschJumping dynamo) $ - Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment - -- Demote all other peers to jumpers - forM_ peerStates $ \(peer, st) -> - when (peer /= dynId) $ do - jumpingState <- readTVar (cschJumping st) - when (not (isDisengaged jumpingState)) $ - newJumper mJumpInfo (Happy FreshJumper Nothing) - >>= writeTVar (cschJumping st) - where - findNonDisengaged = - findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) - isDisengaged Disengaged{} = True - isDisengaged _ = False + promoteToDynamo peerStates dynId dynamo + pure $ Just (dynId, dynamo) + +-- | Promote the given peer to dynamo and demote all other peers to jumpers. +promoteToDynamo :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + StrictSeq (peer, ChainSyncClientHandle m blk) -> + peer -> + ChainSyncClientHandle m blk -> + STM m () +promoteToDynamo peerStates dynId dynamo = do + fragment <- csCandidate <$> readTVar (cschState dynamo) + mJumpInfo <- readTVar (cschJumpInfo dynamo) + -- If there is no jump info, the dynamo must be just starting and + -- there is no need to set the intersection of the ChainSync server. + let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo + writeTVar (cschJumping dynamo) $ + Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment + -- Demote all other peers to jumpers + forM_ peerStates $ \(peer, st) -> + when (peer /= dynId) $ do + jumpingState <- readTVar (cschJumping st) + when (not (isDisengaged jumpingState)) $ + newJumper mJumpInfo (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping st) + +-- | Find a non-disengaged peer in the given sequence +findNonDisengaged :: + (MonadSTM m) => + StrictSeq (peer, ChainSyncClientHandle m blk) -> + STM m (Maybe (peer, ChainSyncClientHandle m blk)) +findNonDisengaged = + findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) + +isDisengaged :: ChainSyncJumpingState m blk -> Bool +isDisengaged Disengaged{} = True +isDisengaged _ = False -findM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) -findM _ [] = pure Nothing -findM p (x : xs) = p x >>= \case - True -> pure (Just x) - False -> findM p xs +findM :: (Foldable f, Monad m) => (a -> m Bool) -> f a -> m (Maybe a) +findM p = + foldr (\x mb -> p x >>= \case True -> pure (Just x); False -> mb) (pure Nothing) -- | Find the objector in a context, if there is one. findObjector :: (MonadSTM m) => Context m peer blk -> STM m (Maybe (ObjectorInitState, JumpInfo blk, Point (Header blk), ChainSyncClientHandle m blk)) -findObjector context = do - readTVar (handlesVar context) >>= go . Map.toList +findObjector context = + cschcSeq (handlesCol context) >>= go where - go [] = pure Nothing - go ((_, handle):xs) = + go Seq.Empty = pure Nothing + go ((_, handle) Seq.:<| xs) = readTVar (cschJumping handle) >>= \case Objector initState goodJump badPoint -> pure $ Just (initState, goodJump, badPoint, handle) @@ -809,7 +904,7 @@ electNewObjector :: Context m peer blk -> STM m () electNewObjector context = do - peerStates <- Map.toList <$> readTVar (handlesVar context) + peerStates <- toList <$> cschcSeq (handlesCol context) dissentingJumpers <- collectDissentingJumpers peerStates let sortedJumpers = sortOn (pointSlot . fst) dissentingJumpers case sortedJumpers of @@ -826,3 +921,7 @@ electNewObjector context = do pure $ Just (badPoint, (initState, goodJumpInfo, handle)) _ -> pure Nothing + +data TraceEvent peer + = RotatedDynamo peer peer + deriving (Show) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs index 909e0ff829..763e79fd89 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -9,6 +9,7 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( ChainSyncClientHandle (..) + , ChainSyncClientHandleCollection (..) , ChainSyncJumpingJumperState (..) , ChainSyncJumpingState (..) , ChainSyncState (..) @@ -17,11 +18,16 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( , JumpInfo (..) , JumperInitState (..) , ObjectorInitState (..) + , newChainSyncClientHandleCollection ) where import Cardano.Slotting.Slot (SlotNo, WithOrigin) import Data.Function (on) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map import Data.Maybe.Strict (StrictMaybe (..)) +import Data.Sequence.Strict (StrictSeq) +import qualified Data.Sequence.Strict as Seq import Data.Typeable (Proxy (..), typeRep) import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader, Header, Point) @@ -30,7 +36,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.Node.GsmState (GsmState) import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks (..), STM, - StrictTVar, Time) + StrictTVar, Time, modifyTVar, newTVar, readTVar) import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) @@ -96,9 +102,74 @@ deriving anyclass instance ( NoThunks (Header blk) ) => NoThunks (ChainSyncClientHandle m blk) +-- | A collection of ChainSync client handles for the peers of this node. +-- +-- Sometimes we want to see the collection as a Map, and sometimes as a sequence. +-- The implementation keeps both views in sync. +data ChainSyncClientHandleCollection peer m blk = ChainSyncClientHandleCollection { + -- | A map containing the handles for the peers in the collection + cschcMap :: !(STM m (Map peer (ChainSyncClientHandle m blk))) + -- | A sequence containing the handles for the peers in the collection + , cschcSeq :: !(STM m (StrictSeq (peer, ChainSyncClientHandle m blk))) + -- | Add the handle for the given peer to the collection + -- PRECONDITION: The peer is not already in the collection + , cschcAddHandle :: !(peer -> ChainSyncClientHandle m blk -> STM m ()) + -- | Remove the handle for the given peer from the collection + , cschcRemoveHandle :: !(peer -> STM m ()) + -- | Moves the handle for the given peer to the end of the sequence + , cschcRotateHandle :: !(peer -> STM m ()) + -- | Remove all the handles from the collection + , cschcRemoveAllHandles :: !(STM m ()) + } + deriving stock (Generic) + +deriving anyclass instance ( + IOLike m, + HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (STM m ()), + NoThunks (Header blk), + NoThunks (STM m (Map peer (ChainSyncClientHandle m blk))), + NoThunks (STM m (StrictSeq (peer, ChainSyncClientHandle m blk))) + ) => NoThunks (ChainSyncClientHandleCollection peer m blk) + +newChainSyncClientHandleCollection :: + ( Ord peer, + IOLike m, + LedgerSupportsProtocol blk, + NoThunks peer + ) + => STM m (ChainSyncClientHandleCollection peer m blk) +newChainSyncClientHandleCollection = do + handlesMap <- newTVar mempty + handlesSeq <- newTVar mempty + + return ChainSyncClientHandleCollection { + cschcMap = readTVar handlesMap + , cschcSeq = readTVar handlesSeq + , cschcAddHandle = \peer handle -> do + modifyTVar handlesMap (Map.insert peer handle) + modifyTVar handlesSeq (Seq.|> (peer, handle)) + , cschcRemoveHandle = \peer -> do + modifyTVar handlesMap (Map.delete peer) + modifyTVar handlesSeq $ \s -> + let (xs, ys) = Seq.spanl ((/= peer) . fst) s + in xs Seq.>< Seq.drop 1 ys + , cschcRotateHandle = \peer -> + modifyTVar handlesSeq $ \s -> + let (xs, ys) = Seq.spanl ((/= peer) . fst) s + in xs Seq.>< Seq.drop 1 ys Seq.>< Seq.take 1 ys + , cschcRemoveAllHandles = do + modifyTVar handlesMap (const mempty) + modifyTVar handlesSeq (const mempty) + } + data DynamoInitState blk - = -- | The dynamo has not yet started jumping and we first need to jump to the - -- given jump info to set the intersection of the ChainSync server. + = -- | The dynamo still has to set the intersection of the ChainSync server + -- before it can resume downloading headers. This is because + -- the message pipeline might be drained to do jumps, and this causes + -- the intersection on the ChainSync server to diverge from the tip of + -- the candidate fragment. DynamoStarting !(JumpInfo blk) | DynamoStarted deriving (Generic) @@ -111,7 +182,10 @@ deriving anyclass instance data ObjectorInitState = -- | The objector still needs to set the intersection of the ChainSync - -- server before resuming retrieval of headers. + -- server before resuming retrieval of headers. This is mainly because + -- the message pipeline might be drained to do jumps, and this causes + -- the intersection on the ChainSync server to diverge from the tip of + -- the candidate fragment. Starting | Started deriving (Generic, Show, NoThunks) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index 441c598c6a..eec1e930af 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -91,6 +91,8 @@ import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (ChainUpdate, MaxSlotNo, Serialised (..)) import qualified Ouroboros.Network.Block as Network +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) import Ouroboros.Network.Mock.Chain (Chain (..)) import qualified Ouroboros.Network.Mock.Chain as Chain import System.FS.API.Types (FsError) @@ -334,6 +336,10 @@ data ChainDB m blk = ChainDB { -- invalid block is detected. These blocks are likely to be valid. , getIsInvalidBlock :: STM m (WithFingerprint (HeaderHash blk -> Maybe (InvalidBlockReason blk))) + -- | Whether ChainSel is currently starved, or when was last time it + -- stopped being starved. + , getChainSelStarvation :: STM m ChainSelStarvation + , closeDB :: m () -- | Return 'True' when the database is open. diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 03f9a1ef58..15cf79fdd7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -16,6 +16,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl ( , LgrDB.TraceReplayEvent , SelectionChangedInfo (..) , TraceAddBlockEvent (..) + , TraceChainSelStarvationEvent (..) , TraceCopyToImmutableDBEvent (..) , TraceEvent (..) , TraceFollowerEvent (..) @@ -69,6 +70,8 @@ import Ouroboros.Consensus.Util.ResourceRegistry (WithTempRegistry, import Ouroboros.Consensus.Util.STM (Fingerprint (..), WithFingerprint (..)) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) {------------------------------------------------------------------------------- Initialization @@ -177,6 +180,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do copyFuse <- newFuse "copy to immutable db" chainSelFuse <- newFuse "chain selection" chainSelQueue <- newChainSelQueue (Args.cdbsBlocksToAddSize cdbSpecificArgs) + varChainSelStarvation <- newTVarIO ChainSelStarvationOngoing let env = CDB { cdbImmutableDB = immutableDB , cdbVolatileDB = volatileDB @@ -201,6 +205,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , cdbChainSelQueue = chainSelQueue , cdbFutureBlocks = varFutureBlocks , cdbLoE = Args.cdbsLoE cdbSpecificArgs + , cdbChainSelStarvation = varChainSelStarvation } h <- fmap CDBHandle $ newTVarIO $ ChainDbOpen env let chainDB = API.ChainDB @@ -218,6 +223,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , stream = Iterator.stream h , newFollower = Follower.newFollower h , getIsInvalidBlock = getEnvSTM h Query.getIsInvalidBlock + , getChainSelStarvation = getEnvSTM h Query.getChainSelStarvation , closeDB = closeDB h , isOpen = isOpen h } diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 9a6fdcb374..82a0a228be 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -522,7 +522,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do -- exception (or it errored), notify the blocked thread withFuse fuse $ bracketOnError - (lift $ getChainSelMessage cdbChainSelQueue) + (lift $ getChainSelMessage starvationTracer cdbChainSelStarvation cdbChainSelQueue) (\message -> lift $ atomically $ do case message of ChainSelReprocessLoEBlocks -> pure () @@ -540,4 +540,11 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ FallingEdgeWith $ blockRealPoint blockToAdd - chainSelSync cdb message) + chainSelSync cdb message + lift $ case message of + ChainSelAddBlock blockToAdd -> + deleteBlockToAdd blockToAdd cdbChainSelQueue + _ -> pure () + ) + where + starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index f6a8971ee8..2ae269f8d8 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -334,13 +334,11 @@ chainSelSync cdb@CDB {..} (ChainSelAddBlock BlockToAdd { blockToAdd = b, .. }) = let immBlockNo = AF.anchorBlockNo curChain -- We follow the steps from section "## Adding a block" in ChainDB.md - - -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead - -- of once, before branching, because we want to do it /after/ writing the - -- block to the VolatileDB and delivering the 'varBlockWrittenToDisk' - -- promise, as this is the promise the BlockFetch client waits for. - -- Otherwise, the BlockFetch client would have to wait for - -- 'chainSelectionForFutureBlocks'. + -- + -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead of + -- once, before branching, because we want to do it /after/ writing the + -- block to the VolatileDB so that any threads waiting on the + -- 'varBlockWrittenToDisk' promise don't have to wait for the result of -- ### Ignore newTip <- if diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index d47182bd43..c697f11804 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -21,6 +21,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Query ( , getAnyBlockComponent , getAnyKnownBlock , getAnyKnownBlockComponent + , getChainSelStarvation ) where import qualified Data.Map.Strict as Map @@ -42,6 +43,8 @@ import Ouroboros.Consensus.Util.STM (WithFingerprint (..)) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (MaxSlotNo, maxSlotNoFromWithOrigin) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) -- | Return the last @k@ headers. -- @@ -128,18 +131,15 @@ getBlockComponent :: getBlockComponent CDB{..} = getAnyBlockComponent cdbImmutableDB cdbVolatileDB getIsFetched :: - forall m blk. IOLike m + forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk -> STM m (Point blk -> Bool) -getIsFetched CDB{..} = basedOnHash <$> VolatileDB.getIsMember cdbVolatileDB - where - -- The volatile DB indexes by hash only, not by points. However, it should - -- not be possible to have two points with the same hash but different - -- slot numbers. - basedOnHash :: (HeaderHash blk -> Bool) -> Point blk -> Bool - basedOnHash f p = - case pointHash p of - BlockHash hash -> f hash - GenesisHash -> False +getIsFetched CDB{..} = do + checkBlocksToAdd <- memberBlocksToAdd cdbChainSelQueue + checkVolDb <- VolatileDB.getIsMember cdbVolatileDB + return $ \pt -> + case pointToWithOriginRealPoint pt of + Origin -> False + NotOrigin pt' -> checkBlocksToAdd pt' || checkVolDb (realPointHash pt') getIsInvalidBlock :: forall m blk. (IOLike m, HasHeader blk) @@ -148,6 +148,12 @@ getIsInvalidBlock :: getIsInvalidBlock CDB{..} = fmap (fmap (fmap invalidBlockReason) . flip Map.lookup) <$> readTVar cdbInvalid +getChainSelStarvation :: + forall m blk. IOLike m + => ChainDbEnv m blk + -> STM m ChainSelStarvation +getChainSelStarvation CDB {..} = readTVar cdbChainSelStarvation + getIsValid :: forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk @@ -176,10 +182,13 @@ getMaxSlotNo CDB{..} = do -- contains block 9'. The ImmutableDB contains blocks 1-10. The max slot -- of the current chain will be 10 (being the anchor point of the empty -- current chain), while the max slot of the VolatileDB will be 9. - curChainMaxSlotNo <- maxSlotNoFromWithOrigin . AF.headSlot - <$> readTVar cdbChain - volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB - return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo + -- + -- Moreover, we have to look in 'ChainSelQueue' too. + curChainMaxSlotNo <- + maxSlotNoFromWithOrigin . AF.headSlot <$> readTVar cdbChain + volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB + blocksToAddMaxSlotNo <- getBlocksToAddMaxSlotNo cdbChainSelQueue + return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo `max` blocksToAddMaxSlotNo {------------------------------------------------------------------------------- Unifying interface over the immutable DB and volatile DB, but independent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index a42ed3ad86..165ddbad69 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -43,15 +43,19 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( -- * Blocks to add , BlockToAdd (..) , ChainSelMessage (..) - , ChainSelQueue + , ChainSelQueue -- opaque , addBlockToAdd , addReprocessLoEBlocks , closeChainSelQueue + , deleteBlockToAdd + , getBlocksToAddMaxSlotNo , getChainSelMessage + , memberBlocksToAdd , newChainSelQueue -- * Trace types , SelectionChangedInfo (..) , TraceAddBlockEvent (..) + , TraceChainSelStarvationEvent (..) , TraceCopyToImmutableDBEvent (..) , TraceEvent (..) , TraceFollowerEvent (..) @@ -63,10 +67,11 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where +import Control.Monad (join, when) import Control.Tracer -import Data.Foldable (traverse_) +import Data.Foldable (for_) import Data.Map.Strict (Map) -import Data.Maybe (mapMaybe) +import qualified Data.Map.Strict as Map import Data.Maybe.Strict (StrictMaybe (..)) import Data.Set (Set) import Data.Typeable @@ -106,7 +111,9 @@ import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry import Ouroboros.Consensus.Util.STM (WithFingerprint) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) -import Ouroboros.Network.Block (MaxSlotNo) +import Ouroboros.Network.Block (MaxSlotNo (..)) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) -- | All the serialisation related constraints needed by the ChainDB. class ( ImmutableDbSerialiseConstraints blk @@ -250,6 +257,17 @@ data ChainDbEnv m blk = CDB , cdbCheckInFuture :: !(CheckInFuture m blk) , cdbChainSelQueue :: !(ChainSelQueue m blk) -- ^ Queue of blocks that still have to be added. + -- + -- NOTE: the set of blocks in this queue are /not/ disjoint from the set of + -- blocks in the VolatileDB. When processing the next block in the queue, we + -- do not remove the block from the queue /until/ it has been added to the + -- VolatileDB and processed by chain selection. This means the block + -- currently being added will be both in the queue and the VolatileDB for a + -- short while. + -- + -- If we would remove the block from the queue before adding it to the + -- VolatileDB, then it would be in /neither/ for a short time, and + -- 'getIsFetched' would incorrectly return 'False'. , cdbFutureBlocks :: !(StrictTVar m (FutureBlocks m blk)) -- ^ Blocks from the future -- @@ -275,6 +293,9 @@ data ChainDbEnv m blk = CDB -- switch back to a chain containing it. The fragment is usually anchored at -- a recent immutable tip; if it does not, it will conservatively be treated -- as the empty fragment anchored in the current immutable tip. + , cdbChainSelStarvation :: !(StrictTVar m ChainSelStarvation) + -- ^ Information on the last starvation of ChainSel, whether ongoing or + -- ended recently. } deriving (Generic) -- | We include @blk@ in 'showTypeOf' because it helps resolving type families @@ -442,8 +463,21 @@ type FutureBlocks m blk = Map (HeaderHash blk) (Header blk, InvalidBlockPunishme -- | FIFO queue used to add blocks asynchronously to the ChainDB. Blocks are -- read from this queue by a background thread, which processes the blocks -- synchronously. -newtype ChainSelQueue m blk = ChainSelQueue (TBQueue m (ChainSelMessage m blk)) - deriving NoThunks via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) +data ChainSelQueue m blk = ChainSelQueue { + -- TODO use a better data structure, e.g., a heap from the @heaps@ + -- package. Wish list: + -- + O(1) pop min value + -- + O(log n) insert + -- + O(n) get all + -- + Bounded in size + -- + -- TODO join consecutive blocks into a fragment that can be added at + -- once. + varChainSelQueue :: !(StrictTVar m (Map (RealPoint blk) (BlockToAdd m blk))) + , chainSelQueueCapacity :: !Word + , varChainSelReprocessLoEBlocks :: !(StrictTVar m Bool) + } + deriving (NoThunks) via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) -- | Entry in the 'ChainSelQueue' queue: a block together with the 'TMVar's used -- to implement 'AddBlockPromise'. @@ -457,6 +491,7 @@ data BlockToAdd m blk = BlockToAdd , varBlockProcessed :: !(StrictTMVar m (AddBlockResult blk)) -- ^ Used for the 'blockProcessed' field of 'AddBlockPromise'. } + deriving NoThunks via OnlyCheckWhnfNamed "BlockToAdd" (BlockToAdd m blk) -- | Different async tasks for triggering ChainSel data ChainSelMessage m blk @@ -466,9 +501,11 @@ data ChainSelMessage m blk | ChainSelReprocessLoEBlocks -- | Create a new 'ChainSelQueue' with the given size. -newChainSelQueue :: IOLike m => Word -> m (ChainSelQueue m blk) -newChainSelQueue queueSize = ChainSelQueue <$> - atomically (newTBQueue (fromIntegral queueSize)) +newChainSelQueue :: (IOLike m, StandardHash blk, Typeable blk) => Word -> m (ChainSelQueue m blk) +newChainSelQueue chainSelQueueCapacity = do + varChainSelQueue <- newTVarIO mempty + varChainSelReprocessLoEBlocks <- newTVarIO False + return $ ChainSelQueue {varChainSelQueue, chainSelQueueCapacity, varChainSelReprocessLoEBlocks} -- | Add a block to the 'ChainSelQueue' queue. Can block when the queue is full. addBlockToAdd :: @@ -478,7 +515,7 @@ addBlockToAdd :: -> InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) -addBlockToAdd tracer (ChainSelQueue queue) punish blk = do +addBlockToAdd tracer (ChainSelQueue {varChainSelQueue, chainSelQueueCapacity}) punish blk = do varBlockWrittenToDisk <- newEmptyTMVarIO varBlockProcessed <- newEmptyTMVarIO let !toAdd = BlockToAdd @@ -489,8 +526,12 @@ addBlockToAdd tracer (ChainSelQueue queue) punish blk = do } traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) RisingEdge queueSize <- atomically $ do - writeTBQueue queue (ChainSelAddBlock toAdd) - lengthTBQueue queue + chainSelQueue <- readTVar varChainSelQueue + let chainSelQueue' = Map.insert (blockRealPoint blk) toAdd chainSelQueue + chainSelQueueSize = Map.size chainSelQueue' + check (fromIntegral chainSelQueueSize <= chainSelQueueCapacity) + writeTVar varChainSelQueue chainSelQueue' + return chainSelQueueSize traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) (FallingEdgeWith (fromIntegral queueSize)) return AddBlockPromise @@ -504,28 +545,102 @@ addReprocessLoEBlocks => Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> m () -addReprocessLoEBlocks tracer (ChainSelQueue queue) = do +addReprocessLoEBlocks tracer (ChainSelQueue {varChainSelReprocessLoEBlocks}) = do traceWith tracer $ AddedReprocessLoEBlocksToQueue - atomically $ writeTBQueue queue ChainSelReprocessLoEBlocks + atomically $ writeTVar varChainSelReprocessLoEBlocks True -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the --- queue is empty. -getChainSelMessage :: IOLike m => ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage (ChainSelQueue queue) = atomically $ readTBQueue queue +-- queue is empty; in that case, reports the starvation (and its end) to the +-- callback. +getChainSelMessage + :: forall m blk. (HasHeader blk, IOLike m) + => Tracer m (TraceChainSelStarvationEvent blk) + -> StrictTVar m ChainSelStarvation + -> ChainSelQueue m blk + -> m (ChainSelMessage m blk) +getChainSelMessage starvationTracer starvationVar queue = go + where + go = join $ atomically $ + readTVar varChainSelReprocessLoEBlocks >>= \case + True -> do + writeTVar varChainSelReprocessLoEBlocks False + pure $ pure ChainSelReprocessLoEBlocks + False -> do + chainSelQueue <- readTVar varChainSelQueue + case Map.minView chainSelQueue of + Just (blockToAdd, chainSelQueue') -> do + writeTVar varChainSelQueue chainSelQueue' + pure $ do + terminateStarvationMeasure blockToAdd + pure $ ChainSelAddBlock blockToAdd + Nothing -> pure $ do + startStarvationMeasure + blockUntilMoreWork + go + + ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue + + -- Wait until we either need to reprocess blocks due to the LoE, or until a + -- new block arrives. + blockUntilMoreWork :: m () + blockUntilMoreWork = atomically $ do + reprocessLoEBlocks <- readTVar varChainSelReprocessLoEBlocks + chainSelQueue <- readTVar varChainSelQueue + check $ reprocessLoEBlocks || not (Map.null chainSelQueue) + + startStarvationMeasure = do + prevStarvation <- atomically $ swapTVar starvationVar ChainSelStarvationOngoing + when (prevStarvation /= ChainSelStarvationOngoing) $ + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime + + terminateStarvationMeasure BlockToAdd{blockToAdd=block} = do + prevStarvation <- readTVarIO starvationVar + when (prevStarvation == ChainSelStarvationOngoing) $ do + tf <- getMonotonicTime + traceWith starvationTracer (ChainSelStarvationEnded tf $ blockRealPoint block) + atomically $ writeTVar starvationVar (ChainSelStarvationEndedAt tf) -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- +-- REVIEW: What about all the threads that are waiting to write in the queue and +-- will write after the flush?! closeChainSelQueue :: IOLike m => ChainSelQueue m blk -> STM m () -closeChainSelQueue (ChainSelQueue queue) = do - as <- mapMaybe blockAdd <$> flushTBQueue queue - traverse_ (\a -> tryPutTMVar (varBlockProcessed a) - (FailedToAddBlock "Queue flushed")) - as - where - blockAdd = \case - ChainSelAddBlock ab -> Just ab - ChainSelReprocessLoEBlocks -> Nothing +closeChainSelQueue ChainSelQueue {varChainSelQueue} = do + chainSelQueue <- swapTVar varChainSelQueue Map.empty + for_ chainSelQueue $ \BlockToAdd {varBlockProcessed} -> + putTMVar varBlockProcessed $ FailedToAddBlock "Queue flushed" + +-- | Delete the given 'BlockToAdd' from the 'ChainSelQueue'. +-- +-- PRECONDITION: the given 'BlockToAdd' is in 'ChainSelQueue'. +deleteBlockToAdd :: + (IOLike m, HasHeader blk) + => BlockToAdd m blk + -> ChainSelQueue m blk + -> m () +deleteBlockToAdd (BlockToAdd _ blk _ _) (ChainSelQueue {varChainSelQueue}) = + atomically $ modifyTVar varChainSelQueue $ Map.delete (blockRealPoint blk) +-- | Return a function to test the membership for the given 'BlocksToAdd'. +memberBlocksToAdd :: + (IOLike m, HasHeader blk) + => ChainSelQueue m blk + -> STM m (RealPoint blk -> Bool) +memberBlocksToAdd (ChainSelQueue {varChainSelQueue}) = + flip Map.member <$> readTVar varChainSelQueue + +getBlocksToAddMaxSlotNo :: + IOLike m + => ChainSelQueue m blk + -> STM m MaxSlotNo +getBlocksToAddMaxSlotNo (ChainSelQueue {varChainSelQueue}) = aux <$> readTVar varChainSelQueue + where + -- | The 'Ord' instance of 'RealPoint' orders by 'SlotNo' first, so the + -- maximal key of the map has the greatest 'SlotNo'. + aux :: Map (RealPoint blk) (BlockToAdd m blk) -> MaxSlotNo + aux queue = case Map.lookupMax queue of + Nothing -> NoMaxSlotNo + Just (RealPoint s _, _) -> MaxSlotNo s {------------------------------------------------------------------------------- Trace types @@ -545,6 +660,7 @@ data TraceEvent blk | TraceImmutableDBEvent (ImmutableDB.TraceEvent blk) | TraceVolatileDBEvent (VolatileDB.TraceEvent blk) | TraceLastShutdownUnclean + | TraceChainSelStarvationEvent(TraceChainSelStarvationEvent blk) deriving (Generic) @@ -877,3 +993,8 @@ data TraceIteratorEvent blk -- next block we're looking for. | SwitchBackToVolatileDB deriving (Generic, Eq, Show) + +data TraceChainSelStarvationEvent blk + = ChainSelStarvationStarted Time + | ChainSelStarvationEnded Time (RealPoint blk) + deriving (Generic, Eq, Show) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index b3aa0efe1e..cebcf616ec 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -8,6 +8,7 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} -- | A test for the consensus-specific parts of the BlockFetch client. -- @@ -51,11 +52,13 @@ import Ouroboros.Consensus.Util.STM (blockUntilJust, import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), - BlockFetchConsensusInterface, FetchMode (..), - blockFetchLogic, bracketFetchClient, - bracketKeepAliveClient, bracketSyncWithFetchClient, - newFetchClientRegistry) + BlockFetchConsensusInterface (..), FetchMode (..), + GenesisBlockFetchConfiguration (..), blockFetchLogic, + bracketFetchClient, bracketKeepAliveClient, + bracketSyncWithFetchClient, newFetchClientRegistry) import Ouroboros.Network.BlockFetch.Client (blockFetchClient) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (GenesisFetchMode (..)) import Ouroboros.Network.ControlMessage (ControlMessage (..)) import Ouroboros.Network.Mock.Chain (Chain) import qualified Ouroboros.Network.Mock.Chain as Chain @@ -96,7 +99,10 @@ prop_blockFetch bfcts@BlockFetchClientTestSetup{..} = ] <> [ Map.keysSet bfcoBlockFetchResults === Map.keysSet peerUpdates , counterexample ("Fetched blocks per peer: " <> condense bfcoFetchedBlocks) $ - property $ all (> 0) bfcoFetchedBlocks + property $ case blockFetchMode of + PraosFetchMode FetchModeDeadline -> all (> 0) bfcoFetchedBlocks + PraosFetchMode FetchModeBulkSync -> all (> 0) bfcoFetchedBlocks + FetchModeGenesis -> any (> 0) bfcoFetchedBlocks ] where BlockFetchClientOutcome{..} = runSimOrThrow $ runBlockFetchTest bfcts @@ -256,10 +262,11 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do let -- Always return the empty chain such that the BlockFetch logic -- downloads all chains. - getCurrentChain = pure $ AF.Empty AF.AnchorGenesis - getIsFetched = ChainDB.getIsFetched chainDB - getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + getCurrentChain = pure $ AF.Empty AF.AnchorGenesis + getIsFetched = ChainDB.getIsFetched chainDB + getMaxSlotNo = ChainDB.getMaxSlotNo chainDB + addBlockAsync = ChainDB.addBlockAsync chainDB + getChainSelStarvation = ChainDB.getChainSelStarvation chainDB pure BlockFetchClientInterface.ChainDbView {..} where -- Needs to be larger than any chain length in this test, to ensure that @@ -278,13 +285,17 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do -> BlockFetchClientInterface.ChainDbView m TestBlock -> BlockFetchConsensusInterface PeerId (Header TestBlock) TestBlock m mkTestBlockFetchConsensusInterface getCandidates chainDbView = - BlockFetchClientInterface.mkBlockFetchConsensusInterface + (BlockFetchClientInterface.mkBlockFetchConsensusInterface @m @PeerId + nullTracer (TestBlockConfig numCoreNodes) chainDbView - getCandidates + (error "ChainSyncClientHandleCollection not provided to mkBlockFetchConsensusInterface") (\_hdr -> 1000) -- header size, only used for peer prioritization slotForgeTime - (pure blockFetchMode) + (pure blockFetchMode)) + { readCandidateChains = getCandidates + , demoteCSJDynamo = const (pure ()) + } where -- Bogus implementation; this is fine as this is only used for -- enriching tracing information ATM. @@ -322,7 +333,7 @@ data BlockFetchClientTestSetup = BlockFetchClientTestSetup { -- the candidate fragments provided by the ChainSync client. peerUpdates :: Map PeerId (Schedule ChainUpdate) -- | BlockFetch 'FetchMode' - , blockFetchMode :: FetchMode + , blockFetchMode :: GenesisFetchMode , blockFetchCfg :: BlockFetchConfiguration } deriving stock (Show) @@ -350,7 +361,11 @@ instance Arbitrary BlockFetchClientTestSetup where peerUpdates <- Map.fromList . zip peerIds <$> replicateM numPeers genUpdateSchedule - blockFetchMode <- elements [FetchModeBulkSync, FetchModeDeadline] + blockFetchMode <- elements + [ PraosFetchMode FetchModeBulkSync + , PraosFetchMode FetchModeDeadline + , FetchModeGenesis + ] blockFetchCfg <- do let -- ensure that we can download blocks from all peers bfcMaxConcurrencyBulkSync = fromIntegral numPeers @@ -358,9 +373,12 @@ instance Arbitrary BlockFetchClientTestSetup where -- This is used to introduce a minimal delay between BlockFetch -- logic iterations in case the monitored state vars change too -- fast, which we don't have to worry about in this test. - bfcDecisionLoopInterval = 0 + bfcDecisionLoopIntervalGenesis = 0 + bfcDecisionLoopIntervalPraos = 0 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary + gbfcGracePeriod <- fromIntegral <$> chooseInteger (5, 60) + let bfcGenesisBFConfig = GenesisBlockFetchConfiguration {..} pure BlockFetchConfiguration {..} pure BlockFetchClientTestSetup {..} where diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs index 868ca695dd..511f204835 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs @@ -82,12 +82,14 @@ import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended hiding (ledgerState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), ChainDbView (..), - ChainSyncClientException, ChainSyncClientResult (..), - ChainSyncLoPBucketConfig (..), ChainSyncState (..), - ChainSyncStateView (..), ConfigEnv (..), Consensus, - DynamicEnv (..), Our (..), Their (..), - TraceChainSyncClientEvent (..), bracketChainSyncClient, - chainSyncClient, chainSyncStateFor, viewChainSyncState) + ChainSyncClientException, + ChainSyncClientHandleCollection (..), + ChainSyncClientResult (..), ChainSyncLoPBucketConfig (..), + ChainSyncState (..), ChainSyncStateView (..), + ConfigEnv (..), Consensus, DynamicEnv (..), Our (..), + Their (..), TraceChainSyncClientEvent (..), + bracketChainSyncClient, chainSyncClient, chainSyncStateFor, + newChainSyncClientHandleCollection, viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.GsmState (GsmState (Syncing)) import Ouroboros.Consensus.Node.NetworkProtocolVersion @@ -357,7 +359,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) -- separate map too, one that isn't emptied. We can use this map to look -- at the final state of each candidate. varFinalCandidates <- uncheckedNewTVarM Map.empty - varHandles <- uncheckedNewTVarM Map.empty + cschCol <- atomically newChainSyncClientHandleCollection (tracer, getTrace) <- do (tracer', getTrace) <- recordingTracerTVar @@ -501,7 +503,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) bracketChainSyncClient chainSyncTracer chainDbView - varHandles + cschCol -- 'Syncing' only ever impacts the LoP, which is disabled in -- this test, so any value would do. (pure Syncing) @@ -511,7 +513,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) csjConfig $ \csState -> do atomically $ do - handles <- readTVar varHandles + handles <- cschcMap cschCol modifyTVar varFinalCandidates $ Map.insert serverId (handles Map.! serverId) result <- runPipelinedPeer protocolTracer codecChainSyncId clientChannel $ @@ -532,7 +534,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) let checkTipTime :: m () checkTipTime = do now <- systemTimeCurrent clientSystemTime - candidates <- atomically $ viewChainSyncState varHandles csCandidate + candidates <- atomically $ viewChainSyncState (cschcMap cschCol) csCandidate forM_ candidates $ \candidate -> do let p = castPoint $ AF.headPoint candidate :: Point TestBlock case pointSlot p of diff --git a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs index 7017b78ce5..c675772289 100644 --- a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs +++ b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs @@ -1252,6 +1252,8 @@ deriving instance SOP.Generic (ImmutableDB.TraceEvent blk) deriving instance SOP.HasDatatypeInfo (ImmutableDB.TraceEvent blk) deriving instance SOP.Generic (VolatileDB.TraceEvent blk) deriving instance SOP.HasDatatypeInfo (VolatileDB.TraceEvent blk) +deriving instance SOP.Generic (TraceChainSelStarvationEvent blk) +deriving instance SOP.HasDatatypeInfo (TraceChainSelStarvationEvent blk) data Tag = TagGetIsValidJust @@ -1632,6 +1634,7 @@ traceEventName = \case TraceImmutableDBEvent ev -> "ImmutableDB." <> constrName ev TraceVolatileDBEvent ev -> "VolatileDB." <> constrName ev TraceLastShutdownUnclean -> "LastShutdownUnclean" + TraceChainSelStarvationEvent ev -> "ChainSelStarvation." <> constrName ev mkArgs :: IOLike m => TopLevelConfig Blk