From 1369102a78937be90e37dc9fe1d43954dacaca1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 27 May 2024 18:27:28 +0000 Subject: [PATCH 01/76] Introduce a collection of chainsync handles that synchronizes a map and a queue --- .../Ouroboros/Consensus/NodeKernel.hs | 19 ++--- .../Consensus/PeerSimulator/CSJInvariants.hs | 10 +-- .../Test/Consensus/PeerSimulator/ChainSync.hs | 12 ++-- .../Consensus/PeerSimulator/NodeLifecycle.hs | 4 +- .../Test/Consensus/PeerSimulator/Resources.hs | 7 +- .../Test/Consensus/PeerSimulator/Run.hs | 23 +++--- .../MiniProtocol/ChainSync/Client.hs | 12 ++-- .../MiniProtocol/ChainSync/Client/Jumping.hs | 58 ++++++++------- .../MiniProtocol/ChainSync/Client/State.hs | 70 ++++++++++++++++++- 9 files changed, 145 insertions(+), 70 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 1c6f0bdb0f..05c36a0c0e 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -61,8 +61,9 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol import Ouroboros.Consensus.Mempool import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientHandle (..), ChainSyncState (..), - viewChainSyncState) + (ChainSyncClientHandle (..), + ChainSyncClientHandleCollection (..), ChainSyncState (..), + newChainSyncClientHandleCollection, viewChainSyncState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck (SomeHeaderInFutureCheck) import Ouroboros.Consensus.Node.Genesis (GenesisNodeKernelArgs (..), @@ -140,7 +141,7 @@ data NodeKernel m addrNTN addrNTC blk = NodeKernel { , getGsmState :: STM m GSM.GsmState -- | The kill handle and exposed state for each ChainSync client. - , getChainSyncHandles :: StrictTVar m (Map (ConnectionId addrNTN) (ChainSyncClientHandle m blk)) + , getChainSyncHandles :: ChainSyncClientHandleCollection (ConnectionId addrNTN) m blk -- | Read the current peer sharing registry, used for interacting with -- the PeerSharing protocol @@ -242,7 +243,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers <&> \wd (_headers, lst) -> GSM.getDurationUntilTooOld wd (getTipSlot lst) , GSM.equivalent = (==) `on` (AF.headPoint . fst) - , GSM.getChainSyncStates = fmap cschState <$> readTVar varChainSyncHandles + , GSM.getChainSyncStates = fmap cschState <$> cschcMap varChainSyncHandles , GSM.getCurrentSelection = do headers <- ChainDB.getCurrentChain chainDB extLedgerState <- ChainDB.getCurrentLedger chainDB @@ -254,7 +255,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , GSM.writeGsmState = \gsmState -> atomicallyWithMonotonicTime $ \time -> do writeTVar varGsmState gsmState - handles <- readTVar varChainSyncHandles + handles <- cschcMap varChainSyncHandles traverse_ (($ time) . ($ gsmState) . cschOnGsmStateChanged) handles , GSM.isHaaSatisfied = do readTVar varOutboundConnectionsState <&> \case @@ -289,7 +290,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers chainDB (readTVar varGsmState) -- TODO GDD should only consider (big) ledger peers - (readTVar varChainSyncHandles) + (cschcMap varChainSyncHandles) varLoEFragment void $ forkLinkedThread registry "NodeKernel.blockForging" $ @@ -345,7 +346,7 @@ data InternalState m addrNTN addrNTC blk = IS { , chainDB :: ChainDB m blk , blockFetchInterface :: BlockFetchConsensusInterface (ConnectionId addrNTN) (Header blk) blk m , fetchClientRegistry :: FetchClientRegistry (ConnectionId addrNTN) (Header blk) blk m - , varChainSyncHandles :: StrictTVar m (Map (ConnectionId addrNTN) (ChainSyncClientHandle m blk)) + , varChainSyncHandles :: ChainSyncClientHandleCollection (ConnectionId addrNTN) m blk , varGsmState :: StrictTVar m GSM.GsmState , mempool :: Mempool m blk , peerSharingRegistry :: PeerSharingRegistry addrNTN m @@ -373,7 +374,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg gsmMarkerFileView newTVarIO gsmState - varChainSyncHandles <- newTVarIO mempty + varChainSyncHandles <- atomically newChainSyncClientHandleCollection mempool <- openMempool registry (chainDBLedgerInterface chainDB) (configLedger cfg) @@ -384,7 +385,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg fetchClientRegistry <- newFetchClientRegistry let getCandidates :: STM m (Map (ConnectionId addrNTN) (AnchoredFragment (Header blk))) - getCandidates = viewChainSyncState varChainSyncHandles csCandidate + getCandidates = viewChainSyncState (cschcMap varChainSyncHandles) csCandidate slotForgeTimeOracle <- BlockFetchClientInterface.initSlotForgeTimeOracle cfg chainDB let readFetchMode = BlockFetchClientInterface.readFetchModeDefault diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs index 7b83e04101..0d66e1f154 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/CSJInvariants.hs @@ -19,7 +19,7 @@ import Data.Typeable (Typeable) import Ouroboros.Consensus.Block (Point, StandardHash, castPoint) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State as CSState import Ouroboros.Consensus.Util.IOLike (Exception, MonadSTM (STM), - MonadThrow (throwIO), StrictTVar, readTVar) + MonadThrow (throwIO), readTVar) import Ouroboros.Consensus.Util.STM (Watcher (..)) -------------------------------------------------------------------------------- @@ -109,10 +109,10 @@ readAndView :: forall m peer blk. ( MonadSTM m ) => - StrictTVar m (Map peer (CSState.ChainSyncClientHandle m blk)) -> + STM m (Map peer (CSState.ChainSyncClientHandle m blk)) -> STM m (View peer blk) -readAndView handles = - traverse (fmap idealiseState . readTVar . CSState.cschJumping) =<< readTVar handles +readAndView readHandles = + traverse (fmap idealiseState . readTVar . CSState.cschJumping) =<< readHandles where -- Idealise the state of a ChainSync peer with respect to ChainSync jumping. -- In particular, we get rid of non-comparable information such as the TVars @@ -170,7 +170,7 @@ watcher :: Typeable blk, StandardHash blk ) => - StrictTVar m (Map peer (CSState.ChainSyncClientHandle m blk)) -> + STM m (Map peer (CSState.ChainSyncClientHandle m blk)) -> Watcher m (View peer blk) (View peer blk) watcher handles = Watcher diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs index 2197bea732..5678b4d7c9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/ChainSync.hs @@ -13,7 +13,6 @@ module Test.Consensus.PeerSimulator.ChainSync ( import Control.Exception (SomeException) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (Tracer), nullTracer, traceWith) -import Data.Map.Strict (Map) import Data.Proxy (Proxy (..)) import Network.TypedProtocol.Codec (AnyMessage) import Ouroboros.Consensus.Block (Header, Point) @@ -21,15 +20,16 @@ import Ouroboros.Consensus.Config (TopLevelConfig (..)) import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (CSJConfig (..), ChainDbView, ChainSyncClientHandle, - ChainSyncLoPBucketConfig, ChainSyncStateView (..), - Consensus, bracketChainSyncClient, chainSyncClient) + (CSJConfig (..), ChainDbView, + ChainSyncClientHandleCollection, ChainSyncLoPBucketConfig, + ChainSyncStateView (..), Consensus, bracketChainSyncClient, + chainSyncClient) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.GsmState (GsmState (Syncing)) import Ouroboros.Consensus.Util (ShowProxy) import Ouroboros.Consensus.Util.IOLike (Exception (fromException), - IOLike, MonadCatch (try), StrictTVar) + IOLike, MonadCatch (try)) import Ouroboros.Network.Block (Tip) import Ouroboros.Network.Channel (Channel) import Ouroboros.Network.ControlMessage (ControlMessage (..)) @@ -124,7 +124,7 @@ runChainSyncClient :: -- ^ Configuration for ChainSync Jumping StateViewTracers blk m -> -- ^ Tracers used to record information for the future 'StateView'. - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection PeerId m blk -> -- ^ A TVar containing a map of states for each peer. This -- function will (via 'bracketChainSyncClient') register and de-register a -- TVar for the state of the peer. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs index 3d6ea7d04e..993d1b1263 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/NodeLifecycle.hs @@ -19,6 +19,8 @@ import Data.Set (Set) import qualified Data.Set as Set import Ouroboros.Consensus.Block import Ouroboros.Consensus.Config (TopLevelConfig (..)) +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (ChainSyncClientHandleCollection (..)) import Ouroboros.Consensus.Storage.ChainDB.API import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import qualified Ouroboros.Consensus.Storage.ChainDB.Impl as ChainDB @@ -204,7 +206,7 @@ lifecycleStop resources LiveNode {lnStateViewTracers, lnCopyToImmDb, lnPeers} = releaseAll lrRegistry -- Reset the resources in TVars that were allocated by the simulator atomically $ do - modifyTVar psrHandles (const mempty) + cschcRemoveAllHandles psrHandles case lrLoEVar of LoEEnabled var -> modifyTVar var (const (AF.Empty AF.AnchorGenesis)) LoEDisabled -> pure () diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs index c4fe394a60..a594d9059c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Resources.hs @@ -25,7 +25,8 @@ import Data.Traversable (for) import Ouroboros.Consensus.Block (WithOrigin (Origin)) import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientHandle) + (ChainSyncClientHandleCollection, + newChainSyncClientHandleCollection) import Ouroboros.Consensus.Util.IOLike (IOLike, MonadSTM (STM), StrictTVar, readTVar, uncheckedNewTVarM, writeTVar) import qualified Ouroboros.Network.AnchoredFragment as AF @@ -115,7 +116,7 @@ data PeerSimulatorResources m blk = -- | Handles to interact with the ChainSync client of each peer. -- See 'ChainSyncClientHandle' for more details. - psrHandles :: StrictTVar m (Map PeerId (ChainSyncClientHandle m TestBlock)) + psrHandles :: ChainSyncClientHandleCollection PeerId m TestBlock } -- | Create 'ChainSyncServerHandlers' for our default implementation using 'NodeState'. @@ -233,5 +234,5 @@ makePeerSimulatorResources tracer blockTree peers = do resources <- for peers $ \ peerId -> do peerResources <- makePeerResources tracer blockTree peerId pure (peerId, peerResources) - psrHandles <- uncheckedNewTVarM mempty + psrHandles <- atomically newChainSyncClientHandleCollection pure PeerSimulatorResources {psrPeers = Map.fromList $ toList resources, psrHandles} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 586f7776ca..63c8897e33 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -26,7 +26,9 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), CSJEnabledConfig (..), ChainDbView, - ChainSyncClientHandle, ChainSyncLoPBucketConfig (..), + ChainSyncClientHandle, + ChainSyncClientHandleCollection (..), + ChainSyncLoPBucketConfig (..), ChainSyncLoPBucketEnabledConfig (..), viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.Node.GsmState as GSM @@ -147,7 +149,7 @@ startChainSyncConnectionThread :: ChainSyncLoPBucketConfig -> CSJConfig -> StateViewTracers blk m -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection PeerId m blk -> m (Thread m (), Thread m ()) startChainSyncConnectionThread registry @@ -230,7 +232,7 @@ smartDelay _ node duration = do dispatchTick :: forall m blk. IOLike m => Tracer m (TraceSchedulerEvent blk) -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + STM m (Map PeerId (ChainSyncClientHandle m blk)) -> Map PeerId (PeerResources m blk) -> NodeLifecycle blk m -> LiveNode blk m -> @@ -250,7 +252,7 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid traceNewTick = do currentChain <- atomically $ ChainDB.getCurrentChain (lnChainDb node) (csState, jumpingStates) <- atomically $ do - m <- readTVar varHandles + m <- varHandles csState <- traverse (readTVar . CSClient.cschState) (m Map.!? pid) jumpingStates <- forM (Map.toList m) $ \(peer, h) -> do st <- readTVar (CSClient.cschJumping h) @@ -272,7 +274,7 @@ dispatchTick tracer varHandles peers lifecycle node (number, (duration, Peer pid runScheduler :: IOLike m => Tracer m (TraceSchedulerEvent blk) -> - StrictTVar m (Map PeerId (ChainSyncClientHandle m blk)) -> + STM m (Map PeerId (ChainSyncClientHandle m blk)) -> PointSchedule blk -> Map PeerId (PeerResources m blk) -> NodeLifecycle blk m -> @@ -314,7 +316,7 @@ mkStateTracer :: m (Tracer m ()) mkStateTracer schedulerConfig GenesisTest {gtBlockTree} PeerSimulatorResources {psrHandles, psrPeers} chainDb | scTraceState schedulerConfig - , let getCandidates = viewChainSyncState psrHandles CSClient.csCandidate + , let getCandidates = viewChainSyncState (cschcMap psrHandles) CSClient.csCandidate getCurrentChain = ChainDB.getCurrentChain chainDb getPoints = traverse readTVar (srCurrentState . prShared <$> psrPeers) = peerSimStateDiagramSTMTracerDebug gtBlockTree getCurrentChain getCandidates getPoints @@ -335,7 +337,7 @@ startNode :: startNode schedulerConfig genesisTest interval = do let handles = psrHandles lrPeerSim - getCandidates = viewChainSyncState handles CSClient.csCandidate + getCandidates = viewChainSyncState (cschcMap handles) CSClient.csCandidate fetchClientRegistry <- newFetchClientRegistry let chainDbView = CSClient.defaultChainDbView lnChainDb activePeers = Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) @@ -384,10 +386,11 @@ startNode schedulerConfig genesisTest interval = do (mkGDDTracerTestBlock lrTracer) lnChainDb (pure GSM.Syncing) -- TODO actually run GSM - (readTVar handles) + (cschcMap handles) var - void $ forkLinkedWatcher lrRegistry "CSJ invariants watcher" $ CSJInvariants.watcher handles + void $ forkLinkedWatcher lrRegistry "CSJ invariants watcher" $ + CSJInvariants.watcher (cschcMap handles) where LiveResources {lrRegistry, lrTracer, lrConfig, lrPeerSim, lrLoEVar} = resources @@ -483,7 +486,7 @@ runPointSchedule schedulerConfig genesisTest tracer0 = lifecycle <- nodeLifecycle schedulerConfig genesisTest tracer registry peerSim (chainDb, stateViewTracers) <- runScheduler (Tracer $ traceWith tracer . TraceSchedulerEvent) - (psrHandles peerSim) + (cschcMap (psrHandles peerSim)) gtSchedule (psrPeers peerSim) lifecycle diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 452ae24930..9ea02cc2d0 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -64,10 +64,12 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client ( , TraceChainSyncClientEvent (..) -- * State shared with other components , ChainSyncClientHandle (..) + , ChainSyncClientHandleCollection (..) , ChainSyncState (..) , ChainSyncStateView (..) , Jumping.noJumping , chainSyncStateFor + , newChainSyncClientHandleCollection , noIdling , noLoPBucket , viewChainSyncState @@ -229,11 +231,11 @@ newtype Our a = Our { unOur :: a } -- data from 'ChainSyncState'. viewChainSyncState :: IOLike m => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + STM m (Map peer (ChainSyncClientHandle m blk)) -> (ChainSyncState blk -> a) -> STM m (Map peer a) viewChainSyncState varHandles f = - Map.map f <$> (traverse (readTVar . cschState) =<< readTVar varHandles) + Map.map f <$> (traverse (readTVar . cschState) =<< varHandles) -- | Convenience function for reading the 'ChainSyncState' for a single peer -- from a nested set of TVars. @@ -327,7 +329,7 @@ bracketChainSyncClient :: ) => Tracer m (TraceChainSyncClientEvent blk) -> ChainDbView m blk - -> StrictTVar m (Map peer (ChainSyncClientHandle m blk)) + -> ChainSyncClientHandleCollection peer m blk -- ^ The kill handle and states for each peer, we need the whole map because we -- (de)register nodes (@peer@). -> STM m GsmState @@ -400,8 +402,8 @@ bracketChainSyncClient insertHandle = atomicallyWithMonotonicTime $ \time -> do initialGsmState <- getGsmState updateLopBucketConfig lopBucket initialGsmState time - modifyTVar varHandles $ Map.insert peer handle - deleteHandle = atomically $ modifyTVar varHandles $ Map.delete peer + cschcAddHandle varHandles peer handle + deleteHandle = atomically $ cschcRemoveHandle varHandles peer bracket_ insertHandle deleteHandle $ f Jumping.noJumping withCSJCallbacks lopBucket csHandleState (CSJEnabled csjEnabledConfig) f = diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index bd150c93d4..6e3283a14e 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -163,11 +163,11 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) import Control.Monad (forM, forM_, when) +import Data.Foldable (toList) import Data.List (sortOn) -import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes, fromMaybe) import Data.Maybe.Strict (StrictMaybe (..)) +import qualified Data.Sequence.Strict as Seq import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader (getHeaderFields), Header, Point (..), castPoint, pointSlot, succWithOrigin) @@ -175,6 +175,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State (ChainSyncClientHandle (..), + ChainSyncClientHandleCollection (..), ChainSyncJumpingJumperState (..), ChainSyncJumpingState (..), ChainSyncState (..), DisengagedInitState (..), DynamoInitState (..), @@ -257,16 +258,16 @@ mkJumping peerContext = Jumping -- -- Invariants: -- --- - If 'handlesVar' is not empty, then there is exactly one dynamo in it. --- - There is at most one objector in 'handlesVar'. --- - If there exist 'FoundIntersection' jumpers in 'handlesVar', then there +-- - If 'handlesCol is not empty, then there is exactly one dynamo in it. +-- - There is at most one objector in 'handlesCol. +-- - If there exist 'FoundIntersection' jumpers in 'handlesCol, then there -- is an objector and the intersection of the objector with the dynamo is -- at least as old as the oldest intersection of the `FoundIntersection` jumpers -- with the dynamo. data ContextWith peerField handleField m peer blk = Context { peer :: !peerField, handle :: !handleField, - handlesVar :: !(StrictTVar m (Map peer (ChainSyncClientHandle m blk))), + handlesCol :: !(ChainSyncClientHandleCollection peer m blk), jumpSize :: !SlotNo } @@ -276,12 +277,12 @@ type Context = ContextWith () () -- | A peer-specific context for ChainSync jumping. This is a 'ContextWith' -- pointing on the handler of the peer in question. -- --- Invariant: The binding from 'peer' to 'handle' is present in 'handlesVar'. +-- Invariant: The binding from 'peer' to 'handle' is present in 'handlesCol'. type PeerContext m peer blk = ContextWith peer (ChainSyncClientHandle m blk) m peer blk makeContext :: MonadSTM m => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection peer m blk -> SlotNo -> -- ^ The size of jumps, in number of slots. STM m (Context m peer blk) @@ -427,8 +428,8 @@ onRollForward context point = setJumps (Just jumpInfo) = do writeTVar (cschJumping (handle context)) $ Dynamo DynamoStarted $ pointSlot $ AF.headPoint $ jTheirFragment jumpInfo - handles <- readTVar (handlesVar context) - forM_ (Map.elems handles) $ \h -> + handles <- cschcSeq (handlesCol context) + forM_ handles $ \(_, h) -> readTVar (cschJumping h) >>= \case Jumper nextJumpVar Happy{} -> writeTVar nextJumpVar (Just jumpInfo) _ -> pure () @@ -660,11 +661,11 @@ updateJumpInfo context jumpInfo = -- of the dynamo, or 'Nothing' if there is none. getDynamo :: (MonadSTM m) => - StrictTVar m (Map peer (ChainSyncClientHandle m blk)) -> + ChainSyncClientHandleCollection peer m blk -> STM m (Maybe (ChainSyncClientHandle m blk)) -getDynamo handlesVar = do - handles <- Map.elems <$> readTVar handlesVar - findM (\handle -> isDynamo <$> readTVar (cschJumping handle)) handles +getDynamo handlesCol = do + handles <- cschcSeq handlesCol + fmap snd <$> findM (\(_, handle) -> isDynamo <$> readTVar (cschJumping handle)) handles where isDynamo Dynamo{} = True isDynamo _ = False @@ -705,8 +706,7 @@ newJumper jumpInfo jumperState = do -- that peer. If there is no dynamo, the peer starts as dynamo; otherwise, it -- starts as a jumper. registerClient :: - ( Ord peer, - LedgerSupportsProtocol blk, + ( LedgerSupportsProtocol blk, IOLike m ) => Context m peer blk -> @@ -716,7 +716,7 @@ registerClient :: (StrictTVar m (ChainSyncJumpingState m blk) -> ChainSyncClientHandle m blk) -> STM m (PeerContext m peer blk) registerClient context peer csState mkHandle = do - csjState <- getDynamo (handlesVar context) >>= \case + csjState <- getDynamo (handlesCol context) >>= \case Nothing -> do fragment <- csCandidate <$> readTVar csState pure $ Dynamo DynamoStarted $ pointSlot $ AF.anchorPoint fragment @@ -725,7 +725,7 @@ registerClient context peer csState mkHandle = do newJumper mJustInfo (Happy FreshJumper Nothing) cschJumping <- newTVar csjState let handle = mkHandle cschJumping - modifyTVar (handlesVar context) $ Map.insert peer handle + cschcAddHandle (handlesCol context) peer handle pure $ context {peer, handle} -- | Unregister a client from a 'PeerContext'; this might trigger the election @@ -738,7 +738,7 @@ unregisterClient :: PeerContext m peer blk -> STM m () unregisterClient context = do - modifyTVar (handlesVar context) $ Map.delete (peer context) + cschcRemoveHandle (handlesCol context) (peer context) let context' = stripContext context readTVar (cschJumping (handle context)) >>= \case Disengaged{} -> pure () @@ -756,7 +756,7 @@ electNewDynamo :: Context m peer blk -> STM m () electNewDynamo context = do - peerStates <- Map.toList <$> readTVar (handlesVar context) + peerStates <- cschcSeq (handlesCol context) mDynamo <- findNonDisengaged peerStates case mDynamo of Nothing -> pure () @@ -781,22 +781,20 @@ electNewDynamo context = do isDisengaged Disengaged{} = True isDisengaged _ = False -findM :: Monad m => (a -> m Bool) -> [a] -> m (Maybe a) -findM _ [] = pure Nothing -findM p (x : xs) = p x >>= \case - True -> pure (Just x) - False -> findM p xs +findM :: (Foldable f, Monad m) => (a -> m Bool) -> f a -> m (Maybe a) +findM p = + foldr (\x mb -> p x >>= \case True -> pure (Just x); False -> mb) (pure Nothing) -- | Find the objector in a context, if there is one. findObjector :: (MonadSTM m) => Context m peer blk -> STM m (Maybe (ObjectorInitState, JumpInfo blk, Point (Header blk), ChainSyncClientHandle m blk)) -findObjector context = do - readTVar (handlesVar context) >>= go . Map.toList +findObjector context = + cschcSeq (handlesCol context) >>= go where - go [] = pure Nothing - go ((_, handle):xs) = + go Seq.Empty = pure Nothing + go ((_, handle) Seq.:<| xs) = readTVar (cschJumping handle) >>= \case Objector initState goodJump badPoint -> pure $ Just (initState, goodJump, badPoint, handle) @@ -809,7 +807,7 @@ electNewObjector :: Context m peer blk -> STM m () electNewObjector context = do - peerStates <- Map.toList <$> readTVar (handlesVar context) + peerStates <- toList <$> cschcSeq (handlesCol context) dissentingJumpers <- collectDissentingJumpers peerStates let sortedJumpers = sortOn (pointSlot . fst) dissentingJumpers case sortedJumpers of diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs index 909e0ff829..69fa5ea7cd 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -9,6 +9,7 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( ChainSyncClientHandle (..) + , ChainSyncClientHandleCollection (..) , ChainSyncJumpingJumperState (..) , ChainSyncJumpingState (..) , ChainSyncState (..) @@ -17,11 +18,16 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.State ( , JumpInfo (..) , JumperInitState (..) , ObjectorInitState (..) + , newChainSyncClientHandleCollection ) where import Cardano.Slotting.Slot (SlotNo, WithOrigin) import Data.Function (on) +import Data.Map.Strict (Map) +import qualified Data.Map.Strict as Map import Data.Maybe.Strict (StrictMaybe (..)) +import Data.Sequence.Strict (StrictSeq) +import qualified Data.Sequence.Strict as Seq import Data.Typeable (Proxy (..), typeRep) import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader, Header, Point) @@ -30,7 +36,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import Ouroboros.Consensus.Node.GsmState (GsmState) import Ouroboros.Consensus.Util.IOLike (IOLike, NoThunks (..), STM, - StrictTVar, Time) + StrictTVar, Time, modifyTVar, newTVar, readTVar) import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) @@ -96,6 +102,68 @@ deriving anyclass instance ( NoThunks (Header blk) ) => NoThunks (ChainSyncClientHandle m blk) +-- | A collection of ChainSync client handles for the peers of this node. +-- +-- Sometimes we want to see the collection as a Map, and sometimes as a sequence. +-- The implementation keeps both views in sync. +data ChainSyncClientHandleCollection peer m blk = ChainSyncClientHandleCollection { + -- | A map containing the handles for the peers in the collection + cschcMap :: !(STM m (Map peer (ChainSyncClientHandle m blk))) + -- | A sequence containing the handles for the peers in the collection + , cschcSeq :: !(STM m (StrictSeq (peer, ChainSyncClientHandle m blk))) + -- | Add the handle for the given peer to the collection + -- PRECONDITION: The peer is not already in the collection + , cschcAddHandle :: !(peer -> ChainSyncClientHandle m blk -> STM m ()) + -- | Remove the handle for the given peer from the collection + , cschcRemoveHandle :: !(peer -> STM m ()) + -- | Moves the handle for the given peer to the end of the sequence + , cschcRotateHandle :: !(peer -> STM m ()) + -- | Remove all the handles from the collection + , cschcRemoveAllHandles :: !(STM m ()) + } + deriving stock (Generic) + +deriving anyclass instance ( + IOLike m, + HasHeader blk, + LedgerSupportsProtocol blk, + NoThunks (STM m ()), + NoThunks (Header blk), + NoThunks (STM m (Map peer (ChainSyncClientHandle m blk))), + NoThunks (STM m (StrictSeq (peer, ChainSyncClientHandle m blk))) + ) => NoThunks (ChainSyncClientHandleCollection peer m blk) + +newChainSyncClientHandleCollection :: + ( Ord peer, + IOLike m, + LedgerSupportsProtocol blk, + NoThunks peer + ) + => STM m (ChainSyncClientHandleCollection peer m blk) +newChainSyncClientHandleCollection = do + handlesMap <- newTVar mempty + handlesSeq <- newTVar mempty + + return ChainSyncClientHandleCollection { + cschcMap = readTVar handlesMap + , cschcSeq = readTVar handlesSeq + , cschcAddHandle = \peer handle -> do + modifyTVar handlesMap (Map.insert peer handle) + modifyTVar handlesSeq (Seq.|> (peer, handle)) + , cschcRemoveHandle = \peer -> do + modifyTVar handlesMap (Map.delete peer) + modifyTVar handlesSeq $ \s -> + let (xs, ys) = Seq.spanl ((/= peer) . fst) s + in xs Seq.>< Seq.drop 1 ys + , cschcRotateHandle = \peer -> + modifyTVar handlesSeq $ \s -> + let (xs, ys) = Seq.spanl ((/= peer) . fst) s + in xs Seq.>< Seq.drop 1 ys Seq.>< Seq.take 1 ys + , cschcRemoveAllHandles = do + modifyTVar handlesMap (const mempty) + modifyTVar handlesSeq (const mempty) + } + data DynamoInitState blk = -- | The dynamo has not yet started jumping and we first need to jump to the -- given jump info to set the intersection of the ChainSync server. From e3be2f123cc75cdfce19fc411d8c1b46b484988e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 27 May 2024 19:08:54 +0000 Subject: [PATCH 02/76] Implement a call to rotate dynamos in CSJ --- .../MiniProtocol/ChainSync/Client/Jumping.hs | 157 +++++++++++++----- 1 file changed, 119 insertions(+), 38 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index 6e3283a14e..af05bf27c3 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -74,6 +74,13 @@ -- when the client should pause, download headers, or ask about agreement with -- a given point (jumping). See the 'Jumping' type for more details. -- +-- Interactions with the BlockFetch logic +-- -------------------------------------- +-- +-- When syncing, the BlockFetch logic will fetch blocks from the dynamo. If the +-- dynamo is responding too slowly, the BlockFetch logic can ask to change the +-- dynamo with a call to 'rotateDynamo'. +-- -- Interactions with the Limit on Patience -- --------------------------------------- -- @@ -100,15 +107,15 @@ -- -- > j ╔════════╗ -- > ╭────────── ║ Dynamo ║ ◀─────────╮ --- > │ ╚════════╝ │f --- > ▼ ▲ │ --- > ┌────────────┐ │ k ┌──────────┐ --- > │ Disengaged │ ◀───────────│────────── │ Objector │ --- > └────────────┘ ╭─────│────────── └──────────┘ --- > │ │ ▲ ▲ │ --- > g│ │e b │ │ │ --- > │ │ ╭─────╯ i│ │c --- > ╭╌╌╌╌╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ +-- > │ ╭──╚════════╝ │f +-- > ▼ │ ▲ │ +-- > ┌────────────┐ │ │ k ┌──────────┐ +-- > │ Disengaged │ ◀─│─────────│────────── │ Objector │ +-- > └────────────┘ │ ╭─────│────────── └──────────┘ +-- > │ │ │ ▲ ▲ │ +-- > l│ g│ │e b │ │ │ +-- > │ │ │ ╭─────╯ i│ │c +-- > ╭╌╌╌▼╌╌╌▼╌╌╌╌╌╌╌╌╌╌╌╌╌│╌╌╌╌╌╌╌╌╌╌│╌▼╌╌╌╮ -- > ┆ ╔═══════╗ a ┌──────┐ d ┌─────┐ | -- > ┆ ║ Happy ║ ───▶ │ LFI* │ ───▶ │ FI* │ | -- > ┆ ╚═══════╝ ◀─╮ └──────┘ └─────┘ | @@ -147,6 +154,10 @@ -- If dynamo or objector claim to have no more headers, they are disengaged -- (j|k). -- +-- The BlockFetch logic can ask to change the dynamo if it is not serving blocks +-- fast enough. If there are other non-disengaged peers the dynamo is demoted to +-- a jumper (l) and a new dynamo is elected. +-- module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( Context , ContextWith (..) @@ -154,19 +165,23 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( , JumpInstruction (..) , JumpResult (..) , Jumping (..) + , getDynamo , makeContext , mkJumping , noJumping , registerClient + , rotateDynamo , unregisterClient ) where import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) -import Control.Monad (forM, forM_, when) +import Control.Monad (forM, forM_, void, when) import Data.Foldable (toList) import Data.List (sortOn) +import qualified Data.Map as Map import Data.Maybe (catMaybes, fromMaybe) import Data.Maybe.Strict (StrictMaybe (..)) +import Data.Sequence.Strict (StrictSeq) import qualified Data.Sequence.Strict as Seq import GHC.Generics (Generic) import Ouroboros.Consensus.Block (HasHeader (getHeaderFields), Header, @@ -460,7 +475,7 @@ onRollBackward context slot = Dynamo _ lastJumpSlot | slot < lastJumpSlot -> do disengage (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) | otherwise -> pure () -- | This function is called when we receive a 'MsgAwaitReply' message. @@ -478,7 +493,7 @@ onAwaitReply context = readTVar (cschJumping (handle context)) >>= \case Dynamo{} -> do disengage (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) Objector{} -> do disengage (handle context) electNewObjector (stripContext context) @@ -511,7 +526,7 @@ processJumpResult context jumpResult = updateChainSyncState (handle context) jumpInfo RejectedJump JumpToGoodPoint{} -> do startDisengaging (handle context) - electNewDynamo (stripContext context) + void $ electNewDynamo (stripContext context) -- Not interesting in the dynamo state AcceptedJump JumpTo{} -> pure () @@ -662,10 +677,10 @@ updateJumpInfo context jumpInfo = getDynamo :: (MonadSTM m) => ChainSyncClientHandleCollection peer m blk -> - STM m (Maybe (ChainSyncClientHandle m blk)) + STM m (Maybe (peer, ChainSyncClientHandle m blk)) getDynamo handlesCol = do handles <- cschcSeq handlesCol - fmap snd <$> findM (\(_, handle) -> isDynamo <$> readTVar (cschJumping handle)) handles + findM (\(_, handle) -> isDynamo <$> readTVar (cschJumping handle)) handles where isDynamo Dynamo{} = True isDynamo _ = False @@ -720,7 +735,7 @@ registerClient context peer csState mkHandle = do Nothing -> do fragment <- csCandidate <$> readTVar csState pure $ Dynamo DynamoStarted $ pointSlot $ AF.anchorPoint fragment - Just handle -> do + Just (_, handle) -> do mJustInfo <- readTVar (cschJumpInfo handle) newJumper mJustInfo (Happy FreshJumper Nothing) cschJumping <- newTVar csjState @@ -744,7 +759,52 @@ unregisterClient context = do Disengaged{} -> pure () Jumper{} -> pure () Objector{} -> electNewObjector context' - Dynamo{} -> electNewDynamo context' + Dynamo{} -> void $ electNewDynamo context' + +-- | Elects a new dynamo by demoting the given dynamo to a jumper, moving the +-- peer to the end of the queue of chain sync handles and electing a new dynamo. +-- +-- It does nothing if there is no other engaged peer to elect or if the given +-- peer is not the dynamo. +-- +-- Yields the new dynamo, if there is one. +rotateDynamo :: + ( Ord peer, + LedgerSupportsProtocol blk, + MonadSTM m + ) => + Context m peer blk -> + peer -> + STM m (Maybe (peer, ChainSyncClientHandle m blk)) +rotateDynamo context peer = do + handles <- cschcMap (handlesCol context) + case handles Map.!? peer of + Nothing -> + -- Do not re-elect a dynamo if the peer has been disconnected. + getDynamo (handlesCol context) + Just oldDynHandle -> + readTVar (cschJumping oldDynHandle) >>= \case + Dynamo{} -> do + cschcRotateHandle (handlesCol context) peer + peerStates <- cschcSeq (handlesCol context) + mEngaged <- findNonDisengaged peerStates + case mEngaged of + Nothing -> + -- There are no engaged peers. This case cannot happen, as the + -- dynamo is always engaged. + error "rotateDynamo: no engaged peer found" + Just (newDynamoId, newDynHandle) + | newDynamoId == peer -> + -- The old dynamo is the only engaged peer left. + pure $ Just (newDynamoId, newDynHandle) + | otherwise -> do + newJumper Nothing (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping oldDynHandle) + promoteToDynamo peerStates newDynamoId newDynHandle + pure $ Just (newDynamoId, newDynHandle) + _ -> + -- Do not re-elect a dynamo if the peer is not the dynamo. + getDynamo (handlesCol context) -- | Choose an unspecified new non-idling dynamo and demote all other peers to -- jumpers. @@ -754,32 +814,53 @@ electNewDynamo :: LedgerSupportsProtocol blk ) => Context m peer blk -> - STM m () + STM m (Maybe (peer, ChainSyncClientHandle m blk)) electNewDynamo context = do peerStates <- cschcSeq (handlesCol context) mDynamo <- findNonDisengaged peerStates case mDynamo of - Nothing -> pure () + Nothing -> pure Nothing Just (dynId, dynamo) -> do - fragment <- csCandidate <$> readTVar (cschState dynamo) - mJumpInfo <- readTVar (cschJumpInfo dynamo) - -- If there is no jump info, the dynamo must be just starting and - -- there is no need to set the intersection of the ChainSync server. - let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo - writeTVar (cschJumping dynamo) $ - Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment - -- Demote all other peers to jumpers - forM_ peerStates $ \(peer, st) -> - when (peer /= dynId) $ do - jumpingState <- readTVar (cschJumping st) - when (not (isDisengaged jumpingState)) $ - newJumper mJumpInfo (Happy FreshJumper Nothing) - >>= writeTVar (cschJumping st) - where - findNonDisengaged = - findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) - isDisengaged Disengaged{} = True - isDisengaged _ = False + promoteToDynamo peerStates dynId dynamo + pure $ Just (dynId, dynamo) + +-- | Promote the given peer to dynamo and demote all other peers to jumpers. +promoteToDynamo :: + ( MonadSTM m, + Eq peer, + LedgerSupportsProtocol blk + ) => + StrictSeq (peer, ChainSyncClientHandle m blk) -> + peer -> + ChainSyncClientHandle m blk -> + STM m () +promoteToDynamo peerStates dynId dynamo = do + fragment <- csCandidate <$> readTVar (cschState dynamo) + mJumpInfo <- readTVar (cschJumpInfo dynamo) + -- If there is no jump info, the dynamo must be just starting and + -- there is no need to set the intersection of the ChainSync server. + let dynamoInitState = maybe DynamoStarted DynamoStarting mJumpInfo + writeTVar (cschJumping dynamo) $ + Dynamo dynamoInitState $ pointSlot $ AF.headPoint fragment + -- Demote all other peers to jumpers + forM_ peerStates $ \(peer, st) -> + when (peer /= dynId) $ do + jumpingState <- readTVar (cschJumping st) + when (not (isDisengaged jumpingState)) $ + newJumper mJumpInfo (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping st) + +-- | Find a non-disengaged peer in the given sequence +findNonDisengaged :: + (MonadSTM m) => + StrictSeq (peer, ChainSyncClientHandle m blk) -> + STM m (Maybe (peer, ChainSyncClientHandle m blk)) +findNonDisengaged = + findM $ \(_, st) -> not . isDisengaged <$> readTVar (cschJumping st) + +isDisengaged :: ChainSyncJumpingState m blk -> Bool +isDisengaged Disengaged{} = True +isDisengaged _ = False findM :: (Foldable f, Monad m) => (a -> m Bool) -> f a -> m (Maybe a) findM p = From 3c42b51ccabd4a02bebaf282b931e4632a488b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Wed, 26 Jun 2024 12:39:26 -0300 Subject: [PATCH 03/76] Comment formatting Co-authored-by: Nicolas Jeannerod --- .../Consensus/MiniProtocol/ChainSync/Client/Jumping.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index af05bf27c3..85442363d5 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -273,9 +273,9 @@ mkJumping peerContext = Jumping -- -- Invariants: -- --- - If 'handlesCol is not empty, then there is exactly one dynamo in it. --- - There is at most one objector in 'handlesCol. --- - If there exist 'FoundIntersection' jumpers in 'handlesCol, then there +-- - If 'handlesCol' is not empty, then there is exactly one dynamo in it. +-- - There is at most one objector in 'handlesCol'. +-- - If there exist 'FoundIntersection' jumpers in 'handlesCol', then there -- is an objector and the intersection of the objector with the dynamo is -- at least as old as the oldest intersection of the `FoundIntersection` jumpers -- with the dynamo. From 407db3b43f1dc586d620af2a1725d5a9861608d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 27 Jun 2024 10:14:30 +0000 Subject: [PATCH 04/76] Rename `varHandles` to `readHandles` for consistency --- .../Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 9ea02cc2d0..bfba898e9b 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -234,8 +234,8 @@ viewChainSyncState :: STM m (Map peer (ChainSyncClientHandle m blk)) -> (ChainSyncState blk -> a) -> STM m (Map peer a) -viewChainSyncState varHandles f = - Map.map f <$> (traverse (readTVar . cschState) =<< varHandles) +viewChainSyncState readHandles f = + Map.map f <$> (traverse (readTVar . cschState) =<< readHandles) -- | Convenience function for reading the 'ChainSyncState' for a single peer -- from a nested set of TVars. From 92ff25224cc8323561e5734d76f7cfb67dd22926 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 27 Jun 2024 10:16:00 +0000 Subject: [PATCH 05/76] Mention that the objector also gets demoted --- .../Consensus/MiniProtocol/ChainSync/Client/Jumping.hs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index 85442363d5..bfc9011bdc 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -155,8 +155,9 @@ -- (j|k). -- -- The BlockFetch logic can ask to change the dynamo if it is not serving blocks --- fast enough. If there are other non-disengaged peers the dynamo is demoted to --- a jumper (l) and a new dynamo is elected. +-- fast enough. If there are other non-disengaged peers, the dynamo (and the +-- objector if there is one) is demoted to a jumper (l+g) and a new dynamo is +-- elected. -- module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( Context @@ -761,8 +762,9 @@ unregisterClient context = do Objector{} -> electNewObjector context' Dynamo{} -> void $ electNewDynamo context' --- | Elects a new dynamo by demoting the given dynamo to a jumper, moving the --- peer to the end of the queue of chain sync handles and electing a new dynamo. +-- | Elects a new dynamo by demoting the given dynamo (and the objector if there +-- is one) to a jumper, moving the peer to the end of the queue of chain sync +-- handles and electing a new dynamo. -- -- It does nothing if there is no other engaged peer to elect or if the given -- peer is not the dynamo. From d7d7accc16093ecbff1a1fb942a5d8d35d624057 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Wed, 19 Jun 2024 14:20:57 +0200 Subject: [PATCH 06/76] Specify the order in which to start the peers --- .../Genesis/Tests/DensityDisconnect.hs | 8 +- .../Test/Consensus/Genesis/Tests/LoE.hs | 2 +- .../Test/Consensus/Genesis/Tests/LoP.hs | 13 ++- .../Test/Consensus/Genesis/Tests/Uniform.hs | 1 + .../Test/Consensus/PeerSimulator/Run.hs | 13 ++- .../PeerSimulator/Tests/LinkedThreads.hs | 2 +- .../Consensus/PeerSimulator/Tests/Rollback.hs | 6 +- .../Consensus/PeerSimulator/Tests/Timeouts.hs | 2 +- .../Test/Consensus/PointSchedule.hs | 80 +++++++++++++------ .../Test/Consensus/PointSchedule/Shrinking.hs | 5 +- .../PointSchedule/Shrinking/Tests.hs | 4 +- 11 files changed, 98 insertions(+), 38 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index e4c3376e36..5dcf3c06d6 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -499,7 +499,8 @@ prop_densityDisconnectTriggersChainSel = (AF.Empty _) -> Origin (_ AF.:> tipBlock) -> At tipBlock advTip = getOnlyBranchTip tree - in mkPointSchedule $ peers' + in PointSchedule { + psSchedule = peers' -- Eagerly serve the honest tree, but after the adversary has -- advertised its chain up to the intersection. [[(Time 0, scheduleTipPoint trunkTip), @@ -514,4 +515,7 @@ prop_densityDisconnectTriggersChainSel = (Time 0, ScheduleBlockPoint intersect), (Time 1, scheduleHeaderPoint advTip), (Time 1, scheduleBlockPoint advTip) - ]] + ]], + psStartOrder = [], + psMinEndTime = Time 0 + } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 9dca4e004d..f4930a8aaf 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -115,4 +115,4 @@ prop_adversaryHitsTimeouts timeoutsEnabled = ] -- We want to wait more than the short wait timeout psMinEndTime = Time 11 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 552a10696d..1b1bbacc03 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -79,6 +79,7 @@ prop_wait mustTimeout = let offset :: DiffTime = if mustTimeout then 1 else -1 in PointSchedule { psSchedule = peersOnlyHonest [(Time 0, scheduleTipPoint tipBlock)] + , psStartOrder = [] , psMinEndTime = Time $ timeout + offset } @@ -108,6 +109,7 @@ prop_waitBehindForecastHorizon = [ (Time 0, scheduleTipPoint tipBlock) , (Time 0, scheduleHeaderPoint tipBlock) ] + , psStartOrder = [] , psMinEndTime = Time 11 } @@ -166,13 +168,18 @@ prop_serve mustTimeout = makeSchedule :: (HasHeader blk) => AnchoredFragment blk -> PointSchedule blk makeSchedule (AF.Empty _) = error "fragment must have at least one block" makeSchedule fragment@(_ AF.:> tipBlock) = - mkPointSchedule $ peersOnlyHonest $ + PointSchedule { + psSchedule = + peersOnlyHonest $ (Time 0, scheduleTipPoint tipBlock) : ( flip concatMap (zip [1 ..] (AF.toOldestFirst fragment)) $ \(i, block) -> [ (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleHeaderPoint block), (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleBlockPoint block) ] - ) + ), + psStartOrder = [], + psMinEndTime = Time 0 + } -- NOTE: Same as 'LoE.prop_adversaryHitsTimeouts' with LoP instead of timeouts. prop_delayAttack :: Bool -> Property @@ -249,4 +256,4 @@ prop_delayAttack lopEnabled = ] -- Wait for LoP bucket to empty psMinEndTime = Time 11 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 7ebdd0a84e..a3400a0184 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -288,6 +288,7 @@ prop_leashingAttackTimeLimited = advs = fmap (takePointsUntil timeLimit) advs0 pure $ PointSchedule { psSchedule = Peers honests advs + , psStartOrder = [] , psMinEndTime = timeLimit } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 63c8897e33..8f35e322b3 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -16,6 +16,7 @@ import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (..), nullTracer, traceWith) import Data.Coerce (coerce) import Data.Foldable (for_) +import Data.List (sort) import qualified Data.List.NonEmpty as NonEmpty import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map @@ -340,8 +341,15 @@ startNode schedulerConfig genesisTest interval = do getCandidates = viewChainSyncState (cschcMap handles) CSClient.csCandidate fetchClientRegistry <- newFetchClientRegistry let chainDbView = CSClient.defaultChainDbView lnChainDb - activePeers = Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) - for_ activePeers $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do + activePeers = Map.toList $ Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) + peersStartOrder = psStartOrder ++ sort [pid | (pid, _) <- activePeers, pid `notElem` psStartOrder] + activePeersOrdered = [ + peerResources + | pid <- peersStartOrder + , (pid', peerResources) <- activePeers + , pid == pid' + ] + for_ activePeersOrdered $ \PeerResources {prShared, prChainSync, prBlockFetch} -> do let pid = srPeerId prShared forkLinkedThread lrRegistry ("Peer overview " ++ show pid) $ -- The peerRegistry helps ensuring that if any thread fails, then @@ -405,6 +413,7 @@ startNode schedulerConfig genesisTest interval = do , gtBlockFetchTimeouts , gtLoPBucketParams = LoPBucketParams { lbpCapacity, lbpRate } , gtCSJParams = CSJParams { csjpJumpSize } + , gtSchedule = PointSchedule {psStartOrder} } = genesisTest StateViewTracers{svtTraceTracer} = lnStateViewTracers diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs index a94c69a968..e023e24335 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/LinkedThreads.hs @@ -72,4 +72,4 @@ prop_chainSyncKillsBlockFetch = do (Time 0, scheduleHeaderPoint firstBlock) ] psMinEndTime = Time $ timeout + 1 - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs index 17509dc458..e2f31d8919 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Rollback.hs @@ -95,7 +95,11 @@ rollbackSchedule n blockTree = , banalSchedulePoints trunkSuffix , banalSchedulePoints (btbSuffix branch) ] - in mkPointSchedule $ peersOnlyHonest $ zip (map (Time . (/30)) [0..]) schedulePoints + in PointSchedule { + psSchedule = peersOnlyHonest $ zip (map (Time . (/30)) [0..]) schedulePoints, + psStartOrder = [], + psMinEndTime = Time 0 + } where banalSchedulePoints :: AnchoredFragment blk -> [SchedulePoint blk] banalSchedulePoints = concatMap banalSchedulePoints' . toOldestFirst diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs index 0c594c67e3..8e218df6fa 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs @@ -70,4 +70,4 @@ prop_timeouts mustTimeout = do ] -- This keeps the test running long enough to pass the timeout by 'offset'. psMinEndTime = Time $ timeout + offset - in PointSchedule {psSchedule, psMinEndTime} + in PointSchedule {psSchedule, psStartOrder = [], psMinEndTime} diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index 52990a0f9a..bbb11c4253 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -37,7 +37,6 @@ module Test.Consensus.PointSchedule ( , ensureScheduleDuration , genesisNodeState , longRangeAttack - , mkPointSchedule , peerSchedulesBlocks , peerStates , peersStates @@ -55,7 +54,6 @@ import Control.Monad.Class.MonadTime.SI (Time (Time), addTime, import Control.Monad.ST (ST) import Data.Functor (($>)) import Data.List (mapAccumL, partition, scanl') -import qualified Data.Map.Strict as Map import Data.Maybe (catMaybes, fromMaybe, mapMaybe) import Data.Time (DiffTime) import Data.Word (Word64) @@ -77,8 +75,8 @@ import Test.Consensus.BlockTree (BlockTree (..), BlockTreeBranch (..), import Test.Consensus.PeerSimulator.StateView (StateView) import Test.Consensus.PointSchedule.NodeState (NodeState (..), genesisNodeState) -import Test.Consensus.PointSchedule.Peers (Peer (..), Peers (..), - peers', peersList) +import Test.Consensus.PointSchedule.Peers (Peer (..), PeerId, + Peers (..), getPeerIds, peers', peersList) import Test.Consensus.PointSchedule.SinglePeer (IsTrunk (IsBranch, IsTrunk), PeerScheduleParams (..), SchedulePoint (..), defaultPeerScheduleParams, mergeOn, @@ -97,21 +95,24 @@ prettyPointSchedule :: (CondenseList (NodeState blk)) => PointSchedule blk -> [String] -prettyPointSchedule peers = - [ "honest peers: " ++ show (Map.size (honestPeers $ psSchedule peers)) - , "adversaries: " ++ show (Map.size (adversarialPeers $ psSchedule peers)) - , "minimal duration: " ++ show (psMinEndTime peers) - ] ++ - zipWith3 - (\number time peerState -> - number ++ ": " ++ peerState ++ " @ " ++ time - ) - (condenseListWithPadding PadLeft $ fst <$> numberedPeersStates) - (showDT . fst . snd <$> numberedPeersStates) - (condenseList $ (snd . snd) <$> numberedPeersStates) +prettyPointSchedule ps@PointSchedule {psStartOrder, psMinEndTime} = + [] + ++ [ "psSchedule =" + ] + ++ ( zipWith3 + ( \number time peerState -> + " " ++ number ++ ": " ++ peerState ++ " @ " ++ time + ) + (condenseListWithPadding PadLeft $ fst <$> numberedPeersStates) + (showDT . fst . snd <$> numberedPeersStates) + (condenseList $ (snd . snd) <$> numberedPeersStates) + ) + ++ [ "psStartOrder = " ++ show psStartOrder, + "psMinEndTime = " ++ show psMinEndTime + ] where numberedPeersStates :: [(Int, (Time, Peer (NodeState blk)))] - numberedPeersStates = zip [0..] (peersStates peers) + numberedPeersStates = zip [0 ..] (peersStates ps) showDT :: Time -> String showDT (Time dt) = printf "%.6f" (realToFrac dt :: Double) @@ -177,15 +178,17 @@ peerScheduleBlocks = mapMaybe (withOriginToMaybe . schedulePointToBlock . snd) data PointSchedule blk = PointSchedule { -- | The actual point schedule psSchedule :: Peers (PeerSchedule blk), + -- | The order in which the peers start and connect to the node under test. + -- The peers that are absent from 'psSchedule' are ignored; the peers from + -- 'psSchedule' that are absent of 'psStartOrder' are started in the end in + -- the order of 'PeerId'. + psStartOrder :: [PeerId], -- | Minimum duration for the simulation of this point schedule. -- If no point in the schedule is larger than 'psMinEndTime', -- the simulation will still run until this time is reached. psMinEndTime :: Time } -mkPointSchedule :: Peers (PeerSchedule blk) -> PointSchedule blk -mkPointSchedule sch = PointSchedule sch $ Time 0 - -- | List of all blocks appearing in the schedules. peerSchedulesBlocks :: Peers (PeerSchedule blk) -> [blk] peerSchedulesBlocks = concatMap (peerScheduleBlocks . value) . peersList @@ -208,7 +211,11 @@ longRangeAttack :: longRangeAttack BlockTree {btTrunk, btBranches = [branch]} g = do honest <- peerScheduleFromTipPoints g honParams [(IsTrunk, [AF.length btTrunk - 1])] btTrunk [] adv <- peerScheduleFromTipPoints g advParams [(IsBranch, [AF.length (btbFull branch) - 1])] btTrunk [btbFull branch] - pure $ mkPointSchedule $ peers' [honest] [adv] + pure $ PointSchedule { + psSchedule = peers' [honest] [adv], + psStartOrder = [], + psMinEndTime = Time 0 + } where honParams = defaultPeerScheduleParams {pspHeaderDelayInterval = (0.3, 0.4)} advParams = defaultPeerScheduleParams {pspTipDelayInterval = (0, 0.1)} @@ -240,6 +247,7 @@ uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} = case pg -- Include rollbacks in a percentage of adversaries, in which case that peer uses two branchs. -- uniformPointsWithExtraHonestPeers :: + forall g m blk. (StatefulGen g m, AF.HasHeader blk) => Int -> BlockTree blk -> @@ -254,7 +262,9 @@ uniformPointsWithExtraHonestPeers honests <- replicateM (extraHonestPeers + 1) $ mkSchedule [(IsTrunk, [honestTip0 .. AF.length btTrunk - 1])] [] advs <- takeBranches btBranches - pure $ mkPointSchedule $ peers' honests advs + let psSchedule = peers' honests advs + psStartOrder <- shuffle (getPeerIds psSchedule) + pure $ PointSchedule {psSchedule, psStartOrder, psMinEndTime = Time 0} where takeBranches = \case [] -> pure [] @@ -305,6 +315,15 @@ uniformPointsWithExtraHonestPeers rollbackProb = 0.2 + -- Inefficient implementation, but sufficient for small lists. + shuffle :: [a] -> m [a] + shuffle [] = pure [] + shuffle xs = do + i <- Random.uniformRM (0, length xs - 1) g + let x = xs !! i + xs' = take i xs ++ drop (i+1) xs + (x :) <$> shuffle xs' + minusClamp :: (Ord a, Num a) => a -> a -> a minusClamp a b | a <= b = 0 | otherwise = a - b @@ -361,6 +380,7 @@ syncTips honests advs = -- -- Includes rollbacks in some schedules. uniformPointsWithExtraHonestPeersAndDowntime :: + forall g m blk. (StatefulGen g m, AF.HasHeader blk) => Int -> SecurityParam -> @@ -383,7 +403,9 @@ uniformPointsWithExtraHonestPeersAndDowntime mkSchedule [(IsTrunk, [honestTip0, minusClamp (AF.length btTrunk) 1])] [] advs <- takeBranches pauseSlot btBranches let (honests', advs') = syncTips honests advs - pure $ mkPointSchedule $ peers' honests' advs' + psSchedule = peers' honests' advs' + psStartOrder <- shuffle $ getPeerIds psSchedule + pure $ PointSchedule {psSchedule, psStartOrder, psMinEndTime = Time 0} where takeBranches pause = \case [] -> pure [] @@ -438,6 +460,15 @@ uniformPointsWithExtraHonestPeersAndDowntime rollbackProb = 0.2 + -- Inefficient implementation, but sufficient for small lists. + shuffle :: [a] -> m [a] + shuffle [] = pure [] + shuffle xs = do + i <- Random.uniformRM (0, length xs - 1) g + let x = xs !! i + xs' = take i xs ++ drop (i+1) xs + (x :) <$> shuffle xs' + newtype ForecastRange = ForecastRange { unForecastRange :: Word64 } deriving (Show) @@ -545,9 +576,10 @@ stToGen gen = do pure (runSTGen_ seed gen) ensureScheduleDuration :: GenesisTest blk a -> PointSchedule blk -> PointSchedule blk -ensureScheduleDuration gt PointSchedule{psSchedule, psMinEndTime} = +ensureScheduleDuration gt PointSchedule{psSchedule, psStartOrder, psMinEndTime} = PointSchedule { psSchedule + , psStartOrder , psMinEndTime = max psMinEndTime (Time endingDelay) } where diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs index 820f844b22..eb24ccf6e0 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking.hs @@ -41,7 +41,7 @@ shrinkPeerSchedules :: StateView TestBlock -> [GenesisTestFull TestBlock] shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView = - let PointSchedule {psSchedule} = gtSchedule + let PointSchedule {psSchedule, psStartOrder} = gtSchedule simulationDuration = duration gtSchedule trimmedBlockTree sch = trimBlockTree' sch gtBlockTree shrunkAdversarialPeers = @@ -50,6 +50,7 @@ shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder , psMinEndTime = simulationDuration } , gtBlockTree = trimmedBlockTree shrunkSchedule @@ -61,6 +62,7 @@ shrinkPeerSchedules genesisTest@GenesisTest{gtBlockTree, gtSchedule} _stateView <&> \shrunkSchedule -> genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder , psMinEndTime = simulationDuration } } @@ -81,6 +83,7 @@ shrinkByRemovingAdversaries genesisTest@GenesisTest{gtSchedule, gtBlockTree} _st in genesisTest { gtSchedule = PointSchedule { psSchedule = shrunkSchedule + , psStartOrder = psStartOrder gtSchedule , psMinEndTime = simulationDuration } , gtBlockTree = trimmedBlockTree diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs index d715375a3f..b375a8ee94 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Shrinking/Tests.hs @@ -82,14 +82,14 @@ checkShrinkProperty :: (Peers (PeerSchedule TestBlock) -> Peers (PeerSchedule Te checkShrinkProperty prop = forAllBlind (genChains (choose (1, 4)) >>= genUniformSchedulePoints) - (\sch@PointSchedule{psSchedule, psMinEndTime} -> + (\sch@PointSchedule{psSchedule, psStartOrder, psMinEndTime} -> conjoin $ map (\shrunk -> counterexample ( "Original schedule:\n" ++ unlines (map (" " ++) $ prettyPointSchedule sch) ++ "\nShrunk schedule:\n" - ++ unlines (map (" " ++) $ prettyPointSchedule $ PointSchedule shrunk psMinEndTime) + ++ unlines (map (" " ++) $ prettyPointSchedule $ PointSchedule {psSchedule = shrunk, psStartOrder, psMinEndTime}) ) (prop psSchedule shrunk) ) From 705daf4ff114b42231e2028618d174f1246fad2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Tue, 18 Jun 2024 19:07:28 +0200 Subject: [PATCH 07/76] Add a BlockFetch leashing attack test --- .../Test/Consensus/Genesis/Tests/Uniform.hs | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index a3400a0184..3a4b3b7814 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -19,7 +19,7 @@ module Test.Consensus.Genesis.Tests.Uniform ( import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) import Control.Monad.Class.MonadTime.SI (Time, addTime) -import Data.List (intercalate, sort) +import Data.List (intercalate, sort, uncons) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe, mapMaybe) @@ -40,7 +40,8 @@ import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (Peers (..), isHonestPeerId) +import Test.Consensus.PointSchedule.Peers (Peers (..), getPeerIds, + isHonestPeerId, peers') import Test.Consensus.PointSchedule.Shrinking (shrinkByRemovingAdversaries, shrinkPeerSchedules) import Test.Consensus.PointSchedule.SinglePeer @@ -72,7 +73,8 @@ tests = -- because this test writes the immutable chain to disk and `instance Binary TestBlock` -- chokes on long chains. adjustQuickCheckMaxSize (const 10) $ - testProperty "the node is shut down and restarted after some time" prop_downtime + testProperty "the node is shut down and restarted after some time" prop_downtime, + testProperty "block fetch leashing attack" prop_blockFetchLeashingAttack ] theProperty :: @@ -416,3 +418,44 @@ prop_downtime = forAllGenesisTest { pgpExtraHonestPeers = fromIntegral (gtExtraHonestPeers gt) , pgpDowntime = DowntimeWithSecurityParam (gtSecurityParam gt) } + +prop_blockFetchLeashingAttack :: Property +prop_blockFetchLeashingAttack = + forAllGenesisTest + (disableBoringTimeouts <$> genChains (pure 0) `enrichedWith` genBlockFetchLeashingSchedule) + defaultSchedulerConfig + { scEnableLoE = True, + scEnableLoP = True, + scEnableCSJ = True + } + shrinkPeerSchedules + theProperty + where + genBlockFetchLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PointSchedule TestBlock) + genBlockFetchLeashingSchedule genesisTest = do + PointSchedule {psSchedule, psMinEndTime} <- + stToGen $ + uniformPoints + (PointsGeneratorParams {pgpExtraHonestPeers = 1, pgpDowntime = NoDowntime}) + (gtBlockTree genesisTest) + peers <- QC.shuffle $ Map.elems $ honestPeers psSchedule + let (honest, adversaries) = fromMaybe (error "blockFetchLeashingAttack") $ uncons peers + adversaries' = map (filter (not . isBlockPoint . snd)) adversaries + psSchedule' = peers' [honest] adversaries' + -- Important to shuffle the order in which the peers start, otherwise the + -- honest peer starts first and systematically becomes dynamo. + psStartOrder <- shuffle $ getPeerIds psSchedule' + pure $ PointSchedule {psSchedule = psSchedule', psStartOrder, psMinEndTime} + + isBlockPoint :: SchedulePoint blk -> Bool + isBlockPoint (ScheduleBlockPoint _) = True + isBlockPoint _ = False + + disableBoringTimeouts gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { mustReplyTimeout = Nothing, + idleTimeout = Nothing + } + } From 8bd6815adff9d629e80bf8229fc01a5e868ac35c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Wed, 5 Jun 2024 10:22:17 +0000 Subject: [PATCH 08/76] Switch peer simulator to `FetchModeBulkSync` --- .../Test/Consensus/PeerSimulator/BlockFetch.hs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 536a49f2fc..a6820448e2 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -93,11 +93,8 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = -- do not serialize the blocks. (\_hdr -> 1000) slotForgeTime - -- Initially, we tried FetchModeBulkSync, but adversaries had the - -- opportunity to delay syncing by not responding to block requests. - -- The BlockFetch logic would then wait for the timeout to expire - -- before trying to download the block from another peer. - (pure FetchModeDeadline) + -- This is a syncing test, so we use 'FetchModeBulkSync'. + (pure FetchModeBulkSync) -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs From 31705f2c546322bcbdc925fe1866863981ea6137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 20 Jun 2024 16:55:37 +0200 Subject: [PATCH 09/76] Track the last time the ChainDB thread was starved --- .../MiniProtocol/BlockFetch/ClientInterface.hs | 4 ++++ .../Ouroboros/Consensus/Storage/ChainDB/API.hs | 4 ++++ .../Ouroboros/Consensus/Storage/ChainDB/Impl.hs | 3 +++ .../Consensus/Storage/ChainDB/Impl/Background.hs | 2 +- .../Consensus/Storage/ChainDB/Impl/Query.hs | 4 ++++ .../Consensus/Storage/ChainDB/Impl/Types.hs | 14 +++++++++++--- 6 files changed, 27 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 2d8a8e1b1e..823f1d6bb6 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -52,6 +52,7 @@ data ChainDbView m blk = ChainDbView { , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool + , getLastTimeStarved :: STM m Time } defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk @@ -60,6 +61,7 @@ defaultChainDbView chainDB = ChainDbView { , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + , getLastTimeStarved = ChainDB.getLastTimeStarved chainDB } -- | How to get the wall-clock time of a slot. Note that this is a very @@ -340,3 +342,5 @@ mkBlockFetchConsensusInterface headerForgeUTCTime = slotForgeTime . headerRealPoint . unFromConsensus blockForgeUTCTime = slotForgeTime . blockRealPoint . unFromConsensus + + lastChainSelStarvation = getLastTimeStarved chainDB diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index 441c598c6a..1cb0d3199b 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -334,6 +334,10 @@ data ChainDB m blk = ChainDB { -- invalid block is detected. These blocks are likely to be valid. , getIsInvalidBlock :: STM m (WithFingerprint (HeaderHash blk -> Maybe (InvalidBlockReason blk))) + -- | The last time we starved the chainsel thread. this is used by the + -- blockfetch decision logic to demote peers. + , getLastTimeStarved :: STM m Time + , closeDB :: m () -- | Return 'True' when the database is open. diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 03f9a1ef58..4d154aa13b 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -177,6 +177,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do copyFuse <- newFuse "copy to immutable db" chainSelFuse <- newFuse "chain selection" chainSelQueue <- newChainSelQueue (Args.cdbsBlocksToAddSize cdbSpecificArgs) + varLastTimeStarved <- newTVarIO =<< getMonotonicTime let env = CDB { cdbImmutableDB = immutableDB , cdbVolatileDB = volatileDB @@ -201,6 +202,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , cdbChainSelQueue = chainSelQueue , cdbFutureBlocks = varFutureBlocks , cdbLoE = Args.cdbsLoE cdbSpecificArgs + , cdbLastTimeStarved = varLastTimeStarved } h <- fmap CDBHandle $ newTVarIO $ ChainDbOpen env let chainDB = API.ChainDB @@ -218,6 +220,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , stream = Iterator.stream h , newFollower = Follower.newFollower h , getIsInvalidBlock = getEnvSTM h Query.getIsInvalidBlock + , getLastTimeStarved = getEnvSTM h Query.getLastTimeStarved , closeDB = closeDB h , isOpen = isOpen h } diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 9a6fdcb374..2edcb1e826 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -522,7 +522,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do -- exception (or it errored), notify the blocked thread withFuse fuse $ bracketOnError - (lift $ getChainSelMessage cdbChainSelQueue) + (lift $ getChainSelMessage (writeTVar cdbLastTimeStarved) cdbChainSelQueue) (\message -> lift $ atomically $ do case message of ChainSelReprocessLoEBlocks -> pure () diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index d47182bd43..324cddfdd0 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -21,6 +21,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Query ( , getAnyBlockComponent , getAnyKnownBlock , getAnyKnownBlockComponent + , getLastTimeStarved ) where import qualified Data.Map.Strict as Map @@ -148,6 +149,9 @@ getIsInvalidBlock :: getIsInvalidBlock CDB{..} = fmap (fmap (fmap invalidBlockReason) . flip Map.lookup) <$> readTVar cdbInvalid +getLastTimeStarved :: forall m blk. IOLike m => ChainDbEnv m blk -> STM m Time +getLastTimeStarved CDB{..} = readTVar cdbLastTimeStarved + getIsValid :: forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index feb94c2dbf..4667fdf585 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -63,6 +63,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where +import Cardano.Prelude (whenM) import Control.Tracer import Data.Foldable (traverse_) import Data.Map.Strict (Map) @@ -275,6 +276,9 @@ data ChainDbEnv m blk = CDB -- switch back to a chain containing it. The fragment is usually anchored at -- a recent immutable tip; if it does not, it will conservatively be treated -- as the empty fragment anchored in the current immutable tip. + , cdbLastTimeStarved :: !(StrictTVar m Time) + -- ^ The last time we starved the ChainSel thread. This is used by the + -- BlockFetch decision logic to demote peers. } deriving (Generic) -- | We include @blk@ in 'showTypeOf' because it helps resolving type families @@ -509,9 +513,13 @@ addReprocessLoEBlocks tracer (ChainSelQueue queue) = do atomically $ writeTBQueue queue ChainSelReprocessLoEBlocks -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the --- queue is empty. -getChainSelMessage :: IOLike m => ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage (ChainSelQueue queue) = atomically $ readTBQueue queue +-- queue is empty; in that case, reports the current time to the given callback. +getChainSelMessage :: IOLike m => (Time -> STM m ()) -> ChainSelQueue m blk -> m (ChainSelMessage m blk) +getChainSelMessage whenEmpty (ChainSelQueue queue) = do + time <- getMonotonicTime + -- NOTE: The two following lines are in different `atomically` on purpose. + atomically $ whenM (isEmptyTBQueue queue) (whenEmpty time) + atomically $ readTBQueue queue -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- From b20f71155ef081bc11e70d9aaa847be0f8d957d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Wed, 26 Jun 2024 15:47:13 +0200 Subject: [PATCH 10/76] Plug `rotateDynamo` into `BlockFetchConsensusInterface` --- .../Ouroboros/Consensus/NodeKernel.hs | 8 ++------ .../Test/Consensus/PeerSimulator/BlockFetch.hs | 13 ++++++------- .../Test/Consensus/PeerSimulator/Run.hs | 6 ++---- .../MiniProtocol/BlockFetch/ClientInterface.hs | 17 ++++++++++++++--- .../MiniProtocol/ChainSync/Client/Jumping.hs | 14 +++++++------- 5 files changed, 31 insertions(+), 27 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 05c36a0c0e..8b32d65f14 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -41,7 +41,6 @@ import Data.Function (on) import Data.Functor ((<&>)) import Data.Hashable (Hashable) import Data.List.NonEmpty (NonEmpty) -import Data.Map.Strict (Map) import Data.Maybe (isJust, mapMaybe) import Data.Proxy import qualified Data.Text as Text @@ -63,7 +62,7 @@ import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainSyncClientHandle (..), ChainSyncClientHandleCollection (..), ChainSyncState (..), - newChainSyncClientHandleCollection, viewChainSyncState) + newChainSyncClientHandleCollection) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck (SomeHeaderInFutureCheck) import Ouroboros.Consensus.Node.Genesis (GenesisNodeKernelArgs (..), @@ -384,9 +383,6 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg fetchClientRegistry <- newFetchClientRegistry - let getCandidates :: STM m (Map (ConnectionId addrNTN) (AnchoredFragment (Header blk))) - getCandidates = viewChainSyncState (cschcMap varChainSyncHandles) csCandidate - slotForgeTimeOracle <- BlockFetchClientInterface.initSlotForgeTimeOracle cfg chainDB let readFetchMode = BlockFetchClientInterface.readFetchModeDefault btime @@ -397,7 +393,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg blockFetchInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface (configBlock cfg) (BlockFetchClientInterface.defaultChainDbView chainDB) - getCandidates + varChainSyncHandles blockFetchSize slotForgeTimeOracle readFetchMode diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index a6820448e2..9a6b1c18c6 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -23,21 +23,20 @@ import Control.Monad.Class.MonadTime import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer, nullTracer, traceWith) import Data.Functor.Contravariant ((>$<)) -import Data.Map.Strict (Map) import Network.TypedProtocol.Codec (AnyMessage, PeerHasAgency (..), PeerRole) import Ouroboros.Consensus.Block (HasHeader) import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface +import Ouroboros.Consensus.MiniProtocol.ChainSync.Client + (ChainSyncClientHandleCollection) import Ouroboros.Consensus.Node.ProtocolInfo (NumCoreNodes (NumCoreNodes)) import Ouroboros.Consensus.Storage.ChainDB.API import Ouroboros.Consensus.Util (ShowProxy) import Ouroboros.Consensus.Util.IOLike (DiffTime, - Exception (fromException), IOLike, STM, atomically, retry, - try) + Exception (fromException), IOLike, atomically, retry, try) import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), FetchClientRegistry, FetchMode (..), blockFetchLogic, bracketFetchClient, bracketKeepAliveClient) @@ -78,9 +77,9 @@ startBlockFetchLogic :: -> Tracer m (TraceEvent TestBlock) -> ChainDB m TestBlock -> FetchClientRegistry PeerId (Header TestBlock) TestBlock m - -> STM m (Map PeerId (AnchoredFragment (Header TestBlock))) + -> ChainSyncClientHandleCollection PeerId m TestBlock -> m () -startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = do +startBlockFetchLogic registry tracer chainDb fetchClientRegistry csHandlesCol = do let slotForgeTime :: BlockFetchClientInterface.SlotForgeTimeOracle m blk slotForgeTime _ = pure dawnOfTime @@ -88,7 +87,7 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry getCandidates = BlockFetchClientInterface.mkBlockFetchConsensusInterface (TestBlockConfig $ NumCoreNodes 0) -- Only needed when minting blocks (BlockFetchClientInterface.defaultChainDbView chainDb) - getCandidates + csHandlesCol -- The size of headers in bytes is irrelevant because our tests -- do not serialize the blocks. (\_hdr -> 1000) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 8f35e322b3..111af6a6a8 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -336,9 +336,7 @@ startNode :: LiveInterval TestBlock m -> m () startNode schedulerConfig genesisTest interval = do - let - handles = psrHandles lrPeerSim - getCandidates = viewChainSyncState (cschcMap handles) CSClient.csCandidate + let handles = psrHandles lrPeerSim fetchClientRegistry <- newFetchClientRegistry let chainDbView = CSClient.defaultChainDbView lnChainDb activePeers = Map.toList $ Map.restrictKeys (psrPeers lrPeerSim) (lirActive liveResult) @@ -385,7 +383,7 @@ startNode schedulerConfig genesisTest interval = do -- The block fetch logic needs to be started after the block fetch clients -- otherwise, an internal assertion fails because getCandidates yields more -- peer fragments than registered clients. - BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry getCandidates + BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry handles for_ lrLoEVar $ \ var -> do forkLinkedWatcher lrRegistry "LoE updater background" $ diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 823f1d6bb6..d91487a43c 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -26,6 +26,10 @@ import qualified Ouroboros.Consensus.HardFork.Abstract as History import qualified Ouroboros.Consensus.HardFork.History as History import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended +import Ouroboros.Consensus.Ledger.SupportsProtocol + (LedgerSupportsProtocol) +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as Jumping import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment @@ -171,11 +175,12 @@ mkBlockFetchConsensusInterface :: forall m peer blk. ( IOLike m , BlockSupportsDiffusionPipelining blk - , BlockSupportsProtocol blk + , Ord peer + , LedgerSupportsProtocol blk ) => BlockConfig blk -> ChainDbView m blk - -> STM m (Map peer (AnchoredFragment (Header blk))) + -> CSClient.ChainSyncClientHandleCollection peer m blk -> (Header blk -> SizeInBytes) -> SlotForgeTimeOracle m blk -- ^ Slot forge time, see 'headerForgeUTCTime' and 'blockForgeUTCTime'. @@ -183,9 +188,12 @@ mkBlockFetchConsensusInterface :: -- ^ See 'readFetchMode'. -> BlockFetchConsensusInterface peer (Header blk) blk m mkBlockFetchConsensusInterface - bcfg chainDB getCandidates blockFetchSize slotForgeTime readFetchMode = + bcfg chainDB csHandlesCol blockFetchSize slotForgeTime readFetchMode = BlockFetchConsensusInterface {..} where + getCandidates :: STM m (Map peer (AnchoredFragment (Header blk))) + getCandidates = CSClient.viewChainSyncState (CSClient.cschcMap csHandlesCol) CSClient.csCandidate + blockMatchesHeader :: Header blk -> blk -> Bool blockMatchesHeader = Block.blockMatchesHeader @@ -344,3 +352,6 @@ mkBlockFetchConsensusInterface blockForgeUTCTime = slotForgeTime . blockRealPoint . unFromConsensus lastChainSelStarvation = getLastTimeStarved chainDB + + demoteCSJDynamo :: peer -> m () + demoteCSJDynamo = void . atomically . Jumping.rotateDynamo csHandlesCol diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index bfc9011bdc..8655046760 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -775,20 +775,20 @@ rotateDynamo :: LedgerSupportsProtocol blk, MonadSTM m ) => - Context m peer blk -> + ChainSyncClientHandleCollection peer m blk -> peer -> STM m (Maybe (peer, ChainSyncClientHandle m blk)) -rotateDynamo context peer = do - handles <- cschcMap (handlesCol context) +rotateDynamo handlesCol peer = do + handles <- cschcMap handlesCol case handles Map.!? peer of Nothing -> -- Do not re-elect a dynamo if the peer has been disconnected. - getDynamo (handlesCol context) + getDynamo handlesCol Just oldDynHandle -> readTVar (cschJumping oldDynHandle) >>= \case Dynamo{} -> do - cschcRotateHandle (handlesCol context) peer - peerStates <- cschcSeq (handlesCol context) + cschcRotateHandle handlesCol peer + peerStates <- cschcSeq handlesCol mEngaged <- findNonDisengaged peerStates case mEngaged of Nothing -> @@ -806,7 +806,7 @@ rotateDynamo context peer = do pure $ Just (newDynamoId, newDynHandle) _ -> -- Do not re-elect a dynamo if the peer is not the dynamo. - getDynamo (handlesCol context) + getDynamo handlesCol -- | Choose an unspecified new non-idling dynamo and demote all other peers to -- jumpers. From e894140a38c07a59bc9a258573830af818e2e782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 27 Jun 2024 09:18:20 +0000 Subject: [PATCH 11/76] Change how last starvation is recorded Previously, we only registered the time at which starvation started. This is in fact not enough: if after the grace period the peer is still not making us unstarved, we won't detect it. Instead, we record the last starvation as either ongoing or we record its end time. --- .../Test/Consensus/PeerSimulator/Trace.hs | 4 +++ .../BlockFetch/ClientInterface.hs | 9 ++--- .../Consensus/Storage/ChainDB/API.hs | 8 +++-- .../Consensus/Storage/ChainDB/Impl.hs | 8 +++-- .../Storage/ChainDB/Impl/Background.hs | 6 +++- .../Consensus/Storage/ChainDB/Impl/Query.hs | 11 +++++-- .../Consensus/Storage/ChainDB/Impl/Types.hs | 33 ++++++++++++------- 7 files changed, 54 insertions(+), 25 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index f003dfe447..f9797d2ebc 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -48,6 +48,8 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (SlotNo (SlotNo), Tip, castPoint) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (Peer), PeerId) import Test.Util.TersePrinting (terseAnchor, terseBlock, @@ -369,6 +371,8 @@ traceChainDBEventTestBlockWith tracer = \case AddedReprocessLoEBlocksToQueue -> trace $ "Requested ChainSel run" _ -> pure () + ChainDB.TraceChainSelStarvation ChainSelStarvationOngoing -> trace "ChainSel starved" + ChainDB.TraceChainSelStarvation (ChainSelStarvationEndedAt time) -> trace $ "ChainSel starvation ended at " ++ prettyTime time _ -> pure () where trace = traceUnitWith tracer "ChainDB" diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index d91487a43c..93750475a9 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -42,7 +42,8 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (MaxSlotNo) import Ouroboros.Network.BlockFetch.ConsensusInterface - (BlockFetchConsensusInterface (..), FetchMode (..), + (BlockFetchConsensusInterface (..), + ChainSelStarvation (..), FetchMode (..), FromConsensus (..), WhetherReceivingTentativeBlocks (..)) import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers, requiresBootstrapPeers) @@ -56,7 +57,7 @@ data ChainDbView m blk = ChainDbView { , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool - , getLastTimeStarved :: STM m Time + , getChainSelStarvation :: STM m ChainSelStarvation } defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk @@ -65,7 +66,7 @@ defaultChainDbView chainDB = ChainDbView { , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB - , getLastTimeStarved = ChainDB.getLastTimeStarved chainDB + , getChainSelStarvation = ChainDB.getChainSelStarvation chainDB } -- | How to get the wall-clock time of a slot. Note that this is a very @@ -351,7 +352,7 @@ mkBlockFetchConsensusInterface headerForgeUTCTime = slotForgeTime . headerRealPoint . unFromConsensus blockForgeUTCTime = slotForgeTime . blockRealPoint . unFromConsensus - lastChainSelStarvation = getLastTimeStarved chainDB + readChainSelStarvation = getChainSelStarvation chainDB demoteCSJDynamo :: peer -> m () demoteCSJDynamo = void . atomically . Jumping.rotateDynamo csHandlesCol diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs index 1cb0d3199b..eec1e930af 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/API.hs @@ -91,6 +91,8 @@ import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (ChainUpdate, MaxSlotNo, Serialised (..)) import qualified Ouroboros.Network.Block as Network +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) import Ouroboros.Network.Mock.Chain (Chain (..)) import qualified Ouroboros.Network.Mock.Chain as Chain import System.FS.API.Types (FsError) @@ -334,9 +336,9 @@ data ChainDB m blk = ChainDB { -- invalid block is detected. These blocks are likely to be valid. , getIsInvalidBlock :: STM m (WithFingerprint (HeaderHash blk -> Maybe (InvalidBlockReason blk))) - -- | The last time we starved the chainsel thread. this is used by the - -- blockfetch decision logic to demote peers. - , getLastTimeStarved :: STM m Time + -- | Whether ChainSel is currently starved, or when was last time it + -- stopped being starved. + , getChainSelStarvation :: STM m ChainSelStarvation , closeDB :: m () diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 4d154aa13b..9d368412ec 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -69,6 +69,8 @@ import Ouroboros.Consensus.Util.ResourceRegistry (WithTempRegistry, import Ouroboros.Consensus.Util.STM (Fingerprint (..), WithFingerprint (..)) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) {------------------------------------------------------------------------------- Initialization @@ -177,7 +179,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do copyFuse <- newFuse "copy to immutable db" chainSelFuse <- newFuse "chain selection" chainSelQueue <- newChainSelQueue (Args.cdbsBlocksToAddSize cdbSpecificArgs) - varLastTimeStarved <- newTVarIO =<< getMonotonicTime + varChainSelStarvation <- newTVarIO ChainSelStarvationOngoing let env = CDB { cdbImmutableDB = immutableDB , cdbVolatileDB = volatileDB @@ -202,7 +204,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , cdbChainSelQueue = chainSelQueue , cdbFutureBlocks = varFutureBlocks , cdbLoE = Args.cdbsLoE cdbSpecificArgs - , cdbLastTimeStarved = varLastTimeStarved + , cdbChainSelStarvation = varChainSelStarvation } h <- fmap CDBHandle $ newTVarIO $ ChainDbOpen env let chainDB = API.ChainDB @@ -220,7 +222,7 @@ openDBInternal args launchBgTasks = runWithTempRegistry $ do , stream = Iterator.stream h , newFollower = Follower.newFollower h , getIsInvalidBlock = getEnvSTM h Query.getIsInvalidBlock - , getLastTimeStarved = getEnvSTM h Query.getLastTimeStarved + , getChainSelStarvation = getEnvSTM h Query.getChainSelStarvation , closeDB = closeDB h , isOpen = isOpen h } diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 2edcb1e826..af6081a3f7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -522,7 +522,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do -- exception (or it errored), notify the blocked thread withFuse fuse $ bracketOnError - (lift $ getChainSelMessage (writeTVar cdbLastTimeStarved) cdbChainSelQueue) + (lift $ getChainSelMessage reportChainSelStarvation cdbChainSelQueue) (\message -> lift $ atomically $ do case message of ChainSelReprocessLoEBlocks -> pure () @@ -541,3 +541,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do trace $ PoppedBlockFromQueue $ FallingEdgeWith $ blockRealPoint blockToAdd chainSelSync cdb message) + where + reportChainSelStarvation s = do + traceWith cdbTracer $ TraceChainSelStarvation s + atomically $ writeTVar cdbChainSelStarvation s diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index 324cddfdd0..96bb57eac3 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -21,7 +21,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Query ( , getAnyBlockComponent , getAnyKnownBlock , getAnyKnownBlockComponent - , getLastTimeStarved + , getChainSelStarvation ) where import qualified Data.Map.Strict as Map @@ -43,6 +43,8 @@ import Ouroboros.Consensus.Util.STM (WithFingerprint (..)) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (MaxSlotNo, maxSlotNoFromWithOrigin) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) -- | Return the last @k@ headers. -- @@ -149,8 +151,11 @@ getIsInvalidBlock :: getIsInvalidBlock CDB{..} = fmap (fmap (fmap invalidBlockReason) . flip Map.lookup) <$> readTVar cdbInvalid -getLastTimeStarved :: forall m blk. IOLike m => ChainDbEnv m blk -> STM m Time -getLastTimeStarved CDB{..} = readTVar cdbLastTimeStarved +getChainSelStarvation :: + forall m blk. IOLike m + => ChainDbEnv m blk + -> STM m ChainSelStarvation +getChainSelStarvation CDB {..} = readTVar cdbChainSelStarvation getIsValid :: forall m blk. (IOLike m, HasHeader blk) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 4667fdf585..58ed2dc19d 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -63,7 +63,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where -import Cardano.Prelude (whenM) +import Control.Monad (when) import Control.Tracer import Data.Foldable (traverse_) import Data.Map.Strict (Map) @@ -108,6 +108,8 @@ import Ouroboros.Consensus.Util.ResourceRegistry import Ouroboros.Consensus.Util.STM (WithFingerprint) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import Ouroboros.Network.Block (MaxSlotNo) +import Ouroboros.Network.BlockFetch.ConsensusInterface + (ChainSelStarvation (..)) -- | All the serialisation related constraints needed by the ChainDB. class ( ImmutableDbSerialiseConstraints blk @@ -276,9 +278,9 @@ data ChainDbEnv m blk = CDB -- switch back to a chain containing it. The fragment is usually anchored at -- a recent immutable tip; if it does not, it will conservatively be treated -- as the empty fragment anchored in the current immutable tip. - , cdbLastTimeStarved :: !(StrictTVar m Time) - -- ^ The last time we starved the ChainSel thread. This is used by the - -- BlockFetch decision logic to demote peers. + , cdbChainSelStarvation :: !(StrictTVar m ChainSelStarvation) + -- ^ Information on the last starvation of ChainSel, whether ongoing or + -- ended recently. } deriving (Generic) -- | We include @blk@ in 'showTypeOf' because it helps resolving type families @@ -513,13 +515,21 @@ addReprocessLoEBlocks tracer (ChainSelQueue queue) = do atomically $ writeTBQueue queue ChainSelReprocessLoEBlocks -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the --- queue is empty; in that case, reports the current time to the given callback. -getChainSelMessage :: IOLike m => (Time -> STM m ()) -> ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage whenEmpty (ChainSelQueue queue) = do - time <- getMonotonicTime - -- NOTE: The two following lines are in different `atomically` on purpose. - atomically $ whenM (isEmptyTBQueue queue) (whenEmpty time) - atomically $ readTBQueue queue +-- queue is empty; in that case, reports the starvation (and its end) to the +-- callback. +getChainSelMessage + :: IOLike m + => (ChainSelStarvation -> m ()) + -> ChainSelQueue m blk + -> m (ChainSelMessage m blk) +getChainSelMessage report (ChainSelQueue queue) = do + -- NOTE: The test of emptiness and the blocking read are in different STM + -- transactions on purpose. + starved <- atomically $ isEmptyTBQueue queue + when starved $ report ChainSelStarvationOngoing + message <- atomically $ readTBQueue queue + when starved $ report =<< ChainSelStarvationEndedAt <$> getMonotonicTime + return message -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- @@ -552,6 +562,7 @@ data TraceEvent blk | TraceLedgerReplayEvent (LgrDB.TraceReplayEvent blk) | TraceImmutableDBEvent (ImmutableDB.TraceEvent blk) | TraceVolatileDBEvent (VolatileDB.TraceEvent blk) + | TraceChainSelStarvation ChainSelStarvation deriving (Generic) From ebc49ce0d1af90cfdd9d1dfc194510d3fb67bd22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 20 Jun 2024 18:17:16 +0000 Subject: [PATCH 12/76] Revert "GSM: use diffusion layer info for HAA" This reverts commit 4946b3b6edeb9621b03d0c5a8da2f49fccf1d1c8. --- .../Ouroboros/Consensus/Node.hs | 8 +++----- .../Ouroboros/Consensus/NodeKernel.hs | 16 +--------------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs index f6cc42eea1..485be33dbb 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs @@ -60,7 +60,6 @@ import qualified Codec.CBOR.Encoding as CBOR import Codec.Serialise (DeserialiseFailure) import qualified Control.Concurrent.Class.MonadSTM.Strict as StrictSTM import Control.DeepSeq (NFData) -import Control.Monad (when) import Control.Monad.Class.MonadTime.SI (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer, contramap, traceWith) @@ -634,10 +633,9 @@ runWith RunNodeArgs{..} encAddrNtN decAddrNtN LowLevelRunNodeArgs{..} = lpGetLedgerPeers = fromMaybe [] <$> getPeersFromCurrentLedger kernel (const True), lpGetLedgerStateJudgement = GSM.gsmStateToLedgerJudgement <$> getGsmState kernel }, - Diffusion.daUpdateOutboundConnectionsState = - let varOcs = getOutboundConnectionsState kernel in \newOcs -> do - oldOcs <- readTVar varOcs - when (newOcs /= oldOcs) $ writeTVar varOcs newOcs + -- TODO: consensus can use this callback to store information if the + -- node is connected to peers other than local roots. + Diffusion.daUpdateOutboundConnectionsState = \_ -> return () } localRethrowPolicy :: RethrowPolicy diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 8b32d65f14..11423e65e2 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -98,8 +98,6 @@ import Ouroboros.Network.NodeToNode (ConnectionId, import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers) import Ouroboros.Network.PeerSelection.LedgerPeers.Type (LedgerStateJudgement (..)) -import Ouroboros.Network.PeerSelection.LocalRootPeers - (OutboundConnectionsState (..)) import Ouroboros.Network.PeerSharing (PeerSharingAPI, PeerSharingRegistry, newPeerSharingAPI, newPeerSharingRegistry, ps_POLICY_PEER_SHARE_MAX_PEERS, @@ -156,9 +154,6 @@ data NodeKernel m addrNTN addrNTC blk = NodeKernel { , setBlockForging :: [BlockForging m blk] -> m () , getPeerSharingAPI :: PeerSharingAPI addrNTN StdGen m - - , getOutboundConnectionsState - :: StrictTVar m OutboundConnectionsState } -- | Arguments required when initializing a node @@ -216,8 +211,6 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , varGsmState } = st - varOutboundConnectionsState <- newTVarIO UntrustedState - do let GsmNodeKernelArgs {..} = gsmArgs gsmTracerArgs = ( castTip . either AF.anchorToTip tipFromHeader . AF.head . fst @@ -256,12 +249,7 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers writeTVar varGsmState gsmState handles <- cschcMap varChainSyncHandles traverse_ (($ time) . ($ gsmState) . cschOnGsmStateChanged) handles - , GSM.isHaaSatisfied = do - readTVar varOutboundConnectionsState <&> \case - -- See the upstream Haddocks for the exact conditions under - -- which the diffusion layer is in this state. - TrustedStateWithExternalPeers -> True - UntrustedState -> False + , GSM.isHaaSatisfied = pure True } judgment <- GSM.gsmStateToLedgerJudgement <$> readTVarIO varGsmState void $ forkLinkedThread registry "NodeKernel.GSM" $ case judgment of @@ -317,8 +305,6 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , getTracers = tracers , setBlockForging = \a -> atomically . LazySTM.putTMVar blockForgingVar $! a , getPeerSharingAPI = peerSharingAPI - , getOutboundConnectionsState - = varOutboundConnectionsState } where blockForgingController :: InternalState m remotePeer localPeer blk From 7beffdcf8966af0f17dfb2d6d116027688ed680f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 13:05:48 +0200 Subject: [PATCH 13/76] Fix starvation detection; improve tracing --- .../Consensus/Storage/ChainDB/Impl.hs | 1 + .../Storage/ChainDB/Impl/Background.hs | 6 ++-- .../Consensus/Storage/ChainDB/Impl/Types.hs | 33 ++++++++++++++----- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index 9d368412ec..d20dcc7502 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -25,6 +25,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl ( , TraceOpenEvent (..) , TracePipeliningEvent (..) , TraceValidationEvent (..) + , TraceChainSelStarvationEvent (..) -- * Re-exported for convenience , Args.RelativeMountPoint (..) , ImmutableDB.ImmutableDbSerialiseConstraints diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index af6081a3f7..a854904cae 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -522,7 +522,7 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do -- exception (or it errored), notify the blocked thread withFuse fuse $ bracketOnError - (lift $ getChainSelMessage reportChainSelStarvation cdbChainSelQueue) + (lift $ getChainSelMessage starvationTracer cdbChainSelStarvation cdbChainSelQueue) (\message -> lift $ atomically $ do case message of ChainSelReprocessLoEBlocks -> pure () @@ -542,6 +542,4 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do blockRealPoint blockToAdd chainSelSync cdb message) where - reportChainSelStarvation s = do - traceWith cdbTracer $ TraceChainSelStarvation s - atomically $ writeTVar cdbChainSelStarvation s + starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 58ed2dc19d..26fd33ab19 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -61,9 +61,10 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceOpenEvent (..) , TracePipeliningEvent (..) , TraceValidationEvent (..) + , TraceChainSelStarvationEvent (..) ) where -import Control.Monad (when) +import Cardano.Prelude (whenM) import Control.Tracer import Data.Foldable (traverse_) import Data.Map.Strict (Map) @@ -518,18 +519,29 @@ addReprocessLoEBlocks tracer (ChainSelQueue queue) = do -- queue is empty; in that case, reports the starvation (and its end) to the -- callback. getChainSelMessage - :: IOLike m - => (ChainSelStarvation -> m ()) + :: (IOLike m, HasHeader blk) + => Tracer m (TraceChainSelStarvationEvent blk) + -> StrictTVar m ChainSelStarvation -> ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage report (ChainSelQueue queue) = do +getChainSelMessage starvationTracer starvationVar (ChainSelQueue queue) = do -- NOTE: The test of emptiness and the blocking read are in different STM -- transactions on purpose. - starved <- atomically $ isEmptyTBQueue queue - when starved $ report ChainSelStarvationOngoing + whenM (atomically $ isEmptyTBQueue queue) $ do + writeTVarIO starvationVar ChainSelStarvationOngoing + traceWith starvationTracer ChainSelStarvationStarted message <- atomically $ readTBQueue queue - when starved $ report =<< ChainSelStarvationEndedAt <$> getMonotonicTime + -- If there was a starvation ongoing, we need to report that it is done. + whenM ((== ChainSelStarvationOngoing) <$> readTVarIO starvationVar) $ + case message of + ChainSelAddBlock BlockToAdd {blockToAdd} -> do + time <- getMonotonicTime + traceWith starvationTracer $ ChainSelStarvationEnded time $ blockRealPoint blockToAdd + writeTVarIO starvationVar $ ChainSelStarvationEndedAt time + ChainSelReprocessLoEBlocks -> pure () return message + where + writeTVarIO v x = atomically $ writeTVar v x -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- @@ -562,7 +574,7 @@ data TraceEvent blk | TraceLedgerReplayEvent (LgrDB.TraceReplayEvent blk) | TraceImmutableDBEvent (ImmutableDB.TraceEvent blk) | TraceVolatileDBEvent (VolatileDB.TraceEvent blk) - | TraceChainSelStarvation ChainSelStarvation + | TraceChainSelStarvationEvent(TraceChainSelStarvationEvent blk) deriving (Generic) @@ -895,3 +907,8 @@ data TraceIteratorEvent blk -- next block we're looking for. | SwitchBackToVolatileDB deriving (Generic, Eq, Show) + +data TraceChainSelStarvationEvent blk + = ChainSelStarvationStarted + | ChainSelStarvationEnded Time (RealPoint blk) + deriving (Generic, Eq, Show) From d3dbdf85ca85ba9d85f5484ba8895da32cc919e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 14:18:49 +0200 Subject: [PATCH 14/76] Follow changes of tracing in peer simulator --- .../consensus-test/Test/Consensus/PeerSimulator/Trace.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index f9797d2ebc..2d7d243d2d 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -48,8 +48,6 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment, headPoint) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.Block (SlotNo (SlotNo), Tip, castPoint) -import Ouroboros.Network.BlockFetch.ConsensusInterface - (ChainSelStarvation (..)) import Test.Consensus.PointSchedule.NodeState (NodeState) import Test.Consensus.PointSchedule.Peers (Peer (Peer), PeerId) import Test.Util.TersePrinting (terseAnchor, terseBlock, @@ -371,8 +369,10 @@ traceChainDBEventTestBlockWith tracer = \case AddedReprocessLoEBlocksToQueue -> trace $ "Requested ChainSel run" _ -> pure () - ChainDB.TraceChainSelStarvation ChainSelStarvationOngoing -> trace "ChainSel starved" - ChainDB.TraceChainSelStarvation (ChainSelStarvationEndedAt time) -> trace $ "ChainSel starvation ended at " ++ prettyTime time + ChainDB.TraceChainSelStarvationEvent ChainDB.ChainSelStarvationStarted -> + trace "ChainSel starvation started" + ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationEnded time pt) -> + trace $ "ChainSel starvation ended at " ++ prettyTime time ++ " thanks to " ++ terseRealPoint pt _ -> pure () where trace = traceUnitWith tracer "ChainDB" From b0946d3607b3c9a13b5d39e0f4ab8faaf89112b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 14:29:48 +0200 Subject: [PATCH 15/76] Follow configurable grace period --- .../src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs | 1 + .../consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs | 1 + 2 files changed, 2 insertions(+) diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index eb4b978a1f..e2f51f99e0 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1021,6 +1021,7 @@ runThreadNetwork systemTime ThreadNetworkArgs -- interval which doesn't play nice with -- blockfetch descision interval. , bfcSalt = 0 + , bfcBulkSyncGracePeriod = 10 -- seconds } , gsmArgs = GSM.GsmNodeKernelArgs { gsmAntiThunderingHerd = kaRng diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 9a6b1c18c6..3392ceff0e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -112,6 +112,7 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry csHandlesCol = , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 + , bfcBulkSyncGracePeriod = 10 } void $ forkLinkedThread registry "BlockFetchLogic" $ From 2d51933dd0fdf99ee09e855610df52c444e0b491 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 16:17:33 +0200 Subject: [PATCH 16/76] Allow parameterizing whether chainsel starvation is handled --- .../Test/Consensus/PeerSimulator/BlockFetch.hs | 8 +++++--- .../Test/Consensus/PeerSimulator/Run.hs | 14 +++++++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 3392ceff0e..0a76a49796 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -73,13 +73,14 @@ import Test.Util.Time (dawnOfTime) startBlockFetchLogic :: forall m. (IOLike m) - => ResourceRegistry m + => Bool -- ^ Whether to enable chain selection starvation + -> ResourceRegistry m -> Tracer m (TraceEvent TestBlock) -> ChainDB m TestBlock -> FetchClientRegistry PeerId (Header TestBlock) TestBlock m -> ChainSyncClientHandleCollection PeerId m TestBlock -> m () -startBlockFetchLogic registry tracer chainDb fetchClientRegistry csHandlesCol = do +startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClientRegistry csHandlesCol = do let slotForgeTime :: BlockFetchClientInterface.SlotForgeTimeOracle m blk slotForgeTime _ = pure dawnOfTime @@ -112,7 +113,8 @@ startBlockFetchLogic registry tracer chainDb fetchClientRegistry csHandlesCol = , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 - , bfcBulkSyncGracePeriod = 10 + , bfcBulkSyncGracePeriod = + if enableChainSelStarvation then 10 else 1000000 -- (more than 11 days) } void $ forkLinkedThread registry "BlockFetchLogic" $ diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 111af6a6a8..997cb4e011 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -105,6 +105,11 @@ data SchedulerConfig = -- duration to trigger it. , scDowntime :: Maybe DiffTime + -- | Enable the use of ChainSel starvation information in the block fetch + -- decision logic. It is never actually disabled, but rather the grace + -- period is made virtually infinite. + , scEnableChainSelStarvation :: Bool + -- | Whether to enable ChainSync Jumping. The parameters come from -- 'GenesisTest'. , scEnableCSJ :: Bool @@ -122,6 +127,7 @@ defaultSchedulerConfig = scEnableLoE = False, scEnableLoP = False, scDowntime = Nothing, + scEnableChainSelStarvation = True, scEnableCSJ = False } @@ -383,7 +389,13 @@ startNode schedulerConfig genesisTest interval = do -- The block fetch logic needs to be started after the block fetch clients -- otherwise, an internal assertion fails because getCandidates yields more -- peer fragments than registered clients. - BlockFetch.startBlockFetchLogic lrRegistry lrTracer lnChainDb fetchClientRegistry handles + BlockFetch.startBlockFetchLogic + (scEnableChainSelStarvation schedulerConfig) + lrRegistry + lrTracer + lnChainDb + fetchClientRegistry + handles for_ lrLoEVar $ \ var -> do forkLinkedWatcher lrRegistry "LoE updater background" $ From 90d613c398b3c0ed6e7cf0572b17f8492962a098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 17:42:05 +0200 Subject: [PATCH 17/76] Trace the time at which starvation started --- .../Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 26fd33ab19..9ed08981f8 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -529,7 +529,7 @@ getChainSelMessage starvationTracer starvationVar (ChainSelQueue queue) = do -- transactions on purpose. whenM (atomically $ isEmptyTBQueue queue) $ do writeTVarIO starvationVar ChainSelStarvationOngoing - traceWith starvationTracer ChainSelStarvationStarted + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime message <- atomically $ readTBQueue queue -- If there was a starvation ongoing, we need to report that it is done. whenM ((== ChainSelStarvationOngoing) <$> readTVarIO starvationVar) $ @@ -909,6 +909,6 @@ data TraceIteratorEvent blk deriving (Generic, Eq, Show) data TraceChainSelStarvationEvent blk - = ChainSelStarvationStarted + = ChainSelStarvationStarted Time | ChainSelStarvationEnded Time (RealPoint blk) deriving (Generic, Eq, Show) From 50ac9b1dec613bc88c5c32e6371270a9526acea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 17:46:37 +0200 Subject: [PATCH 18/76] stylish haskell --- .../Ouroboros/Consensus/Storage/ChainDB/Impl.hs | 2 +- .../Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs index d20dcc7502..15cf79fdd7 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl.hs @@ -16,6 +16,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl ( , LgrDB.TraceReplayEvent , SelectionChangedInfo (..) , TraceAddBlockEvent (..) + , TraceChainSelStarvationEvent (..) , TraceCopyToImmutableDBEvent (..) , TraceEvent (..) , TraceFollowerEvent (..) @@ -25,7 +26,6 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl ( , TraceOpenEvent (..) , TracePipeliningEvent (..) , TraceValidationEvent (..) - , TraceChainSelStarvationEvent (..) -- * Re-exported for convenience , Args.RelativeMountPoint (..) , ImmutableDB.ImmutableDbSerialiseConstraints diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 9ed08981f8..5f97c417bf 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -52,6 +52,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( -- * Trace types , SelectionChangedInfo (..) , TraceAddBlockEvent (..) + , TraceChainSelStarvationEvent (..) , TraceCopyToImmutableDBEvent (..) , TraceEvent (..) , TraceFollowerEvent (..) @@ -61,7 +62,6 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceOpenEvent (..) , TracePipeliningEvent (..) , TraceValidationEvent (..) - , TraceChainSelStarvationEvent (..) ) where import Cardano.Prelude (whenM) From 60d0d62b602f6b7f2de91694a2d225e03300443d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Mon, 8 Jul 2024 11:41:14 +0200 Subject: [PATCH 19/76] Follow change in trace chainselstarvationstarted --- .../test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index 2d7d243d2d..49398dc67d 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -369,8 +369,8 @@ traceChainDBEventTestBlockWith tracer = \case AddedReprocessLoEBlocksToQueue -> trace $ "Requested ChainSel run" _ -> pure () - ChainDB.TraceChainSelStarvationEvent ChainDB.ChainSelStarvationStarted -> - trace "ChainSel starvation started" + ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationStarted time) -> + trace $ "ChainSel starvation started at " ++ prettyTime time ChainDB.TraceChainSelStarvationEvent (ChainDB.ChainSelStarvationEnded time pt) -> trace $ "ChainSel starvation ended at " ++ prettyTime time ++ " thanks to " ++ terseRealPoint pt _ -> pure () From 1efae68881dc0e2441c571e83564cdd5dad60830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 11 Jul 2024 07:15:52 +0200 Subject: [PATCH 20/76] ChainDB: let the BlockFetch client add blocks asynchronously This is a port of PR https://github.com/IntersectMBO/ouroboros-network/pull/2721 to the new ChainSelQueue. Co-authored-by: mrBliss --- .../BlockFetch/ClientInterface.hs | 15 +- .../Storage/ChainDB/Impl/Background.hs | 7 +- .../Storage/ChainDB/Impl/ChainSel.hs | 12 +- .../Consensus/Storage/ChainDB/Impl/Query.hs | 30 ++-- .../Consensus/Storage/ChainDB/Impl/Types.hs | 151 +++++++++++++----- 5 files changed, 143 insertions(+), 72 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 93750475a9..6bebea9d1f 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -30,7 +30,8 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as Jumping -import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) +import Ouroboros.Consensus.Storage.ChainDB.API (AddBlockPromise, + ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment (InvalidBlockPunishment) @@ -56,16 +57,16 @@ data ChainDbView m blk = ChainDbView { getCurrentChain :: STM m (AnchoredFragment (Header blk)) , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo - , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool + , addBlockAsync :: InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) , getChainSelStarvation :: STM m ChainSelStarvation } -defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk +defaultChainDbView :: ChainDB m blk -> ChainDbView m blk defaultChainDbView chainDB = ChainDbView { getCurrentChain = ChainDB.getCurrentChain chainDB , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + , addBlockAsync = ChainDB.addBlockAsync chainDB , getChainSelStarvation = ChainDB.getChainSelStarvation chainDB } @@ -215,8 +216,8 @@ mkBlockFetchConsensusInterface pipeliningPunishment <- InvalidBlockPunishment.mkForDiffusionPipelining pure $ mkAddFetchedBlock_ pipeliningPunishment enabledPipelining - -- Waits until the block has been written to disk, but not until chain - -- selection has processed the block. + -- Hand over the block to the ChainDB, but don't wait until it has been + -- written to disk or processed. mkAddFetchedBlock_ :: ( BlockConfig blk -> Header blk @@ -260,7 +261,7 @@ mkBlockFetchConsensusInterface NotReceivingTentativeBlocks -> disconnect ReceivingTentativeBlocks -> pipeliningPunishment bcfg (getHeader blk) disconnect - addBlockWaitWrittenToDisk + addBlockAsync chainDB punishment blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index a854904cae..82a0a228be 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -540,6 +540,11 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ FallingEdgeWith $ blockRealPoint blockToAdd - chainSelSync cdb message) + chainSelSync cdb message + lift $ case message of + ChainSelAddBlock blockToAdd -> + deleteBlockToAdd blockToAdd cdbChainSelQueue + _ -> pure () + ) where starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index f6a8971ee8..2ae269f8d8 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -334,13 +334,11 @@ chainSelSync cdb@CDB {..} (ChainSelAddBlock BlockToAdd { blockToAdd = b, .. }) = let immBlockNo = AF.anchorBlockNo curChain -- We follow the steps from section "## Adding a block" in ChainDB.md - - -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead - -- of once, before branching, because we want to do it /after/ writing the - -- block to the VolatileDB and delivering the 'varBlockWrittenToDisk' - -- promise, as this is the promise the BlockFetch client waits for. - -- Otherwise, the BlockFetch client would have to wait for - -- 'chainSelectionForFutureBlocks'. + -- + -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead of + -- once, before branching, because we want to do it /after/ writing the + -- block to the VolatileDB so that any threads waiting on the + -- 'varBlockWrittenToDisk' promise don't have to wait for the result of -- ### Ignore newTip <- if diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index 96bb57eac3..c697f11804 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -131,18 +131,15 @@ getBlockComponent :: getBlockComponent CDB{..} = getAnyBlockComponent cdbImmutableDB cdbVolatileDB getIsFetched :: - forall m blk. IOLike m + forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk -> STM m (Point blk -> Bool) -getIsFetched CDB{..} = basedOnHash <$> VolatileDB.getIsMember cdbVolatileDB - where - -- The volatile DB indexes by hash only, not by points. However, it should - -- not be possible to have two points with the same hash but different - -- slot numbers. - basedOnHash :: (HeaderHash blk -> Bool) -> Point blk -> Bool - basedOnHash f p = - case pointHash p of - BlockHash hash -> f hash - GenesisHash -> False +getIsFetched CDB{..} = do + checkBlocksToAdd <- memberBlocksToAdd cdbChainSelQueue + checkVolDb <- VolatileDB.getIsMember cdbVolatileDB + return $ \pt -> + case pointToWithOriginRealPoint pt of + Origin -> False + NotOrigin pt' -> checkBlocksToAdd pt' || checkVolDb (realPointHash pt') getIsInvalidBlock :: forall m blk. (IOLike m, HasHeader blk) @@ -185,10 +182,13 @@ getMaxSlotNo CDB{..} = do -- contains block 9'. The ImmutableDB contains blocks 1-10. The max slot -- of the current chain will be 10 (being the anchor point of the empty -- current chain), while the max slot of the VolatileDB will be 9. - curChainMaxSlotNo <- maxSlotNoFromWithOrigin . AF.headSlot - <$> readTVar cdbChain - volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB - return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo + -- + -- Moreover, we have to look in 'ChainSelQueue' too. + curChainMaxSlotNo <- + maxSlotNoFromWithOrigin . AF.headSlot <$> readTVar cdbChain + volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB + blocksToAddMaxSlotNo <- getBlocksToAddMaxSlotNo cdbChainSelQueue + return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo `max` blocksToAddMaxSlotNo {------------------------------------------------------------------------------- Unifying interface over the immutable DB and volatile DB, but independent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 5f97c417bf..f39932275f 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -43,11 +43,14 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( -- * Blocks to add , BlockToAdd (..) , ChainSelMessage (..) - , ChainSelQueue + , ChainSelQueue -- opaque , addBlockToAdd , addReprocessLoEBlocks , closeChainSelQueue + , deleteBlockToAdd + , getBlocksToAddMaxSlotNo , getChainSelMessage + , memberBlocksToAdd , newChainSelQueue -- * Trace types , SelectionChangedInfo (..) @@ -64,11 +67,12 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where -import Cardano.Prelude (whenM) +import Cardano.Prelude (Bifunctor (second)) +import Control.Monad (void) import Control.Tracer -import Data.Foldable (traverse_) +import Data.Foldable (for_) import Data.Map.Strict (Map) -import Data.Maybe (mapMaybe) +import qualified Data.Map.Strict as Map import Data.Maybe.Strict (StrictMaybe (..)) import Data.Set (Set) import Data.Typeable @@ -106,9 +110,10 @@ import Ouroboros.Consensus.Util.CallStack import Ouroboros.Consensus.Util.Enclose (Enclosing, Enclosing' (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Consensus.Util.STM (WithFingerprint) +import Ouroboros.Consensus.Util.STM (WithFingerprint, + blockUntilChanged) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) -import Ouroboros.Network.Block (MaxSlotNo) +import Ouroboros.Network.Block (MaxSlotNo (..)) import Ouroboros.Network.BlockFetch.ConsensusInterface (ChainSelStarvation (..)) @@ -254,6 +259,17 @@ data ChainDbEnv m blk = CDB , cdbCheckInFuture :: !(CheckInFuture m blk) , cdbChainSelQueue :: !(ChainSelQueue m blk) -- ^ Queue of blocks that still have to be added. + -- + -- NOTE: the set of blocks in this queue are /not/ disjoint from the set of + -- blocks in the VolatileDB. When processing the next block in the queue, we + -- do not remove the block from the queue /until/ it has been added to the + -- VolatileDB and processed by chain selection. This means the block + -- currently being added will be both in the queue and the VolatileDB for a + -- short while. + -- + -- If we would remove the block from the queue before adding it to the + -- VolatileDB, then it would be in /neither/ for a short time, and + -- 'getIsFetched' would incorrectly return 'False'. , cdbFutureBlocks :: !(StrictTVar m (FutureBlocks m blk)) -- ^ Blocks from the future -- @@ -449,8 +465,21 @@ type FutureBlocks m blk = Map (HeaderHash blk) (Header blk, InvalidBlockPunishme -- | FIFO queue used to add blocks asynchronously to the ChainDB. Blocks are -- read from this queue by a background thread, which processes the blocks -- synchronously. -newtype ChainSelQueue m blk = ChainSelQueue (TBQueue m (ChainSelMessage m blk)) - deriving NoThunks via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) +data ChainSelQueue m blk = ChainSelQueue { + -- TODO use a better data structure, e.g., a heap from the @heaps@ + -- package. Wish list: + -- + O(1) pop min value + -- + O(log n) insert + -- + O(n) get all + -- + Bounded in size + -- + -- TODO join consecutive blocks into a fragment that can be added at + -- once. + varChainSelQueue :: !(StrictTVar m (Map (RealPoint blk) (BlockToAdd m blk))) + , chainSelQueueCapacity :: !Word + , varChainSelReprocessLoEBlocks :: !(StrictTVar m Bool) + } + deriving (NoThunks) via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) -- | Entry in the 'ChainSelQueue' queue: a block together with the 'TMVar's used -- to implement 'AddBlockPromise'. @@ -464,6 +493,7 @@ data BlockToAdd m blk = BlockToAdd , varBlockProcessed :: !(StrictTMVar m (AddBlockResult blk)) -- ^ Used for the 'blockProcessed' field of 'AddBlockPromise'. } + deriving NoThunks via OnlyCheckWhnfNamed "BlockToAdd" (BlockToAdd m blk) -- | Different async tasks for triggering ChainSel data ChainSelMessage m blk @@ -473,9 +503,11 @@ data ChainSelMessage m blk | ChainSelReprocessLoEBlocks -- | Create a new 'ChainSelQueue' with the given size. -newChainSelQueue :: IOLike m => Word -> m (ChainSelQueue m blk) -newChainSelQueue queueSize = ChainSelQueue <$> - atomically (newTBQueue (fromIntegral queueSize)) +newChainSelQueue :: (IOLike m, StandardHash blk, Typeable blk) => Word -> m (ChainSelQueue m blk) +newChainSelQueue chainSelQueueCapacity = do + varChainSelQueue <- newTVarIO mempty + varChainSelReprocessLoEBlocks <- newTVarIO False + return $ ChainSelQueue {varChainSelQueue, chainSelQueueCapacity, varChainSelReprocessLoEBlocks} -- | Add a block to the 'ChainSelQueue' queue. Can block when the queue is full. addBlockToAdd :: @@ -485,7 +517,7 @@ addBlockToAdd :: -> InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) -addBlockToAdd tracer (ChainSelQueue queue) punish blk = do +addBlockToAdd tracer (ChainSelQueue {varChainSelQueue, chainSelQueueCapacity}) punish blk = do varBlockWrittenToDisk <- newEmptyTMVarIO varBlockProcessed <- newEmptyTMVarIO let !toAdd = BlockToAdd @@ -496,8 +528,12 @@ addBlockToAdd tracer (ChainSelQueue queue) punish blk = do } traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) RisingEdge queueSize <- atomically $ do - writeTBQueue queue (ChainSelAddBlock toAdd) - lengthTBQueue queue + chainSelQueue <- readTVar varChainSelQueue + let chainSelQueue' = Map.insert (blockRealPoint blk) toAdd chainSelQueue + chainSelQueueSize = Map.size chainSelQueue' + check (fromIntegral chainSelQueueSize <= chainSelQueueCapacity) + writeTVar varChainSelQueue chainSelQueue' + return chainSelQueueSize traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) (FallingEdgeWith (fromIntegral queueSize)) return AddBlockPromise @@ -511,51 +547,82 @@ addReprocessLoEBlocks => Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> m () -addReprocessLoEBlocks tracer (ChainSelQueue queue) = do +addReprocessLoEBlocks tracer (ChainSelQueue {varChainSelReprocessLoEBlocks}) = do traceWith tracer $ AddedReprocessLoEBlocksToQueue - atomically $ writeTBQueue queue ChainSelReprocessLoEBlocks + atomically $ writeTVar varChainSelReprocessLoEBlocks True -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the -- queue is empty; in that case, reports the starvation (and its end) to the -- callback. getChainSelMessage - :: (IOLike m, HasHeader blk) + :: IOLike m => Tracer m (TraceChainSelStarvationEvent blk) -> StrictTVar m ChainSelStarvation -> ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage starvationTracer starvationVar (ChainSelQueue queue) = do - -- NOTE: The test of emptiness and the blocking read are in different STM - -- transactions on purpose. - whenM (atomically $ isEmptyTBQueue queue) $ do - writeTVarIO starvationVar ChainSelStarvationOngoing - traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime - message <- atomically $ readTBQueue queue - -- If there was a starvation ongoing, we need to report that it is done. - whenM ((== ChainSelStarvationOngoing) <$> readTVarIO starvationVar) $ - case message of - ChainSelAddBlock BlockToAdd {blockToAdd} -> do - time <- getMonotonicTime - traceWith starvationTracer $ ChainSelStarvationEnded time $ blockRealPoint blockToAdd - writeTVarIO starvationVar $ ChainSelStarvationEndedAt time - ChainSelReprocessLoEBlocks -> pure () - return message +getChainSelMessage starvationTracer starvationVar queue = go where + go = do + (reprocessLoEBlocks, chainSelQueue) <- atomically readBoth + case reprocessLoEBlocks of + True -> do + writeTVarIO varChainSelReprocessLoEBlocks False + return ChainSelReprocessLoEBlocks + False -> + case Map.minView chainSelQueue of + Just (blockToAdd, chainSelQueue') -> do + writeTVarIO varChainSelQueue chainSelQueue' + return $ ChainSelAddBlock blockToAdd + Nothing -> do + writeTVarIO starvationVar ChainSelStarvationOngoing + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime -- FIXME: only trace if first time + void $ atomically $ blockUntilChanged (second Map.null) (False, True) readBoth + go + ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue writeTVarIO v x = atomically $ writeTVar v x + readBoth = (,) <$> readTVar varChainSelReprocessLoEBlocks <*> readTVar varChainSelQueue -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- +-- REVIEW: What about all the threads that are waiting to write in the queue and +-- will write after the flush?! closeChainSelQueue :: IOLike m => ChainSelQueue m blk -> STM m () -closeChainSelQueue (ChainSelQueue queue) = do - as <- mapMaybe blockAdd <$> flushTBQueue queue - traverse_ (\a -> tryPutTMVar (varBlockProcessed a) - (FailedToAddBlock "Queue flushed")) - as - where - blockAdd = \case - ChainSelAddBlock ab -> Just ab - ChainSelReprocessLoEBlocks -> Nothing +closeChainSelQueue ChainSelQueue {varChainSelQueue} = do + chainSelQueue <- readTVar varChainSelQueue + for_ chainSelQueue $ \BlockToAdd {varBlockProcessed} -> + putTMVar varBlockProcessed $ FailedToAddBlock "Queue flushed" +-- | Delete the given 'BlockToAdd' from the 'ChainSelQueue'. +-- +-- PRECONDITION: the given 'BlockToAdd' is in 'ChainSelQueue'. +deleteBlockToAdd :: + (IOLike m, HasHeader blk) + => BlockToAdd m blk + -> ChainSelQueue m blk + -> m () +deleteBlockToAdd (BlockToAdd _ blk _ _) (ChainSelQueue {varChainSelQueue}) = + atomically $ modifyTVar varChainSelQueue $ Map.delete (blockRealPoint blk) + +-- | Return a function to test the membership for the given 'BlocksToAdd'. +memberBlocksToAdd :: + (IOLike m, HasHeader blk) + => ChainSelQueue m blk + -> STM m (RealPoint blk -> Bool) +memberBlocksToAdd (ChainSelQueue {varChainSelQueue}) = + flip Map.member <$> readTVar varChainSelQueue + +getBlocksToAddMaxSlotNo :: + IOLike m + => ChainSelQueue m blk + -> STM m MaxSlotNo +getBlocksToAddMaxSlotNo (ChainSelQueue {varChainSelQueue}) = aux <$> readTVar varChainSelQueue + where + -- | The 'Ord' instance of 'RealPoint' orders by 'SlotNo' first, so the + -- maximal key of the map has the greatest 'SlotNo'. + aux :: Map (RealPoint blk) (BlockToAdd m blk) -> MaxSlotNo + aux queue = case Map.lookupMax queue of + Nothing -> NoMaxSlotNo + Just (RealPoint s _, _) -> MaxSlotNo s {------------------------------------------------------------------------------- Trace types From 91bf46d430de2629a044e84a455e993f22c9afe2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 11 Jul 2024 09:27:31 +0200 Subject: [PATCH 21/76] Some wiggle room for duplicate headers --- .../Test/Consensus/Genesis/Tests/CSJ.hs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index 652b305e19..53acf5b02f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -6,7 +6,8 @@ module Test.Consensus.Genesis.Tests.CSJ (tests) where import Data.List (nub) import qualified Data.Map.Strict as Map import Data.Maybe (mapMaybe) -import Ouroboros.Consensus.Block (Header, blockSlot, succWithOrigin) +import Ouroboros.Consensus.Block (Header, blockSlot, succWithOrigin, + unSlotNo) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (TraceChainSyncClientEvent (..)) import Ouroboros.Consensus.Util.Condense (PaddingDirection (..), @@ -111,8 +112,16 @@ prop_CSJ adversariesFlag numHonestSchedules = do _ -> Nothing ) svTrace + -- We receive headers at most once from honest peer. The only + -- exception is when an honest peer gets to be the objector, until an + -- adversary dies, and then the dynamo. In that specific case, we + -- might re-download jumpSize blocks. TODO: If we ever choose to + -- promote objectors to dynamo to reuse their state, then we could + -- make this bound tighter. receivedHeadersAtMostOnceFromHonestPeers = - length (nub $ snd <$> headerHonestDownloadEvents) == length headerHonestDownloadEvents + length headerHonestDownloadEvents <= + length (nub $ snd <$> headerHonestDownloadEvents) + + (fromIntegral $ unSlotNo $ csjpJumpSize $ gtCSJParams gt) in tabulate "" [ if headerHonestDownloadEvents == [] From bc445da12e2677284605d16d02dc1aaab121d575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 5 Jul 2024 17:24:11 +0200 Subject: [PATCH 22/76] Disable chainsel starvation in CSJ test --- .../Test/Consensus/Genesis/Tests/CSJ.hs | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index 53acf5b02f..70bb0eeb71 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -13,6 +13,8 @@ import Ouroboros.Consensus.MiniProtocol.ChainSync.Client import Ouroboros.Consensus.Util.Condense (PaddingDirection (..), condenseListWithPadding) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.Protocol.ChainSync.Codec + (ChainSyncTimeout (mustReplyTimeout), idleTimeout) import Test.Consensus.BlockTree (BlockTree (..)) import Test.Consensus.Genesis.Setup import Test.Consensus.Genesis.Tests.Uniform (genUniformSchedulePoints) @@ -50,6 +52,7 @@ tests = -- | A flag to indicate if properties are tested with adversarial peers data WithAdversariesFlag = NoAdversaries | WithAdversaries + deriving Eq -- | A flag to indicate if properties are tested using the same schedule for the -- honest peers, or if each peer should used its own schedule. @@ -82,7 +85,7 @@ prop_CSJ adversariesFlag numHonestSchedules = do NoAdversaries -> pure 0 WithAdversaries -> choose (2, 4) forAllGenesisTest - ( case numHonestSchedules of + ( disableBoringTimeouts <$> case numHonestSchedules of OneScheduleForAllPeers -> genChains genForks `enrichedWith` genDuplicatedHonestSchedule @@ -94,6 +97,13 @@ prop_CSJ adversariesFlag numHonestSchedules = do { scEnableCSJ = True , scEnableLoE = True , scEnableLoP = True + , scEnableChainSelStarvation = adversariesFlag == NoAdversaries + -- ^ NOTE: When there are adversaries and the ChainSel + -- starvation detection of BlockFetch is enabled, then our property does + -- not actually hold, because peer simulator-based tests have virtually + -- infinite CPU, and therefore ChainSel gets starved at every tick, which + -- makes us cycle the dynamos, which can lead to some extra headers being + -- downloaded. } ) shrinkPeerSchedules @@ -161,3 +171,12 @@ prop_CSJ adversariesFlag numHonestSchedules = do in -- Sanity check: add @1 +@ after @>@ and watch the World burn. hdrSlot + jumpSize >= succWithOrigin tipSlot + + disableBoringTimeouts gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { mustReplyTimeout = Nothing, + idleTimeout = Nothing + } + } From e93b423cb5a5bc885ac8ae0834d7265cb0208f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 11 Jul 2024 17:55:30 +0000 Subject: [PATCH 23/76] Depend on the ouroboros-network fork with the latest blockfetch --- cabal.project | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cabal.project b/cabal.project index fa4707da0a..a08d97ef82 100644 --- a/cabal.project +++ b/cabal.project @@ -38,3 +38,13 @@ import: ./asserts.cabal if(os(windows)) constraints: bitvec -simd + +source-repository-package + type: git + location: https://github.com/IntersectMBO/ouroboros-network + tag: f71d0126ff5d84af44b644460b4e08cf3c256914 + --sha256: 1a62hqddpnc0j5r7nl54q79nrxyw77dphpsgp68hxij210dkpvca + subdir: + ouroboros-network + ouroboros-network-api + ouroboros-network-protocols From 5e5b37d66f7889e5bb75e289bf44f5cb8736b5cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 11 Jul 2024 19:00:37 +0000 Subject: [PATCH 24/76] Accomodate for the addition of ChainSyncClientHandleCollection and grace period and starvation event in BlockFetch --- .../MiniProtocol/BlockFetch/Client.hs | 22 ++++++++++++------- .../MiniProtocol/ChainSync/Client.hs | 16 +++++++++----- .../Ouroboros/Storage/ChainDB/StateMachine.hs | 1 + 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index b3aa0efe1e..fb1e61fb0d 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -8,6 +8,7 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} -- | A test for the consensus-specific parts of the BlockFetch client. -- @@ -51,7 +52,7 @@ import Ouroboros.Consensus.Util.STM (blockUntilJust, import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), - BlockFetchConsensusInterface, FetchMode (..), + BlockFetchConsensusInterface (..), FetchMode (..), blockFetchLogic, bracketFetchClient, bracketKeepAliveClient, bracketSyncWithFetchClient, newFetchClientRegistry) @@ -256,10 +257,11 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do let -- Always return the empty chain such that the BlockFetch logic -- downloads all chains. - getCurrentChain = pure $ AF.Empty AF.AnchorGenesis - getIsFetched = ChainDB.getIsFetched chainDB - getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + getCurrentChain = pure $ AF.Empty AF.AnchorGenesis + getIsFetched = ChainDB.getIsFetched chainDB + getMaxSlotNo = ChainDB.getMaxSlotNo chainDB + addBlockAsync = ChainDB.addBlockAsync chainDB + getChainSelStarvation = ChainDB.getChainSelStarvation chainDB pure BlockFetchClientInterface.ChainDbView {..} where -- Needs to be larger than any chain length in this test, to ensure that @@ -278,13 +280,16 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do -> BlockFetchClientInterface.ChainDbView m TestBlock -> BlockFetchConsensusInterface PeerId (Header TestBlock) TestBlock m mkTestBlockFetchConsensusInterface getCandidates chainDbView = - BlockFetchClientInterface.mkBlockFetchConsensusInterface + (BlockFetchClientInterface.mkBlockFetchConsensusInterface @m @PeerId (TestBlockConfig numCoreNodes) chainDbView - getCandidates + (error "ChainSyncClientHandleCollection not provided to mkBlockFetchConsensusInterface") (\_hdr -> 1000) -- header size, only used for peer prioritization slotForgeTime - (pure blockFetchMode) + (pure blockFetchMode)) + { readCandidateChains = getCandidates + , demoteCSJDynamo = const (pure ()) + } where -- Bogus implementation; this is fine as this is only used for -- enriching tracing information ATM. @@ -359,6 +364,7 @@ instance Arbitrary BlockFetchClientTestSetup where -- logic iterations in case the monitored state vars change too -- fast, which we don't have to worry about in this test. bfcDecisionLoopInterval = 0 + bfcBulkSyncGracePeriod = 10 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary pure BlockFetchConfiguration {..} diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs index 868ca695dd..e674013c6d 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs @@ -82,12 +82,16 @@ import Ouroboros.Consensus.Ledger.Abstract import Ouroboros.Consensus.Ledger.Extended hiding (ledgerState) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), ChainDbView (..), - ChainSyncClientException, ChainSyncClientResult (..), + ChainSyncClientException, + ChainSyncClientHandleCollection (..), + ChainSyncClientResult (..), ChainSyncLoPBucketConfig (..), ChainSyncState (..), ChainSyncStateView (..), ConfigEnv (..), Consensus, DynamicEnv (..), Our (..), Their (..), TraceChainSyncClientEvent (..), bracketChainSyncClient, - chainSyncClient, chainSyncStateFor, viewChainSyncState) + chainSyncClient, chainSyncStateFor, + newChainSyncClientHandleCollection, + viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.GsmState (GsmState (Syncing)) import Ouroboros.Consensus.Node.NetworkProtocolVersion @@ -357,7 +361,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) -- separate map too, one that isn't emptied. We can use this map to look -- at the final state of each candidate. varFinalCandidates <- uncheckedNewTVarM Map.empty - varHandles <- uncheckedNewTVarM Map.empty + cschCol <- atomically newChainSyncClientHandleCollection (tracer, getTrace) <- do (tracer', getTrace) <- recordingTracerTVar @@ -501,7 +505,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) bracketChainSyncClient chainSyncTracer chainDbView - varHandles + cschCol -- 'Syncing' only ever impacts the LoP, which is disabled in -- this test, so any value would do. (pure Syncing) @@ -511,7 +515,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) csjConfig $ \csState -> do atomically $ do - handles <- readTVar varHandles + handles <- cschcMap cschCol modifyTVar varFinalCandidates $ Map.insert serverId (handles Map.! serverId) result <- runPipelinedPeer protocolTracer codecChainSyncId clientChannel $ @@ -532,7 +536,7 @@ runChainSync skew securityParam (ClientUpdates clientUpdates) let checkTipTime :: m () checkTipTime = do now <- systemTimeCurrent clientSystemTime - candidates <- atomically $ viewChainSyncState varHandles csCandidate + candidates <- atomically $ viewChainSyncState (cschcMap cschCol) csCandidate forM_ candidates $ \candidate -> do let p = castPoint $ AF.headPoint candidate :: Point TestBlock case pointSlot p of diff --git a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs index 9ca672dfb4..dc2a74d20d 100644 --- a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs +++ b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs @@ -1631,6 +1631,7 @@ traceEventName = \case TraceLedgerReplayEvent ev -> "LedgerReplay." <> constrName ev TraceImmutableDBEvent ev -> "ImmutableDB." <> constrName ev TraceVolatileDBEvent ev -> "VolatileDB." <> constrName ev + TraceChainSelStarvationEvent _ -> "TraceChainSelStarvationEvent" mkArgs :: IOLike m => TopLevelConfig Blk From 5aaa882a8f60b5399fb5fe9d8b50fc9a4cf865db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 12 Jul 2024 12:16:31 +0200 Subject: [PATCH 25/76] Follow removal of `bfcMaxConcurrencyBulkSync` --- .../Ouroboros/Consensus/Node.hs | 6 +----- .../Test/ThreadNet/Network.hs | 3 +-- .../Test/Consensus/PeerSimulator/BlockFetch.hs | 12 +----------- 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs index 485be33dbb..a26eed133b 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs @@ -288,8 +288,7 @@ data LowLevelRunNodeArgs m addrNTN addrNTC versionDataNTN versionDataNTC blk -- -- See 'stdLowLevelRunNodeArgsIO'. data StdRunNodeArgs m blk (p2p :: Diffusion.P2P) = StdRunNodeArgs - { srnBfcMaxConcurrencyBulkSync :: Maybe Word - , srnBfcMaxConcurrencyDeadline :: Maybe Word + { srnBfcMaxConcurrencyDeadline :: Maybe Word , srnChainDbValidateOverride :: Bool -- ^ If @True@, validate the ChainDB on init no matter what , srnDiskPolicyArgs :: DiskPolicyArgs @@ -936,9 +935,6 @@ stdLowLevelRunNodeArgsIO RunNodeArgs{ rnProtocolInfo maybe id (\mc bfc -> bfc { bfcMaxConcurrencyDeadline = mc }) srnBfcMaxConcurrencyDeadline - . maybe id - (\mc bfc -> bfc { bfcMaxConcurrencyBulkSync = mc }) - srnBfcMaxConcurrencyBulkSync modifyMempoolCapacityOverride = maybe id (\mc nka -> nka { mempoolCapacityOverride = mc }) diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index e2f51f99e0..d1d21e6562 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1014,8 +1014,7 @@ runThreadNetwork systemTime ThreadNetworkArgs txSubmissionMaxUnacked = 1000 -- TODO ? } , blockFetchConfiguration = BlockFetchConfiguration { - bfcMaxConcurrencyBulkSync = 1 - , bfcMaxConcurrencyDeadline = 2 + bfcMaxConcurrencyDeadline = 2 , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0.0 -- Mock testsuite can use sub-second slot -- interval which doesn't play nice with diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 0a76a49796..aaf582555b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -99,17 +99,7 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs blockFetchCfg = BlockFetchConfiguration - { -- We set a higher value here to allow downloading blocks from all - -- peers. - -- - -- If the value is too low, block downloads from a peer may prevent - -- blocks from being downloaded from other peers. This can be - -- problematic, since the batch download of a simulated BlockFetch - -- server can last serveral ticks if the block pointer is not - -- advanced to allow completion of the batch. - -- - bfcMaxConcurrencyBulkSync = 50 - , bfcMaxConcurrencyDeadline = 50 + { bfcMaxConcurrencyDeadline = 50 -- unused because of @pure FetchModeBulkSync@ above , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 From 7ac6a88be1db290d484ce214f4398a3abd121206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 12 Jul 2024 15:45:22 +0200 Subject: [PATCH 26/76] Only log starvations when they actually start --- .../Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index f39932275f..24f0706364 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -68,7 +68,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( ) where import Cardano.Prelude (Bifunctor (second)) -import Control.Monad (void) +import Control.Monad (void, when) import Control.Tracer import Data.Foldable (for_) import Data.Map.Strict (Map) @@ -574,12 +574,14 @@ getChainSelMessage starvationTracer starvationVar queue = go writeTVarIO varChainSelQueue chainSelQueue' return $ ChainSelAddBlock blockToAdd Nothing -> do - writeTVarIO starvationVar ChainSelStarvationOngoing - traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime -- FIXME: only trace if first time + prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing + when (prevStarvation /= ChainSelStarvationOngoing) $ + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime void $ atomically $ blockUntilChanged (second Map.null) (False, True) readBoth go ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue writeTVarIO v x = atomically $ writeTVar v x + swapTVarIO v x = atomically $ swapTVar v x readBoth = (,) <$> readTVar varChainSelReprocessLoEBlocks <*> readTVar varChainSelQueue -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. From 956332f02216bced46b4f40c02912924ef722032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Fri, 12 Jul 2024 23:27:28 +0200 Subject: [PATCH 27/76] Revert "ChainDB: let the BlockFetch client add blocks asynchronously" This reverts commit 09e98372890d3566fde05d2bb64c4c14aec7ae78. --- .../BlockFetch/ClientInterface.hs | 15 +- .../Storage/ChainDB/Impl/Background.hs | 7 +- .../Storage/ChainDB/Impl/ChainSel.hs | 12 +- .../Consensus/Storage/ChainDB/Impl/Query.hs | 30 ++-- .../Consensus/Storage/ChainDB/Impl/Types.hs | 155 +++++------------- 5 files changed, 75 insertions(+), 144 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 6bebea9d1f..93750475a9 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -30,8 +30,7 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as Jumping -import Ouroboros.Consensus.Storage.ChainDB.API (AddBlockPromise, - ChainDB) +import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment (InvalidBlockPunishment) @@ -57,16 +56,16 @@ data ChainDbView m blk = ChainDbView { getCurrentChain :: STM m (AnchoredFragment (Header blk)) , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo - , addBlockAsync :: InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) + , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool , getChainSelStarvation :: STM m ChainSelStarvation } -defaultChainDbView :: ChainDB m blk -> ChainDbView m blk +defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk defaultChainDbView chainDB = ChainDbView { getCurrentChain = ChainDB.getCurrentChain chainDB , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - , addBlockAsync = ChainDB.addBlockAsync chainDB + , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB , getChainSelStarvation = ChainDB.getChainSelStarvation chainDB } @@ -216,8 +215,8 @@ mkBlockFetchConsensusInterface pipeliningPunishment <- InvalidBlockPunishment.mkForDiffusionPipelining pure $ mkAddFetchedBlock_ pipeliningPunishment enabledPipelining - -- Hand over the block to the ChainDB, but don't wait until it has been - -- written to disk or processed. + -- Waits until the block has been written to disk, but not until chain + -- selection has processed the block. mkAddFetchedBlock_ :: ( BlockConfig blk -> Header blk @@ -261,7 +260,7 @@ mkBlockFetchConsensusInterface NotReceivingTentativeBlocks -> disconnect ReceivingTentativeBlocks -> pipeliningPunishment bcfg (getHeader blk) disconnect - addBlockAsync + addBlockWaitWrittenToDisk chainDB punishment blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index 82a0a228be..a854904cae 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -540,11 +540,6 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ FallingEdgeWith $ blockRealPoint blockToAdd - chainSelSync cdb message - lift $ case message of - ChainSelAddBlock blockToAdd -> - deleteBlockToAdd blockToAdd cdbChainSelQueue - _ -> pure () - ) + chainSelSync cdb message) where starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index 2ae269f8d8..f6a8971ee8 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -334,11 +334,13 @@ chainSelSync cdb@CDB {..} (ChainSelAddBlock BlockToAdd { blockToAdd = b, .. }) = let immBlockNo = AF.anchorBlockNo curChain -- We follow the steps from section "## Adding a block" in ChainDB.md - -- - -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead of - -- once, before branching, because we want to do it /after/ writing the - -- block to the VolatileDB so that any threads waiting on the - -- 'varBlockWrittenToDisk' promise don't have to wait for the result of + + -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead + -- of once, before branching, because we want to do it /after/ writing the + -- block to the VolatileDB and delivering the 'varBlockWrittenToDisk' + -- promise, as this is the promise the BlockFetch client waits for. + -- Otherwise, the BlockFetch client would have to wait for + -- 'chainSelectionForFutureBlocks'. -- ### Ignore newTip <- if diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index c697f11804..96bb57eac3 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -131,15 +131,18 @@ getBlockComponent :: getBlockComponent CDB{..} = getAnyBlockComponent cdbImmutableDB cdbVolatileDB getIsFetched :: - forall m blk. (IOLike m, HasHeader blk) + forall m blk. IOLike m => ChainDbEnv m blk -> STM m (Point blk -> Bool) -getIsFetched CDB{..} = do - checkBlocksToAdd <- memberBlocksToAdd cdbChainSelQueue - checkVolDb <- VolatileDB.getIsMember cdbVolatileDB - return $ \pt -> - case pointToWithOriginRealPoint pt of - Origin -> False - NotOrigin pt' -> checkBlocksToAdd pt' || checkVolDb (realPointHash pt') +getIsFetched CDB{..} = basedOnHash <$> VolatileDB.getIsMember cdbVolatileDB + where + -- The volatile DB indexes by hash only, not by points. However, it should + -- not be possible to have two points with the same hash but different + -- slot numbers. + basedOnHash :: (HeaderHash blk -> Bool) -> Point blk -> Bool + basedOnHash f p = + case pointHash p of + BlockHash hash -> f hash + GenesisHash -> False getIsInvalidBlock :: forall m blk. (IOLike m, HasHeader blk) @@ -182,13 +185,10 @@ getMaxSlotNo CDB{..} = do -- contains block 9'. The ImmutableDB contains blocks 1-10. The max slot -- of the current chain will be 10 (being the anchor point of the empty -- current chain), while the max slot of the VolatileDB will be 9. - -- - -- Moreover, we have to look in 'ChainSelQueue' too. - curChainMaxSlotNo <- - maxSlotNoFromWithOrigin . AF.headSlot <$> readTVar cdbChain - volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB - blocksToAddMaxSlotNo <- getBlocksToAddMaxSlotNo cdbChainSelQueue - return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo `max` blocksToAddMaxSlotNo + curChainMaxSlotNo <- maxSlotNoFromWithOrigin . AF.headSlot + <$> readTVar cdbChain + volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB + return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo {------------------------------------------------------------------------------- Unifying interface over the immutable DB and volatile DB, but independent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 24f0706364..14d59080bd 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -43,14 +43,11 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( -- * Blocks to add , BlockToAdd (..) , ChainSelMessage (..) - , ChainSelQueue -- opaque + , ChainSelQueue , addBlockToAdd , addReprocessLoEBlocks , closeChainSelQueue - , deleteBlockToAdd - , getBlocksToAddMaxSlotNo , getChainSelMessage - , memberBlocksToAdd , newChainSelQueue -- * Trace types , SelectionChangedInfo (..) @@ -67,12 +64,12 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where -import Cardano.Prelude (Bifunctor (second)) -import Control.Monad (void, when) +import Control.Monad (when) +import Cardano.Prelude (whenM) import Control.Tracer -import Data.Foldable (for_) +import Data.Foldable (traverse_) import Data.Map.Strict (Map) -import qualified Data.Map.Strict as Map +import Data.Maybe (mapMaybe) import Data.Maybe.Strict (StrictMaybe (..)) import Data.Set (Set) import Data.Typeable @@ -110,10 +107,9 @@ import Ouroboros.Consensus.Util.CallStack import Ouroboros.Consensus.Util.Enclose (Enclosing, Enclosing' (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Consensus.Util.STM (WithFingerprint, - blockUntilChanged) +import Ouroboros.Consensus.Util.STM (WithFingerprint) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) -import Ouroboros.Network.Block (MaxSlotNo (..)) +import Ouroboros.Network.Block (MaxSlotNo) import Ouroboros.Network.BlockFetch.ConsensusInterface (ChainSelStarvation (..)) @@ -259,17 +255,6 @@ data ChainDbEnv m blk = CDB , cdbCheckInFuture :: !(CheckInFuture m blk) , cdbChainSelQueue :: !(ChainSelQueue m blk) -- ^ Queue of blocks that still have to be added. - -- - -- NOTE: the set of blocks in this queue are /not/ disjoint from the set of - -- blocks in the VolatileDB. When processing the next block in the queue, we - -- do not remove the block from the queue /until/ it has been added to the - -- VolatileDB and processed by chain selection. This means the block - -- currently being added will be both in the queue and the VolatileDB for a - -- short while. - -- - -- If we would remove the block from the queue before adding it to the - -- VolatileDB, then it would be in /neither/ for a short time, and - -- 'getIsFetched' would incorrectly return 'False'. , cdbFutureBlocks :: !(StrictTVar m (FutureBlocks m blk)) -- ^ Blocks from the future -- @@ -465,21 +450,8 @@ type FutureBlocks m blk = Map (HeaderHash blk) (Header blk, InvalidBlockPunishme -- | FIFO queue used to add blocks asynchronously to the ChainDB. Blocks are -- read from this queue by a background thread, which processes the blocks -- synchronously. -data ChainSelQueue m blk = ChainSelQueue { - -- TODO use a better data structure, e.g., a heap from the @heaps@ - -- package. Wish list: - -- + O(1) pop min value - -- + O(log n) insert - -- + O(n) get all - -- + Bounded in size - -- - -- TODO join consecutive blocks into a fragment that can be added at - -- once. - varChainSelQueue :: !(StrictTVar m (Map (RealPoint blk) (BlockToAdd m blk))) - , chainSelQueueCapacity :: !Word - , varChainSelReprocessLoEBlocks :: !(StrictTVar m Bool) - } - deriving (NoThunks) via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) +newtype ChainSelQueue m blk = ChainSelQueue (TBQueue m (ChainSelMessage m blk)) + deriving NoThunks via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) -- | Entry in the 'ChainSelQueue' queue: a block together with the 'TMVar's used -- to implement 'AddBlockPromise'. @@ -493,7 +465,6 @@ data BlockToAdd m blk = BlockToAdd , varBlockProcessed :: !(StrictTMVar m (AddBlockResult blk)) -- ^ Used for the 'blockProcessed' field of 'AddBlockPromise'. } - deriving NoThunks via OnlyCheckWhnfNamed "BlockToAdd" (BlockToAdd m blk) -- | Different async tasks for triggering ChainSel data ChainSelMessage m blk @@ -503,11 +474,9 @@ data ChainSelMessage m blk | ChainSelReprocessLoEBlocks -- | Create a new 'ChainSelQueue' with the given size. -newChainSelQueue :: (IOLike m, StandardHash blk, Typeable blk) => Word -> m (ChainSelQueue m blk) -newChainSelQueue chainSelQueueCapacity = do - varChainSelQueue <- newTVarIO mempty - varChainSelReprocessLoEBlocks <- newTVarIO False - return $ ChainSelQueue {varChainSelQueue, chainSelQueueCapacity, varChainSelReprocessLoEBlocks} +newChainSelQueue :: IOLike m => Word -> m (ChainSelQueue m blk) +newChainSelQueue queueSize = ChainSelQueue <$> + atomically (newTBQueue (fromIntegral queueSize)) -- | Add a block to the 'ChainSelQueue' queue. Can block when the queue is full. addBlockToAdd :: @@ -517,7 +486,7 @@ addBlockToAdd :: -> InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) -addBlockToAdd tracer (ChainSelQueue {varChainSelQueue, chainSelQueueCapacity}) punish blk = do +addBlockToAdd tracer (ChainSelQueue queue) punish blk = do varBlockWrittenToDisk <- newEmptyTMVarIO varBlockProcessed <- newEmptyTMVarIO let !toAdd = BlockToAdd @@ -528,12 +497,8 @@ addBlockToAdd tracer (ChainSelQueue {varChainSelQueue, chainSelQueueCapacity}) p } traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) RisingEdge queueSize <- atomically $ do - chainSelQueue <- readTVar varChainSelQueue - let chainSelQueue' = Map.insert (blockRealPoint blk) toAdd chainSelQueue - chainSelQueueSize = Map.size chainSelQueue' - check (fromIntegral chainSelQueueSize <= chainSelQueueCapacity) - writeTVar varChainSelQueue chainSelQueue' - return chainSelQueueSize + writeTBQueue queue (ChainSelAddBlock toAdd) + lengthTBQueue queue traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) (FallingEdgeWith (fromIntegral queueSize)) return AddBlockPromise @@ -547,84 +512,54 @@ addReprocessLoEBlocks => Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> m () -addReprocessLoEBlocks tracer (ChainSelQueue {varChainSelReprocessLoEBlocks}) = do +addReprocessLoEBlocks tracer (ChainSelQueue queue) = do traceWith tracer $ AddedReprocessLoEBlocksToQueue - atomically $ writeTVar varChainSelReprocessLoEBlocks True + atomically $ writeTBQueue queue ChainSelReprocessLoEBlocks -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the -- queue is empty; in that case, reports the starvation (and its end) to the -- callback. getChainSelMessage - :: IOLike m + :: (IOLike m, HasHeader blk) => Tracer m (TraceChainSelStarvationEvent blk) -> StrictTVar m ChainSelStarvation -> ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage starvationTracer starvationVar queue = go +getChainSelMessage starvationTracer starvationVar (ChainSelQueue queue) = do + -- NOTE: The test of emptiness and the blocking read are in different STM + -- transactions on purpose. + whenM (isEmptyTBQueueIO queue) $ do + prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing + when (prevStarvation /= ChainSelStarvationOngoing) $ + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime + message <- atomically $ readTBQueue queue + -- If there was a starvation ongoing, we need to report that it is done. + whenM ((== ChainSelStarvationOngoing) <$> readTVarIO starvationVar) $ + case message of + ChainSelAddBlock BlockToAdd {blockToAdd} -> do + time <- getMonotonicTime + traceWith starvationTracer $ ChainSelStarvationEnded time $ blockRealPoint blockToAdd + writeTVarIO starvationVar $ ChainSelStarvationEndedAt time + ChainSelReprocessLoEBlocks -> pure () + return message where - go = do - (reprocessLoEBlocks, chainSelQueue) <- atomically readBoth - case reprocessLoEBlocks of - True -> do - writeTVarIO varChainSelReprocessLoEBlocks False - return ChainSelReprocessLoEBlocks - False -> - case Map.minView chainSelQueue of - Just (blockToAdd, chainSelQueue') -> do - writeTVarIO varChainSelQueue chainSelQueue' - return $ ChainSelAddBlock blockToAdd - Nothing -> do - prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing - when (prevStarvation /= ChainSelStarvationOngoing) $ - traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime - void $ atomically $ blockUntilChanged (second Map.null) (False, True) readBoth - go - ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue writeTVarIO v x = atomically $ writeTVar v x swapTVarIO v x = atomically $ swapTVar v x - readBoth = (,) <$> readTVar varChainSelReprocessLoEBlocks <*> readTVar varChainSelQueue + isEmptyTBQueueIO q = atomically $ isEmptyTBQueue q -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- --- REVIEW: What about all the threads that are waiting to write in the queue and --- will write after the flush?! closeChainSelQueue :: IOLike m => ChainSelQueue m blk -> STM m () -closeChainSelQueue ChainSelQueue {varChainSelQueue} = do - chainSelQueue <- readTVar varChainSelQueue - for_ chainSelQueue $ \BlockToAdd {varBlockProcessed} -> - putTMVar varBlockProcessed $ FailedToAddBlock "Queue flushed" - --- | Delete the given 'BlockToAdd' from the 'ChainSelQueue'. --- --- PRECONDITION: the given 'BlockToAdd' is in 'ChainSelQueue'. -deleteBlockToAdd :: - (IOLike m, HasHeader blk) - => BlockToAdd m blk - -> ChainSelQueue m blk - -> m () -deleteBlockToAdd (BlockToAdd _ blk _ _) (ChainSelQueue {varChainSelQueue}) = - atomically $ modifyTVar varChainSelQueue $ Map.delete (blockRealPoint blk) - --- | Return a function to test the membership for the given 'BlocksToAdd'. -memberBlocksToAdd :: - (IOLike m, HasHeader blk) - => ChainSelQueue m blk - -> STM m (RealPoint blk -> Bool) -memberBlocksToAdd (ChainSelQueue {varChainSelQueue}) = - flip Map.member <$> readTVar varChainSelQueue - -getBlocksToAddMaxSlotNo :: - IOLike m - => ChainSelQueue m blk - -> STM m MaxSlotNo -getBlocksToAddMaxSlotNo (ChainSelQueue {varChainSelQueue}) = aux <$> readTVar varChainSelQueue +closeChainSelQueue (ChainSelQueue queue) = do + as <- mapMaybe blockAdd <$> flushTBQueue queue + traverse_ (\a -> tryPutTMVar (varBlockProcessed a) + (FailedToAddBlock "Queue flushed")) + as where - -- | The 'Ord' instance of 'RealPoint' orders by 'SlotNo' first, so the - -- maximal key of the map has the greatest 'SlotNo'. - aux :: Map (RealPoint blk) (BlockToAdd m blk) -> MaxSlotNo - aux queue = case Map.lookupMax queue of - Nothing -> NoMaxSlotNo - Just (RealPoint s _, _) -> MaxSlotNo s + blockAdd = \case + ChainSelAddBlock ab -> Just ab + ChainSelReprocessLoEBlocks -> Nothing + {------------------------------------------------------------------------------- Trace types From e742b01880fd215bb6d7d55c989e6c9de62c8d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Mon, 15 Jul 2024 12:36:11 +0200 Subject: [PATCH 28/76] Add explicit tracing events for CSJ --- .../Ouroboros/Consensus/Node/Tracers.hs | 5 ++ .../Ouroboros/Consensus/NodeKernel.hs | 1 + .../Consensus/PeerSimulator/BlockFetch.hs | 1 + .../BlockFetch/ClientInterface.hs | 10 +-- .../MiniProtocol/ChainSync/Client/Jumping.hs | 72 +++++++++++-------- .../Consensus/Storage/ChainDB/Impl/Types.hs | 2 +- .../MiniProtocol/BlockFetch/Client.hs | 1 + 7 files changed, 56 insertions(+), 36 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs index 59e4052d92..8123eda4ea 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs @@ -31,6 +31,7 @@ import Ouroboros.Consensus.MiniProtocol.BlockFetch.Server (TraceBlockFetchServerEvent) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (InvalidBlockReason, TraceChainSyncClientEvent) +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping import Ouroboros.Consensus.MiniProtocol.ChainSync.Server (TraceChainSyncServerEvent) import Ouroboros.Consensus.MiniProtocol.LocalTxSubmission.Server @@ -67,6 +68,7 @@ data Tracers' remotePeer localPeer blk f = Tracers , consensusErrorTracer :: f SomeException , gsmTracer :: f (TraceGsmEvent (Tip blk)) , gddTracer :: f (TraceGDDEvent remotePeer blk) + , csjTracer :: f (CSJumping.TraceEvent remotePeer) } instance (forall a. Semigroup (f a)) @@ -89,6 +91,7 @@ instance (forall a. Semigroup (f a)) , consensusErrorTracer = f consensusErrorTracer , gsmTracer = f gsmTracer , gddTracer = f gddTracer + , csjTracer = f csjTracer } where f :: forall a. Semigroup a @@ -119,6 +122,7 @@ nullTracers = Tracers , consensusErrorTracer = nullTracer , gsmTracer = nullTracer , gddTracer = nullTracer + , csjTracer = nullTracer } showTracers :: ( Show blk @@ -152,6 +156,7 @@ showTracers tr = Tracers , consensusErrorTracer = showTracing tr , gsmTracer = showTracing tr , gddTracer = showTracing tr + , csjTracer = showTracing tr } {------------------------------------------------------------------------------- diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 11423e65e2..729bcbd0c4 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -377,6 +377,7 @@ initInternalState NodeKernelArgs { tracers, chainDB, registry, cfg (GSM.gsmStateToLedgerJudgement <$> readTVar varGsmState) blockFetchInterface :: BlockFetchConsensusInterface (ConnectionId addrNTN) (Header blk) blk m blockFetchInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface + (csjTracer tracers) (configBlock cfg) (BlockFetchClientInterface.defaultChainDbView chainDB) varChainSyncHandles diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index aaf582555b..0fcc5b723a 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -86,6 +86,7 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien blockFetchConsensusInterface = BlockFetchClientInterface.mkBlockFetchConsensusInterface + nullTracer -- FIXME (TestBlockConfig $ NumCoreNodes 0) -- Only needed when minting blocks (BlockFetchClientInterface.defaultChainDbView chainDb) csHandlesCol diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 93750475a9..276626e26c 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -14,6 +14,7 @@ module Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface ( ) where import Control.Monad +import Control.Tracer (Tracer) import Data.Map.Strict (Map) import Data.Time.Clock (UTCTime) import GHC.Stack (HasCallStack) @@ -29,7 +30,7 @@ import Ouroboros.Consensus.Ledger.Extended import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient -import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as Jumping +import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment @@ -179,7 +180,8 @@ mkBlockFetchConsensusInterface :: , Ord peer , LedgerSupportsProtocol blk ) - => BlockConfig blk + => Tracer m (CSJumping.TraceEvent peer) + -> BlockConfig blk -> ChainDbView m blk -> CSClient.ChainSyncClientHandleCollection peer m blk -> (Header blk -> SizeInBytes) @@ -189,7 +191,7 @@ mkBlockFetchConsensusInterface :: -- ^ See 'readFetchMode'. -> BlockFetchConsensusInterface peer (Header blk) blk m mkBlockFetchConsensusInterface - bcfg chainDB csHandlesCol blockFetchSize slotForgeTime readFetchMode = + csjTracer bcfg chainDB csHandlesCol blockFetchSize slotForgeTime readFetchMode = BlockFetchConsensusInterface {..} where getCandidates :: STM m (Map peer (AnchoredFragment (Header blk))) @@ -355,4 +357,4 @@ mkBlockFetchConsensusInterface readChainSelStarvation = getChainSelStarvation chainDB demoteCSJDynamo :: peer -> m () - demoteCSJDynamo = void . atomically . Jumping.rotateDynamo csHandlesCol + demoteCSJDynamo = CSJumping.rotateDynamo csjTracer csHandlesCol diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index 8655046760..a54bcb0711 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -166,6 +166,7 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( , JumpInstruction (..) , JumpResult (..) , Jumping (..) + , TraceEvent (..) , getDynamo , makeContext , mkJumping @@ -177,7 +178,8 @@ module Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping ( import Cardano.Slotting.Slot (SlotNo (..), WithOrigin (..)) import Control.Monad (forM, forM_, void, when) -import Data.Foldable (toList) +import Control.Tracer (Tracer, traceWith) +import Data.Foldable (toList, traverse_) import Data.List (sortOn) import qualified Data.Map as Map import Data.Maybe (catMaybes, fromMaybe) @@ -775,38 +777,42 @@ rotateDynamo :: LedgerSupportsProtocol blk, MonadSTM m ) => + Tracer m (TraceEvent peer) -> ChainSyncClientHandleCollection peer m blk -> peer -> - STM m (Maybe (peer, ChainSyncClientHandle m blk)) -rotateDynamo handlesCol peer = do - handles <- cschcMap handlesCol - case handles Map.!? peer of - Nothing -> - -- Do not re-elect a dynamo if the peer has been disconnected. - getDynamo handlesCol - Just oldDynHandle -> - readTVar (cschJumping oldDynHandle) >>= \case - Dynamo{} -> do - cschcRotateHandle handlesCol peer - peerStates <- cschcSeq handlesCol - mEngaged <- findNonDisengaged peerStates - case mEngaged of - Nothing -> - -- There are no engaged peers. This case cannot happen, as the - -- dynamo is always engaged. - error "rotateDynamo: no engaged peer found" - Just (newDynamoId, newDynHandle) - | newDynamoId == peer -> - -- The old dynamo is the only engaged peer left. - pure $ Just (newDynamoId, newDynHandle) - | otherwise -> do - newJumper Nothing (Happy FreshJumper Nothing) - >>= writeTVar (cschJumping oldDynHandle) - promoteToDynamo peerStates newDynamoId newDynHandle - pure $ Just (newDynamoId, newDynHandle) - _ -> - -- Do not re-elect a dynamo if the peer is not the dynamo. - getDynamo handlesCol + m () + -- STM m (Maybe (peer, ChainSyncClientHandle m blk)) +rotateDynamo tracer handlesCol peer = do + traceEvent <- atomically $ do + handles <- cschcMap handlesCol + case handles Map.!? peer of + Nothing -> + -- Do not re-elect a dynamo if the peer has been disconnected. + pure Nothing + Just oldDynHandle -> + readTVar (cschJumping oldDynHandle) >>= \case + Dynamo{} -> do + cschcRotateHandle handlesCol peer + peerStates <- cschcSeq handlesCol + mEngaged <- findNonDisengaged peerStates + case mEngaged of + Nothing -> + -- There are no engaged peers. This case cannot happen, as the + -- dynamo is always engaged. + error "rotateDynamo: no engaged peer found" + Just (newDynamoId, newDynHandle) + | newDynamoId == peer -> + -- The old dynamo is the only engaged peer left. + pure Nothing + | otherwise -> do + newJumper Nothing (Happy FreshJumper Nothing) + >>= writeTVar (cschJumping oldDynHandle) + promoteToDynamo peerStates newDynamoId newDynHandle + pure $ Just $ RotatedDynamo peer newDynamoId + _ -> + -- Do not re-elect a dynamo if the peer is not the dynamo. + pure Nothing + traverse_ (traceWith tracer) traceEvent -- | Choose an unspecified new non-idling dynamo and demote all other peers to -- jumpers. @@ -907,3 +913,7 @@ electNewObjector context = do pure $ Just (badPoint, (initState, goodJumpInfo, handle)) _ -> pure Nothing + +data TraceEvent peer + = RotatedDynamo peer peer + deriving (Show) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 14d59080bd..686abf6086 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -64,8 +64,8 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where -import Control.Monad (when) import Cardano.Prelude (whenM) +import Control.Monad (when) import Control.Tracer import Data.Foldable (traverse_) import Data.Map.Strict (Map) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index fb1e61fb0d..b982fbf4f7 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -281,6 +281,7 @@ runBlockFetchTest BlockFetchClientTestSetup{..} = withRegistry \registry -> do -> BlockFetchConsensusInterface PeerId (Header TestBlock) TestBlock m mkTestBlockFetchConsensusInterface getCandidates chainDbView = (BlockFetchClientInterface.mkBlockFetchConsensusInterface @m @PeerId + nullTracer (TestBlockConfig numCoreNodes) chainDbView (error "ChainSyncClientHandleCollection not provided to mkBlockFetchConsensusInterface") From a12cd89e7888da77e9e1a43dc58c5d068ab6d82d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Mon, 15 Jul 2024 13:12:35 +0200 Subject: [PATCH 29/76] Follow changes in blockfetch decision tracing --- .../Ouroboros/Consensus/Node/Tracers.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs index 8123eda4ea..dba2da3df7 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Tracers.hs @@ -38,8 +38,10 @@ import Ouroboros.Consensus.MiniProtocol.LocalTxSubmission.Server (TraceLocalTxSubmissionServerEvent (..)) import Ouroboros.Consensus.Node.GSM (TraceGsmEvent) import Ouroboros.Network.Block (Tip) -import Ouroboros.Network.BlockFetch (FetchDecision, - TraceFetchClientState, TraceLabelPeer) +import Ouroboros.Network.BlockFetch (TraceFetchClientState, + TraceLabelPeer) +import Ouroboros.Network.BlockFetch.Decision.Trace + (TraceDecisionEvent) import Ouroboros.Network.KeepAlive (TraceKeepAliveClient) import Ouroboros.Network.TxSubmission.Inbound (TraceTxSubmissionInbound) @@ -54,7 +56,7 @@ data Tracers' remotePeer localPeer blk f = Tracers { chainSyncClientTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncClientEvent blk)) , chainSyncServerHeaderTracer :: f (TraceLabelPeer remotePeer (TraceChainSyncServerEvent blk)) , chainSyncServerBlockTracer :: f (TraceChainSyncServerEvent blk) - , blockFetchDecisionTracer :: f [TraceLabelPeer remotePeer (FetchDecision [Point (Header blk)])] + , blockFetchDecisionTracer :: f (TraceDecisionEvent remotePeer (Header blk)) , blockFetchClientTracer :: f (TraceLabelPeer remotePeer (TraceFetchClientState (Header blk))) , blockFetchServerTracer :: f (TraceLabelPeer remotePeer (TraceBlockFetchServerEvent blk)) , txInboundTracer :: f (TraceLabelPeer remotePeer (TraceTxSubmissionInbound (GenTxId blk) (GenTx blk))) From eafa6a4ce1ec280768bbcb0636eb7c7627492b9f Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 16 Jul 2024 19:04:29 +0200 Subject: [PATCH 30/76] Move Genesis-specific BlockFetch config to GenesisConfig --- .../Ouroboros/Consensus/Node/Genesis.hs | 63 +++++++++++++++---- .../Test/ThreadNet/Network.hs | 2 +- .../Consensus/PeerSimulator/BlockFetch.hs | 9 ++- .../MiniProtocol/BlockFetch/Client.hs | 5 +- 4 files changed, 63 insertions(+), 16 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 27a83a7ad0..c7564b4f59 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -18,6 +18,7 @@ module Ouroboros.Consensus.Node.Genesis ( ) where import Control.Monad (join) +import Data.Maybe (isJust) import Data.Traversable (for) import Ouroboros.Consensus.Block import Ouroboros.Consensus.MiniProtocol.ChainSync.Client @@ -32,6 +33,10 @@ import Ouroboros.Consensus.Util.Args import Ouroboros.Consensus.Util.IOLike import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF +import Ouroboros.Network.BlockFetch + (GenesisBlockFetchConfiguration (..)) +import System.Environment (lookupEnv) +import System.IO.Unsafe (unsafePerformIO) -- | Whether to en-/disable the Limit on Eagerness and the Genesis Density -- Disconnector. @@ -41,29 +46,63 @@ data LoEAndGDDConfig a = deriving stock (Show, Functor, Foldable, Traversable) -- | Aggregating the various configs for Genesis-related subcomponents. -data GenesisConfig = GenesisConfig { - gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig +data GenesisConfig = GenesisConfig + { gcBlockFetchConfig :: !GenesisBlockFetchConfiguration + , gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig , gcCSJConfig :: !CSJConfig , gcLoEAndGDDConfig :: !(LoEAndGDDConfig ()) } -- TODO justification/derivation from other parameters enableGenesisConfigDefault :: GenesisConfig -enableGenesisConfigDefault = GenesisConfig { - gcChainSyncLoPBucketConfig = ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig { - csbcCapacity = 100_000 -- number of tokens - , csbcRate = 500 -- tokens per second leaking, 1/2ms - } - , gcCSJConfig = CSJEnabled CSJEnabledConfig { - csjcJumpSize = 3 * 2160 * 20 -- mainnet forecast range +enableGenesisConfigDefault = unsafePerformIO $ do + enableGenesis <- isJust <$> lookupEnv "ENABLE_GENESIS" + enableLoP <- (enableGenesis ||) . isJust <$> lookupEnv "ENABLE_LoP" + enableCSJ <- (enableGenesis ||) . isJust <$> lookupEnv "ENABLE_CSJ" + enableLoEAndGDD <- (enableGenesis ||) . isJust <$> lookupEnv "ENABLE_LoEGDD" + + let defaultBulkSyncGracePeriod = 10 -- seconds + defaultCapacity = 100_000 -- number of tokens + defaultRate = 500 -- tokens per second leaking, 1/2ms + defaultCSJJumpSize = 3 * 2160 * 20 -- mainnet forecast range + + gbfcBulkSyncGracePeriod <- maybe defaultBulkSyncGracePeriod + (fromInteger . read) <$> lookupEnv "BLOCKFETCH_GRACE_PERIOD" + csbcCapacity <- maybe defaultCapacity + (fromInteger . read) <$> lookupEnv "LOP_CAPACITY" + csbcRate <- maybe defaultRate + (fromInteger . read) <$> lookupEnv "LOP_RATE" + csjcJumpSize <- maybe defaultCSJJumpSize + (fromInteger . read) <$> lookupEnv "CSJ_JUMP_SIZE" + + pure $ GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcBulkSyncGracePeriod } - , gcLoEAndGDDConfig = LoEAndGDDEnabled () + , gcChainSyncLoPBucketConfig = if enableLoP + then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig + { csbcCapacity + , csbcRate + } + else ChainSyncLoPBucketDisabled + , gcCSJConfig = if enableCSJ + then CSJEnabled CSJEnabledConfig + { csjcJumpSize + } + else CSJDisabled + , gcLoEAndGDDConfig = if enableLoEAndGDD + then LoEAndGDDEnabled () + else LoEAndGDDDisabled } +{-# NOINLINE enableGenesisConfigDefault #-} -- | Disable all Genesis components, yielding Praos behavior. disableGenesisConfig :: GenesisConfig -disableGenesisConfig = GenesisConfig { - gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled +disableGenesisConfig = GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcBulkSyncGracePeriod = 0 -- no grace period when Genesis is disabled + } + , gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled , gcCSJConfig = CSJDisabled , gcLoEAndGDDConfig = LoEAndGDDDisabled } diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index d1d21e6562..73ccb384e9 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1020,7 +1020,7 @@ runThreadNetwork systemTime ThreadNetworkArgs -- interval which doesn't play nice with -- blockfetch descision interval. , bfcSalt = 0 - , bfcBulkSyncGracePeriod = 10 -- seconds + , bfcGenesisBFConfig = gcBlockFetchConfig enableGenesisConfigDefault } , gsmArgs = GSM.GsmNodeKernelArgs { gsmAntiThunderingHerd = kaRng diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 0fcc5b723a..4b1fc6450c 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -97,6 +97,12 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien -- This is a syncing test, so we use 'FetchModeBulkSync'. (pure FetchModeBulkSync) + bfcGenesisBFConfig = if enableChainSelStarvation + then GenesisBlockFetchConfiguration + { gbfcBulkSyncGracePeriod = 1000000 -- (more than 11 days) + } + else gcBlockFetchConfig enableGenesisConfigDefault + -- Values taken from -- ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs blockFetchCfg = BlockFetchConfiguration @@ -104,8 +110,7 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 - , bfcBulkSyncGracePeriod = - if enableChainSelStarvation then 10 else 1000000 -- (more than 11 days) + , bfcBulkSyncGracePeriod } void $ forkLinkedThread registry "BlockFetchLogic" $ diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index b982fbf4f7..705fc8ff8e 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -5,6 +5,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TupleSections #-} @@ -51,7 +52,7 @@ import Ouroboros.Consensus.Util.STM (blockUntilJust, forkLinkedWatcher) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF -import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), +import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), GenesisBlockFetchConfiguration (..), BlockFetchConsensusInterface (..), FetchMode (..), blockFetchLogic, bracketFetchClient, bracketKeepAliveClient, bracketSyncWithFetchClient, @@ -368,6 +369,8 @@ instance Arbitrary BlockFetchClientTestSetup where bfcBulkSyncGracePeriod = 10 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary + gbfcBulkSyncGracePeriod <- fromIntegral <$> chooseInteger (5, 60) + let bfcGenesisBFConfig = GenesisBlockFetchConfiguration {..} pure BlockFetchConfiguration {..} pure BlockFetchClientTestSetup {..} where From 4500891ddefb7477f0945305207056f9c558d06e Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Wed, 17 Jul 2024 10:19:27 +0200 Subject: [PATCH 31/76] Add missing instances for TraceChainSetStarvationEvent --- .../Test/Ouroboros/Storage/ChainDB/StateMachine.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs index dc2a74d20d..378a4220b8 100644 --- a/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs +++ b/ouroboros-consensus/test/storage-test/Test/Ouroboros/Storage/ChainDB/StateMachine.hs @@ -1252,6 +1252,8 @@ deriving instance SOP.Generic (ImmutableDB.TraceEvent blk) deriving instance SOP.HasDatatypeInfo (ImmutableDB.TraceEvent blk) deriving instance SOP.Generic (VolatileDB.TraceEvent blk) deriving instance SOP.HasDatatypeInfo (VolatileDB.TraceEvent blk) +deriving instance SOP.Generic (TraceChainSelStarvationEvent blk) +deriving instance SOP.HasDatatypeInfo (TraceChainSelStarvationEvent blk) data Tag = TagGetIsValidJust @@ -1631,7 +1633,7 @@ traceEventName = \case TraceLedgerReplayEvent ev -> "LedgerReplay." <> constrName ev TraceImmutableDBEvent ev -> "ImmutableDB." <> constrName ev TraceVolatileDBEvent ev -> "VolatileDB." <> constrName ev - TraceChainSelStarvationEvent _ -> "TraceChainSelStarvationEvent" + TraceChainSelStarvationEvent ev -> "ChainSelStarvation." <> constrName ev mkArgs :: IOLike m => TopLevelConfig Blk From ff5f0408cede21698ff3407c6ff2010b88535fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 18 Jul 2024 18:28:57 +0000 Subject: [PATCH 32/76] Reapply "ChainDB: let the BlockFetch client add blocks asynchronously" This reverts commit 310d3a9ee9f783ab6c01337a96a92b3205423ad6. --- .../BlockFetch/ClientInterface.hs | 15 +- .../Storage/ChainDB/Impl/Background.hs | 7 +- .../Storage/ChainDB/Impl/ChainSel.hs | 12 +- .../Consensus/Storage/ChainDB/Impl/Query.hs | 30 ++-- .../Consensus/Storage/ChainDB/Impl/Types.hs | 155 +++++++++++++----- 5 files changed, 144 insertions(+), 75 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs index 276626e26c..1a3697a182 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/BlockFetch/ClientInterface.hs @@ -31,7 +31,8 @@ import Ouroboros.Consensus.Ledger.SupportsProtocol (LedgerSupportsProtocol) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client as CSClient import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.Jumping as CSJumping -import Ouroboros.Consensus.Storage.ChainDB.API (ChainDB) +import Ouroboros.Consensus.Storage.ChainDB.API (AddBlockPromise, + ChainDB) import qualified Ouroboros.Consensus.Storage.ChainDB.API as ChainDB import Ouroboros.Consensus.Storage.ChainDB.API.Types.InvalidBlockPunishment (InvalidBlockPunishment) @@ -57,16 +58,16 @@ data ChainDbView m blk = ChainDbView { getCurrentChain :: STM m (AnchoredFragment (Header blk)) , getIsFetched :: STM m (Point blk -> Bool) , getMaxSlotNo :: STM m MaxSlotNo - , addBlockWaitWrittenToDisk :: InvalidBlockPunishment m -> blk -> m Bool + , addBlockAsync :: InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) , getChainSelStarvation :: STM m ChainSelStarvation } -defaultChainDbView :: IOLike m => ChainDB m blk -> ChainDbView m blk +defaultChainDbView :: ChainDB m blk -> ChainDbView m blk defaultChainDbView chainDB = ChainDbView { getCurrentChain = ChainDB.getCurrentChain chainDB , getIsFetched = ChainDB.getIsFetched chainDB , getMaxSlotNo = ChainDB.getMaxSlotNo chainDB - , addBlockWaitWrittenToDisk = ChainDB.addBlockWaitWrittenToDisk chainDB + , addBlockAsync = ChainDB.addBlockAsync chainDB , getChainSelStarvation = ChainDB.getChainSelStarvation chainDB } @@ -217,8 +218,8 @@ mkBlockFetchConsensusInterface pipeliningPunishment <- InvalidBlockPunishment.mkForDiffusionPipelining pure $ mkAddFetchedBlock_ pipeliningPunishment enabledPipelining - -- Waits until the block has been written to disk, but not until chain - -- selection has processed the block. + -- Hand over the block to the ChainDB, but don't wait until it has been + -- written to disk or processed. mkAddFetchedBlock_ :: ( BlockConfig blk -> Header blk @@ -262,7 +263,7 @@ mkBlockFetchConsensusInterface NotReceivingTentativeBlocks -> disconnect ReceivingTentativeBlocks -> pipeliningPunishment bcfg (getHeader blk) disconnect - addBlockWaitWrittenToDisk + addBlockAsync chainDB punishment blk diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs index a854904cae..82a0a228be 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Background.hs @@ -540,6 +540,11 @@ addBlockRunner fuse cdb@CDB{..} = forever $ do ChainSelAddBlock BlockToAdd{blockToAdd} -> trace $ PoppedBlockFromQueue $ FallingEdgeWith $ blockRealPoint blockToAdd - chainSelSync cdb message) + chainSelSync cdb message + lift $ case message of + ChainSelAddBlock blockToAdd -> + deleteBlockToAdd blockToAdd cdbChainSelQueue + _ -> pure () + ) where starvationTracer = Tracer $ traceWith cdbTracer . TraceChainSelStarvationEvent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs index f6a8971ee8..2ae269f8d8 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/ChainSel.hs @@ -334,13 +334,11 @@ chainSelSync cdb@CDB {..} (ChainSelAddBlock BlockToAdd { blockToAdd = b, .. }) = let immBlockNo = AF.anchorBlockNo curChain -- We follow the steps from section "## Adding a block" in ChainDB.md - - -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead - -- of once, before branching, because we want to do it /after/ writing the - -- block to the VolatileDB and delivering the 'varBlockWrittenToDisk' - -- promise, as this is the promise the BlockFetch client waits for. - -- Otherwise, the BlockFetch client would have to wait for - -- 'chainSelectionForFutureBlocks'. + -- + -- Note: we call 'chainSelectionForFutureBlocks' in all branches instead of + -- once, before branching, because we want to do it /after/ writing the + -- block to the VolatileDB so that any threads waiting on the + -- 'varBlockWrittenToDisk' promise don't have to wait for the result of -- ### Ignore newTip <- if diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs index 96bb57eac3..c697f11804 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Query.hs @@ -131,18 +131,15 @@ getBlockComponent :: getBlockComponent CDB{..} = getAnyBlockComponent cdbImmutableDB cdbVolatileDB getIsFetched :: - forall m blk. IOLike m + forall m blk. (IOLike m, HasHeader blk) => ChainDbEnv m blk -> STM m (Point blk -> Bool) -getIsFetched CDB{..} = basedOnHash <$> VolatileDB.getIsMember cdbVolatileDB - where - -- The volatile DB indexes by hash only, not by points. However, it should - -- not be possible to have two points with the same hash but different - -- slot numbers. - basedOnHash :: (HeaderHash blk -> Bool) -> Point blk -> Bool - basedOnHash f p = - case pointHash p of - BlockHash hash -> f hash - GenesisHash -> False +getIsFetched CDB{..} = do + checkBlocksToAdd <- memberBlocksToAdd cdbChainSelQueue + checkVolDb <- VolatileDB.getIsMember cdbVolatileDB + return $ \pt -> + case pointToWithOriginRealPoint pt of + Origin -> False + NotOrigin pt' -> checkBlocksToAdd pt' || checkVolDb (realPointHash pt') getIsInvalidBlock :: forall m blk. (IOLike m, HasHeader blk) @@ -185,10 +182,13 @@ getMaxSlotNo CDB{..} = do -- contains block 9'. The ImmutableDB contains blocks 1-10. The max slot -- of the current chain will be 10 (being the anchor point of the empty -- current chain), while the max slot of the VolatileDB will be 9. - curChainMaxSlotNo <- maxSlotNoFromWithOrigin . AF.headSlot - <$> readTVar cdbChain - volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB - return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo + -- + -- Moreover, we have to look in 'ChainSelQueue' too. + curChainMaxSlotNo <- + maxSlotNoFromWithOrigin . AF.headSlot <$> readTVar cdbChain + volatileDbMaxSlotNo <- VolatileDB.getMaxSlotNo cdbVolatileDB + blocksToAddMaxSlotNo <- getBlocksToAddMaxSlotNo cdbChainSelQueue + return $ curChainMaxSlotNo `max` volatileDbMaxSlotNo `max` blocksToAddMaxSlotNo {------------------------------------------------------------------------------- Unifying interface over the immutable DB and volatile DB, but independent diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 686abf6086..24f0706364 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -43,11 +43,14 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( -- * Blocks to add , BlockToAdd (..) , ChainSelMessage (..) - , ChainSelQueue + , ChainSelQueue -- opaque , addBlockToAdd , addReprocessLoEBlocks , closeChainSelQueue + , deleteBlockToAdd + , getBlocksToAddMaxSlotNo , getChainSelMessage + , memberBlocksToAdd , newChainSelQueue -- * Trace types , SelectionChangedInfo (..) @@ -64,12 +67,12 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where -import Cardano.Prelude (whenM) -import Control.Monad (when) +import Cardano.Prelude (Bifunctor (second)) +import Control.Monad (void, when) import Control.Tracer -import Data.Foldable (traverse_) +import Data.Foldable (for_) import Data.Map.Strict (Map) -import Data.Maybe (mapMaybe) +import qualified Data.Map.Strict as Map import Data.Maybe.Strict (StrictMaybe (..)) import Data.Set (Set) import Data.Typeable @@ -107,9 +110,10 @@ import Ouroboros.Consensus.Util.CallStack import Ouroboros.Consensus.Util.Enclose (Enclosing, Enclosing' (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Consensus.Util.STM (WithFingerprint) +import Ouroboros.Consensus.Util.STM (WithFingerprint, + blockUntilChanged) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) -import Ouroboros.Network.Block (MaxSlotNo) +import Ouroboros.Network.Block (MaxSlotNo (..)) import Ouroboros.Network.BlockFetch.ConsensusInterface (ChainSelStarvation (..)) @@ -255,6 +259,17 @@ data ChainDbEnv m blk = CDB , cdbCheckInFuture :: !(CheckInFuture m blk) , cdbChainSelQueue :: !(ChainSelQueue m blk) -- ^ Queue of blocks that still have to be added. + -- + -- NOTE: the set of blocks in this queue are /not/ disjoint from the set of + -- blocks in the VolatileDB. When processing the next block in the queue, we + -- do not remove the block from the queue /until/ it has been added to the + -- VolatileDB and processed by chain selection. This means the block + -- currently being added will be both in the queue and the VolatileDB for a + -- short while. + -- + -- If we would remove the block from the queue before adding it to the + -- VolatileDB, then it would be in /neither/ for a short time, and + -- 'getIsFetched' would incorrectly return 'False'. , cdbFutureBlocks :: !(StrictTVar m (FutureBlocks m blk)) -- ^ Blocks from the future -- @@ -450,8 +465,21 @@ type FutureBlocks m blk = Map (HeaderHash blk) (Header blk, InvalidBlockPunishme -- | FIFO queue used to add blocks asynchronously to the ChainDB. Blocks are -- read from this queue by a background thread, which processes the blocks -- synchronously. -newtype ChainSelQueue m blk = ChainSelQueue (TBQueue m (ChainSelMessage m blk)) - deriving NoThunks via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) +data ChainSelQueue m blk = ChainSelQueue { + -- TODO use a better data structure, e.g., a heap from the @heaps@ + -- package. Wish list: + -- + O(1) pop min value + -- + O(log n) insert + -- + O(n) get all + -- + Bounded in size + -- + -- TODO join consecutive blocks into a fragment that can be added at + -- once. + varChainSelQueue :: !(StrictTVar m (Map (RealPoint blk) (BlockToAdd m blk))) + , chainSelQueueCapacity :: !Word + , varChainSelReprocessLoEBlocks :: !(StrictTVar m Bool) + } + deriving (NoThunks) via OnlyCheckWhnfNamed "ChainSelQueue" (ChainSelQueue m blk) -- | Entry in the 'ChainSelQueue' queue: a block together with the 'TMVar's used -- to implement 'AddBlockPromise'. @@ -465,6 +493,7 @@ data BlockToAdd m blk = BlockToAdd , varBlockProcessed :: !(StrictTMVar m (AddBlockResult blk)) -- ^ Used for the 'blockProcessed' field of 'AddBlockPromise'. } + deriving NoThunks via OnlyCheckWhnfNamed "BlockToAdd" (BlockToAdd m blk) -- | Different async tasks for triggering ChainSel data ChainSelMessage m blk @@ -474,9 +503,11 @@ data ChainSelMessage m blk | ChainSelReprocessLoEBlocks -- | Create a new 'ChainSelQueue' with the given size. -newChainSelQueue :: IOLike m => Word -> m (ChainSelQueue m blk) -newChainSelQueue queueSize = ChainSelQueue <$> - atomically (newTBQueue (fromIntegral queueSize)) +newChainSelQueue :: (IOLike m, StandardHash blk, Typeable blk) => Word -> m (ChainSelQueue m blk) +newChainSelQueue chainSelQueueCapacity = do + varChainSelQueue <- newTVarIO mempty + varChainSelReprocessLoEBlocks <- newTVarIO False + return $ ChainSelQueue {varChainSelQueue, chainSelQueueCapacity, varChainSelReprocessLoEBlocks} -- | Add a block to the 'ChainSelQueue' queue. Can block when the queue is full. addBlockToAdd :: @@ -486,7 +517,7 @@ addBlockToAdd :: -> InvalidBlockPunishment m -> blk -> m (AddBlockPromise m blk) -addBlockToAdd tracer (ChainSelQueue queue) punish blk = do +addBlockToAdd tracer (ChainSelQueue {varChainSelQueue, chainSelQueueCapacity}) punish blk = do varBlockWrittenToDisk <- newEmptyTMVarIO varBlockProcessed <- newEmptyTMVarIO let !toAdd = BlockToAdd @@ -497,8 +528,12 @@ addBlockToAdd tracer (ChainSelQueue queue) punish blk = do } traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) RisingEdge queueSize <- atomically $ do - writeTBQueue queue (ChainSelAddBlock toAdd) - lengthTBQueue queue + chainSelQueue <- readTVar varChainSelQueue + let chainSelQueue' = Map.insert (blockRealPoint blk) toAdd chainSelQueue + chainSelQueueSize = Map.size chainSelQueue' + check (fromIntegral chainSelQueueSize <= chainSelQueueCapacity) + writeTVar varChainSelQueue chainSelQueue' + return chainSelQueueSize traceWith tracer $ AddedBlockToQueue (blockRealPoint blk) (FallingEdgeWith (fromIntegral queueSize)) return AddBlockPromise @@ -512,54 +547,84 @@ addReprocessLoEBlocks => Tracer m (TraceAddBlockEvent blk) -> ChainSelQueue m blk -> m () -addReprocessLoEBlocks tracer (ChainSelQueue queue) = do +addReprocessLoEBlocks tracer (ChainSelQueue {varChainSelReprocessLoEBlocks}) = do traceWith tracer $ AddedReprocessLoEBlocksToQueue - atomically $ writeTBQueue queue ChainSelReprocessLoEBlocks + atomically $ writeTVar varChainSelReprocessLoEBlocks True -- | Get the oldest message from the 'ChainSelQueue' queue. Can block when the -- queue is empty; in that case, reports the starvation (and its end) to the -- callback. getChainSelMessage - :: (IOLike m, HasHeader blk) + :: IOLike m => Tracer m (TraceChainSelStarvationEvent blk) -> StrictTVar m ChainSelStarvation -> ChainSelQueue m blk -> m (ChainSelMessage m blk) -getChainSelMessage starvationTracer starvationVar (ChainSelQueue queue) = do - -- NOTE: The test of emptiness and the blocking read are in different STM - -- transactions on purpose. - whenM (isEmptyTBQueueIO queue) $ do - prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing - when (prevStarvation /= ChainSelStarvationOngoing) $ - traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime - message <- atomically $ readTBQueue queue - -- If there was a starvation ongoing, we need to report that it is done. - whenM ((== ChainSelStarvationOngoing) <$> readTVarIO starvationVar) $ - case message of - ChainSelAddBlock BlockToAdd {blockToAdd} -> do - time <- getMonotonicTime - traceWith starvationTracer $ ChainSelStarvationEnded time $ blockRealPoint blockToAdd - writeTVarIO starvationVar $ ChainSelStarvationEndedAt time - ChainSelReprocessLoEBlocks -> pure () - return message +getChainSelMessage starvationTracer starvationVar queue = go where + go = do + (reprocessLoEBlocks, chainSelQueue) <- atomically readBoth + case reprocessLoEBlocks of + True -> do + writeTVarIO varChainSelReprocessLoEBlocks False + return ChainSelReprocessLoEBlocks + False -> + case Map.minView chainSelQueue of + Just (blockToAdd, chainSelQueue') -> do + writeTVarIO varChainSelQueue chainSelQueue' + return $ ChainSelAddBlock blockToAdd + Nothing -> do + prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing + when (prevStarvation /= ChainSelStarvationOngoing) $ + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime + void $ atomically $ blockUntilChanged (second Map.null) (False, True) readBoth + go + ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue writeTVarIO v x = atomically $ writeTVar v x swapTVarIO v x = atomically $ swapTVar v x - isEmptyTBQueueIO q = atomically $ isEmptyTBQueue q + readBoth = (,) <$> readTVar varChainSelReprocessLoEBlocks <*> readTVar varChainSelQueue -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- +-- REVIEW: What about all the threads that are waiting to write in the queue and +-- will write after the flush?! closeChainSelQueue :: IOLike m => ChainSelQueue m blk -> STM m () -closeChainSelQueue (ChainSelQueue queue) = do - as <- mapMaybe blockAdd <$> flushTBQueue queue - traverse_ (\a -> tryPutTMVar (varBlockProcessed a) - (FailedToAddBlock "Queue flushed")) - as - where - blockAdd = \case - ChainSelAddBlock ab -> Just ab - ChainSelReprocessLoEBlocks -> Nothing +closeChainSelQueue ChainSelQueue {varChainSelQueue} = do + chainSelQueue <- readTVar varChainSelQueue + for_ chainSelQueue $ \BlockToAdd {varBlockProcessed} -> + putTMVar varBlockProcessed $ FailedToAddBlock "Queue flushed" +-- | Delete the given 'BlockToAdd' from the 'ChainSelQueue'. +-- +-- PRECONDITION: the given 'BlockToAdd' is in 'ChainSelQueue'. +deleteBlockToAdd :: + (IOLike m, HasHeader blk) + => BlockToAdd m blk + -> ChainSelQueue m blk + -> m () +deleteBlockToAdd (BlockToAdd _ blk _ _) (ChainSelQueue {varChainSelQueue}) = + atomically $ modifyTVar varChainSelQueue $ Map.delete (blockRealPoint blk) + +-- | Return a function to test the membership for the given 'BlocksToAdd'. +memberBlocksToAdd :: + (IOLike m, HasHeader blk) + => ChainSelQueue m blk + -> STM m (RealPoint blk -> Bool) +memberBlocksToAdd (ChainSelQueue {varChainSelQueue}) = + flip Map.member <$> readTVar varChainSelQueue + +getBlocksToAddMaxSlotNo :: + IOLike m + => ChainSelQueue m blk + -> STM m MaxSlotNo +getBlocksToAddMaxSlotNo (ChainSelQueue {varChainSelQueue}) = aux <$> readTVar varChainSelQueue + where + -- | The 'Ord' instance of 'RealPoint' orders by 'SlotNo' first, so the + -- maximal key of the map has the greatest 'SlotNo'. + aux :: Map (RealPoint blk) (BlockToAdd m blk) -> MaxSlotNo + aux queue = case Map.lookupMax queue of + Nothing -> NoMaxSlotNo + Just (RealPoint s _, _) -> MaxSlotNo s {------------------------------------------------------------------------------- Trace types From ea486e787862ceda66dafe4a6a1ea4dc4e6d7a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 19 Jul 2024 16:45:11 +0000 Subject: [PATCH 33/76] Stop measuring starvation when blocks arrive --- .../Consensus/Storage/ChainDB/Impl/Types.hs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index 24f0706364..cd128d3e42 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -555,7 +555,7 @@ addReprocessLoEBlocks tracer (ChainSelQueue {varChainSelReprocessLoEBlocks}) = d -- queue is empty; in that case, reports the starvation (and its end) to the -- callback. getChainSelMessage - :: IOLike m + :: (HasHeader blk, IOLike m) => Tracer m (TraceChainSelStarvationEvent blk) -> StrictTVar m ChainSelStarvation -> ChainSelQueue m blk @@ -572,11 +572,10 @@ getChainSelMessage starvationTracer starvationVar queue = go case Map.minView chainSelQueue of Just (blockToAdd, chainSelQueue') -> do writeTVarIO varChainSelQueue chainSelQueue' + terminateStarvationMeasure blockToAdd return $ ChainSelAddBlock blockToAdd Nothing -> do - prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing - when (prevStarvation /= ChainSelStarvationOngoing) $ - traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime + startStarvationMeasure void $ atomically $ blockUntilChanged (second Map.null) (False, True) readBoth go ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue @@ -584,6 +583,18 @@ getChainSelMessage starvationTracer starvationVar queue = go swapTVarIO v x = atomically $ swapTVar v x readBoth = (,) <$> readTVar varChainSelReprocessLoEBlocks <*> readTVar varChainSelQueue + startStarvationMeasure = do + prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing + when (prevStarvation /= ChainSelStarvationOngoing) $ + traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime + + terminateStarvationMeasure BlockToAdd{blockToAdd=block} = do + prevStarvation <- readTVarIO starvationVar + when (prevStarvation == ChainSelStarvationOngoing) $ do + tf <- getMonotonicTime + traceWith starvationTracer (ChainSelStarvationEnded tf $ blockRealPoint block) + writeTVarIO starvationVar (ChainSelStarvationEndedAt tf) + -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- -- REVIEW: What about all the threads that are waiting to write in the queue and From 8226db345d65667e47cf6c53fd433d73739e1599 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Mon, 22 Jul 2024 17:44:39 +0200 Subject: [PATCH 34/76] Introduce GenesisConfigFlags for interaction with config files/CLI --- .../Ouroboros/Consensus/Node/Genesis.hs | 89 +++++++++++-------- 1 file changed, 52 insertions(+), 37 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index c7564b4f59..b369d08feb 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -3,14 +3,17 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE NumericUnderscores #-} +{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ScopedTypeVariables #-} module Ouroboros.Consensus.Node.Genesis ( -- * 'GenesisConfig' GenesisConfig (..) + , GenesisConfigFlags (..) , LoEAndGDDConfig (..) , disableGenesisConfig , enableGenesisConfigDefault + , mkGenesisConfig -- * NodeKernel helpers , GenesisNodeKernelArgs (..) , mkGenesisNodeKernelArgs @@ -18,7 +21,7 @@ module Ouroboros.Consensus.Node.Genesis ( ) where import Control.Monad (join) -import Data.Maybe (isJust) +import Data.Maybe (fromMaybe) import Data.Traversable (for) import Ouroboros.Consensus.Block import Ouroboros.Consensus.MiniProtocol.ChainSync.Client @@ -35,8 +38,6 @@ import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF import Ouroboros.Network.BlockFetch (GenesisBlockFetchConfiguration (..)) -import System.Environment (lookupEnv) -import System.IO.Unsafe (unsafePerformIO) -- | Whether to en-/disable the Limit on Eagerness and the Genesis Density -- Disconnector. @@ -53,59 +54,73 @@ data GenesisConfig = GenesisConfig , gcLoEAndGDDConfig :: !(LoEAndGDDConfig ()) } --- TODO justification/derivation from other parameters -enableGenesisConfigDefault :: GenesisConfig -enableGenesisConfigDefault = unsafePerformIO $ do - enableGenesis <- isJust <$> lookupEnv "ENABLE_GENESIS" - enableLoP <- (enableGenesis ||) . isJust <$> lookupEnv "ENABLE_LoP" - enableCSJ <- (enableGenesis ||) . isJust <$> lookupEnv "ENABLE_CSJ" - enableLoEAndGDD <- (enableGenesis ||) . isJust <$> lookupEnv "ENABLE_LoEGDD" +-- | Genesis configuration flags and low-level args, as parsed from config file or CLI +data GenesisConfigFlags = GenesisConfigFlags + { gcfEnableCSJ :: Bool + , gcfEnableLoEAndGDD :: Bool + , gcfEnableLoP :: Bool + , gcfBulkSyncGracePeriod :: Maybe Integer + , gcfBucketCapacity :: Maybe Integer + , gcfBucketRate :: Maybe Integer + , gcfCSJJumpSize :: Maybe Integer + } - let defaultBulkSyncGracePeriod = 10 -- seconds - defaultCapacity = 100_000 -- number of tokens - defaultRate = 500 -- tokens per second leaking, 1/2ms - defaultCSJJumpSize = 3 * 2160 * 20 -- mainnet forecast range +enableGenesisConfigDefault :: GenesisConfig +enableGenesisConfigDefault = mkGenesisConfig $ Just $ GenesisConfigFlags + { gcfEnableCSJ = True + , gcfEnableLoEAndGDD = True + , gcfEnableLoP = True + , gcfBulkSyncGracePeriod = Nothing + , gcfBucketCapacity = Nothing + , gcfBucketRate = Nothing + , gcfCSJJumpSize = Nothing + } - gbfcBulkSyncGracePeriod <- maybe defaultBulkSyncGracePeriod - (fromInteger . read) <$> lookupEnv "BLOCKFETCH_GRACE_PERIOD" - csbcCapacity <- maybe defaultCapacity - (fromInteger . read) <$> lookupEnv "LOP_CAPACITY" - csbcRate <- maybe defaultRate - (fromInteger . read) <$> lookupEnv "LOP_RATE" - csjcJumpSize <- maybe defaultCSJJumpSize - (fromInteger . read) <$> lookupEnv "CSJ_JUMP_SIZE" +-- | Disable all Genesis components, yielding Praos behavior. +disableGenesisConfig :: GenesisConfig +disableGenesisConfig = mkGenesisConfig Nothing - pure $ GenesisConfig +mkGenesisConfig :: Maybe GenesisConfigFlags -> GenesisConfig +mkGenesisConfig Nothing = -- disable Genesis + GenesisConfig + { gcBlockFetchConfig = GenesisBlockFetchConfiguration + { gbfcBulkSyncGracePeriod = 0 -- no grace period when Genesis is disabled + } + , gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled + , gcCSJConfig = CSJDisabled + , gcLoEAndGDDConfig = LoEAndGDDDisabled + } +mkGenesisConfig (Just GenesisConfigFlags{..}) = + GenesisConfig { gcBlockFetchConfig = GenesisBlockFetchConfiguration { gbfcBulkSyncGracePeriod } - , gcChainSyncLoPBucketConfig = if enableLoP + , gcChainSyncLoPBucketConfig = if gcfEnableLoP then ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig { csbcCapacity , csbcRate } else ChainSyncLoPBucketDisabled - , gcCSJConfig = if enableCSJ + , gcCSJConfig = if gcfEnableCSJ then CSJEnabled CSJEnabledConfig { csjcJumpSize } else CSJDisabled - , gcLoEAndGDDConfig = if enableLoEAndGDD + , gcLoEAndGDDConfig = if gcfEnableLoEAndGDD then LoEAndGDDEnabled () else LoEAndGDDDisabled } -{-# NOINLINE enableGenesisConfigDefault #-} + where + -- TODO justification/derivation from other parameters + defaultBulkSyncGracePeriod = 10 -- seconds + defaultCapacity = 100_000 -- number of tokens + defaultRate = 500 -- tokens per second leaking, 1/2ms + defaultCSJJumpSize = 3 * 2160 * 20 -- mainnet forecast range --- | Disable all Genesis components, yielding Praos behavior. -disableGenesisConfig :: GenesisConfig -disableGenesisConfig = GenesisConfig - { gcBlockFetchConfig = GenesisBlockFetchConfiguration - { gbfcBulkSyncGracePeriod = 0 -- no grace period when Genesis is disabled - } - , gcChainSyncLoPBucketConfig = ChainSyncLoPBucketDisabled - , gcCSJConfig = CSJDisabled - , gcLoEAndGDDConfig = LoEAndGDDDisabled - } + gbfcBulkSyncGracePeriod = fromInteger $ fromMaybe defaultBulkSyncGracePeriod gcfBulkSyncGracePeriod + csbcCapacity = fromInteger $ fromMaybe defaultCapacity gcfBucketCapacity + csbcRate = fromInteger $ fromMaybe defaultRate gcfBucketRate + csjcJumpSize = fromInteger $ fromMaybe defaultCSJJumpSize gcfCSJJumpSize -- | Genesis-related arguments needed by the NodeKernel initialization logic. data GenesisNodeKernelArgs m blk = GenesisNodeKernelArgs { From eb8de6e0ebe662db3dc4954127a0e378c90a9ac1 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Mon, 22 Jul 2024 19:49:42 +0200 Subject: [PATCH 35/76] Add missing instances for Genesis configuration --- .../Ouroboros/Consensus/Node/Genesis.hs | 8 +++++--- .../Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index b369d08feb..47a72ee697 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -1,4 +1,5 @@ {-# LANGUAGE DeriveTraversable #-} +{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} @@ -23,6 +24,7 @@ module Ouroboros.Consensus.Node.Genesis ( import Control.Monad (join) import Data.Maybe (fromMaybe) import Data.Traversable (for) +import GHC.Generics (Generic) import Ouroboros.Consensus.Block import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), CSJEnabledConfig (..), @@ -44,7 +46,7 @@ import Ouroboros.Network.BlockFetch data LoEAndGDDConfig a = LoEAndGDDEnabled !a | LoEAndGDDDisabled - deriving stock (Show, Functor, Foldable, Traversable) + deriving stock (Eq, Generic, Show, Functor, Foldable, Traversable) -- | Aggregating the various configs for Genesis-related subcomponents. data GenesisConfig = GenesisConfig @@ -52,7 +54,7 @@ data GenesisConfig = GenesisConfig , gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig , gcCSJConfig :: !CSJConfig , gcLoEAndGDDConfig :: !(LoEAndGDDConfig ()) - } + } deriving stock (Eq, Generic, Show) -- | Genesis configuration flags and low-level args, as parsed from config file or CLI data GenesisConfigFlags = GenesisConfigFlags @@ -63,7 +65,7 @@ data GenesisConfigFlags = GenesisConfigFlags , gcfBucketCapacity :: Maybe Integer , gcfBucketRate :: Maybe Integer , gcfCSJJumpSize :: Maybe Integer - } + } deriving stock (Eq, Generic, Show) enableGenesisConfigDefault :: GenesisConfig enableGenesisConfigDefault = mkGenesisConfig $ Just $ GenesisConfigFlags diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index bfba898e9b..54c5cb49e3 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -165,7 +165,7 @@ data ChainSyncLoPBucketEnabledConfig = ChainSyncLoPBucketEnabledConfig { csbcCapacity :: Integer, -- | The rate of the bucket (think tokens per second). csbcRate :: Rational - } + } deriving stock (Eq, Generic, Show) -- | Configuration of the leaky bucket. data ChainSyncLoPBucketConfig @@ -176,6 +176,7 @@ data ChainSyncLoPBucketConfig | -- | Enable the leaky bucket. ChainSyncLoPBucketEnabled ChainSyncLoPBucketEnabledConfig + deriving stock (Eq, Generic, Show) -- | Configuration of ChainSync Jumping data CSJConfig @@ -186,6 +187,7 @@ data CSJConfig | -- | Enable ChainSync Jumping CSJEnabled CSJEnabledConfig + deriving stock (Eq, Generic, Show) newtype CSJEnabledConfig = CSJEnabledConfig { -- | The _ideal_ size for ChainSync jumps. Note that the algorithm @@ -205,7 +207,7 @@ newtype CSJEnabledConfig = CSJEnabledConfig { -- window has a higher change that dishonest peers can delay syncing by a -- small margin (around 2 minutes per dishonest peer with mainnet parameters). csjcJumpSize :: SlotNo -} +} deriving stock (Eq, Generic, Show) defaultChainDbView :: (IOLike m, LedgerSupportsProtocol blk) From 70f671333bfdf771923a5fc453279c1cf1d3b115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 23 Jul 2024 10:38:32 +0000 Subject: [PATCH 36/76] Set the jump size to smaller size for byron --- .../Ouroboros/Consensus/Node/Genesis.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 47a72ee697..650e8ac49d 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -117,7 +117,9 @@ mkGenesisConfig (Just GenesisConfigFlags{..}) = defaultBulkSyncGracePeriod = 10 -- seconds defaultCapacity = 100_000 -- number of tokens defaultRate = 500 -- tokens per second leaking, 1/2ms - defaultCSJJumpSize = 3 * 2160 * 20 -- mainnet forecast range + -- 3 * 2160 * 20 works in more recent ranges of slots, but causes syncing to + -- block in byron. + defaultCSJJumpSize = 3 * 2160 - 1 gbfcBulkSyncGracePeriod = fromInteger $ fromMaybe defaultBulkSyncGracePeriod gcfBulkSyncGracePeriod csbcCapacity = fromInteger $ fromMaybe defaultCapacity gcfBucketCapacity From 315ea7eab83178f19a7cd98638a2cbc7ffe05c1a Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 23 Jul 2024 13:26:57 +0200 Subject: [PATCH 37/76] export default Genesis config flags --- .../Ouroboros/Consensus/Node/Genesis.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 650e8ac49d..68ccdde6a4 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -12,6 +12,7 @@ module Ouroboros.Consensus.Node.Genesis ( GenesisConfig (..) , GenesisConfigFlags (..) , LoEAndGDDConfig (..) + , defaultGenesisConfigFlags , disableGenesisConfig , enableGenesisConfigDefault , mkGenesisConfig @@ -67,8 +68,8 @@ data GenesisConfigFlags = GenesisConfigFlags , gcfCSJJumpSize :: Maybe Integer } deriving stock (Eq, Generic, Show) -enableGenesisConfigDefault :: GenesisConfig -enableGenesisConfigDefault = mkGenesisConfig $ Just $ GenesisConfigFlags +defaultGenesisConfigFlags :: GenesisConfigFlags +defaultGenesisConfigFlags = GenesisConfigFlags { gcfEnableCSJ = True , gcfEnableLoEAndGDD = True , gcfEnableLoP = True @@ -78,6 +79,9 @@ enableGenesisConfigDefault = mkGenesisConfig $ Just $ GenesisConfigFlags , gcfCSJJumpSize = Nothing } +enableGenesisConfigDefault :: GenesisConfig +enableGenesisConfigDefault = mkGenesisConfig $ Just $ defaultGenesisConfigFlags + -- | Disable all Genesis components, yielding Praos behavior. disableGenesisConfig :: GenesisConfig disableGenesisConfig = mkGenesisConfig Nothing From 07dab6b6eecad0039b6c5ca136c6e1db46bdb63b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 23 Jul 2024 13:45:02 +0000 Subject: [PATCH 38/76] Fix ouroboros-consensus-diffusion:test:consensus-test --- .../Test/Consensus/PeerSimulator/BlockFetch.hs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 4b1fc6450c..3fd245939f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -30,6 +30,8 @@ import Ouroboros.Consensus.Block.Abstract (Header, Point (..)) import qualified Ouroboros.Consensus.MiniProtocol.BlockFetch.ClientInterface as BlockFetchClientInterface import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (ChainSyncClientHandleCollection) +import Ouroboros.Consensus.Node.Genesis (GenesisConfig (..), + enableGenesisConfigDefault) import Ouroboros.Consensus.Node.ProtocolInfo (NumCoreNodes (NumCoreNodes)) import Ouroboros.Consensus.Storage.ChainDB.API @@ -38,7 +40,8 @@ import Ouroboros.Consensus.Util.IOLike (DiffTime, Exception (fromException), IOLike, atomically, retry, try) import Ouroboros.Consensus.Util.ResourceRegistry import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), - FetchClientRegistry, FetchMode (..), blockFetchLogic, + FetchClientRegistry, FetchMode (..), + GenesisBlockFetchConfiguration (..), blockFetchLogic, bracketFetchClient, bracketKeepAliveClient) import Ouroboros.Network.BlockFetch.Client (blockFetchClient) import Ouroboros.Network.Channel (Channel) @@ -110,7 +113,7 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien , bfcMaxRequestsInflight = 10 , bfcDecisionLoopInterval = 0 , bfcSalt = 0 - , bfcBulkSyncGracePeriod + , bfcGenesisBFConfig } void $ forkLinkedThread registry "BlockFetchLogic" $ From 8b510cedbc3f6aebadbd395bc76b9ded72953c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 23 Jul 2024 13:45:15 +0000 Subject: [PATCH 39/76] Fix ouroboros-consensus:test:consensus-test --- .../Test/Consensus/MiniProtocol/BlockFetch/Client.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index 705fc8ff8e..ab0c68994b 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -360,13 +360,11 @@ instance Arbitrary BlockFetchClientTestSetup where blockFetchMode <- elements [FetchModeBulkSync, FetchModeDeadline] blockFetchCfg <- do let -- ensure that we can download blocks from all peers - bfcMaxConcurrencyBulkSync = fromIntegral numPeers bfcMaxConcurrencyDeadline = fromIntegral numPeers -- This is used to introduce a minimal delay between BlockFetch -- logic iterations in case the monitored state vars change too -- fast, which we don't have to worry about in this test. bfcDecisionLoopInterval = 0 - bfcBulkSyncGracePeriod = 10 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary gbfcBulkSyncGracePeriod <- fromIntegral <$> chooseInteger (5, 60) From d9ea9878df2c7bf9350e78ffe1614565cf402385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 11 Jul 2024 19:00:37 +0000 Subject: [PATCH 40/76] Accomodate for the addition of ChainSyncClientHandleCollection and grace period and starvation event in BlockFetch --- .../Test/Consensus/MiniProtocol/BlockFetch/Client.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index ab0c68994b..f59c02dc54 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -365,6 +365,7 @@ instance Arbitrary BlockFetchClientTestSetup where -- logic iterations in case the monitored state vars change too -- fast, which we don't have to worry about in this test. bfcDecisionLoopInterval = 0 + bfcBulkSyncGracePeriod = 10 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary gbfcBulkSyncGracePeriod <- fromIntegral <$> chooseInteger (5, 60) From 6bb240e8990bbdc966a521f31b0e07d0da4edc6c Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Fri, 26 Jul 2024 17:13:13 +0200 Subject: [PATCH 41/76] Reapply "GSM: use diffusion layer info for HAA" This reverts commit 66ab269cf5d53c02094f9aaed36f84337d627d3e. --- .../Ouroboros/Consensus/Node.hs | 8 +++++--- .../Ouroboros/Consensus/NodeKernel.hs | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs index a26eed133b..3e812b66c6 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node.hs @@ -60,6 +60,7 @@ import qualified Codec.CBOR.Encoding as CBOR import Codec.Serialise (DeserialiseFailure) import qualified Control.Concurrent.Class.MonadSTM.Strict as StrictSTM import Control.DeepSeq (NFData) +import Control.Monad (when) import Control.Monad.Class.MonadTime.SI (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer, contramap, traceWith) @@ -632,9 +633,10 @@ runWith RunNodeArgs{..} encAddrNtN decAddrNtN LowLevelRunNodeArgs{..} = lpGetLedgerPeers = fromMaybe [] <$> getPeersFromCurrentLedger kernel (const True), lpGetLedgerStateJudgement = GSM.gsmStateToLedgerJudgement <$> getGsmState kernel }, - -- TODO: consensus can use this callback to store information if the - -- node is connected to peers other than local roots. - Diffusion.daUpdateOutboundConnectionsState = \_ -> return () + Diffusion.daUpdateOutboundConnectionsState = + let varOcs = getOutboundConnectionsState kernel in \newOcs -> do + oldOcs <- readTVar varOcs + when (newOcs /= oldOcs) $ writeTVar varOcs newOcs } localRethrowPolicy :: RethrowPolicy diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 729bcbd0c4..9882154066 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -98,6 +98,8 @@ import Ouroboros.Network.NodeToNode (ConnectionId, import Ouroboros.Network.PeerSelection.Bootstrap (UseBootstrapPeers) import Ouroboros.Network.PeerSelection.LedgerPeers.Type (LedgerStateJudgement (..)) +import Ouroboros.Network.PeerSelection.LocalRootPeers + (OutboundConnectionsState (..)) import Ouroboros.Network.PeerSharing (PeerSharingAPI, PeerSharingRegistry, newPeerSharingAPI, newPeerSharingRegistry, ps_POLICY_PEER_SHARE_MAX_PEERS, @@ -154,6 +156,9 @@ data NodeKernel m addrNTN addrNTC blk = NodeKernel { , setBlockForging :: [BlockForging m blk] -> m () , getPeerSharingAPI :: PeerSharingAPI addrNTN StdGen m + + , getOutboundConnectionsState + :: StrictTVar m OutboundConnectionsState } -- | Arguments required when initializing a node @@ -211,6 +216,8 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , varGsmState } = st + varOutboundConnectionsState <- newTVarIO UntrustedState + do let GsmNodeKernelArgs {..} = gsmArgs gsmTracerArgs = ( castTip . either AF.anchorToTip tipFromHeader . AF.head . fst @@ -249,7 +256,12 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers writeTVar varGsmState gsmState handles <- cschcMap varChainSyncHandles traverse_ (($ time) . ($ gsmState) . cschOnGsmStateChanged) handles - , GSM.isHaaSatisfied = pure True + , GSM.isHaaSatisfied = do + readTVar varOutboundConnectionsState <&> \case + -- See the upstream Haddocks for the exact conditions under + -- which the diffusion layer is in this state. + TrustedStateWithExternalPeers -> True + UntrustedState -> False } judgment <- GSM.gsmStateToLedgerJudgement <$> readTVarIO varGsmState void $ forkLinkedThread registry "NodeKernel.GSM" $ case judgment of @@ -305,6 +317,8 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers , getTracers = tracers , setBlockForging = \a -> atomically . LazySTM.putTMVar blockForgingVar $! a , getPeerSharingAPI = peerSharingAPI + , getOutboundConnectionsState + = varOutboundConnectionsState } where blockForgingController :: InternalState m remotePeer localPeer blk From a8111f787a9406b29c6f1322f6c95143efc9d6b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Wed, 24 Jul 2024 17:33:34 +0000 Subject: [PATCH 42/76] Limit the rate at which GDD is evaluated --- .../Ouroboros/Consensus/Node/Genesis.hs | 40 ++++++++++++++----- .../Ouroboros/Consensus/NodeKernel.hs | 11 ++--- .../Test/Consensus/PeerSimulator/Run.hs | 1 + .../Ouroboros/Consensus/Genesis/Governor.hs | 10 ++++- 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 68ccdde6a4..2427f7189d 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -18,6 +18,7 @@ module Ouroboros.Consensus.Node.Genesis ( , mkGenesisConfig -- * NodeKernel helpers , GenesisNodeKernelArgs (..) + , LoEAndGDDNodeKernelArgs (..) , mkGenesisNodeKernelArgs , setGetLoEFragment ) where @@ -54,7 +55,7 @@ data GenesisConfig = GenesisConfig { gcBlockFetchConfig :: !GenesisBlockFetchConfiguration , gcChainSyncLoPBucketConfig :: !ChainSyncLoPBucketConfig , gcCSJConfig :: !CSJConfig - , gcLoEAndGDDConfig :: !(LoEAndGDDConfig ()) + , gcLoEAndGDDConfig :: !(LoEAndGDDConfig LoEAndGDDParams) } deriving stock (Eq, Generic, Show) -- | Genesis configuration flags and low-level args, as parsed from config file or CLI @@ -66,6 +67,7 @@ data GenesisConfigFlags = GenesisConfigFlags , gcfBucketCapacity :: Maybe Integer , gcfBucketRate :: Maybe Integer , gcfCSJJumpSize :: Maybe Integer + , gcfGDDRateLimit :: Maybe DiffTime } deriving stock (Eq, Generic, Show) defaultGenesisConfigFlags :: GenesisConfigFlags @@ -77,6 +79,7 @@ defaultGenesisConfigFlags = GenesisConfigFlags , gcfBucketCapacity = Nothing , gcfBucketRate = Nothing , gcfCSJJumpSize = Nothing + , gcfGDDRateLimit = Nothing } enableGenesisConfigDefault :: GenesisConfig @@ -113,7 +116,7 @@ mkGenesisConfig (Just GenesisConfigFlags{..}) = } else CSJDisabled , gcLoEAndGDDConfig = if gcfEnableLoEAndGDD - then LoEAndGDDEnabled () + then LoEAndGDDEnabled LoEAndGDDParams{lgpGDDRateLimit} else LoEAndGDDDisabled } where @@ -124,21 +127,34 @@ mkGenesisConfig (Just GenesisConfigFlags{..}) = -- 3 * 2160 * 20 works in more recent ranges of slots, but causes syncing to -- block in byron. defaultCSJJumpSize = 3 * 2160 - 1 + defaultGDDRateLimit = 1.0 -- seconds gbfcBulkSyncGracePeriod = fromInteger $ fromMaybe defaultBulkSyncGracePeriod gcfBulkSyncGracePeriod csbcCapacity = fromInteger $ fromMaybe defaultCapacity gcfBucketCapacity csbcRate = fromInteger $ fromMaybe defaultRate gcfBucketRate csjcJumpSize = fromInteger $ fromMaybe defaultCSJJumpSize gcfCSJJumpSize + lgpGDDRateLimit = fromMaybe defaultGDDRateLimit gcfGDDRateLimit + +newtype LoEAndGDDParams = LoEAndGDDParams + { -- | How often to evaluate GDD. 0 means as soon as possible. + -- Otherwise, no faster than once every T seconds, where T is the + -- value of the field. + lgpGDDRateLimit :: DiffTime + } deriving stock (Eq, Generic, Show) -- | Genesis-related arguments needed by the NodeKernel initialization logic. data GenesisNodeKernelArgs m blk = GenesisNodeKernelArgs { + gnkaLoEAndGDDArgs :: !(LoEAndGDDConfig (LoEAndGDDNodeKernelArgs m blk)) + } + +data LoEAndGDDNodeKernelArgs m blk = LoEAndGDDNodeKernelArgs { -- | A TVar containing an action that returns the 'ChainDB.GetLoEFragment' -- action. We use this extra indirection to update this action after we -- opened the ChainDB (which happens before we initialize the NodeKernel). -- After that, this TVar will not be modified again. - gnkaGetLoEFragment :: !(LoEAndGDDConfig (StrictTVar m (ChainDB.GetLoEFragment m blk))) + lgnkaLoEFragmentTVar :: !(StrictTVar m (ChainDB.GetLoEFragment m blk)) + , lgnkaGDDRateLimit :: DiffTime } - -- | Create the initial 'GenesisNodeKernelArgs" (with a temporary -- 'ChainDB.GetLoEFragment' that will be replaced via 'setGetLoEFragment') and a -- function to update the 'ChainDbArgs' accordingly. @@ -149,20 +165,24 @@ mkGenesisNodeKernelArgs :: , Complete ChainDbArgs m blk -> Complete ChainDbArgs m blk ) mkGenesisNodeKernelArgs gcfg = do - gnkaGetLoEFragment <- for (gcLoEAndGDDConfig gcfg) $ \() -> - newTVarIO $ pure $ + gnkaLoEAndGDDArgs <- for (gcLoEAndGDDConfig gcfg) $ \p -> do + loeFragmentTVar <- newTVarIO $ pure $ -- Use the most conservative LoE fragment until 'setGetLoEFragment' -- is called. ChainDB.LoEEnabled $ AF.Empty AF.AnchorGenesis - let updateChainDbArgs = case gnkaGetLoEFragment of + pure LoEAndGDDNodeKernelArgs + { lgnkaLoEFragmentTVar = loeFragmentTVar + , lgnkaGDDRateLimit = lgpGDDRateLimit p + } + let updateChainDbArgs = case gnkaLoEAndGDDArgs of LoEAndGDDDisabled -> id - LoEAndGDDEnabled varGetLoEFragment -> \cfg -> + LoEAndGDDEnabled lgnkArgs -> \cfg -> cfg { ChainDB.cdbsArgs = (ChainDB.cdbsArgs cfg) { ChainDB.cdbsLoE = getLoEFragment } } where - getLoEFragment = join $ readTVarIO varGetLoEFragment - pure (GenesisNodeKernelArgs {gnkaGetLoEFragment}, updateChainDbArgs) + getLoEFragment = join $ readTVarIO $ lgnkaLoEFragmentTVar lgnkArgs + pure (GenesisNodeKernelArgs{gnkaLoEAndGDDArgs}, updateChainDbArgs) -- | Set 'gnkaGetLoEFragment' to the actual logic for determining the current -- LoE fragment. diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index 9882154066..f14c9a2983 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -66,7 +66,7 @@ import Ouroboros.Consensus.MiniProtocol.ChainSync.Client import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck (SomeHeaderInFutureCheck) import Ouroboros.Consensus.Node.Genesis (GenesisNodeKernelArgs (..), - LoEAndGDDConfig (..), setGetLoEFragment) + LoEAndGDDConfig (..), LoEAndGDDNodeKernelArgs (..), setGetLoEFragment) import Ouroboros.Consensus.Node.GSM (GsmNodeKernelArgs (..)) import qualified Ouroboros.Consensus.Node.GSM as GSM import Ouroboros.Consensus.Node.Run @@ -273,20 +273,21 @@ initNodeKernel args@NodeKernelArgs { registry, cfg, tracers ps_POLICY_PEER_SHARE_STICKY_TIME ps_POLICY_PEER_SHARE_MAX_PEERS - case gnkaGetLoEFragment genesisArgs of - LoEAndGDDDisabled -> pure () - LoEAndGDDEnabled varGetLoEFragment -> do + case gnkaLoEAndGDDArgs genesisArgs of + LoEAndGDDDisabled -> pure () + LoEAndGDDEnabled lgArgs -> do varLoEFragment <- newTVarIO $ AF.Empty AF.AnchorGenesis setGetLoEFragment (readTVar varGsmState) (readTVar varLoEFragment) - varGetLoEFragment + (lgnkaLoEFragmentTVar lgArgs) void $ forkLinkedWatcher registry "NodeKernel.GDD" $ gddWatcher cfg (gddTracer tracers) chainDB + (lgnkaGDDRateLimit lgArgs) (readTVar varGsmState) -- TODO GDD should only consider (big) ledger peers (cschcMap varChainSyncHandles) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 997cb4e011..6999891bfa 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -403,6 +403,7 @@ startNode schedulerConfig genesisTest interval = do lrConfig (mkGDDTracerTestBlock lrTracer) lnChainDb + 1.0 -- Default config value in NodeKernel.hs at the time or writing (pure GSM.Syncing) -- TODO actually run GSM (cschcMap handles) var diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 4edbc85b9d..185fcd5d20 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -83,6 +83,9 @@ gddWatcher :: => TopLevelConfig blk -> Tracer m (TraceGDDEvent peer blk) -> ChainDB m blk + -> DiffTime -- ^ How often to evaluate GDD. 0 means as soon as possible. + -- Otherwise, no faster than once every T seconds, where T is + -- the provided value. -> STM m GsmState -> STM m (Map peer (ChainSyncClientHandle m blk)) -- ^ The ChainSync handles. We trigger the GDD whenever our 'GsmState' @@ -95,7 +98,7 @@ gddWatcher :: -> Watcher m (GsmState, GDDStateView m blk peer) (Map peer (StrictMaybe (WithOrigin SlotNo), Bool)) -gddWatcher cfg tracer chainDb getGsmState getHandles varLoEFrag = +gddWatcher cfg tracer chainDb rateLimit getGsmState getHandles varLoEFrag = Watcher { wInitial = Nothing , wReader = (,) <$> getGsmState <*> getGDDStateView @@ -137,12 +140,17 @@ gddWatcher cfg tracer chainDb getGsmState getHandles varLoEFrag = wNotify :: (GsmState, GDDStateView m blk peer) -> m () wNotify (_gsmState, stateView) = do + t0 <- getMonotonicTime loeFrag <- evaluateGDD cfg tracer stateView oldLoEFrag <- atomically $ swapTVar varLoEFrag loeFrag -- The chain selection only depends on the LoE tip, so there -- is no point in retriggering it if the LoE tip hasn't changed. when (AF.headHash oldLoEFrag /= AF.headHash loeFrag) $ ChainDB.triggerChainSelectionAsync chainDb + tf <- getMonotonicTime + -- We limit the rate at which GDD is evaluated, otherwise it would + -- be called every time a new header is validated. + threadDelay $ rateLimit - diffTime tf t0 -- | Pure snapshot of the dynamic data the GDD operates on. data GDDStateView m blk peer = GDDStateView { From 6961bd4dbf8cc65da9ba2b03c00266872bb82f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Wed, 24 Jul 2024 19:16:26 +0000 Subject: [PATCH 43/76] Edit note on Interactions with the BlockFetch logic --- .../Consensus/MiniProtocol/ChainSync/Client/Jumping.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index a54bcb0711..ad76dba48c 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -77,9 +77,10 @@ -- Interactions with the BlockFetch logic -- -------------------------------------- -- --- When syncing, the BlockFetch logic will fetch blocks from the dynamo. If the --- dynamo is responding too slowly, the BlockFetch logic can ask to change the --- dynamo with a call to 'rotateDynamo'. +-- When syncing, the BlockFetch logic might request to change the dynamo with +-- a call to 'rotateDynamo'. This is because the choice of dynamo influences +-- which peer is selected to download blocks. See the note "Interactions with +-- ChainSync Jumping" in "Ouroboros.Network.BlockFetch.Decision.BulkSync". -- -- Interactions with the Limit on Patience -- --------------------------------------- From b605fed9a9cfcc2f9ea6c0198f5f39068b0cc9aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 25 Jul 2024 12:31:39 +0000 Subject: [PATCH 44/76] Accomodate for separate decision loop intervals for fetch modes --- .../unstable-diffusion-testlib/Test/ThreadNet/Network.hs | 6 +++--- .../Test/Consensus/PeerSimulator/BlockFetch.hs | 3 ++- .../Test/Consensus/MiniProtocol/BlockFetch/Client.hs | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index 73ccb384e9..671b75a331 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1016,9 +1016,9 @@ runThreadNetwork systemTime ThreadNetworkArgs , blockFetchConfiguration = BlockFetchConfiguration { bfcMaxConcurrencyDeadline = 2 , bfcMaxRequestsInflight = 10 - , bfcDecisionLoopInterval = 0.0 -- Mock testsuite can use sub-second slot - -- interval which doesn't play nice with - -- blockfetch descision interval. + , bfcDecisionLoopIntervalBulkSync = 0.0 -- Mock testsuite can use sub-second slot + , bfcDecisionLoopIntervalDeadline = 0.0 -- interval which doesn't play nice with + -- blockfetch descision interval. , bfcSalt = 0 , bfcGenesisBFConfig = gcBlockFetchConfig enableGenesisConfigDefault } diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 3fd245939f..eaab0c4a39 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -111,7 +111,8 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien blockFetchCfg = BlockFetchConfiguration { bfcMaxConcurrencyDeadline = 50 -- unused because of @pure FetchModeBulkSync@ above , bfcMaxRequestsInflight = 10 - , bfcDecisionLoopInterval = 0 + , bfcDecisionLoopIntervalBulkSync = 0 + , bfcDecisionLoopIntervalDeadline = 0 , bfcSalt = 0 , bfcGenesisBFConfig } diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index f59c02dc54..b6f755240f 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -364,7 +364,8 @@ instance Arbitrary BlockFetchClientTestSetup where -- This is used to introduce a minimal delay between BlockFetch -- logic iterations in case the monitored state vars change too -- fast, which we don't have to worry about in this test. - bfcDecisionLoopInterval = 0 + bfcDecisionLoopIntervalBulkSync = 0 + bfcDecisionLoopIntervalDeadline = 0 bfcBulkSyncGracePeriod = 10 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary From 7841fb53d3bfeb1c04b9674d1b17836ef5d80d9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 26 Jul 2024 22:32:00 +0000 Subject: [PATCH 45/76] fixup: Adding rate limit to GDD --- .../src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs | 2 +- .../Test/Consensus/MiniProtocol/BlockFetch/Client.hs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs index 671b75a331..c7d99ebfd7 100644 --- a/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs +++ b/ouroboros-consensus-diffusion/src/unstable-diffusion-testlib/Test/ThreadNet/Network.hs @@ -1035,7 +1035,7 @@ runThreadNetwork systemTime ThreadNetworkArgs , getUseBootstrapPeers = pure DontUseBootstrapPeers , publicPeerSelectionStateVar , genesisArgs = GenesisNodeKernelArgs { - gnkaGetLoEFragment = LoEAndGDDDisabled + gnkaLoEAndGDDArgs = LoEAndGDDDisabled } } diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index b6f755240f..b0c9e53b63 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -366,7 +366,6 @@ instance Arbitrary BlockFetchClientTestSetup where -- fast, which we don't have to worry about in this test. bfcDecisionLoopIntervalBulkSync = 0 bfcDecisionLoopIntervalDeadline = 0 - bfcBulkSyncGracePeriod = 10 bfcMaxRequestsInflight <- chooseEnum (2, 10) bfcSalt <- arbitrary gbfcBulkSyncGracePeriod <- fromIntegral <$> chooseInteger (5, 60) From 314616fea8025b7a72aaeace29fb4709b0168204 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 25 Jul 2024 14:31:36 +0000 Subject: [PATCH 46/76] Set the default jump size to 2k --- .../Ouroboros/Consensus/Node/Genesis.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 2427f7189d..4e01697bad 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -126,7 +126,7 @@ mkGenesisConfig (Just GenesisConfigFlags{..}) = defaultRate = 500 -- tokens per second leaking, 1/2ms -- 3 * 2160 * 20 works in more recent ranges of slots, but causes syncing to -- block in byron. - defaultCSJJumpSize = 3 * 2160 - 1 + defaultCSJJumpSize = 2 * 2160 defaultGDDRateLimit = 1.0 -- seconds gbfcBulkSyncGracePeriod = fromInteger $ fromMaybe defaultBulkSyncGracePeriod gcfBulkSyncGracePeriod From 889a149177781e61958cd43d8dfe63e87f017fe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 29 Jul 2024 16:46:47 +0000 Subject: [PATCH 47/76] Relax expectations of test blockFetch in the BulkSync case --- .../Test/Consensus/MiniProtocol/BlockFetch/Client.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index b0c9e53b63..9b58d13246 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -98,7 +98,9 @@ prop_blockFetch bfcts@BlockFetchClientTestSetup{..} = ] <> [ Map.keysSet bfcoBlockFetchResults === Map.keysSet peerUpdates , counterexample ("Fetched blocks per peer: " <> condense bfcoFetchedBlocks) $ - property $ all (> 0) bfcoFetchedBlocks + property $ case blockFetchMode of + FetchModeDeadline -> all (> 0) bfcoFetchedBlocks + FetchModeBulkSync -> any (> 0) bfcoFetchedBlocks ] where BlockFetchClientOutcome{..} = runSimOrThrow $ runBlockFetchTest bfcts From c93d7dfa93938b07c63a3f6cd33c9c687eb49c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 30 Jul 2024 19:52:02 +0000 Subject: [PATCH 48/76] Set the GDD rate limit to 0 in the peer simulator --- .../test/consensus-test/Test/Consensus/PeerSimulator/Run.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 6999891bfa..3865a87119 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -403,7 +403,10 @@ startNode schedulerConfig genesisTest interval = do lrConfig (mkGDDTracerTestBlock lrTracer) lnChainDb - 1.0 -- Default config value in NodeKernel.hs at the time or writing + 0.0 -- The rate limit makes simpler the calculations of how long tests + -- should run and still should produce interesting interleavings. + -- It is similar to the setting of bfcDecisionLoopInterval in + -- Test.Consensus.PeerSimulator.BlockFetch (pure GSM.Syncing) -- TODO actually run GSM (cschcMap handles) var From bc2bb70abf05caefffa6f7d60563c546da73a79c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 30 Jul 2024 19:57:31 +0000 Subject: [PATCH 49/76] Accomodate for timer added in blockFetchLogic --- .../consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index eaab0c4a39..29f7c90dd1 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -75,7 +75,7 @@ import Test.Util.Time (dawnOfTime) startBlockFetchLogic :: forall m. - (IOLike m) + (IOLike m, MonadTimer m) => Bool -- ^ Whether to enable chain selection starvation -> ResourceRegistry m -> Tracer m (TraceEvent TestBlock) From 62981dbfe3c6b06e89afbf11df7bf4ee8842968e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Tue, 30 Jul 2024 19:58:05 +0000 Subject: [PATCH 50/76] Have the peer simulator use the default grace period for chainsel starvations --- .../consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs index 29f7c90dd1..9e92c07a8b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/BlockFetch.hs @@ -102,7 +102,7 @@ startBlockFetchLogic enableChainSelStarvation registry tracer chainDb fetchClien bfcGenesisBFConfig = if enableChainSelStarvation then GenesisBlockFetchConfiguration - { gbfcBulkSyncGracePeriod = 1000000 -- (more than 11 days) + { gbfcBulkSyncGracePeriod = 10 -- default value for cardano-node at the time of writing } else gcBlockFetchConfig enableGenesisConfigDefault From 1c6498e79d86a1537c3f0ced59733dc5c5727336 Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Wed, 31 Jul 2024 19:16:18 +0200 Subject: [PATCH 51/76] ChainSync client: disconnect if stuck and not better than selection TODO better docs --- .../Consensus/MiniProtocol/ChainSync/Client.hs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 54c5cb49e3..609462071f 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -113,7 +113,8 @@ import Ouroboros.Consensus.Storage.ChainDB (ChainDB, InvalidBlockReason) import qualified Ouroboros.Consensus.Storage.ChainDB as ChainDB import Ouroboros.Consensus.Util -import Ouroboros.Consensus.Util.AnchoredFragment (cross) +import Ouroboros.Consensus.Util.AnchoredFragment (cross, + preferAnchoredCandidate) import Ouroboros.Consensus.Util.Assert (assertWithMsg) import Ouroboros.Consensus.Util.EarlyExit (WithEarlyExit, exitEarly) import qualified Ouroboros.Consensus.Util.EarlyExit as EarlyExit @@ -1422,7 +1423,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = Jumping.jgOnRollForward jumping (blockPoint hdr) atomically (setLatestSlot dynEnv (NotOrigin slotNo)) - checkTime cfgEnv dynEnv intEnv kis arrival slotNo >>= \case + checkTime cfgEnv dynEnv intEnv kis arrival slotNo >>= \case NoLongerIntersects -> continueWithState () $ drainThePipe n @@ -1714,6 +1715,8 @@ checkTime cfgEnv dynEnv intEnv = StillIntersects () kis' -> do let KnownIntersectionState { mostRecentIntersection + , ourFrag + , theirFrag } = kis' lst <- fmap @@ -1726,7 +1729,12 @@ checkTime cfgEnv dynEnv intEnv = ) $ getPastLedger mostRecentIntersection case prj lst of - Nothing -> retry + Nothing -> + -- Precondition is fulfilled as ourFrag and theirFrag + -- intersect by construction. + if preferAnchoredCandidate (configBlock cfg) ourFrag theirFrag + then retry + else throwSTM DensityTooLow Just ledgerView -> return $ return $ Intersects kis' ledgerView From 4c212aa37788fde2ed86b3dfceb8e5863eb463c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Wed, 31 Jul 2024 18:50:46 +0000 Subject: [PATCH 52/76] Shift point schedule times before giving the schedules to tests --- .../Test/Consensus/PointSchedule.hs | 47 ++++++++++++------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs index bbb11c4253..41e3984bf0 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule.hs @@ -52,6 +52,7 @@ import Control.Monad (replicateM) import Control.Monad.Class.MonadTime.SI (Time (Time), addTime, diffTime) import Control.Monad.ST (ST) +import Data.Bifunctor (first) import Data.Functor (($>)) import Data.List (mapAccumL, partition, scanl') import Data.Maybe (catMaybes, fromMaybe, mapMaybe) @@ -126,12 +127,6 @@ prettyPointSchedule ps@PointSchedule {psStartOrder, psMinEndTime} = -- Accumulates the new points in each tick into the previous state, starting with a set of all -- 'Origin' points. -- --- Also shifts all tick start times so that the first tip point is announced at the very beginning --- of the test, keeping the relative delays of the schedule intact. --- This is a preliminary measure to make the long range attack test work, since that relies on the --- honest node sending headers later than the adversary, which is not possible if the adversary's --- first tip point is delayed by 20 or more seconds due to being in a later slot. --- -- Finally, drops the first state, since all points being 'Origin' (in particular the tip) has no -- useful effects in the simulator, but it could set the tip in the GDD governor to 'Origin', which -- causes slow nodes to be disconnected right away. @@ -139,14 +134,8 @@ prettyPointSchedule ps@PointSchedule {psStartOrder, psMinEndTime} = -- TODO Remove dropping the first state in favor of better GDD logic peerStates :: Peer (PeerSchedule blk) -> [(Time, Peer (NodeState blk))] peerStates Peer {name, value = schedulePoints} = - drop 1 (zip (Time 0 : (map shiftTime times)) (Peer name <$> scanl' modPoint genesisNodeState points)) + drop 1 (zip (Time 0 : times) (Peer name <$> scanl' modPoint genesisNodeState points)) where - shiftTime :: Time -> Time - shiftTime t = addTime (- firstTipOffset) t - - firstTipOffset :: DiffTime - firstTipOffset = case times of [] -> 0; (Time dt : _) -> dt - modPoint z = \case ScheduleTipPoint nsTip -> z {nsTip} ScheduleHeaderPoint nsHeader -> z {nsHeader} @@ -211,7 +200,7 @@ longRangeAttack :: longRangeAttack BlockTree {btTrunk, btBranches = [branch]} g = do honest <- peerScheduleFromTipPoints g honParams [(IsTrunk, [AF.length btTrunk - 1])] btTrunk [] adv <- peerScheduleFromTipPoints g advParams [(IsBranch, [AF.length (btbFull branch) - 1])] btTrunk [btbFull branch] - pure $ PointSchedule { + pure $ shiftPointSchedule $ PointSchedule { psSchedule = peers' [honest] [adv], psStartOrder = [], psMinEndTime = Time 0 @@ -236,9 +225,33 @@ uniformPoints :: BlockTree blk -> g -> m (PointSchedule blk) -uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} = case pgpDowntime of - NoDowntime -> uniformPointsWithExtraHonestPeers pgpExtraHonestPeers - DowntimeWithSecurityParam k -> uniformPointsWithExtraHonestPeersAndDowntime pgpExtraHonestPeers k +uniformPoints PointsGeneratorParams {pgpExtraHonestPeers, pgpDowntime} bt = + fmap shiftPointSchedule . case pgpDowntime of + NoDowntime -> + uniformPointsWithExtraHonestPeers pgpExtraHonestPeers bt + DowntimeWithSecurityParam k -> + uniformPointsWithExtraHonestPeersAndDowntime pgpExtraHonestPeers k bt + +-- | Shifts all tick start times so that the first tip point is announced at +-- the very beginning of the test, keeping the relative delays of the schedule +-- intact. +-- +-- This is a measure to make the long range attack test work, since that +-- relies on the honest node sending headers later than the adversary, which +-- is not possible if the adversary's first tip point is delayed by 20 or +-- more seconds due to being in a later slot. +shiftPointSchedule :: PointSchedule blk -> PointSchedule blk +shiftPointSchedule s = s {psSchedule = shiftPeerSchedule <$> psSchedule s} + where + shiftPeerSchedule :: PeerSchedule blk -> PeerSchedule blk + shiftPeerSchedule times = map (first shiftTime) times + where + shiftTime :: Time -> Time + shiftTime t = addTime (- firstTipOffset) t + + firstTipOffset :: DiffTime + firstTipOffset = case times of [] -> 0; ((Time dt, _) : _) -> dt + -- | Generate a schedule in which the trunk is served by @pgpExtraHonestPeers + 1@ peers, -- and extra branches are served by one peer each, using a single tip point, From 42b9ff4a0577ed2b3fd179f338801ce21e600fce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Wed, 31 Jul 2024 19:19:10 +0000 Subject: [PATCH 53/76] Allow to run the decision logic once after the last tick in the blockfetch leashing attack --- .../Test/Consensus/Genesis/Tests/Uniform.hs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 3a4b3b7814..95fce81366 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -18,7 +18,7 @@ module Test.Consensus.Genesis.Tests.Uniform ( import Cardano.Slotting.Slot (SlotNo (SlotNo), WithOrigin (..)) import Control.Monad (replicateM) -import Control.Monad.Class.MonadTime.SI (Time, addTime) +import Control.Monad.Class.MonadTime.SI (Time (..), addTime) import Data.List (intercalate, sort, uncons) import qualified Data.List.NonEmpty as NE import qualified Data.Map.Strict as Map @@ -433,7 +433,7 @@ prop_blockFetchLeashingAttack = where genBlockFetchLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PointSchedule TestBlock) genBlockFetchLeashingSchedule genesisTest = do - PointSchedule {psSchedule, psMinEndTime} <- + PointSchedule {psSchedule} <- stToGen $ uniformPoints (PointsGeneratorParams {pgpExtraHonestPeers = 1, pgpDowntime = NoDowntime}) @@ -445,7 +445,16 @@ prop_blockFetchLeashingAttack = -- Important to shuffle the order in which the peers start, otherwise the -- honest peer starts first and systematically becomes dynamo. psStartOrder <- shuffle $ getPeerIds psSchedule' - pure $ PointSchedule {psSchedule = psSchedule', psStartOrder, psMinEndTime} + let maxTime = maximum $ + Time 0 : [ pt | s <- honest : adversaries', (pt, _) <- take 1 (reverse s) ] + pure $ PointSchedule { + psSchedule = psSchedule', + psStartOrder, + -- Allow to run the blockfetch decision logic after the last tick + -- 11 is the grace period for unresponsive peers that should send + -- blocks + psMinEndTime = addTime 11 maxTime + } isBlockPoint :: SchedulePoint blk -> Bool isBlockPoint (ScheduleBlockPoint _) = True From e73e43a0b8c5d92ed9b58a0bc63cc6d42f96396d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 1 Aug 2024 11:53:59 +0000 Subject: [PATCH 54/76] Update LoP tests to run 1000 times each --- .../Test/Consensus/Genesis/Tests/LoP.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 1b1bbacc03..a13ca54a01 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -30,10 +30,11 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ testGroup "LoP" [ -- \| NOTE: Running the test that must _not_ timeout (@prop_smoke False@) takes @@ -41,16 +42,18 @@ tests = -- does all the computation (serving the headers, validating them, serving the -- block, validating them) while the former does nothing, because it timeouts -- before reaching the last tick of the point schedule. - adjustQuickCheckTests (`div` 10) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "wait just enough" (prop_wait False), testProperty "wait too much" (prop_wait True), + adjustQuickCheckMaxSize (`div` 5) $ testProperty "wait behind forecast horizon" prop_waitBehindForecastHorizon, - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "serve just fast enough" (prop_serve False), + adjustQuickCheckMaxSize (`div` 5) $ testProperty "serve too slow" (prop_serve True), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "delaying attack succeeds without LoP" (prop_delayAttack False), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "delaying attack fails with LoP" (prop_delayAttack True) ] From aeb3290dfe21178568e96c2e3ab48f83bd0eae46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 1 Aug 2024 11:58:49 +0000 Subject: [PATCH 55/76] Run more repetitions of LoE tests --- .../consensus-test/Test/Consensus/Genesis/Tests/LoE.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index f4930a8aaf..82313829c9 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -26,16 +26,17 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ testGroup "LoE" [ - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "adversary does not hit timeouts" (prop_adversaryHitsTimeouts False), - adjustQuickCheckTests (`div` 5) $ + adjustQuickCheckMaxSize (`div` 5) $ testProperty "adversary hits timeouts" (prop_adversaryHitsTimeouts True) ] From 1ff5343c376246eea2389f9c4f43456398b48a83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 1 Aug 2024 16:14:43 +0000 Subject: [PATCH 56/76] Print timestamps for node restarts --- .../Test/Consensus/PeerSimulator/Trace.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs index 49398dc67d..199f9d35df 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Trace.hs @@ -190,7 +190,7 @@ traceSchedulerEventTestBlockWith :: Tracer m String -> TraceSchedulerEvent TestBlock -> m () -traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case +traceSchedulerEventTestBlockWith setTickTime tracer0 tracer = \case TraceBeginningOfTime -> traceWith tracer0 "Running point schedule ..." TraceEndOfTime -> @@ -221,13 +221,13 @@ traceSchedulerEventTestBlockWith setTickTime tracer0 _tracer = \case " jumping states:\n" ++ traceJumpingStates jumpingStates ] TraceNodeShutdownStart immTip -> - traceWith tracer0 (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) + traceWith tracer (" Initiating node shutdown with immutable tip at slot " ++ condense immTip) TraceNodeShutdownComplete -> - traceWith tracer0 " Node shutdown complete" + traceWith tracer " Node shutdown complete" TraceNodeStartupStart -> - traceWith tracer0 " Initiating node startup" + traceWith tracer " Initiating node startup" TraceNodeStartupComplete selection -> - traceWith tracer0 (" Node startup complete with selection " ++ terseHFragment selection) + traceWith tracer (" Node startup complete with selection " ++ terseHFragment selection) where traceJumpingStates :: [(PeerId, ChainSyncJumpingState m TestBlock)] -> String From 99edbede0a2109e5e7ccfb4aa39ebf9fad6bd5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 1 Aug 2024 17:47:04 +0000 Subject: [PATCH 57/76] Disable boring timeouts in the node restart test --- .../Test/Consensus/Genesis/Tests/Uniform.hs | 35 ++++++------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 95fce81366..74fe9ec4f0 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -230,13 +230,6 @@ prop_leashingAttackStalling = advs <- mapM dropRandomPoints $ adversarialPeers sch pure $ ps {psSchedule = sch {adversarialPeers = advs}} - disableBoringTimeouts gt = - gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) - { mustReplyTimeout = Nothing - , idleTimeout = Nothing - } - } - dropRandomPoints :: [(Time, SchedulePoint blk)] -> QC.Gen [(Time, SchedulePoint blk)] dropRandomPoints ps = do let lenps = length ps @@ -296,14 +289,6 @@ prop_leashingAttackTimeLimited = takePointsUntil limit = takeWhile ((<= limit) . fst) - disableBoringTimeouts gt = - gt { gtChainSyncTimeouts = (gtChainSyncTimeouts gt) - { canAwaitTimeout = Nothing - , mustReplyTimeout = Nothing - , idleTimeout = Nothing - } - } - estimateTimeBound :: AF.HasHeader blk => ChainSyncTimeout @@ -399,7 +384,7 @@ prop_loeStalling = prop_downtime :: Property prop_downtime = forAllGenesisTest - (genChains (QC.choose (1, 4)) `enrichedWith` \ gt -> + (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` \ gt -> ensureScheduleDuration gt <$> stToGen (uniformPoints (pointsGeneratorParams gt) (gtBlockTree gt))) defaultSchedulerConfig @@ -460,11 +445,13 @@ prop_blockFetchLeashingAttack = isBlockPoint (ScheduleBlockPoint _) = True isBlockPoint _ = False - disableBoringTimeouts gt = - gt - { gtChainSyncTimeouts = - (gtChainSyncTimeouts gt) - { mustReplyTimeout = Nothing, - idleTimeout = Nothing - } - } +disableBoringTimeouts :: GenesisTest blk schedule -> GenesisTest blk schedule +disableBoringTimeouts gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { mustReplyTimeout = Nothing + , idleTimeout = Nothing + , canAwaitTimeout = Nothing + } + } From 499a95d1343501dfd7afe577b5763208bde58a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 1 Aug 2024 18:43:06 +0000 Subject: [PATCH 58/76] Wait sufficiently long after restarting the node at the end of a test --- .../Test/Consensus/PeerSimulator/Run.hs | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index 3865a87119..e91c71eac6 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -10,7 +10,7 @@ module Test.Consensus.PeerSimulator.Run ( , runPointSchedule ) where -import Control.Monad (foldM, forM, void) +import Control.Monad (foldM, forM, void, when) import Control.Monad.Class.MonadTime (MonadTime) import Control.Monad.Class.MonadTimer.SI (MonadTimer) import Control.Tracer (Tracer (..), nullTracer, traceWith) @@ -221,8 +221,8 @@ smartDelay :: LiveNode blk m -> DiffTime -> m (LiveNode blk m) -smartDelay NodeLifecycle {nlMinDuration, nlStart, nlShutdown} node duration - | Just minInterval <- nlMinDuration, duration > minInterval = do +smartDelay lifecycle@NodeLifecycle {nlStart, nlShutdown} node duration + | itIsTimeToRestartTheNode lifecycle duration = do results <- nlShutdown node threadDelay duration nlStart results @@ -230,6 +230,12 @@ smartDelay _ node duration = do threadDelay duration pure node +itIsTimeToRestartTheNode :: NodeLifecycle blk m -> DiffTime -> Bool +itIsTimeToRestartTheNode NodeLifecycle {nlMinDuration} duration = + case nlMinDuration of + Just minInterval -> duration > minInterval + Nothing -> False + -- | The 'Tick' contains a state update for a specific peer. -- If the peer has not terminated by protocol rules, this will update its TMVar -- with the new state, thereby unblocking the handler that's currently waiting @@ -296,7 +302,16 @@ runScheduler tracer varHandles ps@PointSchedule{psMinEndTime} peers lifecycle@No else Nothing _ -> Just $ coerce psMinEndTime LiveNode{lnChainDb, lnStateViewTracers} <- - maybe (pure nodeEnd) (smartDelay lifecycle nodeEnd) extraDelay + case extraDelay of + Just duration -> do + nodeEnd' <- smartDelay lifecycle nodeEnd duration + -- Give an opportunity to the node to finish whatever it was doing at + -- shutdown + when (itIsTimeToRestartTheNode lifecycle duration) $ + threadDelay $ coerce psMinEndTime + pure nodeEnd' + Nothing -> + pure nodeEnd traceWith tracer TraceEndOfTime pure (lnChainDb, lnStateViewTracers) where From db3eb9f42820a54252727a08b272c4cdb48dead2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Thu, 1 Aug 2024 20:23:13 +0000 Subject: [PATCH 59/76] Don't let GDD drop candidates that do not intersect with the selection --- .../Ouroboros/Consensus/Genesis/Governor.hs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 185fcd5d20..0c9eb1a8b2 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -40,7 +40,7 @@ import Data.Foldable (for_, toList) import Data.Functor.Compose (Compose (..)) import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map -import Data.Maybe (mapMaybe, maybeToList) +import Data.Maybe (maybeToList) import Data.Maybe.Strict (StrictMaybe) import Data.Word (Word64) import Ouroboros.Consensus.Block @@ -249,16 +249,16 @@ sharedCandidatePrefix curChain candidates = immutableTip = AF.anchorPoint curChain splitAfterImmutableTip (peer, frag) = - (,) peer . snd <$> AF.splitAfterPoint frag immutableTip + case AF.splitAfterPoint frag immutableTip of + -- If there is no intersection, it might be the case that the candidate + -- is behind the immutable tip. In that case we give the peer the + -- benefit of the doubt, and considered its fragment anchored at the + -- immutable tip. + Nothing -> (peer, AF.takeOldest 0 curChain) + Just (_, suffix) -> (peer, suffix) immutableTipSuffixes = - -- If a ChainSync client's candidate forks off before the - -- immutable tip, then this transaction is currently winning an - -- innocuous race versus the thread that will fatally raise - -- 'InvalidIntersection' within that ChainSync client, so it's - -- sound to pre-emptively discard their candidate from this - -- 'Map' via 'mapMaybe'. - mapMaybe splitAfterImmutableTip candidates + map splitAfterImmutableTip candidates data DensityBounds blk = DensityBounds { From 8eb4ea2c4e662d0d2b0987bb7b6e96fef7038649 Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Fri, 2 Aug 2024 09:49:47 +0200 Subject: [PATCH 60/76] Genesis tests: remove outdated comment --- .../consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 74fe9ec4f0..77dc1afd25 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -248,9 +248,6 @@ prop_leashingAttackStalling = -- immutable tip needs to be advanced enough when the honest peer has offered -- all of its ticks. -- --- This test is expected to fail because we don't test a genesis implementation --- yet. --- -- See Note [Leashing attacks] prop_leashingAttackTimeLimited :: Property prop_leashingAttackTimeLimited = From 833d2ec952c722c5891b9d2b8e3b1701b3fd70de Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Fri, 2 Aug 2024 10:13:07 +0200 Subject: [PATCH 61/76] disableBoringTimeouts: don't consider `canAwaitTimeout` boring This is relatively tight by default (10s), and applies eg to adversaries that simply stop sending headers --- .../test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs | 1 - 1 file changed, 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 77dc1afd25..44294c9b49 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -449,6 +449,5 @@ disableBoringTimeouts gt = (gtChainSyncTimeouts gt) { mustReplyTimeout = Nothing , idleTimeout = Nothing - , canAwaitTimeout = Nothing } } From 83ff29e912d38aaced0cd87a001e9b643dbce037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 2 Aug 2024 11:00:30 +0000 Subject: [PATCH 62/76] Disable canAwaitTimeout again in the timed leashing attack --- .../Test/Consensus/Genesis/Tests/Uniform.hs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 44294c9b49..6dbaf06384 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -253,7 +253,9 @@ prop_leashingAttackTimeLimited :: Property prop_leashingAttackTimeLimited = forAllGenesisTest - (disableBoringTimeouts <$> genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule) + (disableCanAwaitTimeout . disableBoringTimeouts <$> + genChains (QC.choose (1, 4)) `enrichedWith` genTimeLimitedSchedule + ) defaultSchedulerConfig { scTrace = False @@ -326,6 +328,15 @@ prop_leashingAttackTimeLimited = fromTipPoint (t, ScheduleTipPoint bp) = Just (t, bp) fromTipPoint _ = Nothing + disableCanAwaitTimeout :: GenesisTest blk schedule -> GenesisTest blk schedule + disableCanAwaitTimeout gt = + gt + { gtChainSyncTimeouts = + (gtChainSyncTimeouts gt) + { canAwaitTimeout = Nothing + } + } + headCallStack :: HasCallStack => [a] -> a headCallStack = \case x:_ -> x From 3d65fb1e5d22bd8c83b454eabb4f40e17db61473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 2 Aug 2024 12:27:31 +0000 Subject: [PATCH 63/76] Document treatment of fragments with no intersection with the selection in GDD --- .../Ouroboros/Consensus/Genesis/Governor.hs | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 0c9eb1a8b2..fe9220db5d 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -250,16 +250,43 @@ sharedCandidatePrefix curChain candidates = splitAfterImmutableTip (peer, frag) = case AF.splitAfterPoint frag immutableTip of - -- If there is no intersection, it might be the case that the candidate - -- is behind the immutable tip. In that case we give the peer the - -- benefit of the doubt, and considered its fragment anchored at the - -- immutable tip. + -- When there is no intersection, we assume the candidate fragment is + -- empty and anchored at the immutable tip. + -- See Note [CSJ can recede the candidate fragments] Nothing -> (peer, AF.takeOldest 0 curChain) Just (_, suffix) -> (peer, suffix) immutableTipSuffixes = map splitAfterImmutableTip candidates +-- Note [CSJ can recede the candidate fragments] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- Before CSJ, only rollback could cause trimming of the suffix of a candidate +-- fragment. Trimming suffixes is a serious business to GDD because the LoE +-- might have allowed the selection to advance, based on the tips of the +-- candidate fragments. +-- +-- Trimming a suffix risks moving the LoE back, which could be earlier than the +-- anchor of the latest selection. When rollbacks where the only mechanism to +-- trim suffixes, it was fine to ignore candidate fragments that don't intersect +-- with the current selection. This could only happen if the peer is rolling +-- back more than k blocks, which is dishonest behavior. +-- +-- With CSJ, however, the candidate fragments can recede without a rollback. +-- A former objector might be asked to jump back when it becomes a jumper again. +-- The jump back might still be to a point that is a descendent of the immutable +-- tip. But by the time the jump is accepted, the immutable tip might have +-- advanced, and the candidate fragment of the otherwise honest peer might be +-- ignored by GDD. +-- +-- Therefore, at the moment, when there is no intersection with the current +-- selection, the GDD assumes that the candidate fragment is empty and anchored +-- at the immutable tip. It is the job of the ChainSync client to update the +-- candidate fragment so it intersects with the selection or to disconnect the +-- peer if no such fragment can be established. +-- + data DensityBounds blk = DensityBounds { clippedFragment :: AnchoredFragment (Header blk), From 6a184e59b9edf0b4815e82aecc4785109b15cb57 Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Fri, 2 Aug 2024 18:12:07 +0200 Subject: [PATCH 64/76] fixup! ChainSync client: disconnect if stuck and not better than selection --- .../Test/Consensus/Genesis/Setup.hs | 3 +- .../MiniProtocol/ChainSync/Client.hs | 65 ++++++++++++++++--- 2 files changed, 57 insertions(+), 11 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs index a1e661c264..c4cb2aef8b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Setup.hs @@ -18,7 +18,7 @@ import Control.Monad.IOSim (IOSim, runSimStrictShutdown) import Control.Tracer (debugTracer, traceWith) import Data.Maybe (mapMaybe) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (DensityTooLow, EmptyBucket)) + (ChainSyncClientException (..)) import Ouroboros.Consensus.Util.Condense import Ouroboros.Consensus.Util.IOLike (Exception, fromException) import Ouroboros.Network.Driver.Limits @@ -124,6 +124,7 @@ forAllGenesisTest generator schedulerConfig shrinker mkProperty = | Just DensityTooLow <- e = true | Just (ExceededTimeLimit _) <- e = true | Just AsyncCancelled <- e = true + | Just CandidateTooSparse{} <- e = true | otherwise = counterexample ("Encountered unexpected exception: " ++ show exn) False diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 609462071f..8210549871 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -1423,7 +1423,7 @@ knownIntersectionStateTop cfgEnv dynEnv intEnv = Jumping.jgOnRollForward jumping (blockPoint hdr) atomically (setLatestSlot dynEnv (NotOrigin slotNo)) - checkTime cfgEnv dynEnv intEnv kis arrival slotNo >>= \case + checkTime cfgEnv dynEnv intEnv kis arrival slotNo >>= \case NoLongerIntersects -> continueWithState () $ drainThePipe n @@ -1618,7 +1618,8 @@ checkKnownInvalid cfgEnv dynEnv intEnv hdr = case scrutinee of -- Finally, the client will block on the intersection a second time, if -- necessary, since it's possible for a ledger state to determine the slot's -- onset's timestamp without also determining the slot's 'LedgerView'. During --- this pause, the LoP bucket is paused. +-- this pause, the LoP bucket is paused. If we need to block and their fragment +-- is not preferrable to ours, we disconnect. checkTime :: forall m blk arrival judgment. ( IOLike m @@ -1715,8 +1716,6 @@ checkTime cfgEnv dynEnv intEnv = StillIntersects () kis' -> do let KnownIntersectionState { mostRecentIntersection - , ourFrag - , theirFrag } = kis' lst <- fmap @@ -1729,15 +1728,40 @@ checkTime cfgEnv dynEnv intEnv = ) $ getPastLedger mostRecentIntersection case prj lst of - Nothing -> - -- Precondition is fulfilled as ourFrag and theirFrag - -- intersect by construction. - if preferAnchoredCandidate (configBlock cfg) ourFrag theirFrag - then retry - else throwSTM DensityTooLow + Nothing -> do + checkPreferTheirsOverOurs kis' + retry Just ledgerView -> return $ return $ Intersects kis' ledgerView + -- When a header is beyond the forecast horizon and their fragment is not + -- preferrable to our selection (ourFrag), then we disconnect, as we will + -- never end up selecting it. + -- + -- In the context of Genesis, one can think of the candidate losing a + -- density comparison against the selection. See the Genesis documentation + -- for why this check is necessary. + -- + -- In particular, this means that we will disconnect from peers who offer us + -- a chain containing a slot gap larger than a forecast window. + checkPreferTheirsOverOurs :: KnownIntersectionState blk -> STM m () + checkPreferTheirsOverOurs kis + | -- Precondition is fulfilled as ourFrag and theirFrag intersect by + -- construction. + preferAnchoredCandidate (configBlock cfg) ourFrag theirFrag + = pure () + | otherwise + = throwSTM $ CandidateTooSparse + mostRecentIntersection + (ourTipFromChain ourFrag) + (theirTipFromChain theirFrag) + where + KnownIntersectionState { + mostRecentIntersection + , ourFrag + , theirFrag + } = kis + -- Returns 'Nothing' if the ledger state cannot forecast the ledger view -- that far into the future. projectLedgerView :: @@ -1921,6 +1945,12 @@ ourTipFromChain :: -> Our (Tip blk) ourTipFromChain = Our . AF.anchorToTip . AF.headAnchor +theirTipFromChain :: + HasHeader (Header blk) + => AnchoredFragment (Header blk) + -> Their (Tip blk) +theirTipFromChain = Their . AF.anchorToTip . AF.headAnchor + -- | A type-legos auxillary function used in 'readLedgerState'. castM :: Monad m => m (WithEarlyExit m x) -> WithEarlyExit m x castM = join . EarlyExit.lift @@ -2138,6 +2168,14 @@ data ChainSyncClientException = -- different from the previous argument. (InvalidBlockReason blk) -- ^ The upstream node's chain contained a block that we know is invalid. + | + forall blk. BlockSupportsProtocol blk => + CandidateTooSparse + (Point blk) -- ^ Intersection + (Our (Tip blk)) + (Their (Tip blk)) + -- ^ The upstream node's chain was so sparse that it was worse than our + -- selection despite being blocked on the forecast horizon. | InFutureHeaderExceedsClockSkew !InFutureCheck.HeaderArrivalException -- ^ A header arrived from the far future. @@ -2171,6 +2209,12 @@ instance Eq ChainSyncClientException where | Just Refl <- eqT @blk @blk' = (a, b, c) == (a', b', c') + (==) + (CandidateTooSparse (a :: Point blk ) b c ) + (CandidateTooSparse (a' :: Point blk') b' c') + | Just Refl <- eqT @blk @blk' + = (a, b, c) == (a', b', c') + (==) (InFutureHeaderExceedsClockSkew a ) (InFutureHeaderExceedsClockSkew a') @@ -2188,6 +2232,7 @@ instance Eq ChainSyncClientException where HeaderError{} == _ = False InvalidIntersection{} == _ = False InvalidBlock{} == _ = False + CandidateTooSparse{} == _ = False InFutureHeaderExceedsClockSkew{} == _ = False EmptyBucket == _ = False InvalidJumpResponse == _ = False From c7507644e32b52f7d60f4f24d75122944dc4180f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 2 Aug 2024 17:08:45 +0000 Subject: [PATCH 65/76] Edit note on truncation of candidate fragments --- .../Ouroboros/Consensus/Genesis/Governor.hs | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index fe9220db5d..52a58b78b0 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -252,33 +252,31 @@ sharedCandidatePrefix curChain candidates = case AF.splitAfterPoint frag immutableTip of -- When there is no intersection, we assume the candidate fragment is -- empty and anchored at the immutable tip. - -- See Note [CSJ can recede the candidate fragments] + -- See Note [CSJ truncates the candidate fragments]. Nothing -> (peer, AF.takeOldest 0 curChain) Just (_, suffix) -> (peer, suffix) immutableTipSuffixes = map splitAfterImmutableTip candidates --- Note [CSJ can recede the candidate fragments] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- Note [CSJ truncates the candidate fragments] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- --- Before CSJ, only rollback could cause trimming of the suffix of a candidate --- fragment. Trimming suffixes is a serious business to GDD because the LoE --- might have allowed the selection to advance, based on the tips of the --- candidate fragments. +-- Before CSJ, only rollback could cause truncation of a candidate fragment. +-- Truncation is a serious business to GDD because the LoE might have allowed +-- the selection to advance, based on the tips of the candidate fragments. -- --- Trimming a suffix risks moving the LoE back, which could be earlier than the --- anchor of the latest selection. When rollbacks where the only mechanism to --- trim suffixes, it was fine to ignore candidate fragments that don't intersect --- with the current selection. This could only happen if the peer is rolling --- back more than k blocks, which is dishonest behavior. +-- Truncating a candidate fragment risks moving the LoE back, which could be +-- earlier than the anchor of the latest selection. When rollbacks where the +-- only mechanism to truncate, it was fine to ignore candidate fragments that +-- don't intersect with the current selection. This could only happen if the +-- peer is rolling back more than k blocks, which is dishonest behavior. -- -- With CSJ, however, the candidate fragments can recede without a rollback. -- A former objector might be asked to jump back when it becomes a jumper again. --- The jump back might still be to a point that is a descendent of the immutable --- tip. But by the time the jump is accepted, the immutable tip might have --- advanced, and the candidate fragment of the otherwise honest peer might be --- ignored by GDD. +-- The jump point might still be a descendent of the immutable tip. But by the +-- time the jump is accepted, the immutable tip might have advanced, and the +-- candidate fragment of the otherwise honest peer might be ignored by GDD. -- -- Therefore, at the moment, when there is no intersection with the current -- selection, the GDD assumes that the candidate fragment is empty and anchored From 8eaf5dc2597ac0c10f136e34d0878c46d1bbc771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 2 Aug 2024 19:00:55 +0000 Subject: [PATCH 66/76] Expand the comments motivating DynamoInitState and ObjectorInitState --- .../Consensus/MiniProtocol/ChainSync/Client/State.hs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs index 69fa5ea7cd..763e79fd89 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/State.hs @@ -165,8 +165,11 @@ newChainSyncClientHandleCollection = do } data DynamoInitState blk - = -- | The dynamo has not yet started jumping and we first need to jump to the - -- given jump info to set the intersection of the ChainSync server. + = -- | The dynamo still has to set the intersection of the ChainSync server + -- before it can resume downloading headers. This is because + -- the message pipeline might be drained to do jumps, and this causes + -- the intersection on the ChainSync server to diverge from the tip of + -- the candidate fragment. DynamoStarting !(JumpInfo blk) | DynamoStarted deriving (Generic) @@ -179,7 +182,10 @@ deriving anyclass instance data ObjectorInitState = -- | The objector still needs to set the intersection of the ChainSync - -- server before resuming retrieval of headers. + -- server before resuming retrieval of headers. This is mainly because + -- the message pipeline might be drained to do jumps, and this causes + -- the intersection on the ChainSync server to diverge from the tip of + -- the candidate fragment. Starting | Started deriving (Generic, Show, NoThunks) From c94e8ef5489e387d9ac2ee54c543d724668270b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Fri, 2 Aug 2024 21:53:45 +0000 Subject: [PATCH 67/76] Expect CandidateTooSparse in gdd tests --- .../Test/Consensus/Genesis/Tests/DensityDisconnect.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index 5dcf3c06d6..8810392aec 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -31,7 +31,7 @@ import Ouroboros.Consensus.Config.SecurityParam import Ouroboros.Consensus.Genesis.Governor (DensityBounds, densityDisconnect, sharedCandidatePrefix) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (DensityTooLow), + (ChainSyncClientException (..), ChainSyncState (..)) import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) @@ -475,6 +475,7 @@ prop_densityDisconnectTriggersChainSel = othersCount = Map.size (adversarialPeers $ psSchedule gtSchedule) exnCorrect = case exceptionsByComponent ChainSyncClient stateView of [fromException -> Just DensityTooLow] -> True + [fromException -> Just CandidateTooSparse{}] -> True [] | othersCount == 0 -> True _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock From 39b487765b37ab348fa37119321e1d3a5fbe6469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 5 Aug 2024 02:05:34 +0000 Subject: [PATCH 68/76] Run CSJ and gdd tests more times by default --- .../test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs | 3 ++- .../Test/Consensus/Genesis/Tests/DensityDisconnect.hs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index 70bb0eeb71..f468b763c5 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -31,10 +31,11 @@ import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors import Test.Util.TestBlock (TestBlock) -import Test.Util.TestEnv (adjustQuickCheckMaxSize) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) tests :: TestTree tests = + adjustQuickCheckTests (* 10) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "CSJ" diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index 8810392aec..2453907644 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -68,7 +68,7 @@ import Test.Util.TestEnv (adjustQuickCheckMaxSize, tests :: TestTree tests = - adjustQuickCheckTests (* 4) $ + adjustQuickCheckTests (* 10) $ adjustQuickCheckMaxSize (`div` 5) $ testGroup "gdd" [ testProperty "basic" prop_densityDisconnectStatic, From 0f22ff16ad934dcf970d0753a2b1b2e210890066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 5 Aug 2024 13:54:04 +0000 Subject: [PATCH 69/76] Add a notice about untracked delays in the node restart test --- .../Test/Consensus/Genesis/Tests/Uniform.hs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 6dbaf06384..25a0f5c99d 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -404,7 +404,14 @@ prop_downtime = forAllGenesisTest shrinkPeerSchedules - theProperty + (\genesisTest stateView -> + counterexample (unlines + [ "TODO: Shutting down the node inserts delays in the simulation that" + , "are not reflected in the point schedule table. Reporting these delays" + , "correctly is still to be done." + ]) $ + theProperty genesisTest stateView + ) where pointsGeneratorParams gt = PointsGeneratorParams From ab33a039040c77a325368beaec551a560757e9b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Facundo=20Dom=C3=ADnguez?= Date: Mon, 5 Aug 2024 17:07:42 +0000 Subject: [PATCH 70/76] Remove redundant empty window check from the GDD --- .../Ouroboros/Consensus/Genesis/Governor.hs | 8 +------- .../Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs | 3 +++ .../Consensus/MiniProtocol/ChainSync/Client/Jumping.hs | 7 +++++++ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 52a58b78b0..39f3a3b488 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -383,11 +383,7 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe , upperBound = ub0 , hasBlockAfter = hasBlockAfter0 , idling = idling0 - }) -> - -- If the density is 0, the peer should be disconnected. This affects - -- ChainSync jumping, where genesis windows with no headers prevent jumps - -- from happening. - if ub0 == 0 then pure peer0 else do + }) -> do (_peer1, DensityBounds {clippedFragment = frag1, offersMoreThanK, lowerBound = lb1 }) <- densityBounds -- Don't disconnect peer0 if it sent no headers after the intersection yet @@ -395,8 +391,6 @@ densityDisconnect (GenesisWindow sgen) (SecurityParam k) states candidateSuffixe -- -- See Note [Chain disagreement] -- - -- Note: hasBlockAfter0 is False if frag0 is empty and ub0>0. - -- But we leave it here as a reminder that we care about it. guard $ idling0 || not (AF.null frag0) || hasBlockAfter0 -- ensure that the two peer fragments don't share any -- headers after the LoE diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs index 8210549871..76e433c051 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client.hs @@ -1734,6 +1734,9 @@ checkTime cfgEnv dynEnv intEnv = Just ledgerView -> return $ return $ Intersects kis' ledgerView + -- Note [Candidate comparing beyond the forecast horizon] + -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + -- -- When a header is beyond the forecast horizon and their fragment is not -- preferrable to our selection (ourFrag), then we disconnect, as we will -- never end up selecting it. diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs index ad76dba48c..9441525862 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/MiniProtocol/ChainSync/Client/Jumping.hs @@ -74,6 +74,13 @@ -- when the client should pause, download headers, or ask about agreement with -- a given point (jumping). See the 'Jumping' type for more details. -- +-- CSJ depends on the ChainSync client to disconnect dynamos that have an empty +-- genesis window after their intersection with the selection. This is necessary +-- because otherwise there are no points to jump to, and CSJ could would get +-- stuck when the dynamo blocks on the forecast horizon. See +-- Note [Candidate comparing beyond the forecast horizon] in +-- "Ouroboros.Consensus.MiniProtocol.ChainSync.Client". +-- -- Interactions with the BlockFetch logic -- -------------------------------------- -- From 23542f9d25b10c696bb967050eefed0e4a139abe Mon Sep 17 00:00:00 2001 From: Alexander Esgen Date: Tue, 6 Aug 2024 16:15:43 +0200 Subject: [PATCH 71/76] fixup! ChainDB: let the BlockFetch client add blocks asynchronously --- .../Consensus/Storage/ChainDB/Impl/Types.hs | 49 ++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs index cd128d3e42..a448ab62a9 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Storage/ChainDB/Impl/Types.hs @@ -67,8 +67,7 @@ module Ouroboros.Consensus.Storage.ChainDB.Impl.Types ( , TraceValidationEvent (..) ) where -import Cardano.Prelude (Bifunctor (second)) -import Control.Monad (void, when) +import Control.Monad (join, when) import Control.Tracer import Data.Foldable (for_) import Data.Map.Strict (Map) @@ -110,8 +109,7 @@ import Ouroboros.Consensus.Util.CallStack import Ouroboros.Consensus.Util.Enclose (Enclosing, Enclosing' (..)) import Ouroboros.Consensus.Util.IOLike import Ouroboros.Consensus.Util.ResourceRegistry -import Ouroboros.Consensus.Util.STM (WithFingerprint, - blockUntilChanged) +import Ouroboros.Consensus.Util.STM (WithFingerprint) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import Ouroboros.Network.Block (MaxSlotNo (..)) import Ouroboros.Network.BlockFetch.ConsensusInterface @@ -555,36 +553,43 @@ addReprocessLoEBlocks tracer (ChainSelQueue {varChainSelReprocessLoEBlocks}) = d -- queue is empty; in that case, reports the starvation (and its end) to the -- callback. getChainSelMessage - :: (HasHeader blk, IOLike m) + :: forall m blk. (HasHeader blk, IOLike m) => Tracer m (TraceChainSelStarvationEvent blk) -> StrictTVar m ChainSelStarvation -> ChainSelQueue m blk -> m (ChainSelMessage m blk) getChainSelMessage starvationTracer starvationVar queue = go where - go = do - (reprocessLoEBlocks, chainSelQueue) <- atomically readBoth - case reprocessLoEBlocks of + go = join $ atomically $ + readTVar varChainSelReprocessLoEBlocks >>= \case True -> do - writeTVarIO varChainSelReprocessLoEBlocks False - return ChainSelReprocessLoEBlocks - False -> + writeTVar varChainSelReprocessLoEBlocks False + pure $ pure ChainSelReprocessLoEBlocks + False -> do + chainSelQueue <- readTVar varChainSelQueue case Map.minView chainSelQueue of Just (blockToAdd, chainSelQueue') -> do - writeTVarIO varChainSelQueue chainSelQueue' - terminateStarvationMeasure blockToAdd - return $ ChainSelAddBlock blockToAdd - Nothing -> do + writeTVar varChainSelQueue chainSelQueue' + pure $ do + terminateStarvationMeasure blockToAdd + pure $ ChainSelAddBlock blockToAdd + Nothing -> pure $ do startStarvationMeasure - void $ atomically $ blockUntilChanged (second Map.null) (False, True) readBoth + blockUntilMoreWork go + ChainSelQueue {varChainSelQueue, varChainSelReprocessLoEBlocks} = queue - writeTVarIO v x = atomically $ writeTVar v x - swapTVarIO v x = atomically $ swapTVar v x - readBoth = (,) <$> readTVar varChainSelReprocessLoEBlocks <*> readTVar varChainSelQueue + + -- Wait until we either need to reprocess blocks due to the LoE, or until a + -- new block arrives. + blockUntilMoreWork :: m () + blockUntilMoreWork = atomically $ do + reprocessLoEBlocks <- readTVar varChainSelReprocessLoEBlocks + chainSelQueue <- readTVar varChainSelQueue + check $ reprocessLoEBlocks || not (Map.null chainSelQueue) startStarvationMeasure = do - prevStarvation <- swapTVarIO starvationVar ChainSelStarvationOngoing + prevStarvation <- atomically $ swapTVar starvationVar ChainSelStarvationOngoing when (prevStarvation /= ChainSelStarvationOngoing) $ traceWith starvationTracer . ChainSelStarvationStarted =<< getMonotonicTime @@ -593,7 +598,7 @@ getChainSelMessage starvationTracer starvationVar queue = go when (prevStarvation == ChainSelStarvationOngoing) $ do tf <- getMonotonicTime traceWith starvationTracer (ChainSelStarvationEnded tf $ blockRealPoint block) - writeTVarIO starvationVar (ChainSelStarvationEndedAt tf) + atomically $ writeTVar starvationVar (ChainSelStarvationEndedAt tf) -- | Flush the 'ChainSelQueue' queue and notify the waiting threads. -- @@ -601,7 +606,7 @@ getChainSelMessage starvationTracer starvationVar queue = go -- will write after the flush?! closeChainSelQueue :: IOLike m => ChainSelQueue m blk -> STM m () closeChainSelQueue ChainSelQueue {varChainSelQueue} = do - chainSelQueue <- readTVar varChainSelQueue + chainSelQueue <- swapTVar varChainSelQueue Map.empty for_ chainSelQueue $ \BlockToAdd {varBlockProcessed} -> putTMVar varBlockProcessed $ FailedToAddBlock "Queue flushed" From d15230ef40e614d483acc575a88abd9a58fb2113 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 6 Aug 2024 16:27:42 +0200 Subject: [PATCH 72/76] Introduce `peersOnlyAdversary` and classify abnormal test peers as adversarial --- .../consensus-test/Test/Consensus/Genesis/Tests/LoP.hs | 9 ++++++--- .../Test/Consensus/PeerSimulator/Tests/Timeouts.hs | 5 +++-- .../consensus-test/Test/Consensus/PointSchedule/Peers.hs | 8 ++++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index a13ca54a01..88532ea924 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -22,7 +22,8 @@ import Test.Consensus.PeerSimulator.Run (SchedulerConfig (..), defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (peers', peersOnlyHonest) +import Test.Consensus.PointSchedule.Peers (peers', peersOnlyAdversary, + peersOnlyHonest) import Test.Consensus.PointSchedule.Shrinking (shrinkPeerSchedules) import Test.Consensus.PointSchedule.SinglePeer (scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) @@ -81,7 +82,9 @@ prop_wait mustTimeout = dullSchedule timeout (_ AF.:> tipBlock) = let offset :: DiffTime = if mustTimeout then 1 else -1 in PointSchedule - { psSchedule = peersOnlyHonest [(Time 0, scheduleTipPoint tipBlock)] + { psSchedule = + (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) + [(Time 0, scheduleTipPoint tipBlock)] , psStartOrder = [] , psMinEndTime = Time $ timeout + offset } @@ -173,7 +176,7 @@ prop_serve mustTimeout = makeSchedule fragment@(_ AF.:> tipBlock) = PointSchedule { psSchedule = - peersOnlyHonest $ + (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) $ (Time 0, scheduleTipPoint tipBlock) : ( flip concatMap (zip [1 ..] (AF.toOldestFirst fragment)) $ \(i, block) -> [ (Time (secondsRationalToDiffTime (i * timeBetweenBlocks)), scheduleHeaderPoint block), diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs index 8e218df6fa..e33ac3154b 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Tests/Timeouts.hs @@ -18,7 +18,8 @@ import Test.Consensus.Genesis.Setup import Test.Consensus.PeerSimulator.Run (defaultSchedulerConfig) import Test.Consensus.PeerSimulator.StateView import Test.Consensus.PointSchedule -import Test.Consensus.PointSchedule.Peers (peersOnlyHonest) +import Test.Consensus.PointSchedule.Peers (peersOnlyAdversary, + peersOnlyHonest) import Test.Consensus.PointSchedule.SinglePeer (scheduleBlockPoint, scheduleHeaderPoint, scheduleTipPoint) import Test.QuickCheck @@ -63,7 +64,7 @@ prop_timeouts mustTimeout = do dullSchedule _ (AF.Empty _) = error "requires a non-empty block tree" dullSchedule timeout (_ AF.:> tipBlock) = let offset :: DiffTime = if mustTimeout then 1 else -1 - psSchedule = peersOnlyHonest $ [ + psSchedule = (if mustTimeout then peersOnlyAdversary else peersOnlyHonest) $ [ (Time 0, scheduleTipPoint tipBlock), (Time 0, scheduleHeaderPoint tipBlock), (Time 0, scheduleBlockPoint tipBlock) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs index 973de5ef3a..168729dd20 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PointSchedule/Peers.hs @@ -32,6 +32,7 @@ module Test.Consensus.PointSchedule.Peers ( , peersFromPeerIdList' , peersFromPeerList , peersList + , peersOnlyAdversary , peersOnlyHonest , toMap , toMap' @@ -147,6 +148,13 @@ peersOnlyHonest value = adversarialPeers = Map.empty } +peersOnlyAdversary :: a -> Peers a +peersOnlyAdversary value = + Peers + { adversarialPeers = Map.singleton 1 value, + honestPeers = Map.empty + } + -- | Extract all 'PeerId's. getPeerIds :: Peers a -> [PeerId] getPeerIds Peers {honestPeers, adversarialPeers} = From bcb2c111ae86fb338b37a4501ad42cfc0435b804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Tue, 6 Aug 2024 16:53:31 +0200 Subject: [PATCH 73/76] Document all tests that did not have documentation --- .../Test/Consensus/Genesis/Tests/LoE.hs | 9 ++++++++- .../Test/Consensus/Genesis/Tests/LoP.hs | 14 ++++++++++++++ .../Consensus/Genesis/Tests/LongRangeAttack.hs | 6 ++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 82313829c9..644707ff2e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -41,7 +41,14 @@ tests = ] -- | Tests that the selection advances in presence of the LoE when a peer is --- killed by something that is not LoE-aware, eg. the timeouts. +-- killed by something that is not LoE-aware, eg. the timeouts. This test +-- features an honest peer behaving normally and an adversarial peer behaving +-- such that it will get killed by timeouts. We check that, after the adversary +-- gets disconnected, the LoE gets updated to stop taking it into account. There +-- are two variants of the test: one with timeouts enabled, and one without. In +-- the case where timeouts are disabled, we check that we do in fact remain +-- stuck at the intersection between trunk and other chain. +-- -- NOTE: Same as 'LoP.prop_delayAttack' with timeouts instead of LoP. prop_adversaryHitsTimeouts :: Bool -> Property prop_adversaryHitsTimeouts timeoutsEnabled = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 88532ea924..0ac19660be 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -58,6 +58,13 @@ tests = testProperty "delaying attack fails with LoP" (prop_delayAttack True) ] +-- | Simple test in which we connect to only one peer, who advertises the tip of +-- the block tree trunk and then does nothing. If the given boolean, +-- @mustTimeout@, if @True@, then we wait just long enough for the LoP bucket to +-- empty; we expect to observe an 'EmptyBucket' exception in the ChainSync +-- client. If @mustTimeout@ is @False@, then we wait not quite as long, so the +-- LoP bucket should not be empty at the end of the test and we should observe +-- no exception in the ChainSync client. prop_wait :: Bool -> Property prop_wait mustTimeout = forAllGenesisTest @@ -89,6 +96,13 @@ prop_wait mustTimeout = , psMinEndTime = Time $ timeout + offset } +-- | Simple test in which we connect to only one peer, who advertises the tip of +-- the block tree trunk, serves all of its headers, and then does nothing. +-- Because the peer does not send its blocks, then the ChainSync client will end +-- up stuck, waiting behind the forecast horizon. We expect that the LoP will +-- then be disabled and that, therefore, one could wait forever in this state. +-- We disable the timeouts and check that, indeed, the ChainSync client observes +-- no exception. prop_waitBehindForecastHorizon :: Property prop_waitBehindForecastHorizon = forAllGenesisTest diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs index 67a6846c0f..dcf37b1b8e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LongRangeAttack.hs @@ -34,6 +34,12 @@ tests = testProperty "one adversary" prop_longRangeAttack ] +-- | This test case features a long-range attack with one adversary. The honest +-- peer serves the block tree trunk, while the adversary serves its own chain, +-- forking off the trunk by at least @k@ blocks, but less good than the trunk. +-- The adversary serves the chain more rapidly than the honest peer. We check at +-- the end that the selection is honest. This property does not hold with Praos, +-- but should hold with Genesis. prop_longRangeAttack :: Property prop_longRangeAttack = -- NOTE: `shrinkPeerSchedules` only makes sense for tests that expect the From 88c10cd86fc68fec6945cb17fc4c7c850ee654e3 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 6 Aug 2024 17:31:31 +0200 Subject: [PATCH 74/76] lint: stylish-haskell --- .../Ouroboros/Consensus/Node/Genesis.hs | 20 +++++++++---------- .../Ouroboros/Consensus/NodeKernel.hs | 3 ++- .../Test/Consensus/Genesis/Tests/CSJ.hs | 3 ++- .../Genesis/Tests/DensityDisconnect.hs | 9 ++++----- .../Test/Consensus/Genesis/Tests/LoE.hs | 3 ++- .../Test/Consensus/Genesis/Tests/LoP.hs | 3 ++- .../Test/Consensus/PeerSimulator/Run.hs | 2 +- .../Ouroboros/Consensus/Genesis/Governor.hs | 2 +- .../MiniProtocol/BlockFetch/Client.hs | 8 ++++---- .../MiniProtocol/ChainSync/Client.hs | 14 ++++++------- 10 files changed, 34 insertions(+), 33 deletions(-) diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs index 4e01697bad..c599f78676 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/Node/Genesis.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} @@ -60,14 +60,14 @@ data GenesisConfig = GenesisConfig -- | Genesis configuration flags and low-level args, as parsed from config file or CLI data GenesisConfigFlags = GenesisConfigFlags - { gcfEnableCSJ :: Bool - , gcfEnableLoEAndGDD :: Bool - , gcfEnableLoP :: Bool - , gcfBulkSyncGracePeriod :: Maybe Integer - , gcfBucketCapacity :: Maybe Integer - , gcfBucketRate :: Maybe Integer - , gcfCSJJumpSize :: Maybe Integer - , gcfGDDRateLimit :: Maybe DiffTime + { gcfEnableCSJ :: Bool + , gcfEnableLoEAndGDD :: Bool + , gcfEnableLoP :: Bool + , gcfBulkSyncGracePeriod :: Maybe Integer + , gcfBucketCapacity :: Maybe Integer + , gcfBucketRate :: Maybe Integer + , gcfCSJJumpSize :: Maybe Integer + , gcfGDDRateLimit :: Maybe DiffTime } deriving stock (Eq, Generic, Show) defaultGenesisConfigFlags :: GenesisConfigFlags @@ -153,7 +153,7 @@ data LoEAndGDDNodeKernelArgs m blk = LoEAndGDDNodeKernelArgs { -- opened the ChainDB (which happens before we initialize the NodeKernel). -- After that, this TVar will not be modified again. lgnkaLoEFragmentTVar :: !(StrictTVar m (ChainDB.GetLoEFragment m blk)) - , lgnkaGDDRateLimit :: DiffTime + , lgnkaGDDRateLimit :: DiffTime } -- | Create the initial 'GenesisNodeKernelArgs" (with a temporary -- 'ChainDB.GetLoEFragment' that will be replaced via 'setGetLoEFragment') and a diff --git a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs index f14c9a2983..cfdba7929b 100644 --- a/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs +++ b/ouroboros-consensus-diffusion/src/ouroboros-consensus-diffusion/Ouroboros/Consensus/NodeKernel.hs @@ -66,7 +66,8 @@ import Ouroboros.Consensus.MiniProtocol.ChainSync.Client import Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck (SomeHeaderInFutureCheck) import Ouroboros.Consensus.Node.Genesis (GenesisNodeKernelArgs (..), - LoEAndGDDConfig (..), LoEAndGDDNodeKernelArgs (..), setGetLoEFragment) + LoEAndGDDConfig (..), LoEAndGDDNodeKernelArgs (..), + setGetLoEFragment) import Ouroboros.Consensus.Node.GSM (GsmNodeKernelArgs (..)) import qualified Ouroboros.Consensus.Node.GSM as GSM import Ouroboros.Consensus.Node.Run diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs index f468b763c5..3fdf598fa3 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/CSJ.hs @@ -31,7 +31,8 @@ import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors import Test.Util.TestBlock (TestBlock) -import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs index 2453907644..73c2b2c10f 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/DensityDisconnect.hs @@ -31,8 +31,7 @@ import Ouroboros.Consensus.Config.SecurityParam import Ouroboros.Consensus.Genesis.Governor (DensityBounds, densityDisconnect, sharedCandidatePrefix) import Ouroboros.Consensus.MiniProtocol.ChainSync.Client - (ChainSyncClientException (..), - ChainSyncState (..)) + (ChainSyncClientException (..), ChainSyncState (..)) import Ouroboros.Consensus.Util.Condense (condense) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF @@ -474,10 +473,10 @@ prop_densityDisconnectTriggersChainSel = let othersCount = Map.size (adversarialPeers $ psSchedule gtSchedule) exnCorrect = case exceptionsByComponent ChainSyncClient stateView of - [fromException -> Just DensityTooLow] -> True + [fromException -> Just DensityTooLow] -> True [fromException -> Just CandidateTooSparse{}] -> True - [] | othersCount == 0 -> True - _ -> False + [] | othersCount == 0 -> True + _ -> False tipPointCorrect = Just (getTrunkTip gtBlockTree) == svTipBlock in counterexample "Unexpected exceptions" exnCorrect .&&. diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs index 644707ff2e..f26ffb0c91 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoE.hs @@ -26,7 +26,8 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs index 0ac19660be..6e633e98a4 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/LoP.hs @@ -31,7 +31,8 @@ import Test.Tasty import Test.Tasty.QuickCheck import Test.Util.Orphans.IOLike () import Test.Util.PartialAccessors -import Test.Util.TestEnv (adjustQuickCheckMaxSize, adjustQuickCheckTests) +import Test.Util.TestEnv (adjustQuickCheckMaxSize, + adjustQuickCheckTests) tests :: TestTree tests = diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs index e91c71eac6..6f88312ed2 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/PeerSimulator/Run.hs @@ -234,7 +234,7 @@ itIsTimeToRestartTheNode :: NodeLifecycle blk m -> DiffTime -> Bool itIsTimeToRestartTheNode NodeLifecycle {nlMinDuration} duration = case nlMinDuration of Just minInterval -> duration > minInterval - Nothing -> False + Nothing -> False -- | The 'Tick' contains a state update for a specific peer. -- If the peer has not terminated by protocol rules, this will update its TMVar diff --git a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs index 39f3a3b488..5d5080ea39 100644 --- a/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs +++ b/ouroboros-consensus/src/ouroboros-consensus/Ouroboros/Consensus/Genesis/Governor.hs @@ -253,7 +253,7 @@ sharedCandidatePrefix curChain candidates = -- When there is no intersection, we assume the candidate fragment is -- empty and anchored at the immutable tip. -- See Note [CSJ truncates the candidate fragments]. - Nothing -> (peer, AF.takeOldest 0 curChain) + Nothing -> (peer, AF.takeOldest 0 curChain) Just (_, suffix) -> (peer, suffix) immutableTipSuffixes = diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs index 9b58d13246..56abcb3238 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/BlockFetch/Client.hs @@ -52,11 +52,11 @@ import Ouroboros.Consensus.Util.STM (blockUntilJust, forkLinkedWatcher) import Ouroboros.Network.AnchoredFragment (AnchoredFragment) import qualified Ouroboros.Network.AnchoredFragment as AF -import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), GenesisBlockFetchConfiguration (..), +import Ouroboros.Network.BlockFetch (BlockFetchConfiguration (..), BlockFetchConsensusInterface (..), FetchMode (..), - blockFetchLogic, bracketFetchClient, - bracketKeepAliveClient, bracketSyncWithFetchClient, - newFetchClientRegistry) + GenesisBlockFetchConfiguration (..), blockFetchLogic, + bracketFetchClient, bracketKeepAliveClient, + bracketSyncWithFetchClient, newFetchClientRegistry) import Ouroboros.Network.BlockFetch.Client (blockFetchClient) import Ouroboros.Network.ControlMessage (ControlMessage (..)) import Ouroboros.Network.Mock.Chain (Chain) diff --git a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs index e674013c6d..511f204835 100644 --- a/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs +++ b/ouroboros-consensus/test/consensus-test/Test/Consensus/MiniProtocol/ChainSync/Client.hs @@ -84,14 +84,12 @@ import Ouroboros.Consensus.MiniProtocol.ChainSync.Client (CSJConfig (..), ChainDbView (..), ChainSyncClientException, ChainSyncClientHandleCollection (..), - ChainSyncClientResult (..), - ChainSyncLoPBucketConfig (..), ChainSyncState (..), - ChainSyncStateView (..), ConfigEnv (..), Consensus, - DynamicEnv (..), Our (..), Their (..), - TraceChainSyncClientEvent (..), bracketChainSyncClient, - chainSyncClient, chainSyncStateFor, - newChainSyncClientHandleCollection, - viewChainSyncState) + ChainSyncClientResult (..), ChainSyncLoPBucketConfig (..), + ChainSyncState (..), ChainSyncStateView (..), + ConfigEnv (..), Consensus, DynamicEnv (..), Our (..), + Their (..), TraceChainSyncClientEvent (..), + bracketChainSyncClient, chainSyncClient, chainSyncStateFor, + newChainSyncClientHandleCollection, viewChainSyncState) import qualified Ouroboros.Consensus.MiniProtocol.ChainSync.Client.InFutureCheck as InFutureCheck import Ouroboros.Consensus.Node.GsmState (GsmState (Syncing)) import Ouroboros.Consensus.Node.NetworkProtocolVersion From bfc0cac89a1afb4ceaea3997137b194e4598dd90 Mon Sep 17 00:00:00 2001 From: Nicolas BACQUEY Date: Tue, 6 Aug 2024 18:24:47 +0200 Subject: [PATCH 75/76] Update `source-repository-package` to `blockfetch/milestone-1-rebased` in `ouroboros-network` --- cabal.project | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cabal.project b/cabal.project index a08d97ef82..e23317fb39 100644 --- a/cabal.project +++ b/cabal.project @@ -42,8 +42,8 @@ if(os(windows)) source-repository-package type: git location: https://github.com/IntersectMBO/ouroboros-network - tag: f71d0126ff5d84af44b644460b4e08cf3c256914 - --sha256: 1a62hqddpnc0j5r7nl54q79nrxyw77dphpsgp68hxij210dkpvca + tag: fcb842fcd6f32b43a7cdf18a4301c1659a8bb879 + --sha256: kjwUrduwwxC+5QRQNJa4stEBzz7kqDJyyHOgGMfDw7s= subdir: ouroboros-network ouroboros-network-api From f907373da947b9e46c365df000931822b5aa5c03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20=E2=80=9CNiols=E2=80=9D=20Jeannerod?= Date: Thu, 8 Aug 2024 17:42:50 +0200 Subject: [PATCH 76/76] Document prop_blockFetchLeashingAttack --- .../Test/Consensus/Genesis/Tests/Uniform.hs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs index 25a0f5c99d..bfe69adb0e 100644 --- a/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs +++ b/ouroboros-consensus-diffusion/test/consensus-test/Test/Consensus/Genesis/Tests/Uniform.hs @@ -419,6 +419,11 @@ prop_downtime = forAllGenesisTest , pgpDowntime = DowntimeWithSecurityParam (gtSecurityParam gt) } +-- | Test that the block fetch leashing attack does not delay the immutable tip. +-- This leashing attack consists in having adversarial peers that behave +-- honestly when it comes to ChainSync but refuse to send blocks. A proper node +-- under test should detect those behaviours as adversarial and find a way to +-- make progress. prop_blockFetchLeashingAttack :: Property prop_blockFetchLeashingAttack = forAllGenesisTest @@ -433,6 +438,9 @@ prop_blockFetchLeashingAttack = where genBlockFetchLeashingSchedule :: GenesisTest TestBlock () -> QC.Gen (PointSchedule TestBlock) genBlockFetchLeashingSchedule genesisTest = do + -- A schedule with several honest peers and no adversaries. We will then + -- keep one of those as honest and remove the block points from the + -- others, hence producing one honest peer and several adversaries. PointSchedule {psSchedule} <- stToGen $ uniformPoints