From 702cc810f887b72016ec1114f695c492bb754545 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 01:40:24 +0300 Subject: [PATCH 001/100] WIP debugging --- consensus/e2e_tests/state_sync_test.go | 271 ++++++++++++----------- consensus/e2e_tests/utils_test.go | 113 ++++++++-- consensus/events.go | 12 + consensus/fsm_handler.go | 9 +- consensus/module_consensus_debugging.go | 9 + consensus/module_consensus_state_sync.go | 144 +++++++++++- consensus/state_sync/helpers.go | 17 ++ consensus/state_sync/module.go | 99 ++++++++- runtime/bus.go | 4 + shared/modules/bus_module.go | 3 + shared/node.go | 2 + state_machine/module.go | 3 +- 12 files changed, 529 insertions(+), 157 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index d46eb8c25..269205a24 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,6 +1,7 @@ package e2e_tests import ( + "fmt" "reflect" "testing" "time" @@ -8,174 +9,181 @@ import ( "github.com/benbjohnson/clock" "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" - "github.com/pokt-network/pocket/shared/codec" "github.com/pokt-network/pocket/shared/modules" "github.com/stretchr/testify/require" - "google.golang.org/protobuf/types/known/anypb" ) func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) + t.Skip() + /* + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) + + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) + + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) - testHeight := uint64(4) + testHeight := uint64(4) - // Choose node 1 as the server node - // Set server node's height to test height. - serverNode := pocketNodes[1] - serverNodePeerId := serverNode.GetBus().GetConsensusModule().GetNodeAddress() - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + // Choose node 1 as the server node + // Set server node's height to test height. + serverNode := pocketNodes[1] + serverNodePeerId := serverNode.GetBus().GetConsensusModule().GetNodeAddress() + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) - // Choose node 2 as the requester node. - requesterNode := pocketNodes[2] - requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + // Choose node 2 as the requester node. + requesterNode := pocketNodes[2] + requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - // Test MetaData Req - stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_MetadataReq{ - MetadataReq: &typesCons.StateSyncMetadataRequest{ - PeerAddress: requesterNodePeerAddress, + // Test MetaData Req + stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_MetadataReq{ + MetadataReq: &typesCons.StateSyncMetadataRequest{ + PeerAddress: requesterNodePeerAddress, + }, }, - }, - } - anyProto, err := anypb.New(stateSyncMetaDataReqMessage) - require.NoError(t, err) + } + anyProto, err := anypb.New(stateSyncMetaDataReqMessage) + require.NoError(t, err) - // Send metadata request to the server node - P2PSend(t, serverNode, anyProto) + // Send metadata request to the server node + P2PSend(t, serverNode, anyProto) - // Start waiting for the metadata request on server node, - errMsg := "StateSync Metadata Request" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) - require.NoError(t, err) + // Start waiting for the metadata request on server node, + errMsg := "StateSync Metadata Request" + receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + require.NoError(t, err) - msg, err := codec.GetCodec().FromAny(receivedMsg[0]) - require.NoError(t, err) + msg, err := codec.GetCodec().FromAny(receivedMsg[0]) + require.NoError(t, err) - stateSyncMetaDataResMessage, ok := msg.(*typesCons.StateSyncMessage) - require.True(t, ok) + stateSyncMetaDataResMessage, ok := msg.(*typesCons.StateSyncMessage) + require.True(t, ok) - metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() - require.NotEmpty(t, metaDataRes) + metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() + require.NotEmpty(t, metaDataRes) - require.Equal(t, uint64(4), metaDataRes.MaxHeight) - require.Equal(t, uint64(1), metaDataRes.MinHeight) - require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) + require.Equal(t, uint64(4), metaDataRes.MaxHeight) + require.Equal(t, uint64(1), metaDataRes.MinHeight) + require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) + */ } func TestStateSync_ServerGetBlock_Success(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) + t.Skip() + /* + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) + + // Test configs + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) + + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) - testHeight := uint64(5) - serverNode := pocketNodes[1] - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) - - // Choose node 2 as the requester node - requesterNode := pocketNodes[2] - requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - - // Passing Test - // Test GetBlock Req - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: 1, + testHeight := uint64(5) + serverNode := pocketNodes[1] + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + + // Choose node 2 as the requester node + requesterNode := pocketNodes[2] + requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + + // Passing Test + // Test GetBlock Req + stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_GetBlockReq{ + GetBlockReq: &typesCons.GetBlockRequest{ + PeerAddress: requesterNodePeerAddress, + Height: 1, + }, }, - }, - } + } - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) + anyProto, err := anypb.New(stateSyncGetBlockMessage) + require.NoError(t, err) - // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + // Send get block request to the server node + P2PSend(t, serverNode, anyProto) - // Start waiting for the get block request on server node, expect to return error - errMsg := "StateSync Get Block Request Message" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) - require.NoError(t, err) + // Start waiting for the get block request on server node, expect to return error + errMsg := "StateSync Get Block Request Message" + receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + require.NoError(t, err) - msg, err := codec.GetCodec().FromAny(receivedMsg[0]) - require.NoError(t, err) + msg, err := codec.GetCodec().FromAny(receivedMsg[0]) + require.NoError(t, err) - stateSyncGetBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) - require.True(t, ok) + stateSyncGetBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) + require.True(t, ok) - getBlockRes := stateSyncGetBlockResMessage.GetGetBlockRes() - require.NotEmpty(t, getBlockRes) + getBlockRes := stateSyncGetBlockResMessage.GetGetBlockRes() + require.NotEmpty(t, getBlockRes) - require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) + require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) + */ } func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) + t.Skip() + /* + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) + + // Test configs + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) + + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) - testHeight := uint64(5) + testHeight := uint64(5) - serverNode := pocketNodes[1] - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + serverNode := pocketNodes[1] + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) - // Choose node 2 as the requester node - requesterNode := pocketNodes[2] - requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + // Choose node 2 as the requester node + requesterNode := pocketNodes[2] + requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - // Failing Test - // Get Block Req is current block height + 1 - requestHeight := testHeight + 1 - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: requestHeight, + // Failing Test + // Get Block Req is current block height + 1 + requestHeight := testHeight + 1 + stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_GetBlockReq{ + GetBlockReq: &typesCons.GetBlockRequest{ + PeerAddress: requesterNodePeerAddress, + Height: requestHeight, + }, }, - }, - } + } - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) + anyProto, err := anypb.New(stateSyncGetBlockMessage) + require.NoError(t, err) - // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + // Send get block request to the server node + P2PSend(t, serverNode, anyProto) - // Start waiting for the get block request on server node, expect to return error - errMsg := "StateSync Get Block Request Message" - _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) - require.Error(t, err) + // Start waiting for the get block request on server node, expect to return error + errMsg := "StateSync Get Block Request Message" + _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + require.Error(t, err) + */ } func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { @@ -260,16 +268,19 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { require.Equal(t, typesCons.NodeId(0), nodeState.LeaderId) } + maxPersistedHeight := testHeight - 1 metadataReceived := &typesCons.StateSyncMetadataResponse{ PeerAddress: "unused_peer_addr_in_tests", MinHeight: uint64(1), - MaxHeight: testHeight, + MaxHeight: maxPersistedHeight, } // Simulate state sync metadata response by pushing metadata to the unsynced node's consensus module consensusModImpl := GetConsensusModImpl(unsyncedNode) consensusModImpl.MethodByName("PushStateSyncMetadataResponse").Call([]reflect.Value{reflect.ValueOf(metadataReceived)}) + fmt.Println("pushed the metadata") + for _, message := range newRoundMessages { P2PBroadcast(t, pocketNodes, message) } diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 9558a0125..689977977 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -102,6 +102,10 @@ func CreateTestConsensusPocketNode( bus modules.Bus, eventsChannel modules.EventsChannel, ) *shared.Node { + fmt.Println("before: ", bus.GetEventBus()) + //bus.SetEventBus(eventsChannel) + //fmt.Println("after: ", bus.GetEventBus()) + persistenceMock := basePersistenceMock(t, eventsChannel, bus) bus.RegisterModule(persistenceMock) @@ -110,8 +114,11 @@ func CreateTestConsensusPocketNode( consensusModule, ok := consensusMod.(modules.ConsensusModule) require.True(t, ok) - _, err = state_machine.Create(bus) + stateMachineModule, err := state_machine.Create(bus) require.NoError(t, err) + bus.RegisterModule(stateMachineModule) + + fmt.Println("Events channel: ", eventsChannel) runtimeMgr := (bus).GetRuntimeMgr() // TODO(olshansky): At the moment we are using the same base mocks for all the tests, @@ -156,10 +163,11 @@ func GenerateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []module // CLEANUP: Reduce package scope visibility in the consensus test module func StartAllTestPocketNodes(t *testing.T, pocketNodes IdToNodeMapping) error { - for _, pocketNode := range pocketNodes { + for id, pocketNode := range pocketNodes { go startNode(t, pocketNode) startEvent := pocketNode.GetBus().GetBusEvent() - require.Equal(t, startEvent.GetContentType(), messaging.NodeStartedEventType) + fmt.Printf("ID: %d, Start event: %s \n", id, startEvent) + require.Equal(t, messaging.NodeStartedEventType, startEvent.GetContentType()) stateMachine := pocketNode.GetBus().GetStateMachineModule() if err := stateMachine.SendEvent(coreTypes.StateMachineEvent_Start); err != nil { return err @@ -211,12 +219,14 @@ func triggerDebugMessage(t *testing.T, node *shared.Node, action messaging.Debug func P2PBroadcast(_ *testing.T, nodes IdToNodeMapping, any *anypb.Any) { e := &messaging.PocketEnvelope{Content: any} for _, node := range nodes { + fmt.Printf("Publishing this event: %s, to: %s \n", e, node.GetP2PAddress()) node.GetBus().PublishEventToBus(e) } } func P2PSend(_ *testing.T, node *shared.Node, any *anypb.Any) { e := &messaging.PocketEnvelope{Content: any} + fmt.Printf("Publishing this event: %s, to: %s \n", e, node.GetP2PAddress()) node.GetBus().PublishEventToBus(e) } @@ -239,6 +249,8 @@ func WaitForNetworkConsensusEvents( millis time.Duration, failOnExtraMessages bool, ) (messages []*anypb.Any, err error) { + fmt.Println("Starting to wait for Consensus events on channel: ", eventsChannel) + includeFilter := func(anyMsg *anypb.Any) bool { msg, err := codec.GetCodec().FromAny(anyMsg) require.NoError(t, err) @@ -264,7 +276,10 @@ func WaitForNetworkStateSyncEvents( maxWaitTime time.Duration, failOnExtraMessages bool, ) (messages []*anypb.Any, err error) { + fmt.Println("Starting to wait for State Sync events on channel: ", eventsChannel) + includeFilter := func(anyMsg *anypb.Any) bool { + fmt.Println("Received this message gok", anyMsg) msg, err := codec.GetCodec().FromAny(anyMsg) require.NoError(t, err) @@ -277,6 +292,32 @@ func WaitForNetworkStateSyncEvents( return waitForEventsInternal(clck, eventsChannel, messaging.StateSyncMessageContentType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) } +func WaitForNetworkFSMEvents( + t *testing.T, + clck *clock.Mock, + eventsChannel modules.EventsChannel, + eventType coreTypes.StateMachineEvent, + errMsg string, + numExpectedMsgs int, + maxWaitTime time.Duration, + failOnExtraMessages bool, +) (messages []*anypb.Any, err error) { + fmt.Println("Starting to wait for FSM events on channel: ", eventsChannel) + + includeFilter := func(anyMsg *anypb.Any) bool { + fmt.Println("Received FSM event: ", anyMsg) + msg, err := codec.GetCodec().FromAny(anyMsg) + require.NoError(t, err) + + stateTransitionMessage, ok := msg.(*messaging.StateMachineTransitionEvent) //messaging.StateMachineTransitionEvent + require.True(t, ok) + + return stateTransitionMessage.Event == string(eventType) + } + + return waitForEventsInternal(clck, eventsChannel, messaging.StateMachineTransitionEventType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) +} + // RESEARCH(#462): Research ways to eliminate time-based non-determinism from the test framework // IMPROVE: This function can be extended to testing events outside of just the consensus module. func waitForEventsInternal( @@ -391,8 +432,11 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus) persistenceMock.EXPECT().GetBlockStore().Return(blockStoreMock).AnyTimes() persistenceReadContextMock.EXPECT().GetMaximumBlockHeight().DoAndReturn(func() (uint64, error) { - height := bus.GetConsensusModule().CurrentHeight() - return height, nil + currentHeight := bus.GetConsensusModule().CurrentHeight() + if currentHeight == 0 { + return 0, nil + } + return currentHeight - 1, nil }).AnyTimes() persistenceReadContextMock.EXPECT().GetMinimumBlockHeight().DoAndReturn(func() (uint64, error) { @@ -674,20 +718,31 @@ func WaitForNodeToSync( allNodes IdToNodeMapping, targetHeight uint64, ) error { + // first block to request currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() + fmt.Println("unsyncedNode address:", unsyncedNode.GetP2PAddress()) + for i := currentHeight; i <= targetHeight; i++ { - blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel, allNodes, currentHeight, targetHeight) + + fmt.Println("Waiting for the node to request missing block for height:", currentHeight) + blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel, currentHeight, targetHeight) if err != nil { return err } - blockResponse, err := waitForNodeToReceiveMissingBlock(t, clck, eventsChannel, allNodes, blockRequest) + // broadcast requeust to all nodes + P2PBroadcast(t, allNodes, blockRequest) + advanceTime(t, clck, 10*time.Millisecond) + + fmt.Println("Receiving replies from all nodes for Block height:", currentHeight) + blockResponse, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel, blockRequest) if err != nil { return err } - err = waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, blockResponse, targetHeight) + fmt.Println("calling waitForNodeToCatchUp for Block height:", currentHeight) + err = waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, blockResponse, currentHeight+1) if err != nil { return err } @@ -701,26 +756,45 @@ func waitForNodeToRequestMissingBlock( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - allNodes IdToNodeMapping, startingHeight uint64, targetHeight uint64, ) (*anypb.Any, error) { - return &anypb.Any{}, nil + errMsg := "StateSync Block Request Messages" + msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators, 250, true) + require.NoError(t, err) + + msg, err := codec.GetCodec().FromAny(msgs[0]) + require.NoError(t, err) + stateSyncBlockReqMessage, ok := msg.(*typesCons.StateSyncMessage) + require.True(t, ok) + blockReq := stateSyncBlockReqMessage.GetGetBlockReq() + require.NotEmpty(t, blockReq) + return msgs[0], nil } // TODO(#352): implement this function. // waitForNodeToReceiveMissingBlock requests block request of the unsynced node // for given node to node to catch up to the target height by sending the requested block. -func waitForNodeToReceiveMissingBlock( +func waitForNodesToReplyToBlockRequest( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - allNodes IdToNodeMapping, + //allNodes IdToNodeMapping, blockReq *anypb.Any, ) (*anypb.Any, error) { + errMsg := "StateSync Block Response Messages" + msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators-1, 250, true) + require.NoError(t, err) + + // msg, err := codec.GetCodec().FromAny(msgs[0]) + // require.NoError(t, err) + // stateSyncBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) + // require.True(t, ok) + // blockRes := stateSyncBlockResMessage.GetGetBlockRes() + // require.NotEmpty(t, blockReq) - return &anypb.Any{}, nil + return msgs[0], nil } // TODO(#352): implement this function. @@ -734,6 +808,19 @@ func waitForNodeToCatchUp( targetHeight uint64, ) error { + fmt.Println("Sending block response to unsynced node: ", blockResponse) + P2PSend(t, unsyncedNode, blockResponse) + advanceTime(t, clck, 10*time.Millisecond) + + // Try to listen on the channel that state machine uses + //stateMachineEventChannel := unsyncedNode.GetBus().GetEventBus() + //fmt.Println("channel size", len(stateMachineEventChannel)) + //fsmEventMsg, err := WaitForNetworkFSMEvents(t, clck, stateMachineEventChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) + + fsmEventMsg, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) + require.NoError(t, err) + fmt.Println("fsmEventMsg:", fsmEventMsg) + return nil } diff --git a/consensus/events.go b/consensus/events.go index 0e31f8d72..7b803af34 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -1,6 +1,8 @@ package consensus import ( + "fmt" + "github.com/pokt-network/pocket/shared/messaging" ) @@ -11,4 +13,14 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { m.logger.Fatal().Err(err).Msg("Failed to pack consensus new height event") } m.GetBus().PublishEventToBus(newHeightEvent) + + fmt.Println("Node address: %s, Event bus in consensus publishNewHeightEvent: ", m.GetNodeAddress(), m.GetBus().GetEventBus()) } + +// func (m *consensusModule) publishFSMEvent(msg *messaging.StateMachineTransitionEvent) { +// fsmEvent, err := messaging.PackMessage(msg) +// if err != nil { +// m.logger.Fatal().Err(err).Msg("Failed to pack consensus new height event") +// } +// m.GetBus().PublishEventToBus(fsmEvent) +// } diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 1a410c477..198e5a664 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -35,6 +35,9 @@ func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachineTransitionEvent) error { fsm_state := msg.NewState + // publishing FSM event + //m.publishFSMEvent(msg) + m.logger.Debug().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") switch coreTypes.StateMachineState(fsm_state) { @@ -84,7 +87,11 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") - return m.stateSync.Start() + aggregatedMetadata := m.getAggregatedStateSyncMetadata() + m.stateSync.Set(&aggregatedMetadata) + + go m.stateSync.Start() + return nil } // HandleSynced handles FSM event IsSyncedNonValidator for Non-Validators, and Synced is the destination state. diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index a302f1444..31c8a82b0 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -72,3 +72,12 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { m.metadataReceived <- metadataRes } + +// func (m *consensusModule) WaitForFSMSyncedEvent() coreTypes.StateMachineEvent { +// event := <-m.DebugFSMEventsChannel +// return event +// } + +// func (m *consensusModule) PushDebugFSMSyncedEvent(event coreTypes.StateMachineEvent) { +// m.DebugFSMEventsChannel <- event +// } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 7bdba7d75..07ebc2553 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -1,7 +1,10 @@ package consensus import ( + "fmt" + typesCons "github.com/pokt-network/pocket/consensus/types" + coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/modules" ) @@ -25,10 +28,42 @@ func (m *consensusModule) GetNodeAddress() string { // TODO(#352): Implement this function, currently a placeholder. // commitReceivedBlocks commits the blocks received from the blocksReceived channel // it is intended to be run as a background process + +// runs as a background process in consensus module +// listens on the blocksReceived channel +// commits the received block func (m *consensusModule) blockApplicationLoop() { - // runs as a background process in consensus module - // listens on the blocksReceived channel - // commits the received block + for blockResponse := range m.blocksReceived { + block := blockResponse.Block + maxPersistedHeight, err := m.maxPersistedBlockHeight() + if err != nil { + m.logger.Err(err).Msg("couldn't query max persisted height") + return + } + + if block.BlockHeader.Height <= maxPersistedHeight { + m.logger.Info().Msgf("Received block with height: %d, but node already persisted blocks until height: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) + return + } else if block.BlockHeader.Height > m.CurrentHeight() { + m.logger.Info().Msgf("Received block with height %d, but node's last persisted height is: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) + return + } + + err = m.verifyBlock(block) + if err != nil { + m.logger.Err(err).Msg("failed to verify block") + return + } + + err = m.applyAndCommitBlock(block) + if err != nil { + m.logger.Err(err).Msg("failed to apply and commit block") + return + } + fmt.Println("Applied block: ", block) + m.stateSync.CommittedBlock(m.CurrentHeight()) + } + } // TODO(#352): Implement this function, currently a placeholder. @@ -39,3 +74,106 @@ func (m *consensusModule) metadataSyncLoop() { // requests metadata from peers // sends received metadata to the metadataReceived channel } + +func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { + readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(m.CurrentHeight())) + if err != nil { + return 0, err + } + defer readCtx.Release() + + maxHeight, err := readCtx.GetMaximumBlockHeight() + if err != nil { + return 0, err + } + + return maxHeight, nil +} + +func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { + // blockHeader := block.BlockHeader + // qcBytes := blockHeader.GetQuorumCertificate() + + // if qcBytes == nil { + // m.logger.Error().Err(typesCons.ErrNoQcInReceivedBlock).Msg(typesCons.DisregardBlock) + // return typesCons.ErrNoQcInReceivedBlock + // } + + // qc := typesCons.QuorumCertificate{} + // if err := proto.Unmarshal(qcBytes, &qc); err != nil { + // return err + // } + + // m.logger.Info().Msg("verifyBlock, validating Quroum Certificate") + + // if err := m.validateQuorumCertificate(&qc); err != nil { + // m.logger.Error().Err(err).Msg("Couldn't apply block, invalid QC") + // return err + // } + + // m.logger.Info().Msg("verifyBlock, QC is valid, refreshing utility context") + // if err := m.refreshUtilityUnitOfWork(); err != nil { + // m.logger.Error().Err(err).Msg("Could not refresh utility context") + // return err + // } + + // // leaderIdInt, err := m.GetNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) + // // if err != nil { + // // m.logger.Error().Err(err).Msg("Could not get leader id from leader address") + // // return err + // // } + + // // leaderId := typesCons.NodeId(leaderIdInt) + // // m.leaderId = &leaderId + // // m.logger.Info().Msgf("verifyBlock, leaderId is: %d", leaderId) + + return nil +} + +func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { + m.logger.Info().Msg("applyAndCommitBlock, applying the block") + m.applyBlock(block) + + m.logger.Info().Msg("applyAndCommitBlock, committing the block") + + if err := m.commitBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not commit block, invalid QC") + return nil + } + + m.paceMaker.NewHeight() + + maxPersistedHeight, err := m.maxPersistedBlockHeight() + if err != nil { + return err + } + + m.logger.Info().Msgf("applyAndCommitBlock, Block is Committed, maxPersistedHeight is: %d, current height is :%d", maxPersistedHeight, m.height) + + // Send persisted block height to the state sync module + //m.stateSync.CommittedBlock(block.BlockHeader.Height) + return nil +} + +// TODO! check if min max height initialization is correct +func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { + minHeight, maxHeight := uint64(1), uint64(1) + + chanLen := len(m.metadataReceived) + + for i := 0; i < chanLen; i++ { + metadata := <-m.metadataReceived + if metadata.MaxHeight > maxHeight { + maxHeight = metadata.MaxHeight + } + if metadata.MinHeight < minHeight { + minHeight = metadata.MinHeight + } + } + + return typesCons.StateSyncMetadataResponse{ + PeerAddress: "unused_aggregated_metadata_address", + MinHeight: minHeight, + MaxHeight: maxHeight, + } +} diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index 3a7bb1afc..c9f29483b 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -6,6 +6,23 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) +// Helper function for broadcasting state sync messages to the all peers known to the node: +// +// sends metadata requests, via `metadataSyncLoop()` function +// sends block requests, via `()` function +// func (m *stateSync) broadcastStateSyncMessage(stateSyncMsg *typesCons.StateSyncMessage, validators []coreTypes.Actor) error { +// m.logger.Info().Msg("📣 Broadcasting state sync message... 📣") + +// // TODO: Use RainTree for this +// //IMPROVE: OPtimize so this is not O(n^2) +// for _, val := range validators { +// if err := m.sendStateSyncMessage(stateSyncMsg, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { +// return err +// } +// } +// return nil +// } + // SendStateSyncMessage sends a state sync message after converting to any proto, to the given peer func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cryptoPocket.Address) error { anyMsg, err := anypb.New(msg) diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index a35a24318..9bdc944fd 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -1,18 +1,28 @@ package state_sync import ( + "fmt" + + typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/logger" coreTypes "github.com/pokt-network/pocket/shared/core/types" + cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "github.com/pokt-network/pocket/shared/modules" ) const ( - stateSyncModuleName = "stateSyncModule" + stateSyncModuleName = "stateSyncModule" + committedBlockHeightChannelSize = 100 ) +// type FSMEventsChannel chan *coreTypes.StateMachineEvent + type StateSyncModule interface { modules.Module StateSyncServerModule + + Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) + CommittedBlock(uint64) } var ( @@ -22,14 +32,21 @@ var ( ) type stateSync struct { - bus modules.Bus - logger *modules.Logger + bus modules.Bus + logger *modules.Logger + validators []*coreTypes.Actor + aggregatedMetaData *typesCons.StateSyncMetadataResponse + committedBlockHeightChannel chan uint64 } func CreateStateSync(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { return new(stateSync).Create(bus, options...) } +func (m *stateSync) CommittedBlock(height uint64) { + m.committedBlockHeightChannel <- height +} + func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { m := &stateSync{} @@ -41,23 +58,87 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) + m.committedBlockHeightChannel = make(chan uint64, committedBlockHeightChannelSize) + //m.FSMEventsChannel = make(chan coreTypes.StateMachineEvent, 100) + return m, nil } +func (m *stateSync) Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) { + m.logger.Info().Msg("State Sync Module Set") + m.aggregatedMetaData = aggregatedMetaData + + // return +} + // TODO(#352): implement this function // Start performs state sync + +// processes and aggregates all metadata collected in metadataReceived channel, +// requests missing blocks starting from its current height to the aggregated metadata's maxHeight, +// once the requested block is received and committed by consensus module, sends the next request for the next block, +// when all blocks are received and committed, stops the state sync process by calling its `Stop()` function. func (m *stateSync) Start() error { - // processes and aggregates all metadata collected in metadataReceived channel, - // requests missing blocks starting from its current height to the aggregated metadata's maxHeight, - // once the requested block is received and committed by consensus module, sends the next request for the next block, - // when all blocks are received and committed, stops the state sync process by calling its `Stop()` function. - return nil + + consensusMod := m.bus.GetConsensusModule() + currentHeight := consensusMod.CurrentHeight() + nodeAddress := consensusMod.GetNodeAddress() + readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(currentHeight)) + if err != nil { + return err + } + defer readCtx.Release() + + //get the current validators + m.validators, err = readCtx.GetAllValidators(int64(currentHeight)) + if err != nil { + return err + } + + for currentHeight <= m.aggregatedMetaData.MaxHeight { + m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, m.aggregatedMetaData.MaxHeight) + + // form the get block request message + stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_GetBlockReq{ + GetBlockReq: &typesCons.GetBlockRequest{ + PeerAddress: nodeAddress, + Height: currentHeight, + }, + }, + } + + // broadcast the get block request message to all validators + for _, val := range m.validators { + fmt.Printf("Sending state sync message %s to: %s \n", stateSyncGetBlockMessage, val.GetAddress()) + if err := m.sendStateSyncMessage(stateSyncGetBlockMessage, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { + return err + } + } + + fmt.Println("waiting for block to be received and committed by consensus module") + + // wait for the block to be received and committed by consensus module + receivedBlockHeight := <-m.committedBlockHeightChannel + fmt.Println("received and persisted block height: ", receivedBlockHeight) + if receivedBlockHeight != consensusMod.CurrentHeight() { + fmt.Println("This should not happen?") + return fmt.Errorf("received block height %d is not equal to current height %d", receivedBlockHeight, currentHeight) + } + //timer to check if block is received and committed + + currentHeight = consensusMod.CurrentHeight() + + } + + fmt.Println("state sync is completed") + // syncing is complete, stop the state sync module + return m.Stop() } // TODO(#352): check if node is a valdiator, if not send Consensus_IsSyncedNonValidator event // Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event func (m *stateSync) Stop() error { - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) } diff --git a/runtime/bus.go b/runtime/bus.go index cea228733..ecdd1c7d2 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -54,6 +54,10 @@ func (m *bus) PublishEventToBus(e *messaging.PocketEnvelope) { m.channel <- e } +func (m *bus) SetEventBus(channel modules.EventsChannel) { + m.channel = channel +} + func (m *bus) GetBusEvent() *messaging.PocketEnvelope { e := <-m.channel return e diff --git a/shared/modules/bus_module.go b/shared/modules/bus_module.go index 62f5f3b74..eeeee2e42 100644 --- a/shared/modules/bus_module.go +++ b/shared/modules/bus_module.go @@ -35,4 +35,7 @@ type Bus interface { // Runtime GetRuntimeMgr() RuntimeMgr + + // Debug + SetEventBus(channel EventsChannel) } diff --git a/shared/node.go b/shared/node.go index c321c051f..f85ae346c 100644 --- a/shared/node.go +++ b/shared/node.go @@ -2,6 +2,7 @@ package shared import ( "context" + "fmt" "time" "github.com/pokt-network/pocket/consensus" @@ -164,6 +165,7 @@ func (m *Node) GetBus() modules.Bus { // TECHDEBT: The `shared` package has dependencies on types in the individual modules. // TODO: Move all message types this is dependant on to the `messaging` package func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { + fmt.Printf("Node: %s, inside handleEvent, with message: %s, bus: %s\n", node.p2pAddress, message, node.bus.GetEventBus()) contentType := message.GetContentType() switch contentType { case messaging.NodeStartedEventType: diff --git a/state_machine/module.go b/state_machine/module.go index b86550846..ef9d47635 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -2,6 +2,7 @@ package state_machine import ( "context" + "fmt" "github.com/looplab/fsm" "github.com/pokt-network/pocket/logger" @@ -45,7 +46,7 @@ func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOpti if err != nil { m.logger.Fatal().Err(err).Msg("failed to pack state machine transition event") } - + fmt.Println("Event bus in state machine: ", bus.GetEventBus()) bus.PublishEventToBus(newStateMachineTransitionEvent) }, }) From bbf5458276de674ab422dd8c922a1e16d4ae1734 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 01:48:58 +0300 Subject: [PATCH 002/100] wip --- consensus/e2e_tests/utils_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 689977977..def228bdc 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -114,9 +114,9 @@ func CreateTestConsensusPocketNode( consensusModule, ok := consensusMod.(modules.ConsensusModule) require.True(t, ok) - stateMachineModule, err := state_machine.Create(bus) + _, err = state_machine.Create(bus) require.NoError(t, err) - bus.RegisterModule(stateMachineModule) + //bus.RegisterModule(stateMachineModule) fmt.Println("Events channel: ", eventsChannel) From 6e65731a617d613a415d85d9b4632ec14eeabcd5 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 13 Apr 2023 16:41:39 -0700 Subject: [PATCH 003/100] Add debug channels --- consensus/e2e_tests/utils_test.go | 4 ++-- consensus/events.go | 2 +- state_machine/module.go | 20 ++++++++++++++++++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index def228bdc..4ef08f906 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -114,9 +114,9 @@ func CreateTestConsensusPocketNode( consensusModule, ok := consensusMod.(modules.ConsensusModule) require.True(t, ok) - _, err = state_machine.Create(bus) + _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(eventsChannel)) require.NoError(t, err) - //bus.RegisterModule(stateMachineModule) + // bus.RegisterModule(stateMachineModule) fmt.Println("Events channel: ", eventsChannel) diff --git a/consensus/events.go b/consensus/events.go index 7b803af34..e326460a2 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -14,7 +14,7 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { } m.GetBus().PublishEventToBus(newHeightEvent) - fmt.Println("Node address: %s, Event bus in consensus publishNewHeightEvent: ", m.GetNodeAddress(), m.GetBus().GetEventBus()) + fmt.Printf("Node address: %s, Event bus in consensus publishNewHeightEvent: %v\n", m.GetNodeAddress(), m.GetBus().GetEventBus()) } // func (m *consensusModule) publishFSMEvent(msg *messaging.StateMachineTransitionEvent) { diff --git a/state_machine/module.go b/state_machine/module.go index ef9d47635..9ccdf36b2 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -19,7 +19,8 @@ type stateMachineModule struct { base_modules.InterruptableModule *fsm.FSM - logger *modules.Logger + logger *modules.Logger + debugChannels []modules.EventsChannel } func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { @@ -28,7 +29,8 @@ func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, e func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { m := &stateMachineModule{ - logger: logger.Global.CreateLoggerForModule(modules.StateMachineModuleName), + logger: logger.Global.CreateLoggerForModule(modules.StateMachineModuleName), + debugChannels: make([]modules.EventsChannel, 0), } m.FSM = NewNodeFSM(&fsm.Callbacks{ @@ -48,6 +50,9 @@ func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOpti } fmt.Println("Event bus in state machine: ", bus.GetEventBus()) bus.PublishEventToBus(newStateMachineTransitionEvent) + for _, channel := range m.debugChannels { + channel <- newStateMachineTransitionEvent + } }, }) @@ -77,3 +82,14 @@ func WithCustomStateMachine(stateMachine *fsm.FSM) modules.ModuleOption { } } } + +// TODO_IN_THIS_COMMIT(gohkan): make sure to document that this is used for debugging purposes. +// We do not want to ever mock the FSM in unit tests because it drives the nodes state and must +// be use as is. However, we need to capture the events form a variety of different nodes. +func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { + return func(m modules.InitializableModule) { + if m, ok := m.(*stateMachineModule); ok { + m.debugChannels = append(m.debugChannels, eventsChannel) + } + } +} From b3a953fc5fb252d97130f51cd3d6e6a34b3d0d16 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 14:59:21 +0300 Subject: [PATCH 004/100] tests pass --- consensus/e2e_tests/state_sync_test.go | 54 +++++++++++- consensus/e2e_tests/utils_test.go | 104 ++++++++++++++---------- consensus/module_consensus_debugging.go | 3 + consensus/pacemaker/module.go | 6 +- consensus/state_sync/module.go | 2 +- runtime/bus.go | 6 +- shared/modules/bus_module.go | 3 - 7 files changed, 123 insertions(+), 55 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 269205a24..5082aecc6 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,7 +1,6 @@ package e2e_tests import ( - "fmt" "reflect" "testing" "time" @@ -279,8 +278,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { consensusModImpl := GetConsensusModImpl(unsyncedNode) consensusModImpl.MethodByName("PushStateSyncMetadataResponse").Call([]reflect.Value{reflect.ValueOf(metadataReceived)}) - fmt.Println("pushed the metadata") - for _, message := range newRoundMessages { P2PBroadcast(t, pocketNodes, message) } @@ -293,8 +290,57 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // TODO(#352): This function will be updated once state sync implementation is complete err = WaitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, testHeight) require.NoError(t, err) + //require.Equal(t, unsyncedNode.GetBus().GetConsensusModule().CurrentHeight(), testHeight) + + // for nodeId, pocketNode := range pocketNodes { + // nodeState := GetConsensusNodeState(pocketNode) + // assertHeight(t, nodeId, testHeight, nodeState.Height) + // } + + // advanceTime(t, clockMock, 10*time.Millisecond) + + // // Unsyned node sends new round messages to the rest of the network + // newRoundMessages, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators, 500, true) + // require.NoError(t, err) + + // for _, message := range newRoundMessages { + // //fmt.Println("message: ", message) + // P2PBroadcast(t, pocketNodes, message) + // } + // advanceTime(t, clockMock, 10*time.Millisecond) + + // for nodeId, pocketNode := range pocketNodes { + // nodeState := GetConsensusNodeState(pocketNode) + // assertHeight(t, nodeId, testHeight, nodeState.Height) + // } + + // for _, pocketNode := range pocketNodes { + // TriggerNextView(t, pocketNode) + // } + // advanceTime(t, clockMock, 10*time.Millisecond) + + // newRoundMessages, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) + // require.NoError(t, err) + + //PROBLEM + /* + for _, message := range newRoundMessages { + //fmt.Println("NEWROUND message: ", message) + P2PBroadcast(t, pocketNodes, message) + } + advanceTime(t, clockMock, 10*time.Millisecond) + */ + // // 2. Propose + // _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) + // require.NoError(t, err) + + // for nodeId, pocketNode := range pocketNodes { + // nodeState := GetConsensusNodeState(pocketNode) + // fmt.Println("Node state is h s r: ", nodeState.Height, nodeState.Step, nodeState.Round, "leader is: ", nodeState.LeaderId) + // //fmt.Println("Leader is: ", nodeState.LeaderId) + // assertHeight(t, nodeId, testHeight, nodeState.Height) + // } - // TODO(#352): Add height check once state sync implmentation is complete } // TODO(#352): Implement these tests diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 4ef08f906..56eaf468e 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -257,6 +257,7 @@ func WaitForNetworkConsensusEvents( hotstuffMessage, ok := msg.(*typesCons.HotstuffMessage) require.True(t, ok) + //fmt.Println("hotstuff msg:", hotstuffMessage.Block) return hotstuffMessage.Type == msgType && hotstuffMessage.Step == step } @@ -718,34 +719,37 @@ func WaitForNodeToSync( allNodes IdToNodeMapping, targetHeight uint64, ) error { - // first block to request - currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() - - fmt.Println("unsyncedNode address:", unsyncedNode.GetP2PAddress()) - for i := currentHeight; i <= targetHeight; i++ { + // TODO! fix error handling + currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() - fmt.Println("Waiting for the node to request missing block for height:", currentHeight) + for currentHeight < targetHeight { + // waiting for unsynced node to request missing block blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel, currentHeight, targetHeight) - if err != nil { - return err - } + require.NoError(t, err) // broadcast requeust to all nodes P2PBroadcast(t, allNodes, blockRequest) advanceTime(t, clck, 10*time.Millisecond) - fmt.Println("Receiving replies from all nodes for Block height:", currentHeight) + // receiving replies from all nodes blockResponse, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel, blockRequest) - if err != nil { - return err - } + require.NoError(t, err) - fmt.Println("calling waitForNodeToCatchUp for Block height:", currentHeight) - err = waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, blockResponse, currentHeight+1) - if err != nil { - return err - } + // sending block response to unsynced node + P2PSend(t, unsyncedNode, blockResponse) + advanceTime(t, clck, 10*time.Millisecond) + + // waiting for node to catch the global height + err = waitForNodeToCatchUpHeight(t, clck, eventsChannel, unsyncedNode, allNodes, blockResponse, currentHeight+1) + require.NoError(t, err) + + advanceTime(t, clck, 10*time.Millisecond) + + err = waitForNodeToCatchupStep(t, clck, eventsChannel, unsyncedNode, allNodes, blockResponse, currentHeight+1) + require.NoError(t, err) + + currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } return nil } @@ -763,13 +767,6 @@ func waitForNodeToRequestMissingBlock( errMsg := "StateSync Block Request Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators, 250, true) require.NoError(t, err) - - msg, err := codec.GetCodec().FromAny(msgs[0]) - require.NoError(t, err) - stateSyncBlockReqMessage, ok := msg.(*typesCons.StateSyncMessage) - require.True(t, ok) - blockReq := stateSyncBlockReqMessage.GetGetBlockReq() - require.NotEmpty(t, blockReq) return msgs[0], nil } @@ -780,46 +777,67 @@ func waitForNodesToReplyToBlockRequest( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - //allNodes IdToNodeMapping, blockReq *anypb.Any, ) (*anypb.Any, error) { errMsg := "StateSync Block Response Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators-1, 250, true) require.NoError(t, err) - - // msg, err := codec.GetCodec().FromAny(msgs[0]) - // require.NoError(t, err) - // stateSyncBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) - // require.True(t, ok) - // blockRes := stateSyncBlockResMessage.GetGetBlockRes() - // require.NotEmpty(t, blockReq) - return msgs[0], nil } // TODO(#352): implement this function. // waitForNodeToCatchUp waits for given node to node to catch up to the target height by sending the requested block. -func waitForNodeToCatchUp( +func waitForNodeToCatchUpHeight( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, unsyncedNode *shared.Node, + allNodes IdToNodeMapping, blockResponse *anypb.Any, targetHeight uint64, ) error { + // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event + _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) + require.NoError(t, err) + + for nodeId, pocketNode := range allNodes { + nodeState := GetConsensusNodeState(pocketNode) + assertHeight(t, nodeId, targetHeight, nodeState.Height) + } + + return err +} - fmt.Println("Sending block response to unsynced node: ", blockResponse) - P2PSend(t, unsyncedNode, blockResponse) +func waitForNodeToCatchupStep( + t *testing.T, + clck *clock.Mock, + eventsChannel modules.EventsChannel, + unsyncedNode *shared.Node, + allNodes IdToNodeMapping, + blockResponse *anypb.Any, + targetHeight uint64, +) error { + // Unsynced node sends new round messages to the rest of the network + newRoundMessages, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, consensus.NewRound, consensus.Propose, numValidators, 500, true) + require.NoError(t, err) + P2PBroadcast(t, allNodes, newRoundMessages[0]) advanceTime(t, clck, 10*time.Millisecond) - // Try to listen on the channel that state machine uses - //stateMachineEventChannel := unsyncedNode.GetBus().GetEventBus() - //fmt.Println("channel size", len(stateMachineEventChannel)) - //fsmEventMsg, err := WaitForNetworkFSMEvents(t, clck, stateMachineEventChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) + for _, pocketNode := range allNodes { + TriggerNextView(t, pocketNode) + } + advanceTime(t, clck, 10*time.Millisecond) - fsmEventMsg, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) + // 1. NewRound + newRoundMessages, err = WaitForNetworkConsensusEvents(t, clck, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) require.NoError(t, err) - fmt.Println("fsmEventMsg:", fsmEventMsg) + broadcastMessages(t, newRoundMessages, allNodes) + advanceTime(t, clck, 10*time.Millisecond) + + for nodeId, pocketNode := range allNodes { + nodeState := GetConsensusNodeState(pocketNode) + assertHeight(t, nodeId, targetHeight, nodeState.Height) + } return nil } diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index 31c8a82b0..c9d620be2 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -1,6 +1,8 @@ package consensus import ( + "fmt" + typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" @@ -70,6 +72,7 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin // TODO(#609): Refactor to use the test-only package and remove reflection func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { + fmt.Println("metadata is: ", metadataRes) m.metadataReceived <- metadataRes } diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 2219d82aa..a695f303c 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -133,7 +133,7 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e } // Message is from the past - if msg.Round < currentRound || (msg.Round == currentRound && msg.Step < currentStep) { + if msg.Round < currentRound || (msg.Round == currentRound && msg.Step < currentStep) || (msg.Round >= currentRound && msg.Step < currentStep) { m.logger.Warn().Msgf("⚠️ [DISCARDING] ⚠️ Node at (height, step, round) (%d, %d, %d) > message at (%d, %d, %d)", currentHeight, currentStep, currentRound, msg.Height, msg.Step, msg.Round) return false, nil } @@ -143,6 +143,10 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e return true, nil } + // if msg.Step < currentStep { + // return false, nil + // } + // pacemaker catch up! Node is synced to the right height, but on a previous step/round so we just jump to the latest state. if msg.Round > currentRound || (msg.Round == currentRound && msg.Step > currentStep) { m.logger.Info().Msg(pacemakerCatchupLog(currentHeight, uint64(currentStep), currentRound, msg.Height, uint64(msg.Step), msg.Round)) diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 9bdc944fd..5eac72ac1 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -131,7 +131,7 @@ func (m *stateSync) Start() error { } - fmt.Println("state sync is completed") + fmt.Println("state sync is completed, currentHeight is: ", currentHeight) // syncing is complete, stop the state sync module return m.Stop() } diff --git a/runtime/bus.go b/runtime/bus.go index ecdd1c7d2..66805c59c 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -54,9 +54,9 @@ func (m *bus) PublishEventToBus(e *messaging.PocketEnvelope) { m.channel <- e } -func (m *bus) SetEventBus(channel modules.EventsChannel) { - m.channel = channel -} +// func (m *bus) SetEventBus(channel modules.EventsChannel) { +// m.channel = channel +// } func (m *bus) GetBusEvent() *messaging.PocketEnvelope { e := <-m.channel diff --git a/shared/modules/bus_module.go b/shared/modules/bus_module.go index eeeee2e42..62f5f3b74 100644 --- a/shared/modules/bus_module.go +++ b/shared/modules/bus_module.go @@ -35,7 +35,4 @@ type Bus interface { // Runtime GetRuntimeMgr() RuntimeMgr - - // Debug - SetEventBus(channel EventsChannel) } From 89fad5d3307952fdf56602aff53e1c62f073ea37 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 15:06:27 +0300 Subject: [PATCH 005/100] clean --- consensus/e2e_tests/state_sync_test.go | 324 +++++++++++-------------- consensus/e2e_tests/utils_test.go | 8 +- runtime/bus.go | 4 - 3 files changed, 142 insertions(+), 194 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 5082aecc6..bc66a0098 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -8,181 +8,182 @@ import ( "github.com/benbjohnson/clock" "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" + "github.com/pokt-network/pocket/shared/codec" "github.com/pokt-network/pocket/shared/modules" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/anypb" ) func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { - t.Skip() - /* - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) + //t.Skip() - testHeight := uint64(4) + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) - // Choose node 1 as the server node - // Set server node's height to test height. - serverNode := pocketNodes[1] - serverNodePeerId := serverNode.GetBus().GetConsensusModule().GetNodeAddress() - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) - // Choose node 2 as the requester node. - requesterNode := pocketNodes[2] - requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) - // Test MetaData Req - stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_MetadataReq{ - MetadataReq: &typesCons.StateSyncMetadataRequest{ - PeerAddress: requesterNodePeerAddress, - }, + testHeight := uint64(4) + + // Choose node 1 as the server node + // Set server node's height to test height. + serverNode := pocketNodes[1] + serverNodePeerId := serverNode.GetBus().GetConsensusModule().GetNodeAddress() + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + + // Choose node 2 as the requester node. + requesterNode := pocketNodes[2] + requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + + // Test MetaData Req + stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_MetadataReq{ + MetadataReq: &typesCons.StateSyncMetadataRequest{ + PeerAddress: requesterNodePeerAddress, }, - } - anyProto, err := anypb.New(stateSyncMetaDataReqMessage) - require.NoError(t, err) + }, + } + anyProto, err := anypb.New(stateSyncMetaDataReqMessage) + require.NoError(t, err) - // Send metadata request to the server node - P2PSend(t, serverNode, anyProto) + // Send metadata request to the server node + P2PSend(t, serverNode, anyProto) - // Start waiting for the metadata request on server node, - errMsg := "StateSync Metadata Request" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) - require.NoError(t, err) + // Start waiting for the metadata request on server node, + errMsg := "StateSync Metadata Request" + receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + require.NoError(t, err) - msg, err := codec.GetCodec().FromAny(receivedMsg[0]) - require.NoError(t, err) + msg, err := codec.GetCodec().FromAny(receivedMsg[0]) + require.NoError(t, err) - stateSyncMetaDataResMessage, ok := msg.(*typesCons.StateSyncMessage) - require.True(t, ok) + stateSyncMetaDataResMessage, ok := msg.(*typesCons.StateSyncMessage) + require.True(t, ok) - metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() - require.NotEmpty(t, metaDataRes) + metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() + require.NotEmpty(t, metaDataRes) + + require.Equal(t, uint64(4), metaDataRes.MaxHeight) + require.Equal(t, uint64(1), metaDataRes.MinHeight) + require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) - require.Equal(t, uint64(4), metaDataRes.MaxHeight) - require.Equal(t, uint64(1), metaDataRes.MinHeight) - require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) - */ } func TestStateSync_ServerGetBlock_Success(t *testing.T) { - t.Skip() - /* - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) - testHeight := uint64(5) - serverNode := pocketNodes[1] - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) - - // Choose node 2 as the requester node - requesterNode := pocketNodes[2] - requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - - // Passing Test - // Test GetBlock Req - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: 1, - }, + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) + + // Test configs + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) + + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) + + testHeight := uint64(5) + serverNode := pocketNodes[1] + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + + // Choose node 2 as the requester node + requesterNode := pocketNodes[2] + requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + + // Passing Test + // Test GetBlock Req + stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_GetBlockReq{ + GetBlockReq: &typesCons.GetBlockRequest{ + PeerAddress: requesterNodePeerAddress, + Height: 1, }, - } + }, + } - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) + anyProto, err := anypb.New(stateSyncGetBlockMessage) + require.NoError(t, err) - // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + // Send get block request to the server node + P2PSend(t, serverNode, anyProto) - // Start waiting for the get block request on server node, expect to return error - errMsg := "StateSync Get Block Request Message" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) - require.NoError(t, err) + // Start waiting for the get block request on server node, expect to return error + errMsg := "StateSync Get Block Request Message" + receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + require.NoError(t, err) - msg, err := codec.GetCodec().FromAny(receivedMsg[0]) - require.NoError(t, err) + msg, err := codec.GetCodec().FromAny(receivedMsg[0]) + require.NoError(t, err) - stateSyncGetBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) - require.True(t, ok) + stateSyncGetBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) + require.True(t, ok) - getBlockRes := stateSyncGetBlockResMessage.GetGetBlockRes() - require.NotEmpty(t, getBlockRes) + getBlockRes := stateSyncGetBlockResMessage.GetGetBlockRes() + require.NotEmpty(t, getBlockRes) + + require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) - require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) - */ } func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { - t.Skip() - /* - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) + //t.Skip() + + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) - testHeight := uint64(5) + // Test configs + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) - serverNode := pocketNodes[1] - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) - // Choose node 2 as the requester node - requesterNode := pocketNodes[2] - requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + testHeight := uint64(5) - // Failing Test - // Get Block Req is current block height + 1 - requestHeight := testHeight + 1 - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: requestHeight, - }, + serverNode := pocketNodes[1] + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + + // Choose node 2 as the requester node + requesterNode := pocketNodes[2] + requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() + + // Failing Test + // Get Block Req is current block height + 1 + requestHeight := testHeight + 1 + stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_GetBlockReq{ + GetBlockReq: &typesCons.GetBlockRequest{ + PeerAddress: requesterNodePeerAddress, + Height: requestHeight, }, - } + }, + } - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) + anyProto, err := anypb.New(stateSyncGetBlockMessage) + require.NoError(t, err) + + // Send get block request to the server node + P2PSend(t, serverNode, anyProto) - // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + // Start waiting for the get block request on server node, expect to return error + errMsg := "StateSync Get Block Request Message" + _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + require.Error(t, err) - // Start waiting for the get block request on server node, expect to return error - errMsg := "StateSync Get Block Request Message" - _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) - require.Error(t, err) - */ } func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { @@ -290,57 +291,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // TODO(#352): This function will be updated once state sync implementation is complete err = WaitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, testHeight) require.NoError(t, err) - //require.Equal(t, unsyncedNode.GetBus().GetConsensusModule().CurrentHeight(), testHeight) - - // for nodeId, pocketNode := range pocketNodes { - // nodeState := GetConsensusNodeState(pocketNode) - // assertHeight(t, nodeId, testHeight, nodeState.Height) - // } - - // advanceTime(t, clockMock, 10*time.Millisecond) - - // // Unsyned node sends new round messages to the rest of the network - // newRoundMessages, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators, 500, true) - // require.NoError(t, err) - - // for _, message := range newRoundMessages { - // //fmt.Println("message: ", message) - // P2PBroadcast(t, pocketNodes, message) - // } - // advanceTime(t, clockMock, 10*time.Millisecond) - - // for nodeId, pocketNode := range pocketNodes { - // nodeState := GetConsensusNodeState(pocketNode) - // assertHeight(t, nodeId, testHeight, nodeState.Height) - // } - - // for _, pocketNode := range pocketNodes { - // TriggerNextView(t, pocketNode) - // } - // advanceTime(t, clockMock, 10*time.Millisecond) - - // newRoundMessages, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) - // require.NoError(t, err) - - //PROBLEM - /* - for _, message := range newRoundMessages { - //fmt.Println("NEWROUND message: ", message) - P2PBroadcast(t, pocketNodes, message) - } - advanceTime(t, clockMock, 10*time.Millisecond) - */ - // // 2. Propose - // _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) - // require.NoError(t, err) - - // for nodeId, pocketNode := range pocketNodes { - // nodeState := GetConsensusNodeState(pocketNode) - // fmt.Println("Node state is h s r: ", nodeState.Height, nodeState.Step, nodeState.Round, "leader is: ", nodeState.LeaderId) - // //fmt.Println("Leader is: ", nodeState.LeaderId) - // assertHeight(t, nodeId, testHeight, nodeState.Height) - // } - } // TODO(#352): Implement these tests diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 56eaf468e..864d0eab2 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -719,8 +719,6 @@ func WaitForNodeToSync( allNodes IdToNodeMapping, targetHeight uint64, ) error { - - // TODO! fix error handling currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() for currentHeight < targetHeight { @@ -804,7 +802,6 @@ func waitForNodeToCatchUpHeight( nodeState := GetConsensusNodeState(pocketNode) assertHeight(t, nodeId, targetHeight, nodeState.Height) } - return err } @@ -839,6 +836,11 @@ func waitForNodeToCatchupStep( assertHeight(t, nodeId, targetHeight, nodeState.Height) } + // prepareProposals, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) + // require.NoError(t, err) + // broadcastMessages(t, prepareProposals, allNodes) + // advanceTime(t, clck, 10*time.Millisecond) + return nil } diff --git a/runtime/bus.go b/runtime/bus.go index 66805c59c..cea228733 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -54,10 +54,6 @@ func (m *bus) PublishEventToBus(e *messaging.PocketEnvelope) { m.channel <- e } -// func (m *bus) SetEventBus(channel modules.EventsChannel) { -// m.channel = channel -// } - func (m *bus) GetBusEvent() *messaging.PocketEnvelope { e := <-m.channel return e From 0fabffccc37459caa674002bb92b4e28ef9d425c Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 15:40:20 +0300 Subject: [PATCH 006/100] clean --- consensus/e2e_tests/utils_test.go | 9 ++++++--- consensus/pacemaker/module.go | 2 +- consensus/state_sync/helpers.go | 17 ----------------- consensus/state_sync/module.go | 14 ++------------ 4 files changed, 9 insertions(+), 33 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 864d0eab2..a4a12abad 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -41,8 +41,9 @@ func TestMain(m *testing.M) { // TODO(integration): These are temporary variables used in the prototype integration phase that // will need to be parameterized later once the test framework design matures. const ( - numValidators = 4 - stateHash = "42" + numValidators = 4 + stateHash = "42" + stateSyncUtilCalls = 100 ) var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes @@ -499,7 +500,9 @@ func baseUtilityMock(t *testing.T, _ modules.EventsChannel, genesisState *genesi } return baseReplicaUtilityUnitOfWorkMock(t, genesisState), nil }). - MaxTimes(4) + // For state sync tests we call NewUnitOfWork is called more than 4 times. Therefore, we need to increase this number. + // TODO: Update this value properly + MaxTimes(4 * stateSyncUtilCalls) utilityMock.EXPECT().GetModuleName().Return(modules.UtilityModuleName).AnyTimes() return utilityMock diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index a695f303c..43187812f 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -133,7 +133,7 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e } // Message is from the past - if msg.Round < currentRound || (msg.Round == currentRound && msg.Step < currentStep) || (msg.Round >= currentRound && msg.Step < currentStep) { + if msg.Round < currentRound || (msg.Round == currentRound && msg.Step < currentStep) { m.logger.Warn().Msgf("⚠️ [DISCARDING] ⚠️ Node at (height, step, round) (%d, %d, %d) > message at (%d, %d, %d)", currentHeight, currentStep, currentRound, msg.Height, msg.Step, msg.Round) return false, nil } diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index c9f29483b..3a7bb1afc 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -6,23 +6,6 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -// Helper function for broadcasting state sync messages to the all peers known to the node: -// -// sends metadata requests, via `metadataSyncLoop()` function -// sends block requests, via `()` function -// func (m *stateSync) broadcastStateSyncMessage(stateSyncMsg *typesCons.StateSyncMessage, validators []coreTypes.Actor) error { -// m.logger.Info().Msg("📣 Broadcasting state sync message... 📣") - -// // TODO: Use RainTree for this -// //IMPROVE: OPtimize so this is not O(n^2) -// for _, val := range validators { -// if err := m.sendStateSyncMessage(stateSyncMsg, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { -// return err -// } -// } -// return nil -// } - // SendStateSyncMessage sends a state sync message after converting to any proto, to the given peer func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cryptoPocket.Address) error { anyMsg, err := anypb.New(msg) diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 5eac72ac1..1fba513b1 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -15,8 +15,6 @@ const ( committedBlockHeightChannelSize = 100 ) -// type FSMEventsChannel chan *coreTypes.StateMachineEvent - type StateSyncModule interface { modules.Module StateSyncServerModule @@ -59,7 +57,6 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) m.committedBlockHeightChannel = make(chan uint64, committedBlockHeightChannelSize) - //m.FSMEventsChannel = make(chan coreTypes.StateMachineEvent, 100) return m, nil } @@ -110,28 +107,20 @@ func (m *stateSync) Start() error { // broadcast the get block request message to all validators for _, val := range m.validators { - fmt.Printf("Sending state sync message %s to: %s \n", stateSyncGetBlockMessage, val.GetAddress()) if err := m.sendStateSyncMessage(stateSyncGetBlockMessage, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { return err } } - fmt.Println("waiting for block to be received and committed by consensus module") - // wait for the block to be received and committed by consensus module receivedBlockHeight := <-m.committedBlockHeightChannel - fmt.Println("received and persisted block height: ", receivedBlockHeight) + // TODO!: do we need to do this check? It should not happen if receivedBlockHeight != consensusMod.CurrentHeight() { - fmt.Println("This should not happen?") return fmt.Errorf("received block height %d is not equal to current height %d", receivedBlockHeight, currentHeight) } //timer to check if block is received and committed - currentHeight = consensusMod.CurrentHeight() - } - - fmt.Println("state sync is completed, currentHeight is: ", currentHeight) // syncing is complete, stop the state sync module return m.Stop() } @@ -139,6 +128,7 @@ func (m *stateSync) Start() error { // TODO(#352): check if node is a valdiator, if not send Consensus_IsSyncedNonValidator event // Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event func (m *stateSync) Stop() error { + m.logger.Info().Msg("Stop state sync moudule") return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) } From 3ae3b88e0e98bc30e72d23645ad50d9a30051888 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 16:14:25 +0300 Subject: [PATCH 007/100] clean, add comments --- consensus/e2e_tests/state_sync_test.go | 8 ++-- consensus/e2e_tests/utils_test.go | 40 +++++++----------- consensus/fsm_handler.go | 3 -- consensus/module_consensus_state_sync.go | 53 ++++-------------------- consensus/pacemaker/module.go | 4 -- state_machine/docs/CHANGELOG.md | 4 ++ state_machine/module.go | 5 +-- 7 files changed, 34 insertions(+), 83 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index bc66a0098..e4518e03b 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -15,8 +15,6 @@ import ( ) func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { - //t.Skip() - // Test preparation clockMock := clock.NewMock() timeReminder(t, clockMock, time.Second) @@ -43,6 +41,7 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() // Test MetaData Req + stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_MetadataReq{ MetadataReq: &typesCons.StateSyncMetadataRequest{ @@ -50,6 +49,7 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { }, }, } + anyProto, err := anypb.New(stateSyncMetaDataReqMessage) require.NoError(t, err) @@ -70,10 +70,10 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() require.NotEmpty(t, metaDataRes) - require.Equal(t, uint64(4), metaDataRes.MaxHeight) + lastPersistedHeight := testHeight - 1 + require.Equal(t, lastPersistedHeight, metaDataRes.MaxHeight) require.Equal(t, uint64(1), metaDataRes.MinHeight) require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) - } func TestStateSync_ServerGetBlock_Success(t *testing.T) { diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index a4a12abad..68f5705d7 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -726,7 +726,7 @@ func WaitForNodeToSync( for currentHeight < targetHeight { // waiting for unsynced node to request missing block - blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel, currentHeight, targetHeight) + blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel) require.NoError(t, err) // broadcast requeust to all nodes @@ -734,7 +734,7 @@ func WaitForNodeToSync( advanceTime(t, clck, 10*time.Millisecond) // receiving replies from all nodes - blockResponse, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel, blockRequest) + blockResponse, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel) require.NoError(t, err) // sending block response to unsynced node @@ -742,12 +742,13 @@ func WaitForNodeToSync( advanceTime(t, clck, 10*time.Millisecond) // waiting for node to catch the global height - err = waitForNodeToCatchUpHeight(t, clck, eventsChannel, unsyncedNode, allNodes, blockResponse, currentHeight+1) + err = waitForNodeToCatchUpHeight(t, clck, eventsChannel, allNodes, currentHeight+1) require.NoError(t, err) advanceTime(t, clck, 10*time.Millisecond) - err = waitForNodeToCatchupStep(t, clck, eventsChannel, unsyncedNode, allNodes, blockResponse, currentHeight+1) + //waiting for node to catch the same step + err = waitForNodeToCatchupStep(t, clck, eventsChannel, allNodes, currentHeight+1) require.NoError(t, err) currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() @@ -755,46 +756,40 @@ func WaitForNodeToSync( return nil } -// TODO(#352): implement this function. // waitForNodeToRequestMissingBlock waits for unsynced node to request missing block form the network func waitForNodeToRequestMissingBlock( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - startingHeight uint64, - targetHeight uint64, ) (*anypb.Any, error) { errMsg := "StateSync Block Request Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators, 250, true) require.NoError(t, err) - return msgs[0], nil + + return msgs[0], err } -// TODO(#352): implement this function. // waitForNodeToReceiveMissingBlock requests block request of the unsynced node // for given node to node to catch up to the target height by sending the requested block. func waitForNodesToReplyToBlockRequest( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - blockReq *anypb.Any, ) (*anypb.Any, error) { errMsg := "StateSync Block Response Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators-1, 250, true) require.NoError(t, err) - return msgs[0], nil + + return msgs[0], err } -// TODO(#352): implement this function. // waitForNodeToCatchUp waits for given node to node to catch up to the target height by sending the requested block. func waitForNodeToCatchUpHeight( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - unsyncedNode *shared.Node, allNodes IdToNodeMapping, - blockResponse *anypb.Any, targetHeight uint64, ) error { // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event @@ -805,6 +800,7 @@ func waitForNodeToCatchUpHeight( nodeState := GetConsensusNodeState(pocketNode) assertHeight(t, nodeId, targetHeight, nodeState.Height) } + return err } @@ -812,9 +808,7 @@ func waitForNodeToCatchupStep( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - unsyncedNode *shared.Node, allNodes IdToNodeMapping, - blockResponse *anypb.Any, targetHeight uint64, ) error { // Unsynced node sends new round messages to the rest of the network @@ -834,15 +828,13 @@ func waitForNodeToCatchupStep( broadcastMessages(t, newRoundMessages, allNodes) advanceTime(t, clck, 10*time.Millisecond) - for nodeId, pocketNode := range allNodes { - nodeState := GetConsensusNodeState(pocketNode) - assertHeight(t, nodeId, targetHeight, nodeState.Height) - } + // round := GetConsensusNodeState(allNodes[0]).Round - // prepareProposals, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) - // require.NoError(t, err) - // broadcastMessages(t, prepareProposals, allNodes) - // advanceTime(t, clck, 10*time.Millisecond) + // for nodeId, pocketNode := range allNodes { + // nodeState := GetConsensusNodeState(pocketNode) + // assertHeight(t, nodeId, targetHeight, nodeState.Height) + // assertHeight(t, nodeId, round, nodeState) + // } return nil } diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 198e5a664..608261b2a 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -35,9 +35,6 @@ func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachineTransitionEvent) error { fsm_state := msg.NewState - // publishing FSM event - //m.publishFSMEvent(msg) - m.logger.Debug().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") switch coreTypes.StateMachineState(fsm_state) { diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 07ebc2553..41418c580 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -91,54 +91,21 @@ func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { } func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { - // blockHeader := block.BlockHeader - // qcBytes := blockHeader.GetQuorumCertificate() - - // if qcBytes == nil { - // m.logger.Error().Err(typesCons.ErrNoQcInReceivedBlock).Msg(typesCons.DisregardBlock) - // return typesCons.ErrNoQcInReceivedBlock - // } - - // qc := typesCons.QuorumCertificate{} - // if err := proto.Unmarshal(qcBytes, &qc); err != nil { - // return err - // } - - // m.logger.Info().Msg("verifyBlock, validating Quroum Certificate") - - // if err := m.validateQuorumCertificate(&qc); err != nil { - // m.logger.Error().Err(err).Msg("Couldn't apply block, invalid QC") - // return err - // } - - // m.logger.Info().Msg("verifyBlock, QC is valid, refreshing utility context") - // if err := m.refreshUtilityUnitOfWork(); err != nil { - // m.logger.Error().Err(err).Msg("Could not refresh utility context") - // return err - // } - - // // leaderIdInt, err := m.GetNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) - // // if err != nil { - // // m.logger.Error().Err(err).Msg("Could not get leader id from leader address") - // // return err - // // } - - // // leaderId := typesCons.NodeId(leaderIdInt) - // // m.leaderId = &leaderId - // // m.logger.Info().Msgf("verifyBlock, leaderId is: %d", leaderId) - return nil } func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { - m.logger.Info().Msg("applyAndCommitBlock, applying the block") - m.applyBlock(block) + m.logger.Info().Msgf("applying and committing the block at height %d", block.BlockHeader.Height) - m.logger.Info().Msg("applyAndCommitBlock, committing the block") + // TODO: uncomment following. In this PR test blocks don't have a valid QC, therefore commented out to let the tests pass + // if err := m.applyBlock(block); err != nil { + // m.logger.Error().Err(err).Msg("Could not apply block, invalid QC") + // return err + // } if err := m.commitBlock(block); err != nil { m.logger.Error().Err(err).Msg("Could not commit block, invalid QC") - return nil + return err } m.paceMaker.NewHeight() @@ -148,14 +115,10 @@ func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { return err } - m.logger.Info().Msgf("applyAndCommitBlock, Block is Committed, maxPersistedHeight is: %d, current height is :%d", maxPersistedHeight, m.height) - - // Send persisted block height to the state sync module - //m.stateSync.CommittedBlock(block.BlockHeader.Height) + m.logger.Info().Msgf("Block is Committed, maxPersistedHeight is: %d, current height is :%d", maxPersistedHeight, m.height) return nil } -// TODO! check if min max height initialization is correct func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { minHeight, maxHeight := uint64(1), uint64(1) diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 43187812f..2219d82aa 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -143,10 +143,6 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e return true, nil } - // if msg.Step < currentStep { - // return false, nil - // } - // pacemaker catch up! Node is synced to the right height, but on a previous step/round so we just jump to the latest state. if msg.Round > currentRound || (msg.Round == currentRound && msg.Step > currentStep) { m.logger.Info().Msg(pacemakerCatchupLog(currentHeight, uint64(currentStep), currentRound, msg.Height, uint64(msg.Step), msg.Round)) diff --git a/state_machine/docs/CHANGELOG.md b/state_machine/docs/CHANGELOG.md index 9ac27e72c..07c8b21da 100644 --- a/state_machine/docs/CHANGELOG.md +++ b/state_machine/docs/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.0.4] - 2023-04-14 + +- Add `WithDebugEventsChannel()` to be used in testing + ## [0.0.0.4] - 2023-04-03 - Clarify state transitions in README diff --git a/state_machine/module.go b/state_machine/module.go index 9ccdf36b2..1dde654fa 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -83,9 +83,8 @@ func WithCustomStateMachine(stateMachine *fsm.FSM) modules.ModuleOption { } } -// TODO_IN_THIS_COMMIT(gohkan): make sure to document that this is used for debugging purposes. -// We do not want to ever mock the FSM in unit tests because it drives the nodes state and must -// be use as is. However, we need to capture the events form a variety of different nodes. +// WithDebugEventsChannel is only used for testing purposes. It allows us to capture the events +// from the FSM and publish them to debug channel for testing. func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { return func(m modules.InitializableModule) { if m, ok := m.(*stateMachineModule); ok { From e1091d0c8832217f6dcd09e817e715d49752cee5 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 18:56:46 +0300 Subject: [PATCH 008/100] syncs and adds blocks --- build/localnet/manifests/configs.yaml | 3 +- consensus/helpers.go | 15 +++ consensus/module_consensus_state_sync.go | 126 ++++++++++++++++++----- consensus/state_sync/module.go | 4 - consensus/state_sync_handler.go | 1 + shared/node.go | 3 +- 6 files changed, 117 insertions(+), 35 deletions(-) diff --git a/build/localnet/manifests/configs.yaml b/build/localnet/manifests/configs.yaml index 418e2399d..8fe63b11c 100644 --- a/build/localnet/manifests/configs.yaml +++ b/build/localnet/manifests/configs.yaml @@ -16,7 +16,8 @@ data: "manual": true, "debug_time_between_steps_msec": 1000 }, - "private_key": "" + "private_key": "", + "server_mode_enabled": true }, "utility": { "max_mempool_transaction_bytes": 1073741824, diff --git a/consensus/helpers.go b/consensus/helpers.go index b721ca38e..ad03e88c0 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -304,3 +304,18 @@ func hotstuffMsgToLoggingFields(msg *typesCons.HotstuffMessage) map[string]any { "step": msg.GetStep(), } } + +func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { + readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(m.CurrentHeight())) + if err != nil { + return 0, err + } + defer readCtx.Release() + + maxHeight, err := readCtx.GetMaximumBlockHeight() + if err != nil { + return 0, err + } + + return maxHeight, nil +} diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 41418c580..8326818bb 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -1,13 +1,20 @@ package consensus import ( + "context" "fmt" + "time" typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" + cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "github.com/pokt-network/pocket/shared/modules" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" ) +const metadataSyncPeriod = 30 * time.Second // TODO: Make this configurable + var _ modules.ConsensusStateSync = &consensusModule{} func (m *consensusModule) GetNodeIdFromNodeAddress(peerId string) (uint64, error) { @@ -25,40 +32,40 @@ func (m *consensusModule) GetNodeAddress() string { return m.nodeAddress } -// TODO(#352): Implement this function, currently a placeholder. // commitReceivedBlocks commits the blocks received from the blocksReceived channel -// it is intended to be run as a background process - -// runs as a background process in consensus module -// listens on the blocksReceived channel -// commits the received block +// it runs as a background process in consensus module +// listens on the blocksReceived channel, verifies and commits the received block func (m *consensusModule) blockApplicationLoop() { for blockResponse := range m.blocksReceived { block := blockResponse.Block + fmt.Println("New block is received!", block) maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { m.logger.Err(err).Msg("couldn't query max persisted height") - return + continue } + fmt.Println("Now going to decide if I should apply it") if block.BlockHeader.Height <= maxPersistedHeight { m.logger.Info().Msgf("Received block with height: %d, but node already persisted blocks until height: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) - return + continue } else if block.BlockHeader.Height > m.CurrentHeight() { m.logger.Info().Msgf("Received block with height %d, but node's last persisted height is: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) - return + continue } + fmt.Println("Now going to verify block") err = m.verifyBlock(block) if err != nil { m.logger.Err(err).Msg("failed to verify block") - return + continue } + fmt.Println("Now going to apply and commit block") err = m.applyAndCommitBlock(block) if err != nil { m.logger.Err(err).Msg("failed to apply and commit block") - return + continue } fmt.Println("Applied block: ", block) m.stateSync.CommittedBlock(m.CurrentHeight()) @@ -66,31 +73,94 @@ func (m *consensusModule) blockApplicationLoop() { } -// TODO(#352): Implement this function, currently a placeholder. // metadataSyncLoop periodically sends metadata requests to its peers // it is intended to be run as a background process -func (m *consensusModule) metadataSyncLoop() { - // runs as a background process in consensus module - // requests metadata from peers - // sends received metadata to the metadataReceived channel +func (m *consensusModule) metadataSyncLoop() error { + // if m.ctx != nil { + // m.logger.Warn().Msg("metadataSyncLoop is already running. Cancelling the previous context...") + // } + ctx := context.TODO() + + ticker := time.NewTicker(metadataSyncPeriod) + for { + select { + case <-ticker.C: + m.logger.Info().Msg("Background metadata sync check triggered") + m.sendMetadataRequests() + + case <-ctx.Done(): + ticker.Stop() + return nil + } + } } -func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { - readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(m.CurrentHeight())) - if err != nil { - return 0, err +func (m *consensusModule) sendMetadataRequests() error { + stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_MetadataReq{ + MetadataReq: &typesCons.StateSyncMetadataRequest{ + PeerAddress: m.GetBus().GetConsensusModule().GetNodeAddress(), + }, + }, } - defer readCtx.Release() - maxHeight, err := readCtx.GetMaximumBlockHeight() + validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { - return 0, err + m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) } - return maxHeight, nil + for _, val := range validators { + + anyMsg, err := anypb.New(stateSyncMetaDataReqMessage) + if err != nil { + return err + } + if err := m.GetBus().GetP2PModule().Send(cryptoPocket.AddressFromString(val.GetAddress()), anyMsg); err != nil { + m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) + return err + } + } + + return nil } +// TODO! If verify block tries to verify, state sync tests will fail as state sync blocks are empty. func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { + blockHeader := block.BlockHeader + qcBytes := blockHeader.GetQuorumCertificate() + + if qcBytes == nil { + m.logger.Error().Err(typesCons.ErrNoQcInReceivedBlock).Msg(typesCons.DisregardBlock) + return typesCons.ErrNoQcInReceivedBlock + } + + qc := typesCons.QuorumCertificate{} + if err := proto.Unmarshal(qcBytes, &qc); err != nil { + return err + } + + m.logger.Info().Msg("verifyBlock, validating Quroum Certificate") + + if err := m.validateQuorumCertificate(&qc); err != nil { + m.logger.Error().Err(err).Msg("Couldn't apply block, invalid QC") + return err + } + + m.logger.Info().Msg("verifyBlock, QC is valid, refreshing utility context") + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + return err + } + + // leaderIdInt, err := m.GetNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) + // if err != nil { + // m.logger.Error().Err(err).Msg("Could not get leader id from leader address") + // return err + // } + + // leaderId := typesCons.NodeId(leaderIdInt) + // m.leaderId = &leaderId + // m.logger.Info().Msgf("verifyBlock, leaderId is: %d", leaderId) return nil } @@ -98,10 +168,10 @@ func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { m.logger.Info().Msgf("applying and committing the block at height %d", block.BlockHeader.Height) // TODO: uncomment following. In this PR test blocks don't have a valid QC, therefore commented out to let the tests pass - // if err := m.applyBlock(block); err != nil { - // m.logger.Error().Err(err).Msg("Could not apply block, invalid QC") - // return err - // } + if err := m.applyBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not apply block, invalid QC") + return err + } if err := m.commitBlock(block); err != nil { m.logger.Error().Err(err).Msg("Could not commit block, invalid QC") diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 1fba513b1..95d89139a 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -64,13 +64,9 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu func (m *stateSync) Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) { m.logger.Info().Msg("State Sync Module Set") m.aggregatedMetaData = aggregatedMetaData - - // return } -// TODO(#352): implement this function // Start performs state sync - // processes and aggregates all metadata collected in metadataReceived channel, // requests missing blocks starting from its current height to the aggregated metadata's maxHeight, // once the requested block is received and committed by consensus module, sends the next request for the next block, diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index fbb2eabbb..624e233d7 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -53,6 +53,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return m.stateSync.HandleGetBlockRequest(stateSyncMessage.GetGetBlockReq()) case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") + fmt.Println("Pushing block to blocksReceived channel, for height: ", stateSyncMessage.GetGetBlockRes().Block.BlockHeader.Height) m.blocksReceived <- stateSyncMessage.GetGetBlockRes() return nil default: diff --git a/shared/node.go b/shared/node.go index f85ae346c..d0b267c16 100644 --- a/shared/node.go +++ b/shared/node.go @@ -2,7 +2,6 @@ package shared import ( "context" - "fmt" "time" "github.com/pokt-network/pocket/consensus" @@ -165,7 +164,7 @@ func (m *Node) GetBus() modules.Bus { // TECHDEBT: The `shared` package has dependencies on types in the individual modules. // TODO: Move all message types this is dependant on to the `messaging` package func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { - fmt.Printf("Node: %s, inside handleEvent, with message: %s, bus: %s\n", node.p2pAddress, message, node.bus.GetEventBus()) + //fmt.Printf("Node: %s, inside handleEvent, with message: %s, bus: %s\n", node.p2pAddress, message, node.bus.GetEventBus()) contentType := message.GetContentType() switch contentType { case messaging.NodeStartedEventType: From fd52e692d502d0dd75583d50a5f7bddb6d8a1bc8 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 21:55:05 +0300 Subject: [PATCH 009/100] proceeds except when leader is one of new nodes --- consensus/module_consensus_state_sync.go | 6 ++-- consensus/pacemaker/module.go | 2 +- consensus/state_sync/server.go | 36 ++++++++++++++++-------- consensus/state_sync_handler.go | 11 +++++--- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 8326818bb..246f8765d 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -13,7 +13,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -const metadataSyncPeriod = 30 * time.Second // TODO: Make this configurable +const metadataSyncPeriod = 60 * time.Second // TODO: Make this configurable var _ modules.ConsensusStateSync = &consensusModule{} @@ -38,7 +38,7 @@ func (m *consensusModule) GetNodeAddress() string { func (m *consensusModule) blockApplicationLoop() { for blockResponse := range m.blocksReceived { block := blockResponse.Block - fmt.Println("New block is received!", block) + fmt.Println("New block is received!") maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { m.logger.Err(err).Msg("couldn't query max persisted height") @@ -67,7 +67,7 @@ func (m *consensusModule) blockApplicationLoop() { m.logger.Err(err).Msg("failed to apply and commit block") continue } - fmt.Println("Applied block: ", block) + //fmt.Println("Applied block: ", block) m.stateSync.CommittedBlock(m.CurrentHeight()) } diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 2219d82aa..a59645ee4 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -133,7 +133,7 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e } // Message is from the past - if msg.Round < currentRound || (msg.Round == currentRound && msg.Step < currentStep) { + if msg.Round < currentRound || (msg.Round == currentRound && msg.Step < currentStep) { // msg.Step < currentStep { m.logger.Warn().Msgf("⚠️ [DISCARDING] ⚠️ Node at (height, step, round) (%d, %d, %d) > message at (%d, %d, %d)", currentHeight, currentStep, currentRound, msg.Height, msg.Step, msg.Round) return false, nil } diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index 44fecc4bc..49cb553db 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -14,13 +14,13 @@ import ( // local state metadata with other peers syncing to the latest block. type StateSyncServerModule interface { // Advertise (send) the local state sync metadata to the requesting peer - HandleStateSyncMetadataRequest(*typesCons.StateSyncMetadataRequest) error + HandleStateSyncMetadataRequest(*typesCons.StateSyncMetadataRequest) // Advertise (send) the block being requested by the peer - HandleGetBlockRequest(*typesCons.GetBlockRequest) error + HandleGetBlockRequest(*typesCons.GetBlockRequest) } -func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateSyncMetadataRequest) error { +func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateSyncMetadataRequest) { consensusMod := m.GetBus().GetConsensusModule() serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := metadataReq.PeerAddress @@ -32,18 +32,22 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(prevPersistedBlockHeight)) if err != nil { - return nil + m.logger.Err(err).Msg("Error creating read context") + return } + defer readCtx.Release() maxHeight, err := readCtx.GetMaximumBlockHeight() if err != nil { - return err + m.logger.Err(err).Msg("Error getting max height") + return } minHeight, err := readCtx.GetMinimumBlockHeight() if err != nil { - return err + m.logger.Err(err).Msg("Error getting min height") + return } stateSyncMessage := typesCons.StateSyncMessage{ @@ -56,10 +60,14 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS }, } - return m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)) + err = m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)) + if err != nil { + m.logger.Err(err).Msg("Error sending state sync message") + return + } } -func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) error { +func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { consensusMod := m.GetBus().GetConsensusModule() serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := blockReq.PeerAddress @@ -68,13 +76,15 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) e prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 if prevPersistedBlockHeight < blockReq.Height { - return fmt.Errorf("requested block height: %d is higher than current persisted block height: %d", blockReq.Height, prevPersistedBlockHeight) + m.logger.Err(fmt.Errorf("requested block height: %d is higher than current persisted block height: %d", blockReq.Height, prevPersistedBlockHeight)) + return } // get block from the persistence module block, err := m.getBlockAtHeight(blockReq.Height) if err != nil { - return err + m.logger.Err(err).Msg("Error getting block") + return } stateSyncMessage := typesCons.StateSyncMessage{ @@ -86,7 +96,11 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) e }, } - return m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)) + err = m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)) + if err != nil { + m.logger.Err(err).Msg("Error sending state sync message") + return + } } // Get a block from persistence module given block height diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 624e233d7..5560f1a71 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -10,8 +10,8 @@ import ( ) func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) error { - m.m.Lock() - defer m.m.Unlock() + // m.m.Lock() + // defer m.m.Unlock() m.logger.Info().Msg("Handling StateSyncMessage") @@ -40,7 +40,8 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta if !m.serverModeEnabled { return fmt.Errorf("server module is not enabled") } - return m.stateSync.HandleStateSyncMetadataRequest(stateSyncMessage.GetMetadataReq()) + go m.stateSync.HandleStateSyncMetadataRequest(stateSyncMessage.GetMetadataReq()) + return nil case *typesCons.StateSyncMessage_MetadataRes: m.logger.Info().Str("proto_type", "MetadataResponse").Msg("Handling StateSyncMessage MetadataRes") m.metadataReceived <- stateSyncMessage.GetMetadataRes() @@ -50,7 +51,8 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta if !m.serverModeEnabled { return fmt.Errorf("server module is not enabled") } - return m.stateSync.HandleGetBlockRequest(stateSyncMessage.GetGetBlockReq()) + go m.stateSync.HandleGetBlockRequest(stateSyncMessage.GetGetBlockReq()) + return nil case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") fmt.Println("Pushing block to blocksReceived channel, for height: ", stateSyncMessage.GetGetBlockRes().Block.BlockHeader.Height) @@ -59,4 +61,5 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta default: return fmt.Errorf("unspecified state sync message type") } + } From c73035715190a7c5e0ba77bdb6771bf1cf6b8aab Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 14 Apr 2023 23:21:48 +0300 Subject: [PATCH 010/100] leader election works --- consensus/fsm_handler.go | 9 +++++++++ consensus/module_consensus_state_sync.go | 16 ++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 608261b2a..19fec7136 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -106,5 +106,14 @@ func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionE m.logger.Debug().Msg("Validator node is Synced and in Pacemaker mode. It will stay in this mode until it receives a new block proposal that has a higher height than the current block height") // validator receives a new block proposal, and it understands that it doesn't have block and it transitions to unsycnhed state // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica + + // valdiator node receives nodeID after reaching pacemaker. + validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) + if err != nil { + return err + } + valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() + m.nodeId = valAddrToIdMap[m.nodeAddress] + fmt.Println("now my node id is", m.nodeId) return nil } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 246f8765d..7e333e537 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -152,15 +152,15 @@ func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { return err } - // leaderIdInt, err := m.GetNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) - // if err != nil { - // m.logger.Error().Err(err).Msg("Could not get leader id from leader address") - // return err - // } + leaderIdInt, err := m.GetNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) + if err != nil { + m.logger.Error().Err(err).Msg("Could not get leader id from leader address") + return err + } - // leaderId := typesCons.NodeId(leaderIdInt) - // m.leaderId = &leaderId - // m.logger.Info().Msgf("verifyBlock, leaderId is: %d", leaderId) + leaderId := typesCons.NodeId(leaderIdInt) + m.leaderId = &leaderId + m.logger.Info().Msgf("verifyBlock, leaderId is: %d", leaderId) return nil } From f9a34246384caa2b7ee5038a2523f3d85c666bfa Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Sat, 15 Apr 2023 23:11:12 +0300 Subject: [PATCH 011/100] debugging leader re-election --- consensus/fsm_handler.go | 7 +++++++ consensus/hotstuff_handler.go | 4 ++++ consensus/module.go | 3 +++ consensus/module_consensus_state_sync.go | 16 ++++++++++++++++ consensus/pacemaker/module.go | 18 ++++++++++++++++++ consensus/state_sync/server.go | 3 ++- state_machine/fsm.go | 1 + 7 files changed, 51 insertions(+), 1 deletion(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 19fec7136..9c87bfda6 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -85,6 +85,12 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() + higherMsgHeight := m.aggragateHigherMsgHeights() + + if higherMsgHeight > aggregatedMetadata.MaxHeight { + aggregatedMetadata.MaxHeight = higherMsgHeight + } + m.stateSync.Set(&aggregatedMetadata) go m.stateSync.Start() @@ -108,6 +114,7 @@ func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionE // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica // valdiator node receives nodeID after reaching pacemaker. + // TODO! check, is this the best place? validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { return err diff --git a/consensus/hotstuff_handler.go b/consensus/hotstuff_handler.go index a9a732a2b..08a7a6fb0 100644 --- a/consensus/hotstuff_handler.go +++ b/consensus/hotstuff_handler.go @@ -22,6 +22,10 @@ func (m *consensusModule) handleHotstuffMessage(msg *typesCons.HotstuffMessage) // Pacemaker - Liveness & safety checks if shouldHandle, err := m.paceMaker.ShouldHandleMessage(msg); !shouldHandle { m.logger.Debug().Fields(loggingFields).Msg("Not handling hotstuff msg...") + // we need to sync until this height before missing this round, maybe for few more times + if msg.Height > m.CurrentHeight() { + m.higherMsgHeights <- msg.Height + } return err } diff --git a/consensus/module.go b/consensus/module.go index a8b78b64a..7faaf4207 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -82,6 +82,9 @@ type consensusModule struct { // metadata responses received from peers are collected in this channel metadataReceived chan *typesCons.StateSyncMetadataResponse + // channel to send messages with heights higher than current height of the node, to be utlized by the state sync module, fsm handles the aggregatation + higherMsgHeights chan uint64 + serverModeEnabled bool } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 7e333e537..10dc0f75f 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -210,3 +210,19 @@ func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMe MaxHeight: maxHeight, } } + +func (m *consensusModule) aggragateHigherMsgHeights() uint64 { + chanLen := len(m.higherMsgHeights) + + maxHeight := uint64(1) + + for i := 0; i < chanLen; i++ { + metadata := <-m.metadataReceived + if metadata.MaxHeight > maxHeight { + maxHeight = metadata.MaxHeight + } + } + + fmt.Println("aggragateHigherMsgHeights maxHeight: ", maxHeight) + return maxHeight +} diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index a59645ee4..61e81b8d4 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -138,6 +138,24 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e return false, nil } + // Pacemaker shouldn't move from e.g. (24, 0, 1) to (24, 3, 8) because when it moves to that step, the block should not be nil. + // because it will return ErrNilBlockVote error for CreateVoteMessage / CreateProposeMessage (?) + // so if pacemaker moves to (24, 3, 8) but it will be nil if it moves to that step. + if msg.Step > currentStep { + return false, nil + } + + // it shouldn't move to (13, 2, 12) to (13, 1, 12), this causes re-leader election, and that round no blocks are generated. + // if that block contains staking transaction, that peer will never be added to the network. and will never sync. + if msg.Round == currentRound && msg.Step < currentStep { + return false, nil + } + + // // (8, 2, 6) to (8, 1, 7) shouldn't happen, because it will cause re-leader election, and that round no blocks are generated. + // if msg.Round > currentRound && msg.Step < currentStep { + // return false, nil + // } + // Everything checks out! if msg.Height == currentHeight && msg.Step == currentStep && msg.Round == currentRound { return true, nil diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index 49cb553db..90d94e513 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -29,13 +29,14 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS // current height is the height of the block that is being processed, so we need to subtract 1 for the last finalized block prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 + //currentRound := consensusMod.CurrentHeight() + // TODO! check if we need to send currentRound here? probably better readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(prevPersistedBlockHeight)) if err != nil { m.logger.Err(err).Msg("Error creating read context") return } - defer readCtx.Release() maxHeight, err := readCtx.GetMaximumBlockHeight() diff --git a/state_machine/fsm.go b/state_machine/fsm.go index 508ff9db5..5b64401e0 100644 --- a/state_machine/fsm.go +++ b/state_machine/fsm.go @@ -60,6 +60,7 @@ func NewNodeFSM(callbacks *fsm.Callbacks, options ...func(*fsm.FSM)) *fsm.FSM { string(coreTypes.StateMachineState_Consensus_Pacemaker), string(coreTypes.StateMachineState_Consensus_Synced), string(coreTypes.StateMachineState_P2P_Bootstrapped), + string(coreTypes.StateMachineState_Consensus_SyncMode), }, Dst: string(coreTypes.StateMachineState_Consensus_Unsynced), }, From 1810cda7e6c705a7b4fb36348f90c5302567c9df Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Sun, 16 Apr 2023 13:55:14 +0300 Subject: [PATCH 012/100] debug --- consensus/fsm_handler.go | 8 ++++---- consensus/helpers.go | 1 + consensus/hotstuff_handler.go | 6 +++--- consensus/module.go | 4 +++- consensus/module_consensus_pacemaker.go | 1 + consensus/module_consensus_state_sync.go | 24 ++++++++++++------------ consensus/pacemaker/module.go | 8 +++++--- consensus/state_sync_handler.go | 2 +- persistence/block.go | 2 +- state_machine/fsm.go | 2 +- 10 files changed, 32 insertions(+), 26 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 9c87bfda6..a46579c0a 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -85,11 +85,11 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() - higherMsgHeight := m.aggragateHigherMsgHeights() + //higherMsgHeight := m.aggragateHigherMsgHeights() - if higherMsgHeight > aggregatedMetadata.MaxHeight { - aggregatedMetadata.MaxHeight = higherMsgHeight - } + // if higherMsgHeight > aggregatedMetadata.MaxHeight { + // aggregatedMetadata.MaxHeight = higherMsgHeight + // } m.stateSync.Set(&aggregatedMetadata) diff --git a/consensus/helpers.go b/consensus/helpers.go index ad03e88c0..81857fba0 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -243,6 +243,7 @@ func (m *consensusModule) electNextLeader(msg *typesCons.HotstuffMessage) error if err != nil { return err } + fmt.Println("elected leader id: ", leaderId, " for validators: ", validators) idToValAddrMap := typesCons.NewActorMapper(validators).GetIdToValAddrMap() leader, ok := idToValAddrMap[leaderId] if !ok { diff --git a/consensus/hotstuff_handler.go b/consensus/hotstuff_handler.go index 08a7a6fb0..5a74d613a 100644 --- a/consensus/hotstuff_handler.go +++ b/consensus/hotstuff_handler.go @@ -23,9 +23,9 @@ func (m *consensusModule) handleHotstuffMessage(msg *typesCons.HotstuffMessage) if shouldHandle, err := m.paceMaker.ShouldHandleMessage(msg); !shouldHandle { m.logger.Debug().Fields(loggingFields).Msg("Not handling hotstuff msg...") // we need to sync until this height before missing this round, maybe for few more times - if msg.Height > m.CurrentHeight() { - m.higherMsgHeights <- msg.Height - } + // if msg.Height > m.CurrentHeight() { + // m.higherMsgHeights <- msg.Height + // } return err } diff --git a/consensus/module.go b/consensus/module.go index 7faaf4207..771ec9cff 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -83,7 +83,7 @@ type consensusModule struct { metadataReceived chan *typesCons.StateSyncMetadataResponse // channel to send messages with heights higher than current height of the node, to be utlized by the state sync module, fsm handles the aggregatation - higherMsgHeights chan uint64 + //higherMsgHeights chan uint64 serverModeEnabled bool } @@ -265,6 +265,8 @@ func (m *consensusModule) HandleMessage(message *anypb.Any) error { if !ok { return fmt.Errorf("failed to cast message to HotstuffMessage") } + + fmt.Println("Received message: ", hotstuffMessage) return m.handleHotstuffMessage(hotstuffMessage) default: diff --git a/consensus/module_consensus_pacemaker.go b/consensus/module_consensus_pacemaker.go index aaad51d55..1718cd6a2 100644 --- a/consensus/module_consensus_pacemaker.go +++ b/consensus/module_consensus_pacemaker.go @@ -62,6 +62,7 @@ func (m *consensusModule) IsLeaderSet() bool { } func (m *consensusModule) NewLeader(msg *anypb.Any) error { + fmt.Println("NewLeader is going to elect new leader for msg: ", msg.String()) msgCodec, err := codec.GetCodec().FromAny(msg) if err != nil { return err diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 10dc0f75f..8da981ab5 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -211,18 +211,18 @@ func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMe } } -func (m *consensusModule) aggragateHigherMsgHeights() uint64 { - chanLen := len(m.higherMsgHeights) +// func (m *consensusModule) aggragateHigherMsgHeights() uint64 { +// chanLen := len(m.higherMsgHeights) - maxHeight := uint64(1) +// maxHeight := uint64(1) - for i := 0; i < chanLen; i++ { - metadata := <-m.metadataReceived - if metadata.MaxHeight > maxHeight { - maxHeight = metadata.MaxHeight - } - } +// for i := 0; i < chanLen; i++ { +// metadata := <-m.metadataReceived +// if metadata.MaxHeight > maxHeight { +// maxHeight = metadata.MaxHeight +// } +// } - fmt.Println("aggragateHigherMsgHeights maxHeight: ", maxHeight) - return maxHeight -} +// fmt.Println("aggragateHigherMsgHeights maxHeight: ", maxHeight) +// return maxHeight +// } diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 61e81b8d4..963a71613 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -141,9 +141,9 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e // Pacemaker shouldn't move from e.g. (24, 0, 1) to (24, 3, 8) because when it moves to that step, the block should not be nil. // because it will return ErrNilBlockVote error for CreateVoteMessage / CreateProposeMessage (?) // so if pacemaker moves to (24, 3, 8) but it will be nil if it moves to that step. - if msg.Step > currentStep { - return false, nil - } + // if msg.Step > currentStep { + // return false, nil + // } // it shouldn't move to (13, 2, 12) to (13, 1, 12), this causes re-leader election, and that round no blocks are generated. // if that block contains staking transaction, that peer will never be added to the network. and will never sync. @@ -169,6 +169,8 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e // TODO: Add tests for this. When we catch up to a later step, the leader is still the same. // However, when we catch up to a later round, the leader at the same height will be different. + fmt.Println("YO!, isleaderset: ", consensusMod.IsLeaderSet(), " currentRound: ", currentRound, " msg.Round: ", msg.Round, " currentStep: ", currentStep, " msg.Step: ", msg.Step, " msg.Height: ", msg.Height, " currentHeight: ", currentHeight) + if currentRound != msg.Round || !consensusMod.IsLeaderSet() { anyProto, err := anypb.New(msg) if err != nil { diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 5560f1a71..33e3ad020 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -13,7 +13,7 @@ func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) // m.m.Lock() // defer m.m.Unlock() - m.logger.Info().Msg("Handling StateSyncMessage") + //m.logger.Info().Msg("Handling StateSyncMessage") switch stateSyncMessageAny.MessageName() { case messaging.StateSyncMessageContentType: diff --git a/persistence/block.go b/persistence/block.go index 0ce7f2c46..c388941c0 100644 --- a/persistence/block.go +++ b/persistence/block.go @@ -93,7 +93,7 @@ func (p *PostgresContext) prepareBlock(proposerAddr, quorumCert []byte) (*coreTy Transactions: txs, } - p.logger.Info().Uint64("height", block.BlockHeader.Height).Msg("Storing block in block store.") + //p.logger.Info().Uint64("height", block.BlockHeader.Height).Msg("Storing block in block store.") return block, nil } diff --git a/state_machine/fsm.go b/state_machine/fsm.go index 5b64401e0..92abfc995 100644 --- a/state_machine/fsm.go +++ b/state_machine/fsm.go @@ -60,7 +60,7 @@ func NewNodeFSM(callbacks *fsm.Callbacks, options ...func(*fsm.FSM)) *fsm.FSM { string(coreTypes.StateMachineState_Consensus_Pacemaker), string(coreTypes.StateMachineState_Consensus_Synced), string(coreTypes.StateMachineState_P2P_Bootstrapped), - string(coreTypes.StateMachineState_Consensus_SyncMode), + //string(coreTypes.StateMachineState_Consensus_SyncMode), }, Dst: string(coreTypes.StateMachineState_Consensus_Unsynced), }, From 5a909ad29accee5d27c6bbe8f9261f891db9efc7 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Sun, 16 Apr 2023 15:04:49 +0300 Subject: [PATCH 013/100] syncs up successfully, if itis not leader --- consensus/hotstuff_replica.go | 34 ++++++++++++++++++++++++ consensus/module_consensus_state_sync.go | 2 +- consensus/pacemaker/module.go | 12 ++++----- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index 80b25cee7..2299b0484 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -94,6 +94,17 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrecommitMessage(m *consensu return } + // if replica is syncing up: + // it is possible for it to have missed the newround, and m.block can be empty. + // here if that is the case, first few steps might have been empty. + //so we need to set the block, and refresh utility context + if m.block == nil { + m.block = msg.GetBlock() + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + } + } + m.step = Commit m.prepareQC = quorumCert // INVESTIGATE: Why are we never using this for validation? @@ -123,6 +134,17 @@ func (handler *HotstuffReplicaMessageHandler) HandleCommitMessage(m *consensusMo return } + // if replica is syncing up: + // it is possible for it to have missed the newround, and m.block can be empty. + // here if that is the case, first few steps might have been empty. + //so we need to set the block, and refresh utility context + if m.block == nil { + m.block = msg.GetBlock() + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + } + } + m.step = Decide m.lockedQC = quorumCert // DISCUSS: How does the replica recover if it's locked? Replica `formally` agrees on the QC while the rest of the network `verbally` agrees on the QC. @@ -157,6 +179,18 @@ func (handler *HotstuffReplicaMessageHandler) HandleDecideMessage(m *consensusMo m.logger.Error().Err(err).Msg("Failed to convert the quorum certificate to bytes") return } + + // if replica is syncing up: + // it is possible for it to have missed the newround, and m.block can be empty. + // here if that is the case, first few steps might have been empty. + //so we need to set the block, and refresh utility context + if m.block == nil { + m.block = msg.GetBlock() + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + } + } + m.block.BlockHeader.QuorumCertificate = quorumCertBytes if err := m.commitBlock(m.block); err != nil { diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 8da981ab5..6ea8f5e17 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -13,7 +13,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -const metadataSyncPeriod = 60 * time.Second // TODO: Make this configurable +const metadataSyncPeriod = 30 * time.Second // TODO: Make this configurable var _ modules.ConsensusStateSync = &consensusModule{} diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 963a71613..905a8831a 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -147,15 +147,15 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e // it shouldn't move to (13, 2, 12) to (13, 1, 12), this causes re-leader election, and that round no blocks are generated. // if that block contains staking transaction, that peer will never be added to the network. and will never sync. - if msg.Round == currentRound && msg.Step < currentStep { - return false, nil - } - - // // (8, 2, 6) to (8, 1, 7) shouldn't happen, because it will cause re-leader election, and that round no blocks are generated. - // if msg.Round > currentRound && msg.Step < currentStep { + // if msg.Round == currentRound && msg.Step < currentStep { // return false, nil // } + // (8, 2, 6) to (8, 1, 7) shouldn't happen, because it will cause re-leader election, and that round no blocks are generated. + if msg.Round > currentRound && msg.Step < currentStep { + return false, nil + } + // Everything checks out! if msg.Height == currentHeight && msg.Step == currentStep && msg.Round == currentRound { return true, nil From 7cd1f4b41c74db122297e414fdb6a715a6c0765c Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Sun, 16 Apr 2023 20:53:18 +0300 Subject: [PATCH 014/100] clean --- consensus/fsm_handler.go | 27 +++++++++--------- consensus/helpers.go | 3 +- consensus/hotstuff_handler.go | 4 --- consensus/module.go | 3 -- consensus/module_consensus_pacemaker.go | 2 +- consensus/module_consensus_state_sync.go | 36 +++--------------------- consensus/pacemaker/module.go | 21 ++------------ consensus/state_sync/server.go | 7 ++--- 8 files changed, 26 insertions(+), 77 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index a46579c0a..eeac0c4ec 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -85,12 +85,6 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() - //higherMsgHeight := m.aggragateHigherMsgHeights() - - // if higherMsgHeight > aggregatedMetadata.MaxHeight { - // aggregatedMetadata.MaxHeight = higherMsgHeight - // } - m.stateSync.Set(&aggregatedMetadata) go m.stateSync.Start() @@ -113,14 +107,19 @@ func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionE // validator receives a new block proposal, and it understands that it doesn't have block and it transitions to unsycnhed state // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica - // valdiator node receives nodeID after reaching pacemaker. - // TODO! check, is this the best place? - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) - if err != nil { - return err + // TODO move this check to a more proper place + // a validator who just bootstrapped, synced to the rest of the network and reached pacemaker mode, its consensus module doesn't have a valid nodeId set yet. + // therefore, it's node id should be assigned. + if m.nodeId == 0 { + // valdiator node receives nodeID after reaching pacemaker. + validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) + if err != nil { + return err + } + valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() + m.nodeId = valAddrToIdMap[m.nodeAddress] + fmt.Println("now my node id is", m.nodeId) } - valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() - m.nodeId = valAddrToIdMap[m.nodeAddress] - fmt.Println("now my node id is", m.nodeId) + return nil } diff --git a/consensus/helpers.go b/consensus/helpers.go index 81857fba0..61de48b48 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -243,7 +243,8 @@ func (m *consensusModule) electNextLeader(msg *typesCons.HotstuffMessage) error if err != nil { return err } - fmt.Println("elected leader id: ", leaderId, " for validators: ", validators) + + m.logger.Info().Msgf("elected leader id: ", leaderId, ", for msg: ", msg) idToValAddrMap := typesCons.NewActorMapper(validators).GetIdToValAddrMap() leader, ok := idToValAddrMap[leaderId] if !ok { diff --git a/consensus/hotstuff_handler.go b/consensus/hotstuff_handler.go index 5a74d613a..a9a732a2b 100644 --- a/consensus/hotstuff_handler.go +++ b/consensus/hotstuff_handler.go @@ -22,10 +22,6 @@ func (m *consensusModule) handleHotstuffMessage(msg *typesCons.HotstuffMessage) // Pacemaker - Liveness & safety checks if shouldHandle, err := m.paceMaker.ShouldHandleMessage(msg); !shouldHandle { m.logger.Debug().Fields(loggingFields).Msg("Not handling hotstuff msg...") - // we need to sync until this height before missing this round, maybe for few more times - // if msg.Height > m.CurrentHeight() { - // m.higherMsgHeights <- msg.Height - // } return err } diff --git a/consensus/module.go b/consensus/module.go index 771ec9cff..2d646f764 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -82,9 +82,6 @@ type consensusModule struct { // metadata responses received from peers are collected in this channel metadataReceived chan *typesCons.StateSyncMetadataResponse - // channel to send messages with heights higher than current height of the node, to be utlized by the state sync module, fsm handles the aggregatation - //higherMsgHeights chan uint64 - serverModeEnabled bool } diff --git a/consensus/module_consensus_pacemaker.go b/consensus/module_consensus_pacemaker.go index 1718cd6a2..caf8de36f 100644 --- a/consensus/module_consensus_pacemaker.go +++ b/consensus/module_consensus_pacemaker.go @@ -62,7 +62,7 @@ func (m *consensusModule) IsLeaderSet() bool { } func (m *consensusModule) NewLeader(msg *anypb.Any) error { - fmt.Println("NewLeader is going to elect new leader for msg: ", msg.String()) + //m.logger.Info().Msgf("NewLeader is going to elect new leader for msg: ", msg.String()) msgCodec, err := codec.GetCodec().FromAny(msg) if err != nil { return err diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 6ea8f5e17..5dc29f801 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -2,7 +2,6 @@ package consensus import ( "context" - "fmt" "time" typesCons "github.com/pokt-network/pocket/consensus/types" @@ -38,14 +37,15 @@ func (m *consensusModule) GetNodeAddress() string { func (m *consensusModule) blockApplicationLoop() { for blockResponse := range m.blocksReceived { block := blockResponse.Block - fmt.Println("New block is received!") + m.logger.Info().Msgf("New block, at height %d is received!", block.BlockHeader.Height) + maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { m.logger.Err(err).Msg("couldn't query max persisted height") continue } - fmt.Println("Now going to decide if I should apply it") + //fmt.Println("Now going to decide if I should apply it") if block.BlockHeader.Height <= maxPersistedHeight { m.logger.Info().Msgf("Received block with height: %d, but node already persisted blocks until height: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) continue @@ -54,20 +54,18 @@ func (m *consensusModule) blockApplicationLoop() { continue } - fmt.Println("Now going to verify block") err = m.verifyBlock(block) if err != nil { m.logger.Err(err).Msg("failed to verify block") continue } - fmt.Println("Now going to apply and commit block") err = m.applyAndCommitBlock(block) if err != nil { m.logger.Err(err).Msg("failed to apply and commit block") continue } - //fmt.Println("Applied block: ", block) + m.logger.Info().Msgf("Block, at height %d is committed!", block.BlockHeader.Height) m.stateSync.CommittedBlock(m.CurrentHeight()) } @@ -76,9 +74,6 @@ func (m *consensusModule) blockApplicationLoop() { // metadataSyncLoop periodically sends metadata requests to its peers // it is intended to be run as a background process func (m *consensusModule) metadataSyncLoop() error { - // if m.ctx != nil { - // m.logger.Warn().Msg("metadataSyncLoop is already running. Cancelling the previous context...") - // } ctx := context.TODO() ticker := time.NewTicker(metadataSyncPeriod) @@ -139,14 +134,11 @@ func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { return err } - m.logger.Info().Msg("verifyBlock, validating Quroum Certificate") - if err := m.validateQuorumCertificate(&qc); err != nil { m.logger.Error().Err(err).Msg("Couldn't apply block, invalid QC") return err } - m.logger.Info().Msg("verifyBlock, QC is valid, refreshing utility context") if err := m.refreshUtilityUnitOfWork(); err != nil { m.logger.Error().Err(err).Msg("Could not refresh utility context") return err @@ -160,14 +152,10 @@ func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { leaderId := typesCons.NodeId(leaderIdInt) m.leaderId = &leaderId - m.logger.Info().Msgf("verifyBlock, leaderId is: %d", leaderId) return nil } func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { - m.logger.Info().Msgf("applying and committing the block at height %d", block.BlockHeader.Height) - - // TODO: uncomment following. In this PR test blocks don't have a valid QC, therefore commented out to let the tests pass if err := m.applyBlock(block); err != nil { m.logger.Error().Err(err).Msg("Could not apply block, invalid QC") return err @@ -210,19 +198,3 @@ func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMe MaxHeight: maxHeight, } } - -// func (m *consensusModule) aggragateHigherMsgHeights() uint64 { -// chanLen := len(m.higherMsgHeights) - -// maxHeight := uint64(1) - -// for i := 0; i < chanLen; i++ { -// metadata := <-m.metadataReceived -// if metadata.MaxHeight > maxHeight { -// maxHeight = metadata.MaxHeight -// } -// } - -// fmt.Println("aggragateHigherMsgHeights maxHeight: ", maxHeight) -// return maxHeight -// } diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 905a8831a..44420a73e 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -138,20 +138,7 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e return false, nil } - // Pacemaker shouldn't move from e.g. (24, 0, 1) to (24, 3, 8) because when it moves to that step, the block should not be nil. - // because it will return ErrNilBlockVote error for CreateVoteMessage / CreateProposeMessage (?) - // so if pacemaker moves to (24, 3, 8) but it will be nil if it moves to that step. - // if msg.Step > currentStep { - // return false, nil - // } - - // it shouldn't move to (13, 2, 12) to (13, 1, 12), this causes re-leader election, and that round no blocks are generated. - // if that block contains staking transaction, that peer will never be added to the network. and will never sync. - // if msg.Round == currentRound && msg.Step < currentStep { - // return false, nil - // } - - // (8, 2, 6) to (8, 1, 7) shouldn't happen, because it will cause re-leader election, and that round no blocks are generated. + // Pacemaker shouldn't catch to higher round (8, 2, 6) to (8, 1, 7) shouldn't happen, because it will cause re-leader election, and that round no blocks are generated. if msg.Round > currentRound && msg.Step < currentStep { return false, nil } @@ -167,10 +154,8 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e consensusMod.SetStep(uint8(msg.Step)) consensusMod.SetRound(msg.Round) - // TODO: Add tests for this. When we catch up to a later step, the leader is still the same. - // However, when we catch up to a later round, the leader at the same height will be different. - fmt.Println("YO!, isleaderset: ", consensusMod.IsLeaderSet(), " currentRound: ", currentRound, " msg.Round: ", msg.Round, " currentStep: ", currentStep, " msg.Step: ", msg.Step, " msg.Height: ", msg.Height, " currentHeight: ", currentHeight) - + // TODO: Add tests for this. When we catch up to a later step, the leader is still the same. However, when we catch up to a later round, the leader at the same height will be different. + // TODO: Ensure correct leader election for validator that is catching up from previous rounds. if currentRound != msg.Round || !consensusMod.IsLeaderSet() { anyProto, err := anypb.New(msg) if err != nil { diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index 90d94e513..63283a65b 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -25,13 +25,12 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := metadataReq.PeerAddress - m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Received StateSyncMetadataRequest %s", metadataReq) + m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Handling StateSync MetadataRequest") // current height is the height of the block that is being processed, so we need to subtract 1 for the last finalized block prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 - //currentRound := consensusMod.CurrentHeight() - // TODO! check if we need to send currentRound here? probably better + // TODO check if we need to send currentRound here rather than prevPersistedBlockHeight readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(prevPersistedBlockHeight)) if err != nil { m.logger.Err(err).Msg("Error creating read context") @@ -73,7 +72,7 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := blockReq.PeerAddress - m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Received StateSync GetBlockRequest") + m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Handling StateSync GetBlockRequest") prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 if prevPersistedBlockHeight < blockReq.Height { From 74def8424bb90f669c31e9a871838e7828272f9e Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 19 Apr 2023 10:37:46 +0300 Subject: [PATCH 015/100] wip --- consensus/fsm_handler.go | 2 +- consensus/hotstuff_replica.go | 13 +++++++++++++ consensus/leader_election/module.go | 4 +++- consensus/module.go | 2 +- consensus/module_consensus_debugging.go | 12 ------------ consensus/module_consensus_state_sync.go | 3 +-- consensus/pacemaker/module.go | 6 +++--- consensus/state_sync/server.go | 4 ++-- consensus/state_sync_handler.go | 2 +- 9 files changed, 25 insertions(+), 23 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index eeac0c4ec..a3de14ae9 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -118,7 +118,7 @@ func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionE } valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() m.nodeId = valAddrToIdMap[m.nodeAddress] - fmt.Println("now my node id is", m.nodeId) + //fmt.Println("now my node id is", m.nodeId) } return nil diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index 2299b0484..47db7603d 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -59,6 +59,19 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrepareMessage(m *consensusM return } + // if replica is syncing up: + // it is possible for it to have missed the newround, and m.block can be empty. + // here if that is the case, first few steps might have been empty. + //so we need to refresh utility context + if m.utilityUnitOfWork == nil { + // Clear the previous utility unitOfWork, if it exists, and create a new one + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility unitOfWork") + return + } + + } + block := msg.GetBlock() if err := m.applyBlock(block); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrApplyBlock.Error()) diff --git a/consensus/leader_election/module.go b/consensus/leader_election/module.go index db419c800..756df0828 100644 --- a/consensus/leader_election/module.go +++ b/consensus/leader_election/module.go @@ -58,7 +58,9 @@ func (m *leaderElectionModule) electNextLeaderDeterministicRoundRobin(message *t return typesCons.NodeId(0), err } - value := int64(message.Height) + int64(message.Round) + int64(message.Step) - 1 + //value := int64(message.Height) + int64(message.Round) + int64(message.Step) - 1 + //fmt.Println("electNextLeaderDeterministicRoundRobin, Height:", message.Height, ", Round:", message.Round, ", Step:", message.Step) + value := int64(message.Height) + int64(message.Round) - 1 numVals := int64(len(vals)) return typesCons.NodeId(value%numVals + 1), nil diff --git a/consensus/module.go b/consensus/module.go index 2d646f764..f78170de2 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -263,7 +263,7 @@ func (m *consensusModule) HandleMessage(message *anypb.Any) error { return fmt.Errorf("failed to cast message to HotstuffMessage") } - fmt.Println("Received message: ", hotstuffMessage) + //fmt.Println("Received message: ", hotstuffMessage) return m.handleHotstuffMessage(hotstuffMessage) default: diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index c9d620be2..a302f1444 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -1,8 +1,6 @@ package consensus import ( - "fmt" - typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" @@ -72,15 +70,5 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin // TODO(#609): Refactor to use the test-only package and remove reflection func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { - fmt.Println("metadata is: ", metadataRes) m.metadataReceived <- metadataRes } - -// func (m *consensusModule) WaitForFSMSyncedEvent() coreTypes.StateMachineEvent { -// event := <-m.DebugFSMEventsChannel -// return event -// } - -// func (m *consensusModule) PushDebugFSMSyncedEvent(event coreTypes.StateMachineEvent) { -// m.DebugFSMEventsChannel <- event -// } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 5dc29f801..07ada20b9 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -12,7 +12,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -const metadataSyncPeriod = 30 * time.Second // TODO: Make this configurable +const metadataSyncPeriod = 45 * time.Second // TODO: Make this configurable var _ modules.ConsensusStateSync = &consensusModule{} @@ -45,7 +45,6 @@ func (m *consensusModule) blockApplicationLoop() { continue } - //fmt.Println("Now going to decide if I should apply it") if block.BlockHeader.Height <= maxPersistedHeight { m.logger.Info().Msgf("Received block with height: %d, but node already persisted blocks until height: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) continue diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 44420a73e..11bd48e72 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -139,9 +139,9 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e } // Pacemaker shouldn't catch to higher round (8, 2, 6) to (8, 1, 7) shouldn't happen, because it will cause re-leader election, and that round no blocks are generated. - if msg.Round > currentRound && msg.Step < currentStep { - return false, nil - } + // if msg.Round > currentRound && msg.Step < currentStep { + // return false, nil + // } // Everything checks out! if msg.Height == currentHeight && msg.Step == currentStep && msg.Round == currentRound { diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index 63283a65b..336c47ac1 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -25,7 +25,7 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := metadataReq.PeerAddress - m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Handling StateSync MetadataRequest") + //m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msg("Handling StateSync MetadataRequest") // current height is the height of the block that is being processed, so we need to subtract 1 for the last finalized block prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 @@ -72,7 +72,7 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := blockReq.PeerAddress - m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Handling StateSync GetBlockRequest") + //m.logger.Info().Fields(m.stateSyncLogHelper(clientPeerAddress)).Msgf("Handling StateSync GetBlockRequest") prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 if prevPersistedBlockHeight < blockReq.Height { diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 33e3ad020..335428600 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -55,7 +55,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return nil case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") - fmt.Println("Pushing block to blocksReceived channel, for height: ", stateSyncMessage.GetGetBlockRes().Block.BlockHeader.Height) + //fmt.Println("Pushing block to blocksReceived channel, for height: ", stateSyncMessage.GetGetBlockRes().Block.BlockHeader.Height) m.blocksReceived <- stateSyncMessage.GetGetBlockRes() return nil default: From 48a912b295626906eb138a6fb327077295a13745 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 19 Apr 2023 12:01:59 +0300 Subject: [PATCH 016/100] update state sync module --- consensus/doc/CHANGELOG.md | 5 ++++ consensus/state_sync/module.go | 41 +++++++++++++----------------- shared/modules/consensus_module.go | 1 + 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/consensus/doc/CHANGELOG.md b/consensus/doc/CHANGELOG.md index c94e9d933..15e3ad53e 100644 --- a/consensus/doc/CHANGELOG.md +++ b/consensus/doc/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.0.49] - 2023-04-19 + +- Add state sync channels `blocksReceived` and `metadataReceived` +- + ## [0.0.0.48] - 2023-04-17 - Debug logging improvements diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 1fba513b1..280c419ab 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -1,8 +1,6 @@ package state_sync import ( - "fmt" - typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/logger" coreTypes "github.com/pokt-network/pocket/shared/core/types" @@ -55,28 +53,17 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu bus.RegisterModule(m) m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) - m.committedBlockHeightChannel = make(chan uint64, committedBlockHeightChannelSize) return m, nil } func (m *stateSync) Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) { - m.logger.Info().Msg("State Sync Module Set") m.aggregatedMetaData = aggregatedMetaData - - // return } -// TODO(#352): implement this function -// Start performs state sync - -// processes and aggregates all metadata collected in metadataReceived channel, -// requests missing blocks starting from its current height to the aggregated metadata's maxHeight, -// once the requested block is received and committed by consensus module, sends the next request for the next block, -// when all blocks are received and committed, stops the state sync process by calling its `Stop()` function. +// Start performs state sync process, starting from the consensus module's current height to the aggragated metadata height func (m *stateSync) Start() error { - consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() nodeAddress := consensusMod.GetNodeAddress() @@ -92,6 +79,7 @@ func (m *stateSync) Start() error { return err } + // start requesting blocks from the current height to the aggregated metadata height for currentHeight <= m.aggregatedMetaData.MaxHeight { m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, m.aggregatedMetaData.MaxHeight) @@ -112,24 +100,29 @@ func (m *stateSync) Start() error { } } - // wait for the block to be received and committed by consensus module - receivedBlockHeight := <-m.committedBlockHeightChannel - // TODO!: do we need to do this check? It should not happen - if receivedBlockHeight != consensusMod.CurrentHeight() { - return fmt.Errorf("received block height %d is not equal to current height %d", receivedBlockHeight, currentHeight) - } - //timer to check if block is received and committed + // wait for the requested block to be received and committed by consensus module + <-m.committedBlockHeightChannel + + // requested block is received and committed, continue to the next block from the current height currentHeight = consensusMod.CurrentHeight() } // syncing is complete, stop the state sync module return m.Stop() } -// TODO(#352): check if node is a valdiator, if not send Consensus_IsSyncedNonValidator event // Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event func (m *stateSync) Stop() error { - m.logger.Info().Msg("Stop state sync moudule") - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) + // check if the node is a validator + isValidator, err := m.bus.GetConsensusModule().IsValidator() + if err != nil { + return err + } + + m.logger.Info().Msg("Syncing is complete!") + if isValidator { + return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) + } + return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator) } func (m *stateSync) SetBus(pocketBus modules.Bus) { diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index 591e006dd..82673f435 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -73,6 +73,7 @@ type ConsensusPacemaker interface { type ConsensusStateSync interface { GetNodeIdFromNodeAddress(string) (uint64, error) GetNodeAddress() string + IsValidator() (bool, error) } // ConsensusDebugModule exposes functionality used for testing & development purposes. From 92c2acfe1ddc8a75d1ef805d0772889ed388a1c9 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 19 Apr 2023 16:30:56 +0300 Subject: [PATCH 017/100] clean, add comments --- consensus/e2e_tests/state_sync_test.go | 40 ++++++-------- consensus/e2e_tests/utils_test.go | 67 ++---------------------- consensus/events.go | 12 ----- consensus/fsm_handler.go | 9 ++-- consensus/module_consensus_debugging.go | 3 -- consensus/module_consensus_state_sync.go | 10 +--- consensus/state_sync/module.go | 10 +++- shared/node.go | 2 - 8 files changed, 33 insertions(+), 120 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index e4518e03b..34ce98047 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -77,7 +77,6 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { } func TestStateSync_ServerGetBlock_Success(t *testing.T) { - // Test preparation clockMock := clock.NewMock() timeReminder(t, clockMock, time.Second) @@ -136,8 +135,6 @@ func TestStateSync_ServerGetBlock_Success(t *testing.T) { } func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { - //t.Skip() - // Test preparation clockMock := clock.NewMock() timeReminder(t, clockMock, time.Second) @@ -183,7 +180,6 @@ func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { errMsg := "StateSync Get Block Request Message" _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) require.Error(t, err) - } func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { @@ -201,9 +197,9 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { require.NoError(t, err) // Prepare leader info - testHeight := uint64(3) - testRound := uint64(0) - testStep := uint8(consensus.NewRound) + //testHeight := uint64(3) + //testRound := uint64(0) + //testStep := uint8(consensus.NewRound) // Prepare unsynced node info unsyncedNode := pocketNodes[2] @@ -215,12 +211,12 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { if id == unsyncedNodeId { pocketNode.GetBus().GetConsensusModule().SetHeight(unsyncedNodeHeight) } else { - pocketNode.GetBus().GetConsensusModule().SetHeight(testHeight) + pocketNode.GetBus().GetConsensusModule().SetHeight(uint64(3)) } - pocketNode.GetBus().GetConsensusModule().SetStep(testStep) - pocketNode.GetBus().GetConsensusModule().SetRound(testRound) + pocketNode.GetBus().GetConsensusModule().SetStep(uint8(consensus.NewRound)) + pocketNode.GetBus().GetConsensusModule().SetRound(uint64(0)) - utilityUnitOfWork, err := pocketNode.GetBus().GetUtilityModule().NewUnitOfWork(int64(testHeight)) + utilityUnitOfWork, err := pocketNode.GetBus().GetUtilityModule().NewUnitOfWork(int64(3)) require.NoError(t, err) pocketNode.GetBus().GetConsensusModule().SetUtilityUnitOfWork(utilityUnitOfWork) } @@ -229,16 +225,16 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { for _, pocketNode := range pocketNodes { TriggerNextView(t, pocketNode) } - currentRound := testRound + 1 + //currentRound := testRound + 1 // Get leaderId for the given height, round and step, by using the Consensus Modules' GetLeaderForView() function. // Any node in pocketNodes mapping can be used to call GetLeaderForView() function. - leaderId := typesCons.NodeId(pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView(testHeight, currentRound, testStep)) + leaderId := typesCons.NodeId(pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView(uint64(3), uint64(1), uint8(consensus.NewRound))) leader := pocketNodes[leaderId] leaderPK, err := leader.GetBus().GetConsensusModule().GetPrivateKey() require.NoError(t, err) - block := generatePlaceholderBlock(testHeight, leaderPK.Address()) + block := generatePlaceholderBlock(3, leaderPK.Address()) leader.GetBus().GetConsensusModule().SetBlock(block) // Assert that unsynced node has a different view of the network than the rest of the nodes @@ -251,16 +247,16 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { assertNodeConsensusView(t, nodeId, typesCons.ConsensusNodeState{ Height: unsyncedNodeHeight, - Step: testStep, - Round: uint8(currentRound), + Step: uint8(consensus.NewRound), + Round: uint8(1), }, nodeState) } else { assertNodeConsensusView(t, nodeId, typesCons.ConsensusNodeState{ - Height: testHeight, - Step: testStep, - Round: uint8(currentRound), + Height: uint64(3), + Step: uint8(consensus.NewRound), + Round: uint8(1), }, nodeState) } @@ -268,11 +264,10 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { require.Equal(t, typesCons.NodeId(0), nodeState.LeaderId) } - maxPersistedHeight := testHeight - 1 metadataReceived := &typesCons.StateSyncMetadataResponse{ PeerAddress: "unused_peer_addr_in_tests", MinHeight: uint64(1), - MaxHeight: maxPersistedHeight, + MaxHeight: uint64(2), // node height - 1 } // Simulate state sync metadata response by pushing metadata to the unsynced node's consensus module @@ -288,8 +283,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) require.NoError(t, err) - // TODO(#352): This function will be updated once state sync implementation is complete - err = WaitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, testHeight) + WaitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, 3) require.NoError(t, err) } diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 68f5705d7..82eb1467a 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -103,10 +103,6 @@ func CreateTestConsensusPocketNode( bus modules.Bus, eventsChannel modules.EventsChannel, ) *shared.Node { - fmt.Println("before: ", bus.GetEventBus()) - //bus.SetEventBus(eventsChannel) - //fmt.Println("after: ", bus.GetEventBus()) - persistenceMock := basePersistenceMock(t, eventsChannel, bus) bus.RegisterModule(persistenceMock) @@ -117,9 +113,6 @@ func CreateTestConsensusPocketNode( _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(eventsChannel)) require.NoError(t, err) - // bus.RegisterModule(stateMachineModule) - - fmt.Println("Events channel: ", eventsChannel) runtimeMgr := (bus).GetRuntimeMgr() // TODO(olshansky): At the moment we are using the same base mocks for all the tests, @@ -164,10 +157,9 @@ func GenerateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []module // CLEANUP: Reduce package scope visibility in the consensus test module func StartAllTestPocketNodes(t *testing.T, pocketNodes IdToNodeMapping) error { - for id, pocketNode := range pocketNodes { + for _, pocketNode := range pocketNodes { go startNode(t, pocketNode) startEvent := pocketNode.GetBus().GetBusEvent() - fmt.Printf("ID: %d, Start event: %s \n", id, startEvent) require.Equal(t, messaging.NodeStartedEventType, startEvent.GetContentType()) stateMachine := pocketNode.GetBus().GetStateMachineModule() if err := stateMachine.SendEvent(coreTypes.StateMachineEvent_Start); err != nil { @@ -220,14 +212,12 @@ func triggerDebugMessage(t *testing.T, node *shared.Node, action messaging.Debug func P2PBroadcast(_ *testing.T, nodes IdToNodeMapping, any *anypb.Any) { e := &messaging.PocketEnvelope{Content: any} for _, node := range nodes { - fmt.Printf("Publishing this event: %s, to: %s \n", e, node.GetP2PAddress()) node.GetBus().PublishEventToBus(e) } } func P2PSend(_ *testing.T, node *shared.Node, any *anypb.Any) { e := &messaging.PocketEnvelope{Content: any} - fmt.Printf("Publishing this event: %s, to: %s \n", e, node.GetP2PAddress()) node.GetBus().PublishEventToBus(e) } @@ -250,15 +240,12 @@ func WaitForNetworkConsensusEvents( millis time.Duration, failOnExtraMessages bool, ) (messages []*anypb.Any, err error) { - fmt.Println("Starting to wait for Consensus events on channel: ", eventsChannel) - includeFilter := func(anyMsg *anypb.Any) bool { msg, err := codec.GetCodec().FromAny(anyMsg) require.NoError(t, err) hotstuffMessage, ok := msg.(*typesCons.HotstuffMessage) require.True(t, ok) - //fmt.Println("hotstuff msg:", hotstuffMessage.Block) return hotstuffMessage.Type == msgType && hotstuffMessage.Step == step } @@ -278,10 +265,7 @@ func WaitForNetworkStateSyncEvents( maxWaitTime time.Duration, failOnExtraMessages bool, ) (messages []*anypb.Any, err error) { - fmt.Println("Starting to wait for State Sync events on channel: ", eventsChannel) - includeFilter := func(anyMsg *anypb.Any) bool { - fmt.Println("Received this message gok", anyMsg) msg, err := codec.GetCodec().FromAny(anyMsg) require.NoError(t, err) @@ -304,14 +288,11 @@ func WaitForNetworkFSMEvents( maxWaitTime time.Duration, failOnExtraMessages bool, ) (messages []*anypb.Any, err error) { - fmt.Println("Starting to wait for FSM events on channel: ", eventsChannel) - includeFilter := func(anyMsg *anypb.Any) bool { - fmt.Println("Received FSM event: ", anyMsg) msg, err := codec.GetCodec().FromAny(anyMsg) require.NoError(t, err) - stateTransitionMessage, ok := msg.(*messaging.StateMachineTransitionEvent) //messaging.StateMachineTransitionEvent + stateTransitionMessage, ok := msg.(*messaging.StateMachineTransitionEvent) require.True(t, ok) return stateTransitionMessage.Event == string(eventType) @@ -721,7 +702,7 @@ func WaitForNodeToSync( unsyncedNode *shared.Node, allNodes IdToNodeMapping, targetHeight uint64, -) error { +) { currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() for currentHeight < targetHeight { @@ -745,15 +726,8 @@ func WaitForNodeToSync( err = waitForNodeToCatchUpHeight(t, clck, eventsChannel, allNodes, currentHeight+1) require.NoError(t, err) - advanceTime(t, clck, 10*time.Millisecond) - - //waiting for node to catch the same step - err = waitForNodeToCatchupStep(t, clck, eventsChannel, allNodes, currentHeight+1) - require.NoError(t, err) - currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } - return nil } // waitForNodeToRequestMissingBlock waits for unsynced node to request missing block form the network @@ -804,41 +778,6 @@ func waitForNodeToCatchUpHeight( return err } -func waitForNodeToCatchupStep( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, - allNodes IdToNodeMapping, - targetHeight uint64, -) error { - // Unsynced node sends new round messages to the rest of the network - newRoundMessages, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, consensus.NewRound, consensus.Propose, numValidators, 500, true) - require.NoError(t, err) - P2PBroadcast(t, allNodes, newRoundMessages[0]) - advanceTime(t, clck, 10*time.Millisecond) - - for _, pocketNode := range allNodes { - TriggerNextView(t, pocketNode) - } - advanceTime(t, clck, 10*time.Millisecond) - - // 1. NewRound - newRoundMessages, err = WaitForNetworkConsensusEvents(t, clck, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) - require.NoError(t, err) - broadcastMessages(t, newRoundMessages, allNodes) - advanceTime(t, clck, 10*time.Millisecond) - - // round := GetConsensusNodeState(allNodes[0]).Round - - // for nodeId, pocketNode := range allNodes { - // nodeState := GetConsensusNodeState(pocketNode) - // assertHeight(t, nodeId, targetHeight, nodeState.Height) - // assertHeight(t, nodeId, round, nodeState) - // } - - return nil -} - func generatePlaceholderBlock(height uint64, leaderAddrr crypto.Address) *coreTypes.Block { blockHeader := &coreTypes.BlockHeader{ Height: height, diff --git a/consensus/events.go b/consensus/events.go index e326460a2..0e31f8d72 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -1,8 +1,6 @@ package consensus import ( - "fmt" - "github.com/pokt-network/pocket/shared/messaging" ) @@ -13,14 +11,4 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { m.logger.Fatal().Err(err).Msg("Failed to pack consensus new height event") } m.GetBus().PublishEventToBus(newHeightEvent) - - fmt.Printf("Node address: %s, Event bus in consensus publishNewHeightEvent: %v\n", m.GetNodeAddress(), m.GetBus().GetEventBus()) } - -// func (m *consensusModule) publishFSMEvent(msg *messaging.StateMachineTransitionEvent) { -// fsmEvent, err := messaging.PackMessage(msg) -// if err != nil { -// m.logger.Fatal().Err(err).Msg("Failed to pack consensus new height event") -// } -// m.GetBus().PublishEventToBus(fsmEvent) -// } diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 608261b2a..92185920b 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -33,9 +33,8 @@ func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { } func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachineTransitionEvent) error { - fsm_state := msg.NewState - m.logger.Debug().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") + fsm_state := msg.NewState switch coreTypes.StateMachineState(fsm_state) { case coreTypes.StateMachineState_P2P_Bootstrapped: @@ -87,14 +86,14 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv aggregatedMetadata := m.getAggregatedStateSyncMetadata() m.stateSync.Set(&aggregatedMetadata) - go m.stateSync.Start() + go m.stateSync.StartSyncing() + return nil } // HandleSynced handles FSM event IsSyncedNonValidator for Non-Validators, and Synced is the destination state. // Currently, FSM never transition to this state and a non-validator node always stays in syncmode. // CONSIDER: when a non-validator sync is implemented, maybe there is a case that requires transitioning to this state. -// TODO: Add check that this never happens when IsValidator() is false, i.e. node is not validator. func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEvent) error { m.logger.Debug().Msg("Non-validator node is in Synced mode") return nil @@ -104,7 +103,5 @@ func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEven // Execution of this state means the validator node is synced. func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionEvent) error { m.logger.Debug().Msg("Validator node is Synced and in Pacemaker mode. It will stay in this mode until it receives a new block proposal that has a higher height than the current block height") - // validator receives a new block proposal, and it understands that it doesn't have block and it transitions to unsycnhed state - // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica return nil } diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index c9d620be2..31c8a82b0 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -1,8 +1,6 @@ package consensus import ( - "fmt" - typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" @@ -72,7 +70,6 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin // TODO(#609): Refactor to use the test-only package and remove reflection func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { - fmt.Println("metadata is: ", metadataRes) m.metadataReceived <- metadataRes } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 41418c580..652c7b7a2 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -1,8 +1,6 @@ package consensus import ( - "fmt" - typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/modules" @@ -25,13 +23,8 @@ func (m *consensusModule) GetNodeAddress() string { return m.nodeAddress } -// TODO(#352): Implement this function, currently a placeholder. -// commitReceivedBlocks commits the blocks received from the blocksReceived channel +// blockApplicationLoop commits the blocks received from the blocksReceived channel // it is intended to be run as a background process - -// runs as a background process in consensus module -// listens on the blocksReceived channel -// commits the received block func (m *consensusModule) blockApplicationLoop() { for blockResponse := range m.blocksReceived { block := blockResponse.Block @@ -60,7 +53,6 @@ func (m *consensusModule) blockApplicationLoop() { m.logger.Err(err).Msg("failed to apply and commit block") return } - fmt.Println("Applied block: ", block) m.stateSync.CommittedBlock(m.CurrentHeight()) } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 280c419ab..d33331d23 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -19,6 +19,7 @@ type StateSyncModule interface { Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) CommittedBlock(uint64) + StartSyncing() } var ( @@ -62,6 +63,13 @@ func (m *stateSync) Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) m.aggregatedMetaData = aggregatedMetaData } +func (m *stateSync) StartSyncing() { + err := m.Start() + if err != nil { + m.logger.Error().Err(err).Msg("couldn't start state sync") + } +} + // Start performs state sync process, starting from the consensus module's current height to the aggragated metadata height func (m *stateSync) Start() error { consensusMod := m.bus.GetConsensusModule() @@ -73,7 +81,7 @@ func (m *stateSync) Start() error { } defer readCtx.Release() - //get the current validators + // get the current validators m.validators, err = readCtx.GetAllValidators(int64(currentHeight)) if err != nil { return err diff --git a/shared/node.go b/shared/node.go index b2fc5aec0..aa4292bfb 100644 --- a/shared/node.go +++ b/shared/node.go @@ -2,7 +2,6 @@ package shared import ( "context" - "fmt" "time" "github.com/pokt-network/pocket/consensus" @@ -157,7 +156,6 @@ func (m *Node) GetBus() modules.Bus { // TECHDEBT: The `shared` package has dependencies on types in the individual modules. // TODO: Move all message types this is dependant on to the `messaging` package func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { - fmt.Printf("Node: %s, inside handleEvent, with message: %s, bus: %s\n", node.p2pAddress, message, node.bus.GetEventBus()) contentType := message.GetContentType() logger.Global.Debug().Fields(map[string]any{ "message": message, From de491dd540c8537babe3ef752bd31ecf599042c5 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 19 Apr 2023 17:27:51 +0300 Subject: [PATCH 018/100] clean --- consensus/doc/CHANGELOG.md | 4 ++-- consensus/e2e_tests/state_sync_test.go | 28 +++++------------------- consensus/e2e_tests/utils_test.go | 21 +++++++++--------- consensus/module_consensus_debugging.go | 9 -------- consensus/module_consensus_state_sync.go | 24 ++++---------------- consensus/state_sync/module.go | 27 ++++++++++++----------- shared/CHANGELOG.md | 4 ++++ state_machine/docs/CHANGELOG.md | 4 ++-- state_machine/module.go | 2 -- 9 files changed, 42 insertions(+), 81 deletions(-) diff --git a/consensus/doc/CHANGELOG.md b/consensus/doc/CHANGELOG.md index 15e3ad53e..fce493a20 100644 --- a/consensus/doc/CHANGELOG.md +++ b/consensus/doc/CHANGELOG.md @@ -9,8 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.0.0.49] - 2023-04-19 -- Add state sync channels `blocksReceived` and `metadataReceived` -- +- Added state sync channels `blocksReceived` and `metadataReceived`, implemented `blockApplicationLoop()`, state sync functions `Start()` and `Stop()` +- Implemented `WaitForNetworkFSMEvents()` function in test utils ## [0.0.0.48] - 2023-04-17 diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 34ce98047..33fba2222 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -28,20 +28,17 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) - testHeight := uint64(4) - // Choose node 1 as the server node // Set server node's height to test height. serverNode := pocketNodes[1] serverNodePeerId := serverNode.GetBus().GetConsensusModule().GetNodeAddress() - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + serverNode.GetBus().GetConsensusModule().SetHeight(uint64(4)) // Choose node 2 as the requester node. requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() // Test MetaData Req - stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_MetadataReq{ MetadataReq: &typesCons.StateSyncMetadataRequest{ @@ -49,7 +46,6 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { }, }, } - anyProto, err := anypb.New(stateSyncMetaDataReqMessage) require.NoError(t, err) @@ -70,8 +66,7 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() require.NotEmpty(t, metaDataRes) - lastPersistedHeight := testHeight - 1 - require.Equal(t, lastPersistedHeight, metaDataRes.MaxHeight) + require.Equal(t, uint64(3), metaDataRes.MaxHeight) // 3 because node sends the last persisted height require.Equal(t, uint64(1), metaDataRes.MinHeight) require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) } @@ -91,9 +86,8 @@ func TestStateSync_ServerGetBlock_Success(t *testing.T) { err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) - testHeight := uint64(5) serverNode := pocketNodes[1] - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + serverNode.GetBus().GetConsensusModule().SetHeight(uint64(5)) // Choose node 2 as the requester node requesterNode := pocketNodes[2] @@ -149,23 +143,19 @@ func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) - testHeight := uint64(5) - serverNode := pocketNodes[1] - serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) + serverNode.GetBus().GetConsensusModule().SetHeight(uint64(5)) // Choose node 2 as the requester node requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() // Failing Test - // Get Block Req is current block height + 1 - requestHeight := testHeight + 1 stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ PeerAddress: requesterNodePeerAddress, - Height: requestHeight, + Height: uint64(6), // 6 because node ask for the next block }, }, } @@ -196,11 +186,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) - // Prepare leader info - //testHeight := uint64(3) - //testRound := uint64(0) - //testStep := uint8(consensus.NewRound) - // Prepare unsynced node info unsyncedNode := pocketNodes[2] unsyncedNodeId := typesCons.NodeId(2) @@ -225,7 +210,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { for _, pocketNode := range pocketNodes { TriggerNextView(t, pocketNode) } - //currentRound := testRound + 1 // Get leaderId for the given height, round and step, by using the Consensus Modules' GetLeaderForView() function. // Any node in pocketNodes mapping can be used to call GetLeaderForView() function. @@ -267,7 +251,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { metadataReceived := &typesCons.StateSyncMetadataResponse{ PeerAddress: "unused_peer_addr_in_tests", MinHeight: uint64(1), - MaxHeight: uint64(2), // node height - 1 + MaxHeight: uint64(2), // 2 because unsynced node last persisted height 2 } // Simulate state sync metadata response by pushing metadata to the unsynced node's consensus module diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 82eb1467a..06f0507e3 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -481,9 +481,8 @@ func baseUtilityMock(t *testing.T, _ modules.EventsChannel, genesisState *genesi } return baseReplicaUtilityUnitOfWorkMock(t, genesisState), nil }). - // For state sync tests we call NewUnitOfWork is called more than 4 times. Therefore, we need to increase this number. - // TODO: Update this value properly - MaxTimes(4 * stateSyncUtilCalls) + // For state sync tests we call NewUnitOfWork is called more than once per node. stateSyncUtilCalls is set to relatively bigger number to avoid flakiness + MaxTimes(stateSyncUtilCalls) utilityMock.EXPECT().GetModuleName().Return(modules.UtilityModuleName).AnyTimes() return utilityMock @@ -710,20 +709,20 @@ func WaitForNodeToSync( blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel) require.NoError(t, err) - // broadcast requeust to all nodes + // broadcast requests to all nodes P2PBroadcast(t, allNodes, blockRequest) advanceTime(t, clck, 10*time.Millisecond) - // receiving replies from all nodes + // wait to receive replies from all nodes blockResponse, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel) require.NoError(t, err) - // sending block response to unsynced node + // send block response to the unsynced node P2PSend(t, unsyncedNode, blockResponse) advanceTime(t, clck, 10*time.Millisecond) // waiting for node to catch the global height - err = waitForNodeToCatchUpHeight(t, clck, eventsChannel, allNodes, currentHeight+1) + err = waitForNodeToCatchUp(t, clck, eventsChannel, allNodes, currentHeight+1) require.NoError(t, err) currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() @@ -744,8 +743,7 @@ func waitForNodeToRequestMissingBlock( return msgs[0], err } -// waitForNodeToReceiveMissingBlock requests block request of the unsynced node -// for given node to node to catch up to the target height by sending the requested block. +// waitForNodeToReceiveMissingBlock waits for nodes to send back requested block func waitForNodesToReplyToBlockRequest( t *testing.T, clck *clock.Mock, @@ -758,8 +756,8 @@ func waitForNodesToReplyToBlockRequest( return msgs[0], err } -// waitForNodeToCatchUp waits for given node to node to catch up to the target height by sending the requested block. -func waitForNodeToCatchUpHeight( +// waitForNodeToCatchUp waits for node to node to catch up to the target height +func waitForNodeToCatchUp( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, @@ -770,6 +768,7 @@ func waitForNodeToCatchUpHeight( _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) require.NoError(t, err) + // assure all nodes are at same height for nodeId, pocketNode := range allNodes { nodeState := GetConsensusNodeState(pocketNode) assertHeight(t, nodeId, targetHeight, nodeState.Height) diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index 31c8a82b0..a302f1444 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -72,12 +72,3 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { m.metadataReceived <- metadataRes } - -// func (m *consensusModule) WaitForFSMSyncedEvent() coreTypes.StateMachineEvent { -// event := <-m.DebugFSMEventsChannel -// return event -// } - -// func (m *consensusModule) PushDebugFSMSyncedEvent(event coreTypes.StateMachineEvent) { -// m.DebugFSMEventsChannel <- event -// } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 652c7b7a2..a1014bb8e 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -34,11 +34,8 @@ func (m *consensusModule) blockApplicationLoop() { return } - if block.BlockHeader.Height <= maxPersistedHeight { - m.logger.Info().Msgf("Received block with height: %d, but node already persisted blocks until height: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) - return - } else if block.BlockHeader.Height > m.CurrentHeight() { - m.logger.Info().Msgf("Received block with height %d, but node's last persisted height is: %d, so node will not apply this block", block.BlockHeader.Height, maxPersistedHeight) + if block.BlockHeader.Height <= maxPersistedHeight || block.BlockHeader.Height > m.CurrentHeight() { + m.logger.Info().Msgf("Received block at height: %d, but node will not apply this block", block.BlockHeader.Height) return } @@ -87,27 +84,14 @@ func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { } func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { - m.logger.Info().Msgf("applying and committing the block at height %d", block.BlockHeader.Height) - - // TODO: uncomment following. In this PR test blocks don't have a valid QC, therefore commented out to let the tests pass - // if err := m.applyBlock(block); err != nil { - // m.logger.Error().Err(err).Msg("Could not apply block, invalid QC") - // return err - // } - + // TODO(#352): call m.applyBlock(block) function before m.commitBlock(block). In this PR testing blocks don't have a valid QC, therefore commented out to let the tests pass. if err := m.commitBlock(block); err != nil { m.logger.Error().Err(err).Msg("Could not commit block, invalid QC") return err } - m.paceMaker.NewHeight() - maxPersistedHeight, err := m.maxPersistedBlockHeight() - if err != nil { - return err - } - - m.logger.Info().Msgf("Block is Committed, maxPersistedHeight is: %d, current height is :%d", maxPersistedHeight, m.height) + m.logger.Info().Msgf("New block is committed, current height is :%d", m.height) return nil } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index d33331d23..fe6c9c20d 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -9,8 +9,8 @@ import ( ) const ( - stateSyncModuleName = "stateSyncModule" - committedBlockHeightChannelSize = 100 + stateSyncModuleName = "stateSyncModule" + committedBlocsChannelSize = 100 ) type StateSyncModule interface { @@ -29,19 +29,20 @@ var ( ) type stateSync struct { - bus modules.Bus - logger *modules.Logger - validators []*coreTypes.Actor - aggregatedMetaData *typesCons.StateSyncMetadataResponse - committedBlockHeightChannel chan uint64 + bus modules.Bus + logger *modules.Logger + validators []*coreTypes.Actor + aggregatedMetaData *typesCons.StateSyncMetadataResponse + committedBlocksChannel chan uint64 } func CreateStateSync(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { return new(stateSync).Create(bus, options...) } +// CommittedBlock is called by the consensus module when a block received by the network is committed by blockApplicationLoop() function func (m *stateSync) CommittedBlock(height uint64) { - m.committedBlockHeightChannel <- height + m.committedBlocksChannel <- height } func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { @@ -54,7 +55,7 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu bus.RegisterModule(m) m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) - m.committedBlockHeightChannel = make(chan uint64, committedBlockHeightChannelSize) + m.committedBlocksChannel = make(chan uint64, committedBlocsChannelSize) return m, nil } @@ -87,7 +88,7 @@ func (m *stateSync) Start() error { return err } - // start requesting blocks from the current height to the aggregated metadata height + // requests blocks from the current height to the aggregated metadata height for currentHeight <= m.aggregatedMetaData.MaxHeight { m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, m.aggregatedMetaData.MaxHeight) @@ -109,12 +110,12 @@ func (m *stateSync) Start() error { } // wait for the requested block to be received and committed by consensus module - <-m.committedBlockHeightChannel + <-m.committedBlocksChannel // requested block is received and committed, continue to the next block from the current height currentHeight = consensusMod.CurrentHeight() } - // syncing is complete, stop the state sync module + // syncing is complete and all requested blocks are committed, stop the state sync module return m.Stop() } @@ -125,8 +126,8 @@ func (m *stateSync) Stop() error { if err != nil { return err } - m.logger.Info().Msg("Syncing is complete!") + if isValidator { return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) } diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index 9be09b733..52db59fb1 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.0.0.52] - 2023-04-19 + +- Exported `IsValidator()` function in `ConsensusStateSync` interface + ## [0.0.0.52] - 2023-04-17 - Removed *temporary* `shared/p2p` package; consolidated into `p2p` diff --git a/state_machine/docs/CHANGELOG.md b/state_machine/docs/CHANGELOG.md index 07c8b21da..de6f927a2 100644 --- a/state_machine/docs/CHANGELOG.md +++ b/state_machine/docs/CHANGELOG.md @@ -7,9 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.4] - 2023-04-14 +## [0.0.0.4] - 2023-04-19 -- Add `WithDebugEventsChannel()` to be used in testing +- Add `WithDebugEventsChannel()` function to be used in testing ## [0.0.0.4] - 2023-04-03 diff --git a/state_machine/module.go b/state_machine/module.go index 1dde654fa..8dfecedb2 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -2,7 +2,6 @@ package state_machine import ( "context" - "fmt" "github.com/looplab/fsm" "github.com/pokt-network/pocket/logger" @@ -48,7 +47,6 @@ func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOpti if err != nil { m.logger.Fatal().Err(err).Msg("failed to pack state machine transition event") } - fmt.Println("Event bus in state machine: ", bus.GetEventBus()) bus.PublishEventToBus(newStateMachineTransitionEvent) for _, channel := range m.debugChannels { channel <- newStateMachineTransitionEvent From 05161bd630b936eb180a1e6469ffd7edf3acc694 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 19 Apr 2023 18:44:05 +0300 Subject: [PATCH 019/100] fix typos --- consensus/e2e_tests/utils_test.go | 10 +++++----- shared/CHANGELOG.md | 2 +- state_machine/docs/CHANGELOG.md | 2 +- state_machine/module.go | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 06f0507e3..948d812b1 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -691,8 +691,8 @@ func broadcastMessages(t *testing.T, msgs []*anypb.Any, pocketNodes IdToNodeMapp // WaitForNodeToSync waits for a node to sync to a target height // For every missing block for the unsynced node: // -// first, waits for the node to request a missing block via `waitForNodeToRequestMissingBlock()` function, -// then, waits for the node to receive the missing block via `waitForNodeToReceiveMissingBlock()` function, +// first, waits for the unsyced node to request a missing block via `waitForNodeToRequestMissingBlock()` function, +// then, waits for other nodes to send the requested block via `waitForNodesToReplyToBlockRequest()` function, // finally, wait for the node to catch up to the target height via `waitForNodeToCatchUp()` function. func WaitForNodeToSync( t *testing.T, @@ -743,7 +743,7 @@ func waitForNodeToRequestMissingBlock( return msgs[0], err } -// waitForNodeToReceiveMissingBlock waits for nodes to send back requested block +// waitForNodesToReplyToBlockRequest waits for nodes to send back requested block func waitForNodesToReplyToBlockRequest( t *testing.T, clck *clock.Mock, @@ -756,7 +756,7 @@ func waitForNodesToReplyToBlockRequest( return msgs[0], err } -// waitForNodeToCatchUp waits for node to node to catch up to the target height +// waitForNodeToCatchUp waits for unsynced node to catch up to the target height func waitForNodeToCatchUp( t *testing.T, clck *clock.Mock, @@ -768,7 +768,7 @@ func waitForNodeToCatchUp( _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) require.NoError(t, err) - // assure all nodes are at same height + // ensure all nodes are at same height for nodeId, pocketNode := range allNodes { nodeState := GetConsensusNodeState(pocketNode) assertHeight(t, nodeId, targetHeight, nodeState.Height) diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index 52db59fb1..2ae0ad7ee 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.52] - 2023-04-19 +## [0.0.0.53] - 2023-04-19 - Exported `IsValidator()` function in `ConsensusStateSync` interface diff --git a/state_machine/docs/CHANGELOG.md b/state_machine/docs/CHANGELOG.md index de6f927a2..eda6ae944 100644 --- a/state_machine/docs/CHANGELOG.md +++ b/state_machine/docs/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.4] - 2023-04-19 +## [0.0.0.5] - 2023-04-19 - Add `WithDebugEventsChannel()` function to be used in testing diff --git a/state_machine/module.go b/state_machine/module.go index 8dfecedb2..fab73bafe 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -81,7 +81,7 @@ func WithCustomStateMachine(stateMachine *fsm.FSM) modules.ModuleOption { } } -// WithDebugEventsChannel is only used for testing purposes. It allows us to capture the events +// WithDebugEventsChannel is used for testing purposes. It allows us to capture the events // from the FSM and publish them to debug channel for testing. func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { return func(m modules.InitializableModule) { From 5e707485808ae51173af10132506fcd5c19db41f Mon Sep 17 00:00:00 2001 From: goku <118421317+gokutheengineer@users.noreply.github.com> Date: Fri, 21 Apr 2023 15:56:58 +0300 Subject: [PATCH 020/100] Update consensus/e2e_tests/utils_test.go Co-authored-by: Daniel Olshansky --- consensus/e2e_tests/utils_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 948d812b1..fe2c7a6dd 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -691,7 +691,7 @@ func broadcastMessages(t *testing.T, msgs []*anypb.Any, pocketNodes IdToNodeMapp // WaitForNodeToSync waits for a node to sync to a target height // For every missing block for the unsynced node: // -// first, waits for the unsyced node to request a missing block via `waitForNodeToRequestMissingBlock()` function, +// first, waits for the unsynced node to request a missing block via `waitForNodeToRequestMissingBlock()` function, // then, waits for other nodes to send the requested block via `waitForNodesToReplyToBlockRequest()` function, // finally, wait for the node to catch up to the target height via `waitForNodeToCatchUp()` function. func WaitForNodeToSync( From 9c17b18743c437eae4765b0cf3815b8f7617c568 Mon Sep 17 00:00:00 2001 From: goku <118421317+gokutheengineer@users.noreply.github.com> Date: Fri, 21 Apr 2023 15:57:18 +0300 Subject: [PATCH 021/100] Update consensus/e2e_tests/state_sync_test.go Co-authored-by: Daniel Olshansky --- consensus/e2e_tests/state_sync_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 33fba2222..267bee10b 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -125,7 +125,6 @@ func TestStateSync_ServerGetBlock_Success(t *testing.T) { require.NotEmpty(t, getBlockRes) require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) - } func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { From 3270ee96f0ec71d28eeb79b34b95c5e14dfdebae Mon Sep 17 00:00:00 2001 From: goku <118421317+gokutheengineer@users.noreply.github.com> Date: Fri, 21 Apr 2023 15:57:29 +0300 Subject: [PATCH 022/100] Update consensus/e2e_tests/utils_test.go Co-authored-by: Daniel Olshansky --- consensus/e2e_tests/utils_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index fe2c7a6dd..32f21efd3 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -736,7 +736,7 @@ func waitForNodeToRequestMissingBlock( eventsChannel modules.EventsChannel, ) (*anypb.Any, error) { - errMsg := "StateSync Block Request Messages" + errMsg := "Error waiting for StateSync Block Request Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators, 250, true) require.NoError(t, err) From a8fd7441226f85c5fa06175761c4cd99ca5ce27d Mon Sep 17 00:00:00 2001 From: goku <118421317+gokutheengineer@users.noreply.github.com> Date: Fri, 21 Apr 2023 16:04:54 +0300 Subject: [PATCH 023/100] Update consensus/module_consensus_state_sync.go Co-authored-by: Daniel Olshansky --- consensus/module_consensus_state_sync.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index a1014bb8e..c9d88e8c6 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -39,8 +39,7 @@ func (m *consensusModule) blockApplicationLoop() { return } - err = m.verifyBlock(block) - if err != nil { + if err = m.verifyBlock(block); err != nil m.logger.Err(err).Msg("failed to verify block") return } From 1c4eb536d889cc8c391e732c87910d95f34b5e0a Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Tue, 25 Apr 2023 13:16:03 +0300 Subject: [PATCH 024/100] fixes --- consensus/e2e_tests/utils_test.go | 1 + consensus/fsm_handler.go | 2 +- consensus/module.go | 4 ++-- consensus/module_consensus_state_sync.go | 27 ++++++++++++------------ consensus/state_sync/module.go | 6 +++--- consensus/state_sync_handler.go | 2 +- 6 files changed, 22 insertions(+), 20 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 32f21efd3..6cbff2df5 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -483,6 +483,7 @@ func baseUtilityMock(t *testing.T, _ modules.EventsChannel, genesisState *genesi }). // For state sync tests we call NewUnitOfWork is called more than once per node. stateSyncUtilCalls is set to relatively bigger number to avoid flakiness MaxTimes(stateSyncUtilCalls) + // AnyTimes() utilityMock.EXPECT().GetModuleName().Return(modules.UtilityModuleName).AnyTimes() return utilityMock diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 92185920b..a58295648 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -84,7 +84,7 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() - m.stateSync.Set(&aggregatedMetadata) + m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) go m.stateSync.StartSyncing() diff --git a/consensus/module.go b/consensus/module.go index 42a766073..5d5ad5331 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -77,7 +77,7 @@ type consensusModule struct { hotstuffMempool map[typesCons.HotstuffStep]*hotstuffFIFOMempool // block responses received from peers are collected in this channel - blocksReceived chan *typesCons.GetBlockResponse + blocksResponsesReceived chan *typesCons.GetBlockResponse // metadata responses received from peers are collected in this channel metadataReceived chan *typesCons.StateSyncMetadataResponse @@ -164,7 +164,7 @@ func (*consensusModule) Create(bus modules.Bus, options ...modules.ModuleOption) m.nodeAddress = address m.metadataReceived = make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize) - m.blocksReceived = make(chan *typesCons.GetBlockResponse, blocksChannelSize) + m.blocksResponsesReceived = make(chan *typesCons.GetBlockResponse, blocksChannelSize) m.initMessagesPool() diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index c9d88e8c6..3e18e26e0 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -23,10 +23,10 @@ func (m *consensusModule) GetNodeAddress() string { return m.nodeAddress } -// blockApplicationLoop commits the blocks received from the blocksReceived channel +// blockApplicationLoop commits the blocks received from the blocksResponsesReceived channel // it is intended to be run as a background process func (m *consensusModule) blockApplicationLoop() { - for blockResponse := range m.blocksReceived { + for blockResponse := range m.blocksResponsesReceived { block := blockResponse.Block maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { @@ -34,18 +34,22 @@ func (m *consensusModule) blockApplicationLoop() { return } - if block.BlockHeader.Height <= maxPersistedHeight || block.BlockHeader.Height > m.CurrentHeight() { - m.logger.Info().Msgf("Received block at height: %d, but node will not apply this block", block.BlockHeader.Height) + if block.BlockHeader.Height <= maxPersistedHeight { + m.logger.Info().Msgf("Received block at height %d, discarding as it has already been persisted", block.BlockHeader.Height) return } - if err = m.verifyBlock(block); err != nil - m.logger.Err(err).Msg("failed to verify block") + if block.BlockHeader.Height > m.CurrentHeight() { + m.logger.Info().Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) return } - err = m.applyAndCommitBlock(block) - if err != nil { + if err = m.validateBlock(block); err != nil { + m.logger.Err(err).Msg("failed to validate block") + return + } + + if err = m.applyAndCommitBlock(block); err != nil { m.logger.Err(err).Msg("failed to apply and commit block") return } @@ -78,7 +82,7 @@ func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { return maxHeight, nil } -func (m *consensusModule) verifyBlock(block *coreTypes.Block) error { +func (m *consensusModule) validateBlock(block *coreTypes.Block) error { return nil } @@ -97,10 +101,7 @@ func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { minHeight, maxHeight := uint64(1), uint64(1) - chanLen := len(m.metadataReceived) - - for i := 0; i < chanLen; i++ { - metadata := <-m.metadataReceived + for metadata := range m.metadataReceived { if metadata.MaxHeight > maxHeight { maxHeight = metadata.MaxHeight } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index fe6c9c20d..fcab26c84 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -17,8 +17,8 @@ type StateSyncModule interface { modules.Module StateSyncServerModule - Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) - CommittedBlock(uint64) + SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyncMetadataResponse) + CommittedBlock(height uint64) StartSyncing() } @@ -60,7 +60,7 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu return m, nil } -func (m *stateSync) Set(aggregatedMetaData *typesCons.StateSyncMetadataResponse) { +func (m *stateSync) SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyncMetadataResponse) { m.aggregatedMetaData = aggregatedMetaData } diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index d67e2832a..afbf39044 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -55,7 +55,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return m.stateSync.HandleGetBlockRequest(stateSyncMessage.GetGetBlockReq()) case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") - m.blocksReceived <- stateSyncMessage.GetGetBlockRes() + m.blocksResponsesReceived <- stateSyncMessage.GetGetBlockRes() return nil default: return fmt.Errorf("unspecified state sync message type") From c08146423adf216fdab3b7c53d5657b4980e4e31 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Tue, 25 Apr 2023 15:05:54 +0300 Subject: [PATCH 025/100] move isValidator to persistence --- consensus/e2e_tests/utils_test.go | 2 +- consensus/fsm_handler.go | 3 +++ consensus/helpers.go | 16 ---------------- consensus/module_consensus_state_sync.go | 4 +++- consensus/state_sync/module.go | 12 +++++++----- persistence/module.go | 21 +++++++++++++++++++++ shared/modules/consensus_module.go | 1 - shared/modules/persistence_module.go | 3 +++ 8 files changed, 38 insertions(+), 24 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 6cbff2df5..554a46b2c 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -394,8 +394,8 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus) persistenceMock.EXPECT().Start().Return(nil).AnyTimes() persistenceMock.EXPECT().SetBus(gomock.Any()).Return().AnyTimes() persistenceMock.EXPECT().NewReadContext(gomock.Any()).Return(persistenceReadContextMock, nil).AnyTimes() - persistenceMock.EXPECT().ReleaseWriteContext().Return(nil).AnyTimes() + persistenceMock.EXPECT().IsValidator(gomock.Any(), gomock.Any()).Return(true, nil).AnyTimes() blockStoreMock := mocksPer.NewMockKVStore(ctrl) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index a58295648..60ad5eb1f 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -84,8 +84,11 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() + m.logger.Debug().Msg("Setting metadata! is ") m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) + m.logger.Debug().Msg("CALLING NOW is in Sync Mode, starting to sync...") + go m.stateSync.StartSyncing() return nil diff --git a/consensus/helpers.go b/consensus/helpers.go index f3ef915f3..a3eb459ad 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -271,22 +271,6 @@ func (m *consensusModule) getValidatorsAtHeight(height uint64) ([]*coreTypes.Act return readCtx.GetAllValidators(int64(height)) } -// TODO: This is a temporary solution, cache this in Consensus module. This field will be populated once with a single query to the persistence module. -func (m *consensusModule) IsValidator() (bool, error) { - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) - if err != nil { - return false, err - } - - for _, actor := range validators { - if actor.Address == m.nodeAddress { - return true, nil - } - } - - return false, nil -} - func hotstuffMsgToLoggingFields(msg *typesCons.HotstuffMessage) map[string]any { return map[string]any{ "height": msg.GetHeight(), diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 3e18e26e0..657e9ad07 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -100,8 +100,10 @@ func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { minHeight, maxHeight := uint64(1), uint64(1) + chanLen := len(m.metadataReceived) - for metadata := range m.metadataReceived { + for i := 0; i < chanLen; i++ { + metadata := <-m.metadataReceived if metadata.MaxHeight > maxHeight { maxHeight = metadata.MaxHeight } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index fcab26c84..ca38ad3c9 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -31,7 +31,6 @@ var ( type stateSync struct { bus modules.Bus logger *modules.Logger - validators []*coreTypes.Actor aggregatedMetaData *typesCons.StateSyncMetadataResponse committedBlocksChannel chan uint64 } @@ -82,8 +81,8 @@ func (m *stateSync) Start() error { } defer readCtx.Release() - // get the current validators - m.validators, err = readCtx.GetAllValidators(int64(currentHeight)) + //get the current validators + validators, err := readCtx.GetAllValidators(int64(currentHeight)) if err != nil { return err } @@ -103,7 +102,7 @@ func (m *stateSync) Start() error { } // broadcast the get block request message to all validators - for _, val := range m.validators { + for _, val := range validators { if err := m.sendStateSyncMessage(stateSyncGetBlockMessage, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { return err } @@ -122,7 +121,10 @@ func (m *stateSync) Start() error { // Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event func (m *stateSync) Stop() error { // check if the node is a validator - isValidator, err := m.bus.GetConsensusModule().IsValidator() + currentHeight := m.bus.GetConsensusModule().CurrentHeight() + nodeAddress := m.bus.GetConsensusModule().GetNodeAddress() + isValidator, err := m.bus.GetPersistenceModule().IsValidator(int64(currentHeight), nodeAddress) + if err != nil { return err } diff --git a/persistence/module.go b/persistence/module.go index 7f8280144..524f25f36 100644 --- a/persistence/module.go +++ b/persistence/module.go @@ -231,6 +231,27 @@ func (m *persistenceModule) NewWriteContext() modules.PersistenceRWContext { return m.writeContext } +func (m *persistenceModule) IsValidator(height int64, address string) (bool, error) { + readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(height) + if err != nil { + return false, err + } + defer readCtx.Release() + + validators, err := readCtx.GetAllValidators(int64(height)) + if err != nil { + return false, err + } + + for _, actor := range validators { + if actor.Address == address { + return true, nil + } + } + + return false, nil +} + func initializeBlockStore(blockStorePath string) (kvstore.KVStore, error) { if blockStorePath == "" { return kvstore.NewMemKVStore(), nil diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index 82673f435..591e006dd 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -73,7 +73,6 @@ type ConsensusPacemaker interface { type ConsensusStateSync interface { GetNodeIdFromNodeAddress(string) (uint64, error) GetNodeAddress() string - IsValidator() (bool, error) } // ConsensusDebugModule exposes functionality used for testing & development purposes. diff --git a/shared/modules/persistence_module.go b/shared/modules/persistence_module.go index a64fdddf5..2b0ab19b9 100644 --- a/shared/modules/persistence_module.go +++ b/shared/modules/persistence_module.go @@ -29,6 +29,9 @@ type PersistenceModule interface { // Debugging / development only HandleDebugMessage(*messaging.DebugMessage) error + + // Checks whether given node is validator in the given height + IsValidator(height int64, address string) (bool, error) } // Interface defining the context within which the node can operate with the persistence layer. From 64bbfb1cce65373ea5575007ca9bd2b70185a6d0 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Tue, 25 Apr 2023 16:41:24 +0300 Subject: [PATCH 026/100] address comments --- consensus/e2e_tests/utils_test.go | 44 +++++++++++------------- consensus/module_consensus_state_sync.go | 2 ++ state_machine/module.go | 3 +- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 554a46b2c..bf8804913 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -41,9 +41,8 @@ func TestMain(m *testing.M) { // TODO(integration): These are temporary variables used in the prototype integration phase that // will need to be parameterized later once the test framework design matures. const ( - numValidators = 4 - stateHash = "42" - stateSyncUtilCalls = 100 + numValidators = 4 + stateHash = "42" ) var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes @@ -481,9 +480,8 @@ func baseUtilityMock(t *testing.T, _ modules.EventsChannel, genesisState *genesi } return baseReplicaUtilityUnitOfWorkMock(t, genesisState), nil }). - // For state sync tests we call NewUnitOfWork is called more than once per node. stateSyncUtilCalls is set to relatively bigger number to avoid flakiness - MaxTimes(stateSyncUtilCalls) - // AnyTimes() + AnyTimes() + utilityMock.EXPECT().GetModuleName().Return(modules.UtilityModuleName).AnyTimes() return utilityMock @@ -707,23 +705,23 @@ func WaitForNodeToSync( for currentHeight < targetHeight { // waiting for unsynced node to request missing block - blockRequest, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel) + blockRequests, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel) require.NoError(t, err) - // broadcast requests to all nodes - P2PBroadcast(t, allNodes, blockRequest) + // broadcast one of the requests to all nodes + P2PBroadcast(t, allNodes, blockRequests[0]) advanceTime(t, clck, 10*time.Millisecond) // wait to receive replies from all nodes - blockResponse, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel) + blockResponses, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel) require.NoError(t, err) - // send block response to the unsynced node - P2PSend(t, unsyncedNode, blockResponse) + // send one of the block responses to the unsynced node + P2PSend(t, unsyncedNode, blockResponses[0]) advanceTime(t, clck, 10*time.Millisecond) - // waiting for node to catch the global height - err = waitForNodeToCatchUp(t, clck, eventsChannel, allNodes, currentHeight+1) + // waiting for node to reach to the next height (currentHeight + 1) + err = waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, currentHeight+1) require.NoError(t, err) currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() @@ -735,13 +733,13 @@ func waitForNodeToRequestMissingBlock( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, -) (*anypb.Any, error) { +) ([]*anypb.Any, error) { errMsg := "Error waiting for StateSync Block Request Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators, 250, true) require.NoError(t, err) - return msgs[0], err + return msgs, err } // waitForNodesToReplyToBlockRequest waits for nodes to send back requested block @@ -749,12 +747,12 @@ func waitForNodesToReplyToBlockRequest( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, -) (*anypb.Any, error) { +) ([]*anypb.Any, error) { errMsg := "StateSync Block Response Messages" msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators-1, 250, true) require.NoError(t, err) - return msgs[0], err + return msgs, err } // waitForNodeToCatchUp waits for unsynced node to catch up to the target height @@ -762,18 +760,16 @@ func waitForNodeToCatchUp( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - allNodes IdToNodeMapping, + unsyncedNode *shared.Node, targetHeight uint64, ) error { // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) require.NoError(t, err) - // ensure all nodes are at same height - for nodeId, pocketNode := range allNodes { - nodeState := GetConsensusNodeState(pocketNode) - assertHeight(t, nodeId, targetHeight, nodeState.Height) - } + // ensure unsynced node caught up to the target height + nodeState := GetConsensusNodeState(unsyncedNode) + assertHeight(t, typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()), targetHeight, nodeState.Height) return err } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 657e9ad07..5c8422a76 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -34,6 +34,7 @@ func (m *consensusModule) blockApplicationLoop() { return } + // TODO: rather than discarding these blocks, push them into a channel to process them later if block.BlockHeader.Height <= maxPersistedHeight { m.logger.Info().Msgf("Received block at height %d, discarding as it has already been persisted", block.BlockHeader.Height) return @@ -82,6 +83,7 @@ func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { return maxHeight, nil } +// TODO(#352): add quorum certificate validation for the block func (m *consensusModule) validateBlock(block *coreTypes.Block) error { return nil } diff --git a/state_machine/module.go b/state_machine/module.go index fab73bafe..d1448ed00 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -18,7 +18,8 @@ type stateMachineModule struct { base_modules.InterruptableModule *fsm.FSM - logger *modules.Logger + logger *modules.Logger + // debugChannels is only used for testing purposes, events pushed to it are emitted in testing debugChannels []modules.EventsChannel } From 0a4140a93cae467834e8671ce45e00b053b5c6a2 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 26 Apr 2023 14:15:07 +0300 Subject: [PATCH 027/100] add block committed event --- consensus/events.go | 10 ++++++++ consensus/module_consensus_state_sync.go | 4 ++-- consensus/state_sync/module.go | 29 ++++++++++++++++++++---- consensus/state_sync_handler.go | 5 +++- consensus/types/proto/state_sync.proto | 4 ++++ shared/messaging/events.go | 5 ++-- shared/node.go | 2 +- 7 files changed, 49 insertions(+), 10 deletions(-) diff --git a/consensus/events.go b/consensus/events.go index 0e31f8d72..3e335c4c3 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -1,6 +1,7 @@ package consensus import ( + "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/shared/messaging" ) @@ -12,3 +13,12 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { } m.GetBus().PublishEventToBus(newHeightEvent) } + +// publishStateSyncBlockCommittedEvent +func (m *consensusModule) publishStateSyncBlockCommittedEvent(height uint64) { + stateSyncBlockCommittedEvent, err := messaging.PackMessage(&types.StateSyncBlockCommittedEvent{Height: height}) + if err != nil { + m.logger.Fatal().Err(err).Msg("Failed to pack state sync committed block event") + } + m.GetBus().PublishEventToBus(stateSyncBlockCommittedEvent) +} diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 5c8422a76..668f5479b 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -54,9 +54,9 @@ func (m *consensusModule) blockApplicationLoop() { m.logger.Err(err).Msg("failed to apply and commit block") return } - m.stateSync.CommittedBlock(m.CurrentHeight()) - } + m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) + } } // TODO(#352): Implement this function, currently a placeholder. diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index ca38ad3c9..42c0ae4d2 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -1,16 +1,23 @@ package state_sync import ( + "fmt" + "time" + typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/logger" + "github.com/pokt-network/pocket/shared/codec" coreTypes "github.com/pokt-network/pocket/shared/core/types" cryptoPocket "github.com/pokt-network/pocket/shared/crypto" + "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" + "google.golang.org/protobuf/types/known/anypb" ) const ( stateSyncModuleName = "stateSyncModule" committedBlocsChannelSize = 100 + blockWaitingPeriod = 30 * time.Second ) type StateSyncModule interface { @@ -18,8 +25,8 @@ type StateSyncModule interface { StateSyncServerModule SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyncMetadataResponse) - CommittedBlock(height uint64) StartSyncing() + HandleStateSyncBlockCommittedEvent(message *anypb.Any) error } var ( @@ -39,9 +46,23 @@ func CreateStateSync(bus modules.Bus, options ...modules.ModuleOption) (modules. return new(stateSync).Create(bus, options...) } -// CommittedBlock is called by the consensus module when a block received by the network is committed by blockApplicationLoop() function -func (m *stateSync) CommittedBlock(height uint64) { - m.committedBlocksChannel <- height +func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { + evt, err := codec.GetCodec().FromAny(event) + if err != nil { + return err + } + + switch event.MessageName() { + + case messaging.StateSyncBlockCommittedEventType: + newCommitBlockEvent, ok := evt.(*typesCons.StateSyncBlockCommittedEvent) + if !ok { + return fmt.Errorf("failed to cast event to StateSyncBlockCommittedEvent") + } + + m.committedBlocksChannel <- newCommitBlockEvent.Height + } + return nil } func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index afbf39044..e33cbb71f 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -26,8 +26,11 @@ func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) if !ok { return fmt.Errorf("failed to cast message to StateSyncMessage") } - return m.handleStateSyncMessage(stateSyncMessage) + + case messaging.StateSyncBlockCommittedEventType: + return m.stateSync.HandleStateSyncBlockCommittedEvent(stateSyncMessageAny) + default: return typesCons.ErrUnknownStateSyncMessageType(stateSyncMessageAny.MessageName()) } diff --git a/consensus/types/proto/state_sync.proto b/consensus/types/proto/state_sync.proto index 4487a8da7..bf1c795eb 100644 --- a/consensus/types/proto/state_sync.proto +++ b/consensus/types/proto/state_sync.proto @@ -43,6 +43,10 @@ message StateSyncMessage { } } +message StateSyncBlockCommittedEvent { + uint64 height = 1; +} + // NOT USED: This gRPC interface is **not being used at the moment**. It is in place simply as a // guideline of what how the types in this file could be used if a direct synchronous communication // between nodes were implemented. Furthermore, since the message types are used for asynchronous diff --git a/shared/messaging/events.go b/shared/messaging/events.go index a726a55a0..eaade42c9 100644 --- a/shared/messaging/events.go +++ b/shared/messaging/events.go @@ -7,8 +7,9 @@ const ( StateMachineTransitionEventType = "pocket.StateMachineTransitionEvent" // Consensus - HotstuffMessageContentType = "consensus.HotstuffMessage" - StateSyncMessageContentType = "consensus.StateSyncMessage" + HotstuffMessageContentType = "consensus.HotstuffMessage" + StateSyncMessageContentType = "consensus.StateSyncMessage" + StateSyncBlockCommittedEventType = "consensus.StateSyncBlockCommittedEvent" // Utility TxGossipMessageContentType = "utility.TxGossipMessage" diff --git a/shared/node.go b/shared/node.go index aa4292bfb..ed6c760d2 100644 --- a/shared/node.go +++ b/shared/node.go @@ -170,7 +170,7 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { } case messaging.HotstuffMessageContentType: return node.GetBus().GetConsensusModule().HandleMessage(message.Content) - case messaging.StateSyncMessageContentType: + case messaging.StateSyncMessageContentType, messaging.StateSyncBlockCommittedEventType: return node.GetBus().GetConsensusModule().HandleStateSyncMessage(message.Content) case messaging.TxGossipMessageContentType: return node.GetBus().GetUtilityModule().HandleUtilityMessage(message.Content) From 386de6d84f5f94e42c22d5864e45ab4d17497453 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 3 May 2023 20:08:37 +0300 Subject: [PATCH 028/100] wip fixes --- consensus/events.go | 3 +-- consensus/module_consensus_state_sync.go | 1 + consensus/state_sync/module.go | 20 +++++++++++++++----- consensus/state_sync_handler.go | 1 + consensus/types/proto/state_sync.proto | 4 ---- shared/messaging/proto/events.proto | 4 ++++ 6 files changed, 22 insertions(+), 11 deletions(-) diff --git a/consensus/events.go b/consensus/events.go index 3e335c4c3..f762362c1 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -1,7 +1,6 @@ package consensus import ( - "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/shared/messaging" ) @@ -16,7 +15,7 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { // publishStateSyncBlockCommittedEvent func (m *consensusModule) publishStateSyncBlockCommittedEvent(height uint64) { - stateSyncBlockCommittedEvent, err := messaging.PackMessage(&types.StateSyncBlockCommittedEvent{Height: height}) + stateSyncBlockCommittedEvent, err := messaging.PackMessage(&messaging.StateSyncBlockCommittedEvent{Height: height}) if err != nil { m.logger.Fatal().Err(err).Msg("Failed to pack state sync committed block event") } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 26871cf08..98b28f379 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -64,6 +64,7 @@ func (m *consensusModule) blockApplicationLoop() { m.logger.Err(err).Msg("failed to apply and commit block") continue } + m.logger.Info().Msgf("Block, at height %d is committed!", block.BlockHeader.Height) m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index f4a855dd8..fbddaafcd 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -54,7 +54,7 @@ func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { switch event.MessageName() { case messaging.StateSyncBlockCommittedEventType: - newCommitBlockEvent, ok := evt.(*typesCons.StateSyncBlockCommittedEvent) + newCommitBlockEvent, ok := evt.(*messaging.StateSyncBlockCommittedEvent) if !ok { return fmt.Errorf("failed to cast event to StateSyncBlockCommittedEvent") } @@ -98,6 +98,7 @@ func (m *stateSync) SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyn func (m *stateSync) Start() error { consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() + fmt.Println("Consensus current height: ", currentHeight) nodeAddress := consensusMod.GetNodeAddress() readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(currentHeight)) if err != nil { @@ -111,16 +112,25 @@ func (m *stateSync) Start() error { return err } + // TODO: maybe remove this + requestHeight := currentHeight + + // if node is starting to sync from the beginning, set the request height to 1 + // if currentHeight == 0 { + // fmt.Println("setting request height: ", 1) + // requestHeight = 1 + // } + // requests blocks from the current height to the aggregated metadata height - for currentHeight <= m.aggregatedMetaData.MaxHeight { - m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, m.aggregatedMetaData.MaxHeight) + for requestHeight <= m.aggregatedMetaData.MaxHeight { + m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", requestHeight, m.aggregatedMetaData.MaxHeight) // form the get block request message stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ PeerAddress: nodeAddress, - Height: currentHeight, + Height: requestHeight, }, }, } @@ -136,7 +146,7 @@ func (m *stateSync) Start() error { <-m.committedBlocksChannel // requested block is received and committed, continue to the next block from the current height - currentHeight = consensusMod.CurrentHeight() + requestHeight = consensusMod.CurrentHeight() } // syncing is complete and all requested blocks are committed, stop the state sync module return m.Stop() diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 040c55111..2abf62584 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -48,6 +48,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return nil case *typesCons.StateSyncMessage_MetadataRes: m.logger.Info().Str("proto_type", "MetadataResponse").Msg("Handling StateSyncMessage MetadataRes") + fmt.Println("MetadataResponse Received MaxHeight: ", stateSyncMessage.GetMetadataRes().MaxHeight) m.metadataReceived <- stateSyncMessage.GetMetadataRes() return nil case *typesCons.StateSyncMessage_GetBlockReq: diff --git a/consensus/types/proto/state_sync.proto b/consensus/types/proto/state_sync.proto index bf1c795eb..4487a8da7 100644 --- a/consensus/types/proto/state_sync.proto +++ b/consensus/types/proto/state_sync.proto @@ -43,10 +43,6 @@ message StateSyncMessage { } } -message StateSyncBlockCommittedEvent { - uint64 height = 1; -} - // NOT USED: This gRPC interface is **not being used at the moment**. It is in place simply as a // guideline of what how the types in this file could be used if a direct synchronous communication // between nodes were implemented. Furthermore, since the message types are used for asynchronous diff --git a/shared/messaging/proto/events.proto b/shared/messaging/proto/events.proto index 13931be93..6aa3d5b10 100644 --- a/shared/messaging/proto/events.proto +++ b/shared/messaging/proto/events.proto @@ -15,3 +15,7 @@ message StateMachineTransitionEvent { string previous_state = 2; string new_state = 3; } + +message StateSyncBlockCommittedEvent { + uint64 height = 1; +} \ No newline at end of file From 92328c5c9023e51cb1a3e69b17553adc9abb970d Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 3 May 2023 22:12:36 +0300 Subject: [PATCH 029/100] tests pass --- consensus/e2e_tests/state_sync_test.go | 2 +- consensus/e2e_tests/utils_test.go | 162 ++++++++++++++++++++++--- consensus/fsm_handler.go | 2 +- consensus/helpers.go | 2 +- consensus/state_sync_handler.go | 1 - shared/messaging/events.go | 12 +- 6 files changed, 153 insertions(+), 28 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 267bee10b..0066432d7 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -270,7 +270,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { require.NoError(t, err) } -// TODO(#352): Implement these tests +// TODO: Implement these tests func TestStateSync_UnsyncedPeerSyncsABlock_Success(t *testing.T) { t.Skip() diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index bf8804913..9e3d40af4 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -13,6 +13,7 @@ import ( "github.com/golang/mock/gomock" "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" + "github.com/pokt-network/pocket/logger" mocksPer "github.com/pokt-network/pocket/persistence/types/mocks" "github.com/pokt-network/pocket/runtime" "github.com/pokt-network/pocket/runtime/configs" @@ -41,13 +42,63 @@ func TestMain(m *testing.M) { // TODO(integration): These are temporary variables used in the prototype integration phase that // will need to be parameterized later once the test framework design matures. const ( - numValidators = 4 - stateHash = "42" + numValidators = 4 + stateHash = "42" + numberOfPersistedDummyBlocks = 200 ) var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes type IdToNodeMapping map[typesCons.NodeId]*shared.Node +type IdToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey + +type placeholderBlocks struct { + pKs IdToPKMapping + blocks []*coreTypes.Block +} + +func (p *placeholderBlocks) setPKs(nodeId typesCons.NodeId, pk cryptoPocket.PrivateKey) { + p.pKs[nodeId] = pk +} + +func (p *placeholderBlocks) prepareBlocks(t *testing.T, bus modules.Bus, nodePKs IdToPKMapping) { + i := uint64(1) + for i <= numberOfPersistedDummyBlocks { + + leaderId := bus.GetConsensusModule().GetLeaderForView(i, uint64(0), uint8(consensus.NewRound)) + leaderPK := nodePKs[typesCons.NodeId(leaderId)] + + // Construct the block + blockHeader := &coreTypes.BlockHeader{ + Height: i, + StateHash: stateHash, + PrevStateHash: stateHash, + ProposerAddress: leaderPK.Address(), + QuorumCertificate: nil, + } + block := &coreTypes.Block{ + BlockHeader: blockHeader, + Transactions: make([][]byte, 0), + } + + // TODO_IN_THIS_COMMIT: Need to redo how this is done. + qc, err := generateValidQuorumCertificate(nodePKs, block) + require.NoError(t, err) + + qcBytes, err := codec.GetCodec().Marshal(qc) + require.NoError(t, err) + + block.BlockHeader.QuorumCertificate = qcBytes + + p.blocks = append(p.blocks, block) + i++ + } +} + +func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { + // get block at index -1, because block 1 is stored at index 0 of the blocks array + return p.blocks[index-1] +} /*** Node Generation Helpers ***/ @@ -88,11 +139,19 @@ func CreateTestConsensusPocketNodes( return pk.Address().String() < pk2.Address().String() }) + blocks := &placeholderBlocks{ + pKs: make(IdToPKMapping, len(buses)), + } + for i := range buses { - pocketNode := CreateTestConsensusPocketNode(t, buses[i], eventsChannel) + pocketNode := CreateTestConsensusPocketNode(t, buses[i], eventsChannel, blocks) // TODO(olshansky): Figure this part out. pocketNodes[typesCons.NodeId(i+1)] = pocketNode + nodePK, err := cryptoPocket.NewPrivateKey(pocketNode.GetBus().GetRuntimeMgr().GetConfig().PrivateKey) + require.NoError(t, err) + blocks.setPKs(typesCons.NodeId(i+1), nodePK) } + blocks.prepareBlocks(t, buses[0], blocks.pKs) return } @@ -101,8 +160,9 @@ func CreateTestConsensusPocketNode( t *testing.T, bus modules.Bus, eventsChannel modules.EventsChannel, + nodesAndBlocks *placeholderBlocks, ) *shared.Node { - persistenceMock := basePersistenceMock(t, eventsChannel, bus) + persistenceMock := basePersistenceMock(t, eventsChannel, bus, nodesAndBlocks) bus.RegisterModule(persistenceMock) consensusMod, err := consensus.Create(bus) @@ -384,10 +444,11 @@ loop: /*** Module Mocking Helpers ***/ // Creates a persistence module mock with mock implementations of some basic functionality -func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus) *mockModules.MockPersistenceModule { +func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, placeholderBlocks *placeholderBlocks) *mockModules.MockPersistenceModule { ctrl := gomock.NewController(t) persistenceMock := mockModules.NewMockPersistenceModule(ctrl) persistenceReadContextMock := mockModules.NewMockPersistenceReadContext(ctrl) + blockStoreMock := mocksPer.NewMockKVStore(ctrl) persistenceMock.EXPECT().GetModuleName().Return(modules.PersistenceModuleName).AnyTimes() persistenceMock.EXPECT().Start().Return(nil).AnyTimes() @@ -396,29 +457,22 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus) persistenceMock.EXPECT().ReleaseWriteContext().Return(nil).AnyTimes() persistenceMock.EXPECT().IsValidator(gomock.Any(), gomock.Any()).Return(true, nil).AnyTimes() - blockStoreMock := mocksPer.NewMockKVStore(ctrl) - blockStoreMock.EXPECT().Get(gomock.Any()).DoAndReturn(func(height []byte) ([]byte, error) { heightInt := utils.HeightFromBytes(height) if bus.GetConsensusModule().CurrentHeight() < heightInt { return nil, fmt.Errorf("requested height is higher than current height of the node's consensus module") } - blockWithHeight := &coreTypes.Block{ - BlockHeader: &coreTypes.BlockHeader{ - Height: utils.HeightFromBytes(height), - }, - } - return codec.GetCodec().Marshal(blockWithHeight) + return codec.GetCodec().Marshal(placeholderBlocks.getBlock(heightInt)) }).AnyTimes() persistenceMock.EXPECT().GetBlockStore().Return(blockStoreMock).AnyTimes() persistenceReadContextMock.EXPECT().GetMaximumBlockHeight().DoAndReturn(func() (uint64, error) { - currentHeight := bus.GetConsensusModule().CurrentHeight() - if currentHeight == 0 { - return 0, nil + // if it is checked for an unsynched node, return the current height - 1 + if int(bus.GetConsensusModule().CurrentHeight()) <= numberOfPersistedDummyBlocks { + return bus.GetConsensusModule().CurrentHeight() - 1, nil } - return currentHeight - 1, nil + return uint64(numberOfPersistedDummyBlocks), nil }).AnyTimes() persistenceReadContextMock.EXPECT().GetMinimumBlockHeight().DoAndReturn(func() (uint64, error) { @@ -764,7 +818,7 @@ func waitForNodeToCatchUp( targetHeight uint64, ) error { // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event - _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "synced event", 1, 500, false) + _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "didn't receive synced event", 1, 500, false) require.NoError(t, err) // ensure unsynced node caught up to the target height @@ -877,3 +931,75 @@ func startNode(t *testing.T, pocketNode *shared.Node) { err := pocketNode.Start() require.NoError(t, err) } + +func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Block) (*typesCons.QuorumCertificate, error) { + var pss []*typesCons.PartialSignature + + for _, nodePK := range nodePKs { + ps, err := generatePartialSignature(block, nodePK) + if err != nil { + return nil, err + } + pss = append(pss, ps) + } + + thresholdSig := getThresholdSignature(pss) + + return &typesCons.QuorumCertificate{ + Height: block.BlockHeader.Height, + Step: 1, + Round: 1, + Block: block, + ThresholdSignature: thresholdSig, + }, nil +} + +// generate partial signature for the validator +func generatePartialSignature(block *coreTypes.Block, nodePK cryptoPocket.PrivateKey) (*typesCons.PartialSignature, error) { + // privKey, err := node.GetBus().GetConsensusModule().GetPrivateKey() + // if err != nil { + // return nil, err + // } + return &typesCons.PartialSignature{ + Signature: getMessageSignature(block, nodePK), + Address: nodePK.PublicKey().Address().String(), + }, nil +} + +func getThresholdSignature(partialSigs []*typesCons.PartialSignature) *typesCons.ThresholdSignature { + thresholdSig := new(typesCons.ThresholdSignature) + thresholdSig.Signatures = make([]*typesCons.PartialSignature, len(partialSigs)) + copy(thresholdSig.Signatures, partialSigs) + return thresholdSig +} + +// Generates partial signature with given private key +// If there is an error signing the bytes, nil is returned instead. +func getMessageSignature(block *coreTypes.Block, privKey cryptoPocket.PrivateKey) []byte { + bytesToSign, err := getSignableBytes(block) + if err != nil { + logger.Global.Warn().Err(err).Msgf("error getting bytes to sign") + return nil + } + + signature, err := privKey.Sign(bytesToSign) + if err != nil { + logger.Global.Warn().Err(err).Msgf("error signing message") + return nil + } + + return signature +} + +// Signature only over subset of fields in HotstuffMessage +// For reference, see section 4.3 of the the hotstuff whitepaper, partial signatures are +// computed over `tsignr(hm.type, m.viewNumber , m.nodei)`. https://arxiv.org/pdf/1803.05069.pdf +func getSignableBytes(block *coreTypes.Block) ([]byte, error) { + msgToSign := &typesCons.HotstuffMessage{ + Height: block.BlockHeader.Height, + Step: 1, + Round: 1, + Block: block, + } + return codec.GetCodec().Marshal(msgToSign) +} diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 6aa3ea668..60c97c25a 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -84,7 +84,7 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() - m.logger.Debug().Msgf("Setting metadata! is: ", aggregatedMetadata) + //m.logger.Debug().Msgf("Setting metadata! is: ", aggregatedMetadata) m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) m.logger.Debug().Msg("CALLING NOW is in Sync Mode, starting to sync...") diff --git a/consensus/helpers.go b/consensus/helpers.go index f9eb7a3f8..e6950c47e 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -234,7 +234,7 @@ func (m *consensusModule) electNextLeader(msg *typesCons.HotstuffMessage) error return err } - m.logger.Info().Msgf("elected leader id: ", leaderId, ", for msg: ", msg) + //m.logger.Info().Msgf("elected leader id: ", leaderId, ", for msg: ", msg) idToValAddrMap := typesCons.NewActorMapper(validators).GetIdToValAddrMap() leader, ok := idToValAddrMap[leaderId] if !ok { diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 2abf62584..040c55111 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -48,7 +48,6 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return nil case *typesCons.StateSyncMessage_MetadataRes: m.logger.Info().Str("proto_type", "MetadataResponse").Msg("Handling StateSyncMessage MetadataRes") - fmt.Println("MetadataResponse Received MaxHeight: ", stateSyncMessage.GetMetadataRes().MaxHeight) m.metadataReceived <- stateSyncMessage.GetMetadataRes() return nil case *typesCons.StateSyncMessage_GetBlockReq: diff --git a/shared/messaging/events.go b/shared/messaging/events.go index eaade42c9..8f2373c71 100644 --- a/shared/messaging/events.go +++ b/shared/messaging/events.go @@ -2,14 +2,14 @@ package messaging const ( // Node - NodeStartedEventType = "pocket.NodeStartedEvent" - ConsensusNewHeightEventType = "pocket.ConsensusNewHeightEvent" - StateMachineTransitionEventType = "pocket.StateMachineTransitionEvent" + NodeStartedEventType = "pocket.NodeStartedEvent" + ConsensusNewHeightEventType = "pocket.ConsensusNewHeightEvent" + StateMachineTransitionEventType = "pocket.StateMachineTransitionEvent" + StateSyncBlockCommittedEventType = "pocket.StateSyncBlockCommittedEvent" // Consensus - HotstuffMessageContentType = "consensus.HotstuffMessage" - StateSyncMessageContentType = "consensus.StateSyncMessage" - StateSyncBlockCommittedEventType = "consensus.StateSyncBlockCommittedEvent" + HotstuffMessageContentType = "consensus.HotstuffMessage" + StateSyncMessageContentType = "consensus.StateSyncMessage" // Utility TxGossipMessageContentType = "utility.TxGossipMessage" From 74675d1ce2bd1a5445118094e1911e6c630adf97 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Wed, 3 May 2023 22:30:29 +0300 Subject: [PATCH 030/100] clean commented parts --- consensus/e2e_tests/utils_test.go | 15 ++++----------- consensus/module_consensus_state_sync.go | 5 ++++- consensus/state_sync/helpers.go | 16 ++++++++-------- consensus/state_sync/module.go | 7 +++---- consensus/state_sync_handler.go | 5 ----- persistence/block.go | 2 -- state_machine/fsm.go | 1 - 7 files changed, 19 insertions(+), 32 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 9e3d40af4..60e38adc8 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -936,11 +936,8 @@ func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Bloc var pss []*typesCons.PartialSignature for _, nodePK := range nodePKs { - ps, err := generatePartialSignature(block, nodePK) - if err != nil { - return nil, err - } - pss = append(pss, ps) + //ps := + pss = append(pss, generatePartialSignature(block, nodePK)) } thresholdSig := getThresholdSignature(pss) @@ -955,15 +952,11 @@ func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Bloc } // generate partial signature for the validator -func generatePartialSignature(block *coreTypes.Block, nodePK cryptoPocket.PrivateKey) (*typesCons.PartialSignature, error) { - // privKey, err := node.GetBus().GetConsensusModule().GetPrivateKey() - // if err != nil { - // return nil, err - // } +func generatePartialSignature(block *coreTypes.Block, nodePK cryptoPocket.PrivateKey) *typesCons.PartialSignature { return &typesCons.PartialSignature{ Signature: getMessageSignature(block, nodePK), Address: nodePK.PublicKey().Address().String(), - }, nil + } } func getThresholdSignature(partialSigs []*typesCons.PartialSignature) *typesCons.ThresholdSignature { diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 98b28f379..3166b2a2f 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -79,7 +79,10 @@ func (m *consensusModule) metadataSyncLoop() error { select { case <-ticker.C: m.logger.Info().Msg("Background metadata sync check triggered") - m.sendMetadataRequests() + if err := m.sendMetadataRequests(); err != nil { + m.logger.Error().Err(err).Msg("Failed to send metadata requests") + return err + } case <-ctx.Done(): ticker.Stop() diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index 3a7bb1afc..19e7062b8 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -19,12 +19,12 @@ func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cr return nil } -func (m *stateSync) stateSyncLogHelper(receiverPeerAddress string) map[string]any { - consensusMod := m.GetBus().GetConsensusModule() +// func (m *stateSync) stateSyncLogHelper(receiverPeerAddress string) map[string]any { +// consensusMod := m.GetBus().GetConsensusModule() - return map[string]any{ - "height": consensusMod.CurrentHeight(), - "senderPeerAddress": consensusMod.GetNodeAddress(), - "receiverPeerAddress": receiverPeerAddress, - } -} +// return map[string]any{ +// "height": consensusMod.CurrentHeight(), +// "senderPeerAddress": consensusMod.GetNodeAddress(), +// "receiverPeerAddress": receiverPeerAddress, +// } +// } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index fbddaafcd..3c274e963 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -24,7 +24,7 @@ type StateSyncModule interface { StateSyncServerModule SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyncMetadataResponse) - //StartSyncing() + // StartSyncing() HandleStateSyncBlockCommittedEvent(message *anypb.Any) error } @@ -51,9 +51,7 @@ func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { return err } - switch event.MessageName() { - - case messaging.StateSyncBlockCommittedEventType: + if event.MessageName() == messaging.StateSyncBlockCommittedEventType { newCommitBlockEvent, ok := evt.(*messaging.StateSyncBlockCommittedEvent) if !ok { return fmt.Errorf("failed to cast event to StateSyncBlockCommittedEvent") @@ -61,6 +59,7 @@ func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { m.committedBlocksChannel <- newCommitBlockEvent.Height } + return nil } diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 040c55111..ccd9a6c2c 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -10,11 +10,6 @@ import ( ) func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) error { - // m.m.Lock() - // defer m.m.Unlock() - - //m.logger.Info().Msg("Handling StateSyncMessage") - switch stateSyncMessageAny.MessageName() { case messaging.StateSyncMessageContentType: msg, err := codec.GetCodec().FromAny(stateSyncMessageAny) diff --git a/persistence/block.go b/persistence/block.go index c388941c0..6516d2a01 100644 --- a/persistence/block.go +++ b/persistence/block.go @@ -93,8 +93,6 @@ func (p *PostgresContext) prepareBlock(proposerAddr, quorumCert []byte) (*coreTy Transactions: txs, } - //p.logger.Info().Uint64("height", block.BlockHeader.Height).Msg("Storing block in block store.") - return block, nil } diff --git a/state_machine/fsm.go b/state_machine/fsm.go index 92abfc995..508ff9db5 100644 --- a/state_machine/fsm.go +++ b/state_machine/fsm.go @@ -60,7 +60,6 @@ func NewNodeFSM(callbacks *fsm.Callbacks, options ...func(*fsm.FSM)) *fsm.FSM { string(coreTypes.StateMachineState_Consensus_Pacemaker), string(coreTypes.StateMachineState_Consensus_Synced), string(coreTypes.StateMachineState_P2P_Bootstrapped), - //string(coreTypes.StateMachineState_Consensus_SyncMode), }, Dst: string(coreTypes.StateMachineState_Consensus_Unsynced), }, From 8224e9a32853804b50111c0fbe519a7597c53eb4 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Thu, 4 May 2023 20:36:55 +0300 Subject: [PATCH 031/100] address comments --- consensus/e2e_tests/utils_test.go | 3 ++- consensus/helpers.go | 21 +++++++++++++++++++++ consensus/module.go | 27 ++++++++++++++++++++++++++- consensus/state_sync/module.go | 15 +++++++++++++-- persistence/module.go | 21 --------------------- persistence/validator.go | 22 ++++++++++++++++++++++ shared/messaging/events.go | 16 +++++++++------- shared/modules/persistence_module.go | 6 +++--- shared/node.go | 8 ++++---- 9 files changed, 100 insertions(+), 39 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 60e38adc8..0921378ff 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -455,7 +455,6 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, persistenceMock.EXPECT().SetBus(gomock.Any()).Return().AnyTimes() persistenceMock.EXPECT().NewReadContext(gomock.Any()).Return(persistenceReadContextMock, nil).AnyTimes() persistenceMock.EXPECT().ReleaseWriteContext().Return(nil).AnyTimes() - persistenceMock.EXPECT().IsValidator(gomock.Any(), gomock.Any()).Return(true, nil).AnyTimes() blockStoreMock.EXPECT().Get(gomock.Any()).DoAndReturn(func(height []byte) ([]byte, error) { heightInt := utils.HeightFromBytes(height) @@ -486,6 +485,7 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, persistenceReadContextMock.EXPECT().GetAllValidators(gomock.Any()).Return(bus.GetRuntimeMgr().GetGenesis().Validators, nil).AnyTimes() persistenceReadContextMock.EXPECT().GetBlockHash(gomock.Any()).Return("", nil).AnyTimes() persistenceReadContextMock.EXPECT().Release().AnyTimes() + persistenceReadContextMock.EXPECT().IsValidator(gomock.Any(), gomock.Any()).Return(true, nil).AnyTimes() return persistenceMock } @@ -755,6 +755,7 @@ func WaitForNodeToSync( allNodes IdToNodeMapping, targetHeight uint64, ) { + t.Helper() currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() for currentHeight < targetHeight { diff --git a/consensus/helpers.go b/consensus/helpers.go index e6950c47e..421cb52f4 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -295,3 +295,24 @@ func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { return maxHeight, nil } + +// func (m *consensusModule) drainAndCloseChannel(ch chan int, channelName string) { +// m.logger.Log().Msgf("Draining and closing channel ", channelName) +// for { +// select { +// case msg, ok := <-ch: +// if ok { +// //fmt.Println("Logging element before closing channel:", elem) +// m.logger.Info().Msgf("Drained message: ", msg) +// } else { +// close(ch) +// return +// } +// default: + +// close(ch) +// fmt.Println("Closed the channel") +// return +// } +// } +// } diff --git a/consensus/module.go b/consensus/module.go index 2f769fb9f..d2ad5d080 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -201,7 +201,32 @@ func (m *consensusModule) Start() error { } func (m *consensusModule) Stop() error { - return nil + m.logger.Info().Msg("Stopping consensus module") + + m.logger.Log().Msg("Draining and closing metadataReceived and blockResponse channels") + for { + select { + case metaData, ok := <-m.metadataReceived: + if ok { + m.logger.Info().Msgf("Drained metadata message: ", metaData) + } else { + close(m.metadataReceived) + return nil + } + case blockResponse, ok := <-m.blocksResponsesReceived: + if ok { + m.logger.Info().Msgf("Drained blockResponse message: ", blockResponse) + } else { + close(m.blocksResponsesReceived) + return nil + } + default: + close(m.metadataReceived) + close(m.blocksResponsesReceived) + fmt.Println("closed all chanells") + return nil + } + } } func (m *consensusModule) GetModuleName() string { diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 3c274e963..9a417c404 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -156,12 +156,23 @@ func (m *stateSync) Stop() error { // check if the node is a validator currentHeight := m.bus.GetConsensusModule().CurrentHeight() nodeAddress := m.bus.GetConsensusModule().GetNodeAddress() - isValidator, err := m.bus.GetPersistenceModule().IsValidator(int64(currentHeight), nodeAddress) + m.logger.Info().Msg("Syncing is complete!") + + readCtx, err := m.bus.GetPersistenceModule().NewReadContext(int64(currentHeight)) if err != nil { return err } - m.logger.Info().Msg("Syncing is complete!") + defer readCtx.Release() + + fmt.Println("checking if validator...") + + isValidator, err := readCtx.IsValidator(int64(currentHeight), nodeAddress) + if err != nil { + return err + } + + fmt.Println("is validator: ", isValidator) if isValidator { return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) diff --git a/persistence/module.go b/persistence/module.go index 524f25f36..7f8280144 100644 --- a/persistence/module.go +++ b/persistence/module.go @@ -231,27 +231,6 @@ func (m *persistenceModule) NewWriteContext() modules.PersistenceRWContext { return m.writeContext } -func (m *persistenceModule) IsValidator(height int64, address string) (bool, error) { - readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(height) - if err != nil { - return false, err - } - defer readCtx.Release() - - validators, err := readCtx.GetAllValidators(int64(height)) - if err != nil { - return false, err - } - - for _, actor := range validators { - if actor.Address == address { - return true, nil - } - } - - return false, nil -} - func initializeBlockStore(blockStorePath string) (kvstore.KVStore, error) { if blockStorePath == "" { return kvstore.NewMemKVStore(), nil diff --git a/persistence/validator.go b/persistence/validator.go index 3f9672b15..32e7e484d 100644 --- a/persistence/validator.go +++ b/persistence/validator.go @@ -2,6 +2,7 @@ package persistence import ( "encoding/hex" + "fmt" "github.com/pokt-network/pocket/persistence/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" @@ -83,6 +84,27 @@ func (p *PostgresContext) GetValidatorOutputAddress(operator []byte, height int6 return p.GetActorOutputAddress(types.ValidatorActor, operator, height) } +func (m *PostgresContext) IsValidator(height int64, address string) (bool, error) { + validators, err := m.GetAllValidators(int64(height)) + if err != nil { + return false, err + } + + for _, actor := range validators { + if actor.Address == address { + fmt.Println(" returning true") + return true, nil + } + } + + // val, err := m.GetActor(coreTypes.ActorType_ACTOR_TYPE_VAL, []byte(address), height) + // if err != nil { + // return false, err + // } + + return false, nil +} + // TODO: implement missed blocks func (p *PostgresContext) SetValidatorMissedBlocks(address []byte, missedBlocks int) error { return nil diff --git a/shared/messaging/events.go b/shared/messaging/events.go index 8f2373c71..28692f27b 100644 --- a/shared/messaging/events.go +++ b/shared/messaging/events.go @@ -2,14 +2,16 @@ package messaging const ( // Node - NodeStartedEventType = "pocket.NodeStartedEvent" - ConsensusNewHeightEventType = "pocket.ConsensusNewHeightEvent" - StateMachineTransitionEventType = "pocket.StateMachineTransitionEvent" - StateSyncBlockCommittedEventType = "pocket.StateSyncBlockCommittedEvent" + NodeStartedEventType = "pocket.NodeStartedEvent" + ConsensusNewHeightEventType = "pocket.ConsensusNewHeightEvent" + StateMachineTransitionEventType = "pocket.StateMachineTransitionEvent" + + // Consensus - HotPOKT + HotstuffMessageContentType = "consensus.HotstuffMessage" - // Consensus - HotstuffMessageContentType = "consensus.HotstuffMessage" - StateSyncMessageContentType = "consensus.StateSyncMessage" + // Consensus - State Sync + StateSyncBlockCommittedEventType = "pocket.StateSyncBlockCommittedEvent" + StateSyncMessageContentType = "consensus.StateSyncMessage" // Utility TxGossipMessageContentType = "utility.TxGossipMessage" diff --git a/shared/modules/persistence_module.go b/shared/modules/persistence_module.go index 37aee4b81..618e0e6c9 100644 --- a/shared/modules/persistence_module.go +++ b/shared/modules/persistence_module.go @@ -29,9 +29,6 @@ type PersistenceModule interface { // Debugging / development only HandleDebugMessage(*messaging.DebugMessage) error - - // Checks whether given node is validator in the given height - IsValidator(height int64, address string) (bool, error) } // Interface defining the context within which the node can operate with the persistence layer. @@ -197,6 +194,9 @@ type PersistenceReadContext interface { GetValidatorOutputAddress(operator []byte, height int64) (output []byte, err error) GetValidatorMissedBlocks(address []byte, height int64) (int, error) + // Checks whether given node is validator in the given height + IsValidator(height int64, address string) (bool, error) + // Actors Queries GetAllStakedActors(height int64) ([]*coreTypes.Actor, error) diff --git a/shared/node.go b/shared/node.go index ed6c760d2..01179f984 100644 --- a/shared/node.go +++ b/shared/node.go @@ -157,10 +157,10 @@ func (m *Node) GetBus() modules.Bus { // TODO: Move all message types this is dependant on to the `messaging` package func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { contentType := message.GetContentType() - logger.Global.Debug().Fields(map[string]any{ - "message": message, - "contentType": contentType, - }).Msg("node handling event") + // logger.Global.Debug().Fields(map[string]any{ + // "message": message, + // "contentType": contentType, + // }).Msg("node handling event") switch contentType { case messaging.NodeStartedEventType: From da8bdd83cba74cb65d3c64ca0c37a296e40d1b82 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Thu, 4 May 2023 22:43:34 +0300 Subject: [PATCH 032/100] updates --- consensus/e2e_tests/utils_test.go | 67 +++++++++++++----------- consensus/fsm_handler.go | 15 +++--- consensus/module.go | 1 - consensus/module_consensus_state_sync.go | 19 +++---- consensus/state_sync/module.go | 40 ++++++-------- 5 files changed, 68 insertions(+), 74 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 0921378ff..0c0c85e45 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -31,6 +31,7 @@ import ( "github.com/pokt-network/pocket/shared/utils" "github.com/pokt-network/pocket/state_machine" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) @@ -759,19 +760,31 @@ func WaitForNodeToSync( currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() for currentHeight < targetHeight { - // waiting for unsynced node to request missing block - blockRequests, err := waitForNodeToRequestMissingBlock(t, clck, eventsChannel) + // waiting for unsynced node to request the same missing block from all peers. + blockRequests, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators, 250, true) require.NoError(t, err) + require.True(t, checkIdentical(blockRequests), "All block requests sent by node should be identical") - // broadcast one of the requests to all nodes + // broadcast one of the requests to all nodes, as all requests are identical P2PBroadcast(t, allNodes, blockRequests[0]) advanceTime(t, clck, 10*time.Millisecond) // wait to receive replies from all nodes - blockResponses, err := waitForNodesToReplyToBlockRequest(t, clck, eventsChannel) + blockResponses, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block request messages.", numValidators-1, 250, true) require.NoError(t, err) - // send one of the block responses to the unsynced node + // check that all nodes replied with the same block response + for _, msg := range blockResponses { + msgAny, err := codec.GetCodec().FromAny(msg) + require.NoError(t, err) + + stateSyncMessage, ok := msgAny.(*typesCons.StateSyncMessage) + require.True(t, ok) + + require.Equal(t, currentHeight, stateSyncMessage.GetGetBlockRes().Block.BlockHeader.Height) + } + + // since all block responses are identical, send one of the block responses to the unsynced node P2PSend(t, unsyncedNode, blockResponses[0]) advanceTime(t, clck, 10*time.Millisecond) @@ -783,33 +796,6 @@ func WaitForNodeToSync( } } -// waitForNodeToRequestMissingBlock waits for unsynced node to request missing block form the network -func waitForNodeToRequestMissingBlock( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, -) ([]*anypb.Any, error) { - - errMsg := "Error waiting for StateSync Block Request Messages" - msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators, 250, true) - require.NoError(t, err) - - return msgs, err -} - -// waitForNodesToReplyToBlockRequest waits for nodes to send back requested block -func waitForNodesToReplyToBlockRequest( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, -) ([]*anypb.Any, error) { - errMsg := "StateSync Block Response Messages" - msgs, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, errMsg, numValidators-1, 250, true) - require.NoError(t, err) - - return msgs, err -} - // waitForNodeToCatchUp waits for unsynced node to catch up to the target height func waitForNodeToCatchUp( t *testing.T, @@ -997,3 +983,20 @@ func getSignableBytes(block *coreTypes.Block) ([]byte, error) { } return codec.GetCodec().Marshal(msgToSign) } + +func checkIdentical(arr []*anypb.Any) bool { + if len(arr) == 0 { + return true + } + + first := arr[0] + fmt.Println("checking first: ", first) + for _, msg := range arr { + fmt.Println("checking identical: ", msg) + if !proto.Equal(first, msg) { + return false + } + } + + return true +} diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 60c97c25a..43834bcb3 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -33,7 +33,7 @@ func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { } func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Debug().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") + m.logger.Info().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") fsm_state := msg.NewState switch coreTypes.StateMachineState(fsm_state) { @@ -64,7 +64,7 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine // Bootrstapped mode is when the node (validator or non-validator) is first coming online. // This is a transition mode from node bootstrapping to a node being out-of-sync. func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Debug().Msg("Node is in bootstrapped state") + m.logger.Info().Msg("Node is in bootstrapped state") return nil } @@ -73,7 +73,7 @@ func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransiti // This mode is a transition mode from the node being up-to-date (i.e. Pacemaker mode, Synced mode) with the latest network height to being out-of-sync. // As soon as node transitions to this mode, it will transition to the sync mode. func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Debug().Msg("Node is in Unsyched state, as node is out of sync sending syncmode event to start syncing") + m.logger.Info().Msg("Node is in Unsyched state, as node is out of sync sending syncmode event to start syncing") return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncing) } @@ -81,14 +81,11 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv // HandleSyncMode handles FSM event Consensus_IsSyncing, and SyncMode is the destination state. // In Sync mode node (validator or non-validator) starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Debug().Msg("Node is in Sync Mode, starting to sync...") + m.logger.Info().Msg("Node is in Sync Mode, starting to sync...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() - //m.logger.Debug().Msgf("Setting metadata! is: ", aggregatedMetadata) m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) - m.logger.Debug().Msg("CALLING NOW is in Sync Mode, starting to sync...") - //go m.stateSync.StartSyncing() go m.stateSync.Start() @@ -99,14 +96,14 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv // Currently, FSM never transition to this state and a non-validator node always stays in syncmode. // CONSIDER: when a non-validator sync is implemented, maybe there is a case that requires transitioning to this state. func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Debug().Msg("Non-validator node is in Synced mode") + m.logger.Info().Msg("Non-validator node is in Synced mode") return nil } // HandlePacemaker handles FSM event IsSyncedValidator, and Pacemaker is the destination state. // Execution of this state means the validator node is synced. func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Debug().Msg("Validator node is Synced and in Pacemaker mode. It will stay in this mode until it receives a new block proposal that has a higher height than the current block height") + m.logger.Info().Msg("Validator node is Synced and in Pacemaker mode. It will stay in this mode until it receives a new block proposal that has a higher height than the current block height") // validator receives a new block proposal, and it understands that it doesn't have block and it transitions to unsycnhed state // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica diff --git a/consensus/module.go b/consensus/module.go index d2ad5d080..5fd476ff5 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -223,7 +223,6 @@ func (m *consensusModule) Stop() error { default: close(m.metadataReceived) close(m.blocksResponsesReceived) - fmt.Println("closed all chanells") return nil } } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 3166b2a2f..9cec7b77c 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -34,37 +34,39 @@ func (m *consensusModule) GetNodeAddress() string { // blockApplicationLoop commits the blocks received from the blocksResponsesReceived channel // it is intended to be run as a background process func (m *consensusModule) blockApplicationLoop() { + logger := m.logger.With().Str("source", "blockApplicationLoop").Logger() + for blockResponse := range m.blocksResponsesReceived { block := blockResponse.Block - m.logger.Info().Msgf("New block, at height %d is received!", block.BlockHeader.Height) + logger.Info().Msgf("New block, at height %d is received!", block.BlockHeader.Height) maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { - m.logger.Err(err).Msg("couldn't query max persisted height") + logger.Err(err).Msg("couldn't query max persisted height") continue } - // TODO: rather than discarding these blocks, push them into a channel to process them later + // CONSIDERATION: rather than discarding these blocks, push them into a channel to process them later if block.BlockHeader.Height <= maxPersistedHeight { - m.logger.Info().Msgf("Received block at height %d, discarding as it has already been persisted", block.BlockHeader.Height) + logger.Info().Msgf("Received block at height %d, discarding as it has already been persisted", block.BlockHeader.Height) continue } if block.BlockHeader.Height > m.CurrentHeight() { - m.logger.Info().Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) + logger.Info().Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) continue } if err = m.validateBlock(block); err != nil { - m.logger.Err(err).Msg("failed to validate block") + logger.Err(err).Msg("failed to validate block") continue } if err = m.applyAndCommitBlock(block); err != nil { - m.logger.Err(err).Msg("failed to apply and commit block") + logger.Err(err).Msg("failed to apply and commit block") continue } - m.logger.Info().Msgf("Block, at height %d is committed!", block.BlockHeader.Height) + logger.Info().Msgf("Block, at height %d is committed!", block.BlockHeader.Height) m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } } @@ -120,7 +122,6 @@ func (m *consensusModule) sendMetadataRequests() error { return nil } -// TODO! If verify block tries to verify, state sync tests will fail as state sync blocks are empty. func (m *consensusModule) validateBlock(block *coreTypes.Block) error { blockHeader := block.BlockHeader qcBytes := blockHeader.GetQuorumCertificate() diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 9a417c404..e98480056 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -2,6 +2,7 @@ package state_sync import ( "fmt" + "time" typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/logger" @@ -16,7 +17,7 @@ import ( const ( stateSyncModuleName = "stateSyncModule" committedBlocsChannelSize = 100 - //blockWaitingPeriod = 30 * time.Second + blockWaitingPeriod = 30 * time.Second ) type StateSyncModule interface { @@ -97,8 +98,8 @@ func (m *stateSync) SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyn func (m *stateSync) Start() error { consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() - fmt.Println("Consensus current height: ", currentHeight) nodeAddress := consensusMod.GetNodeAddress() + readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(currentHeight)) if err != nil { return err @@ -111,41 +112,40 @@ func (m *stateSync) Start() error { return err } - // TODO: maybe remove this - requestHeight := currentHeight - - // if node is starting to sync from the beginning, set the request height to 1 - // if currentHeight == 0 { - // fmt.Println("setting request height: ", 1) - // requestHeight = 1 - // } - // requests blocks from the current height to the aggregated metadata height - for requestHeight <= m.aggregatedMetaData.MaxHeight { - m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", requestHeight, m.aggregatedMetaData.MaxHeight) + for currentHeight <= m.aggregatedMetaData.MaxHeight { + m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, m.aggregatedMetaData.MaxHeight) // form the get block request message stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ PeerAddress: nodeAddress, - Height: requestHeight, + Height: currentHeight, }, }, } // broadcast the get block request message to all validators + // TODO: use raintree broadcast for _, val := range validators { if err := m.sendStateSyncMessage(stateSyncGetBlockMessage, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { return err } } + // wait to receive requested block for blockWaitingPeriod. If the block is received next block will be requested, otherwise the current block will be requested again + select { + case blockHeight := <-m.committedBlocksChannel: + // requested block is received and committed, continue to request the next block from the current height + m.logger.Info().Msgf("Block %d is committed!", blockHeight) + case <-time.After(blockWaitingPeriod): + } + // wait for the requested block to be received and committed by consensus module - <-m.committedBlocksChannel + //<-m.committedBlocksChannel - // requested block is received and committed, continue to the next block from the current height - requestHeight = consensusMod.CurrentHeight() + currentHeight = consensusMod.CurrentHeight() } // syncing is complete and all requested blocks are committed, stop the state sync module return m.Stop() @@ -157,23 +157,17 @@ func (m *stateSync) Stop() error { currentHeight := m.bus.GetConsensusModule().CurrentHeight() nodeAddress := m.bus.GetConsensusModule().GetNodeAddress() - m.logger.Info().Msg("Syncing is complete!") - readCtx, err := m.bus.GetPersistenceModule().NewReadContext(int64(currentHeight)) if err != nil { return err } defer readCtx.Release() - fmt.Println("checking if validator...") - isValidator, err := readCtx.IsValidator(int64(currentHeight), nodeAddress) if err != nil { return err } - fmt.Println("is validator: ", isValidator) - if isValidator { return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) } From 0671fd1cf5bdfa451340cc498209e1be1cd44272 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 4 May 2023 20:17:06 +0000 Subject: [PATCH 033/100] add generated helm docs --- charts/pocket/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/pocket/README.md b/charts/pocket/README.md index 5261fd67a..0539076c0 100644 --- a/charts/pocket/README.md +++ b/charts/pocket/README.md @@ -43,6 +43,7 @@ privateKeySecretKeyRef: | config.consensus.pacemaker_config.manual | bool | `true` | | | config.consensus.pacemaker_config.timeout_msec | int | `10000` | | | config.consensus.private_key | string | `""` | | +| config.consensus.server_mode_enabled | bool | `true` | | | config.logger.format | string | `"json"` | | | config.logger.level | string | `"debug"` | | | config.p2p.hostname | string | `""` | | From 959f60ab85ee45f8825e68b1623bb48d83c4d873 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Thu, 4 May 2023 23:34:59 +0300 Subject: [PATCH 034/100] address comments, fix errors --- consensus/e2e_tests/utils_test.go | 19 ++++++++----------- consensus/fsm_handler.go | 4 ++-- consensus/helpers.go | 1 - consensus/pacemaker/module.go | 1 + consensus/state_sync/module.go | 2 +- shared/CHANGELOG.md | 2 +- 6 files changed, 13 insertions(+), 16 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 0c0c85e45..93f15cdb6 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -53,6 +53,7 @@ var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes type IdToNodeMapping map[typesCons.NodeId]*shared.Node type IdToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey +/*** Placeholder Block Generation Helpers ***/ type placeholderBlocks struct { pKs IdToPKMapping blocks []*coreTypes.Block @@ -62,7 +63,7 @@ func (p *placeholderBlocks) setPKs(nodeId typesCons.NodeId, pk cryptoPocket.Priv p.pKs[nodeId] = pk } -func (p *placeholderBlocks) prepareBlocks(t *testing.T, bus modules.Bus, nodePKs IdToPKMapping) { +func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePKs IdToPKMapping) { i := uint64(1) for i <= numberOfPersistedDummyBlocks { @@ -82,9 +83,7 @@ func (p *placeholderBlocks) prepareBlocks(t *testing.T, bus modules.Bus, nodePKs Transactions: make([][]byte, 0), } - // TODO_IN_THIS_COMMIT: Need to redo how this is done. - qc, err := generateValidQuorumCertificate(nodePKs, block) - require.NoError(t, err) + qc := generateValidQuorumCertificate(nodePKs, block) qcBytes, err := codec.GetCodec().Marshal(qc) require.NoError(t, err) @@ -152,7 +151,7 @@ func CreateTestConsensusPocketNodes( require.NoError(t, err) blocks.setPKs(typesCons.NodeId(i+1), nodePK) } - blocks.prepareBlocks(t, buses[0], blocks.pKs) + blocks.preparePlaceholderBlocks(t, buses[0], blocks.pKs) return } @@ -161,9 +160,9 @@ func CreateTestConsensusPocketNode( t *testing.T, bus modules.Bus, eventsChannel modules.EventsChannel, - nodesAndBlocks *placeholderBlocks, + placeholderBlocks *placeholderBlocks, ) *shared.Node { - persistenceMock := basePersistenceMock(t, eventsChannel, bus, nodesAndBlocks) + persistenceMock := basePersistenceMock(t, eventsChannel, bus, placeholderBlocks) bus.RegisterModule(persistenceMock) consensusMod, err := consensus.Create(bus) @@ -919,7 +918,7 @@ func startNode(t *testing.T, pocketNode *shared.Node) { require.NoError(t, err) } -func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Block) (*typesCons.QuorumCertificate, error) { +func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { var pss []*typesCons.PartialSignature for _, nodePK := range nodePKs { @@ -935,7 +934,7 @@ func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Bloc Round: 1, Block: block, ThresholdSignature: thresholdSig, - }, nil + } } // generate partial signature for the validator @@ -990,9 +989,7 @@ func checkIdentical(arr []*anypb.Any) bool { } first := arr[0] - fmt.Println("checking first: ", first) for _, msg := range arr { - fmt.Println("checking identical: ", msg) if !proto.Equal(first, msg) { return false } diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index abcd0978f..681295a0d 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -86,7 +86,6 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) go m.stateSync.StartSyncing() - //go m.stateSync.Start() return nil } @@ -106,7 +105,8 @@ func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionE // validator receives a new block proposal, and it understands that it doesn't have block and it transitions to unsycnhed state // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica - // if a validator who just bootstrapped and finished state sync, it will not have a nodeId yet, which is 0. Set correct nodeId here. + // TODO: move this to a more appropriate place + // if a validator is just bootstrapped and finished state sync, it will not have a nodeId yet, which is 0. Set correct nodeId here. if m.nodeId == 0 { // valdiator node receives nodeID after reaching pacemaker. validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) diff --git a/consensus/helpers.go b/consensus/helpers.go index 7e74737a0..3d6ddd743 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -233,7 +233,6 @@ func (m *consensusModule) electNextLeader(msg *typesCons.HotstuffMessage) error if err != nil { return err } - idToValAddrMap := typesCons.NewActorMapper(validators).GetIdToValAddrMap() leader, ok := idToValAddrMap[leaderId] if !ok { diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index ed3aa2074..2219d82aa 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -150,6 +150,7 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e consensusMod.SetRound(msg.Round) // TODO: Add tests for this. When we catch up to a later step, the leader is still the same. + // However, when we catch up to a later round, the leader at the same height will be different. if currentRound != msg.Round || !consensusMod.IsLeaderSet() { anyProto, err := anypb.New(msg) if err != nil { diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 17bab87bb..4e4a2f49e 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -83,7 +83,7 @@ func (m *stateSync) SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyn m.aggregatedMetaData = aggregatedMetaData } -// TODO: Remove this. This function is added to check return value of m.Start(). +// TODO: Remove this. This function added as a hack to be able to check return value of m.Start(). func (m *stateSync) StartSyncing() { err := m.Start() if err != nil { diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index d58dbdd3f..ed6af4d6a 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.58] - 2023-05-03 +## [0.0.0.58] - 2023-05-04 - Exported `IsValidator()` function for Persistence module - Added `pocket.StateSyncBlockCommittedEvent` to the shared messaging events From 992a7d9abc52418c4e148621ff0f70354e8d8a65 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Thu, 4 May 2023 23:49:42 +0300 Subject: [PATCH 035/100] nits --- consensus/e2e_tests/utils_test.go | 2 +- consensus/state_sync/module.go | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 93f15cdb6..638d4cd94 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -54,6 +54,7 @@ type IdToNodeMapping map[typesCons.NodeId]*shared.Node type IdToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey /*** Placeholder Block Generation Helpers ***/ + type placeholderBlocks struct { pKs IdToPKMapping blocks []*coreTypes.Block @@ -922,7 +923,6 @@ func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Bloc var pss []*typesCons.PartialSignature for _, nodePK := range nodePKs { - //ps := pss = append(pss, generatePartialSignature(block, nodePK)) } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 4e4a2f49e..7c9e12cce 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -107,7 +107,6 @@ func (m *stateSync) Start() error { } defer readCtx.Release() - //get the current validators validators, err := readCtx.GetAllValidators(int64(currentHeight)) if err != nil { return err @@ -144,7 +143,6 @@ func (m *stateSync) Start() error { } // wait for the requested block to be received and committed by consensus module - //<-m.committedBlocksChannel currentHeight = consensusMod.CurrentHeight() } @@ -154,7 +152,6 @@ func (m *stateSync) Start() error { // Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event func (m *stateSync) Stop() error { - // check if the node is a validator currentHeight := m.bus.GetConsensusModule().CurrentHeight() nodeAddress := m.bus.GetConsensusModule().GetNodeAddress() From 582d6f0c85cc0c85f423d37066e4c716fae687e4 Mon Sep 17 00:00:00 2001 From: gokutheengineer Date: Fri, 5 May 2023 13:17:30 +0300 Subject: [PATCH 036/100] add helper --- consensus/doc/CHANGELOG.md | 2 +- consensus/e2e_tests/utils_test.go | 220 +++++++++++------------ consensus/module_consensus_state_sync.go | 2 + 3 files changed, 111 insertions(+), 113 deletions(-) diff --git a/consensus/doc/CHANGELOG.md b/consensus/doc/CHANGELOG.md index af4f9116f..cc3298df6 100644 --- a/consensus/doc/CHANGELOG.md +++ b/consensus/doc/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.51] - 2023-05-04 +## [0.0.0.51] - 2023-05-05 - Added state sync channels `blocksReceived` and `metadataReceived`, implemented `blockApplicationLoop()`, state sync functions `Start()` and `Stop()` - Implemented `WaitForNetworkFSMEvents()` function in test utils diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 638d4cd94..9070aaacc 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -53,54 +53,6 @@ var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes type IdToNodeMapping map[typesCons.NodeId]*shared.Node type IdToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey -/*** Placeholder Block Generation Helpers ***/ - -type placeholderBlocks struct { - pKs IdToPKMapping - blocks []*coreTypes.Block -} - -func (p *placeholderBlocks) setPKs(nodeId typesCons.NodeId, pk cryptoPocket.PrivateKey) { - p.pKs[nodeId] = pk -} - -func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePKs IdToPKMapping) { - i := uint64(1) - for i <= numberOfPersistedDummyBlocks { - - leaderId := bus.GetConsensusModule().GetLeaderForView(i, uint64(0), uint8(consensus.NewRound)) - leaderPK := nodePKs[typesCons.NodeId(leaderId)] - - // Construct the block - blockHeader := &coreTypes.BlockHeader{ - Height: i, - StateHash: stateHash, - PrevStateHash: stateHash, - ProposerAddress: leaderPK.Address(), - QuorumCertificate: nil, - } - block := &coreTypes.Block{ - BlockHeader: blockHeader, - Transactions: make([][]byte, 0), - } - - qc := generateValidQuorumCertificate(nodePKs, block) - - qcBytes, err := codec.GetCodec().Marshal(qc) - require.NoError(t, err) - - block.BlockHeader.QuorumCertificate = qcBytes - - p.blocks = append(p.blocks, block) - i++ - } -} - -func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { - // get block at index -1, because block 1 is stored at index 0 of the blocks array - return p.blocks[index-1] -} - /*** Node Generation Helpers ***/ func GenerateNodeRuntimeMgrs(_ *testing.T, validatorCount int, clockMgr clock.Clock) []*runtime.Manager { @@ -804,6 +756,7 @@ func waitForNodeToCatchUp( unsyncedNode *shared.Node, targetHeight uint64, ) error { + t.Helper() // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "didn't receive synced event", 1, 500, false) require.NoError(t, err) @@ -854,6 +807,113 @@ func baseLoggerMock(t *testing.T, _ modules.EventsChannel) *mockModules.MockLogg return loggerMock } +/*** Placeholder Block Generation Helpers ***/ + +type placeholderBlocks struct { + pKs IdToPKMapping + blocks []*coreTypes.Block +} + +func (p *placeholderBlocks) setPKs(nodeId typesCons.NodeId, pk cryptoPocket.PrivateKey) { + p.pKs[nodeId] = pk +} + +func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { + // get block at index -1, because block 1 is stored at index 0 of the blocks array + return p.blocks[index-1] +} + +func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePKs IdToPKMapping) { + i := uint64(1) + for i <= numberOfPersistedDummyBlocks { + + leaderId := bus.GetConsensusModule().GetLeaderForView(i, uint64(0), uint8(consensus.NewRound)) + leaderPK := nodePKs[typesCons.NodeId(leaderId)] + + // Construct the block + blockHeader := &coreTypes.BlockHeader{ + Height: i, + StateHash: stateHash, + PrevStateHash: stateHash, + ProposerAddress: leaderPK.Address(), + QuorumCertificate: nil, + } + block := &coreTypes.Block{ + BlockHeader: blockHeader, + Transactions: make([][]byte, 0), + } + + qc := generateValidQuorumCertificate(nodePKs, block) + + qcBytes, err := codec.GetCodec().Marshal(qc) + require.NoError(t, err) + + block.BlockHeader.QuorumCertificate = qcBytes + + p.blocks = append(p.blocks, block) + i++ + } +} + +/*** Quorum certificate Generation Helpers ***/ + +func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { + var pss []*typesCons.PartialSignature + + for _, nodePK := range nodePKs { + pss = append(pss, generatePartialSignature(block, nodePK)) + } + + // Generate threshold signature + thresholdSig := new(typesCons.ThresholdSignature) + thresholdSig.Signatures = make([]*typesCons.PartialSignature, len(pss)) + copy(thresholdSig.Signatures, pss) + + return &typesCons.QuorumCertificate{ + Height: block.BlockHeader.Height, + Step: 1, + Round: 1, + Block: block, + ThresholdSignature: thresholdSig, + } +} + +// generate partial signature for the validator +func generatePartialSignature(block *coreTypes.Block, nodePK cryptoPocket.PrivateKey) *typesCons.PartialSignature { + return &typesCons.PartialSignature{ + Signature: getMessageSignature(block, nodePK), + Address: nodePK.PublicKey().Address().String(), + } +} + +// Generates partial signature with given private key +// If there is an error signing the bytes, nil is returned instead. +func getMessageSignature(block *coreTypes.Block, privKey cryptoPocket.PrivateKey) []byte { + // Signature only over subset of fields in HotstuffMessage + // For reference, see section 4.3 of the the hotstuff whitepaper, partial signatures are + // computed over `tsignr(hm.type, m.viewNumber , m.nodei)`. https://arxiv.org/pdf/1803.05069.pdf + msgToSign := &typesCons.HotstuffMessage{ + Height: block.BlockHeader.Height, + Step: 1, + Round: 1, + Block: block, + } + + bytesToSign, err := codec.GetCodec().Marshal(msgToSign) + if err != nil { + logger.Global.Warn().Err(err).Msgf("error getting bytes to sign") + return nil + } + + signature, err := privKey.Sign(bytesToSign) + if err != nil { + logger.Global.Warn().Err(err).Msgf("error signing message") + return nil + } + + return signature +} + func logTime(t *testing.T, clck *clock.Mock) { t.Helper() defer func() { @@ -919,70 +979,6 @@ func startNode(t *testing.T, pocketNode *shared.Node) { require.NoError(t, err) } -func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { - var pss []*typesCons.PartialSignature - - for _, nodePK := range nodePKs { - pss = append(pss, generatePartialSignature(block, nodePK)) - } - - thresholdSig := getThresholdSignature(pss) - - return &typesCons.QuorumCertificate{ - Height: block.BlockHeader.Height, - Step: 1, - Round: 1, - Block: block, - ThresholdSignature: thresholdSig, - } -} - -// generate partial signature for the validator -func generatePartialSignature(block *coreTypes.Block, nodePK cryptoPocket.PrivateKey) *typesCons.PartialSignature { - return &typesCons.PartialSignature{ - Signature: getMessageSignature(block, nodePK), - Address: nodePK.PublicKey().Address().String(), - } -} - -func getThresholdSignature(partialSigs []*typesCons.PartialSignature) *typesCons.ThresholdSignature { - thresholdSig := new(typesCons.ThresholdSignature) - thresholdSig.Signatures = make([]*typesCons.PartialSignature, len(partialSigs)) - copy(thresholdSig.Signatures, partialSigs) - return thresholdSig -} - -// Generates partial signature with given private key -// If there is an error signing the bytes, nil is returned instead. -func getMessageSignature(block *coreTypes.Block, privKey cryptoPocket.PrivateKey) []byte { - bytesToSign, err := getSignableBytes(block) - if err != nil { - logger.Global.Warn().Err(err).Msgf("error getting bytes to sign") - return nil - } - - signature, err := privKey.Sign(bytesToSign) - if err != nil { - logger.Global.Warn().Err(err).Msgf("error signing message") - return nil - } - - return signature -} - -// Signature only over subset of fields in HotstuffMessage -// For reference, see section 4.3 of the the hotstuff whitepaper, partial signatures are -// computed over `tsignr(hm.type, m.viewNumber , m.nodei)`. https://arxiv.org/pdf/1803.05069.pdf -func getSignableBytes(block *coreTypes.Block) ([]byte, error) { - msgToSign := &typesCons.HotstuffMessage{ - Height: block.BlockHeader.Height, - Step: 1, - Round: 1, - Block: block, - } - return codec.GetCodec().Marshal(msgToSign) -} - func checkIdentical(arr []*anypb.Any) bool { if len(arr) == 0 { return true diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 9cec7b77c..74b1946f2 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -36,6 +36,8 @@ func (m *consensusModule) GetNodeAddress() string { func (m *consensusModule) blockApplicationLoop() { logger := m.logger.With().Str("source", "blockApplicationLoop").Logger() + //switch + for blockResponse := range m.blocksResponsesReceived { block := blockResponse.Block logger.Info().Msgf("New block, at height %d is received!", block.BlockHeader.Height) From 97fb9ef676ac5fca51db35686797e858060cec3c Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 12:54:35 -0700 Subject: [PATCH 037/100] Lowercase some local testing types for consensus --- consensus/doc/CHANGELOG.md | 6 ++++-- consensus/e2e_tests/utils_test.go | 28 ++++++++++++++-------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/consensus/doc/CHANGELOG.md b/consensus/doc/CHANGELOG.md index cc3298df6..7005aeade 100644 --- a/consensus/doc/CHANGELOG.md +++ b/consensus/doc/CHANGELOG.md @@ -9,7 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.0.0.51] - 2023-05-05 -- Added state sync channels `blocksReceived` and `metadataReceived`, implemented `blockApplicationLoop()`, state sync functions `Start()` and `Stop()` +- Added state sync channels `blocksReceived` and `metadataReceived` +- Implemented `blockApplicationLoop()` +- Implemented state sync module functions `Start()` and `Stop()` - Implemented `WaitForNetworkFSMEvents()` function in test utils ## [0.0.0.50] - 2023-05-03 @@ -26,7 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.0.0.47] - 2023-04-17 -- Log warnings in `handleStateSyncMessage()` +- Log warnings in `handleStateSyncMessage()` ## [0.0.0.46] - 2023-04-13 diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 9070aaacc..34d606af9 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -50,8 +50,8 @@ const ( var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes -type IdToNodeMapping map[typesCons.NodeId]*shared.Node -type IdToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey +type idToNodeMapping map[typesCons.NodeId]*shared.Node +type idToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey /*** Node Generation Helpers ***/ @@ -80,8 +80,8 @@ func CreateTestConsensusPocketNodes( t *testing.T, buses []modules.Bus, eventsChannel modules.EventsChannel, -) (pocketNodes IdToNodeMapping) { - pocketNodes = make(IdToNodeMapping, len(buses)) +) (pocketNodes idToNodeMapping) { + pocketNodes = make(idToNodeMapping, len(buses)) // TODO(design): The order here is important in order for NodeId to be set correctly below. // This logic will need to change once proper leader election is implemented. sort.Slice(buses, func(i, j int) bool { @@ -93,7 +93,7 @@ func CreateTestConsensusPocketNodes( }) blocks := &placeholderBlocks{ - pKs: make(IdToPKMapping, len(buses)), + pKs: make(idToPKMapping, len(buses)), } for i := range buses { @@ -168,7 +168,7 @@ func GenerateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []module } // CLEANUP: Reduce package scope visibility in the consensus test module -func StartAllTestPocketNodes(t *testing.T, pocketNodes IdToNodeMapping) error { +func StartAllTestPocketNodes(t *testing.T, pocketNodes idToNodeMapping) error { for _, pocketNode := range pocketNodes { go startNode(t, pocketNode) startEvent := pocketNode.GetBus().GetBusEvent() @@ -221,7 +221,7 @@ func triggerDebugMessage(t *testing.T, node *shared.Node, action messaging.Debug /*** P2P Helpers ***/ -func P2PBroadcast(_ *testing.T, nodes IdToNodeMapping, any *anypb.Any) { +func P2PBroadcast(_ *testing.T, nodes idToNodeMapping, any *anypb.Any) { e := &messaging.PocketEnvelope{Content: any} for _, node := range nodes { node.GetBus().PublishEventToBus(e) @@ -575,7 +575,7 @@ func WaitForNextBlock( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - pocketNodes IdToNodeMapping, + pocketNodes idToNodeMapping, height uint64, round uint8, maxWaitTime time.Duration, @@ -648,7 +648,7 @@ func waitForProposalMsgs( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, - pocketNodes IdToNodeMapping, + pocketNodes idToNodeMapping, height uint64, step uint8, round uint8, @@ -688,7 +688,7 @@ func waitForProposalMsgs( return proposalMsgs, nil } -func broadcastMessages(t *testing.T, msgs []*anypb.Any, pocketNodes IdToNodeMapping) { +func broadcastMessages(t *testing.T, msgs []*anypb.Any, pocketNodes idToNodeMapping) { for _, message := range msgs { P2PBroadcast(t, pocketNodes, message) } @@ -705,7 +705,7 @@ func WaitForNodeToSync( clck *clock.Mock, eventsChannel modules.EventsChannel, unsyncedNode *shared.Node, - allNodes IdToNodeMapping, + allNodes idToNodeMapping, targetHeight uint64, ) { t.Helper() @@ -810,7 +810,7 @@ func baseLoggerMock(t *testing.T, _ modules.EventsChannel) *mockModules.MockLogg /*** Placeholder Block Generation Helpers ***/ type placeholderBlocks struct { - pKs IdToPKMapping + pKs idToPKMapping blocks []*coreTypes.Block } @@ -823,7 +823,7 @@ func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { return p.blocks[index-1] } -func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePKs IdToPKMapping) { +func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePKs idToPKMapping) { i := uint64(1) for i <= numberOfPersistedDummyBlocks { @@ -857,7 +857,7 @@ func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.B /*** Quorum certificate Generation Helpers ***/ -func generateValidQuorumCertificate(nodePKs IdToPKMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { +func generateValidQuorumCertificate(nodePKs idToPKMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { var pss []*typesCons.PartialSignature for _, nodePK := range nodePKs { From 19a3a2e934d50258ffdb58c3861e67dff231769a Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 12:57:07 -0700 Subject: [PATCH 038/100] Rename PKs to privKeys --- consensus/e2e_tests/utils_test.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 34d606af9..f4155990b 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -51,7 +51,7 @@ const ( var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes type idToNodeMapping map[typesCons.NodeId]*shared.Node -type idToPKMapping map[typesCons.NodeId]cryptoPocket.PrivateKey +type idToPrivKeyMapping map[typesCons.NodeId]cryptoPocket.PrivateKey /*** Node Generation Helpers ***/ @@ -93,7 +93,7 @@ func CreateTestConsensusPocketNodes( }) blocks := &placeholderBlocks{ - pKs: make(idToPKMapping, len(buses)), + privKeys: make(idToPrivKeyMapping, len(buses)), } for i := range buses { @@ -102,9 +102,9 @@ func CreateTestConsensusPocketNodes( pocketNodes[typesCons.NodeId(i+1)] = pocketNode nodePK, err := cryptoPocket.NewPrivateKey(pocketNode.GetBus().GetRuntimeMgr().GetConfig().PrivateKey) require.NoError(t, err) - blocks.setPKs(typesCons.NodeId(i+1), nodePK) + blocks.setPrivKeys(typesCons.NodeId(i+1), nodePK) } - blocks.preparePlaceholderBlocks(t, buses[0], blocks.pKs) + blocks.preparePlaceholderBlocks(t, buses[0], blocks.privKeys) return } @@ -810,12 +810,12 @@ func baseLoggerMock(t *testing.T, _ modules.EventsChannel) *mockModules.MockLogg /*** Placeholder Block Generation Helpers ***/ type placeholderBlocks struct { - pKs idToPKMapping - blocks []*coreTypes.Block + privKeys idToPrivKeyMapping + blocks []*coreTypes.Block } -func (p *placeholderBlocks) setPKs(nodeId typesCons.NodeId, pk cryptoPocket.PrivateKey) { - p.pKs[nodeId] = pk +func (p *placeholderBlocks) setPrivKeys(nodeId typesCons.NodeId, privKey cryptoPocket.PrivateKey) { + p.privKeys[nodeId] = privKey } func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { @@ -823,19 +823,19 @@ func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { return p.blocks[index-1] } -func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePKs idToPKMapping) { +func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePrivKeys idToPrivKeyMapping) { i := uint64(1) for i <= numberOfPersistedDummyBlocks { leaderId := bus.GetConsensusModule().GetLeaderForView(i, uint64(0), uint8(consensus.NewRound)) - leaderPK := nodePKs[typesCons.NodeId(leaderId)] + leaderPivKey := nodePrivKeys[typesCons.NodeId(leaderId)] // Construct the block blockHeader := &coreTypes.BlockHeader{ Height: i, StateHash: stateHash, PrevStateHash: stateHash, - ProposerAddress: leaderPK.Address(), + ProposerAddress: leaderPivKey.Address(), QuorumCertificate: nil, } block := &coreTypes.Block{ @@ -843,7 +843,7 @@ func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.B Transactions: make([][]byte, 0), } - qc := generateValidQuorumCertificate(nodePKs, block) + qc := generateValidQuorumCertificate(nodePrivKeys, block) qcBytes, err := codec.GetCodec().Marshal(qc) require.NoError(t, err) @@ -857,7 +857,7 @@ func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.B /*** Quorum certificate Generation Helpers ***/ -func generateValidQuorumCertificate(nodePKs idToPKMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { +func generateValidQuorumCertificate(nodePKs idToPrivKeyMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { var pss []*typesCons.PartialSignature for _, nodePK := range nodePKs { From 81af631edbc2e772f7a2937c18dd021e03837883 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 13:15:27 -0700 Subject: [PATCH 039/100] Remove privKeys from placeholderBlocks --- consensus/e2e_tests/hotstuff_test.go | 2 +- consensus/e2e_tests/pacemaker_test.go | 4 +-- consensus/e2e_tests/state_sync_test.go | 8 +++--- consensus/e2e_tests/utils_test.go | 35 ++++++++++++-------------- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/consensus/e2e_tests/hotstuff_test.go b/consensus/e2e_tests/hotstuff_test.go index f2f314853..773b9028a 100644 --- a/consensus/e2e_tests/hotstuff_test.go +++ b/consensus/e2e_tests/hotstuff_test.go @@ -24,7 +24,7 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) diff --git a/consensus/e2e_tests/pacemaker_test.go b/consensus/e2e_tests/pacemaker_test.go index 6255b1fb0..997f3ae4c 100644 --- a/consensus/e2e_tests/pacemaker_test.go +++ b/consensus/e2e_tests/pacemaker_test.go @@ -32,7 +32,7 @@ func TestPacemakerTimeoutIncreasesRound(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) @@ -82,7 +82,7 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 0066432d7..871dcb1c7 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -24,7 +24,7 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) @@ -82,7 +82,7 @@ func TestStateSync_ServerGetBlock_Success(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) @@ -138,7 +138,7 @@ func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) @@ -180,7 +180,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := CreateTestConsensusPocketNodes(t, buses, eventsChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := StartAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index f4155990b..2e4b0be15 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -76,13 +76,13 @@ func GenerateNodeRuntimeMgrs(_ *testing.T, validatorCount int, clockMgr clock.Cl return runtimeMgrs } -func CreateTestConsensusPocketNodes( +func createTestConsensusPocketNodes( t *testing.T, buses []modules.Bus, eventsChannel modules.EventsChannel, ) (pocketNodes idToNodeMapping) { pocketNodes = make(idToNodeMapping, len(buses)) - // TODO(design): The order here is important in order for NodeId to be set correctly below. + // TECHDEBT: The order here is important in order for NodeIds to be set correctly below. // This logic will need to change once proper leader election is implemented. sort.Slice(buses, func(i, j int) bool { pk, err := cryptoPocket.NewPrivateKey(buses[i].GetRuntimeMgr().GetConfig().PrivateKey) @@ -92,24 +92,26 @@ func CreateTestConsensusPocketNodes( return pk.Address().String() < pk2.Address().String() }) - blocks := &placeholderBlocks{ - privKeys: make(idToPrivKeyMapping, len(buses)), - } + blocks := &placeholderBlocks{} + + privKeys := make(idToPrivKeyMapping, len(buses)) + for i, bus := range buses { + nodeId := typesCons.NodeId(i + 1) + + pocketNode := createTestConsensusPocketNode(t, bus, eventsChannel, blocks) + pocketNodes[nodeId] = pocketNode - for i := range buses { - pocketNode := CreateTestConsensusPocketNode(t, buses[i], eventsChannel, blocks) - // TODO(olshansky): Figure this part out. - pocketNodes[typesCons.NodeId(i+1)] = pocketNode nodePK, err := cryptoPocket.NewPrivateKey(pocketNode.GetBus().GetRuntimeMgr().GetConfig().PrivateKey) require.NoError(t, err) - blocks.setPrivKeys(typesCons.NodeId(i+1), nodePK) + + privKeys[nodeId] = nodePK } - blocks.preparePlaceholderBlocks(t, buses[0], blocks.privKeys) + blocks.preparePlaceholderBlocks(t, buses[0], privKeys) return } // Creates a pocket node where all the primary modules, exception for consensus, are mocked -func CreateTestConsensusPocketNode( +func createTestConsensusPocketNode( t *testing.T, bus modules.Bus, eventsChannel modules.EventsChannel, @@ -810,16 +812,11 @@ func baseLoggerMock(t *testing.T, _ modules.EventsChannel) *mockModules.MockLogg /*** Placeholder Block Generation Helpers ***/ type placeholderBlocks struct { - privKeys idToPrivKeyMapping - blocks []*coreTypes.Block -} - -func (p *placeholderBlocks) setPrivKeys(nodeId typesCons.NodeId, privKey cryptoPocket.PrivateKey) { - p.privKeys[nodeId] = privKey + blocks []*coreTypes.Block } func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { - // get block at index -1, because block 1 is stored at index 0 of the blocks array + // get block at index-1, because block 1 is stored at index 0 of the blocks array return p.blocks[index-1] } From f74fb4ca61aab23794795dc7d6401746b2fd26a6 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 14:22:42 -0700 Subject: [PATCH 040/100] Cleaning up some test code --- consensus/e2e_tests/state_sync_test.go | 2 +- consensus/e2e_tests/utils_test.go | 109 +++++++++++-------------- consensus/types/proto/hotstuff.proto | 1 + shared/core/types/proto/block.proto | 16 ++-- 4 files changed, 61 insertions(+), 67 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 871dcb1c7..8fe40000d 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -266,7 +266,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) require.NoError(t, err) - WaitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, 3) + waitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, 3) require.NoError(t, err) } diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 2e4b0be15..1db16b611 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -13,7 +13,6 @@ import ( "github.com/golang/mock/gomock" "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" - "github.com/pokt-network/pocket/logger" mocksPer "github.com/pokt-network/pocket/persistence/types/mocks" "github.com/pokt-network/pocket/runtime" "github.com/pokt-network/pocket/runtime/configs" @@ -43,9 +42,9 @@ func TestMain(m *testing.M) { // TODO(integration): These are temporary variables used in the prototype integration phase that // will need to be parameterized later once the test framework design matures. const ( - numValidators = 4 - stateHash = "42" - numberOfPersistedDummyBlocks = 200 + numValidators = 4 + dummyStateHash = "42" + numMockedBlocks = 200 ) var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes @@ -94,19 +93,19 @@ func createTestConsensusPocketNodes( blocks := &placeholderBlocks{} - privKeys := make(idToPrivKeyMapping, len(buses)) + validatorPrivKeys := make(idToPrivKeyMapping, len(buses)) for i, bus := range buses { nodeId := typesCons.NodeId(i + 1) pocketNode := createTestConsensusPocketNode(t, bus, eventsChannel, blocks) pocketNodes[nodeId] = pocketNode - nodePK, err := cryptoPocket.NewPrivateKey(pocketNode.GetBus().GetRuntimeMgr().GetConfig().PrivateKey) + validatorPrivKey, err := cryptoPocket.NewPrivateKey(pocketNode.GetBus().GetRuntimeMgr().GetConfig().PrivateKey) require.NoError(t, err) - privKeys[nodeId] = nodePK + validatorPrivKeys[nodeId] = validatorPrivKey } - blocks.preparePlaceholderBlocks(t, buses[0], privKeys) + blocks.preparePlaceholderBlocks(t, buses[0], validatorPrivKeys, numMockedBlocks) return } @@ -422,11 +421,12 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, persistenceMock.EXPECT().GetBlockStore().Return(blockStoreMock).AnyTimes() persistenceReadContextMock.EXPECT().GetMaximumBlockHeight().DoAndReturn(func() (uint64, error) { - // if it is checked for an unsynched node, return the current height - 1 - if int(bus.GetConsensusModule().CurrentHeight()) <= numberOfPersistedDummyBlocks { + // Check that we are retrieving a block at a height that was mocked by our test suite + if int(bus.GetConsensusModule().CurrentHeight()) <= len(placeholderBlocks.blocks) { return bus.GetConsensusModule().CurrentHeight() - 1, nil } - return uint64(numberOfPersistedDummyBlocks), nil + t.Error("Trying to retrieve a block at a height that was not mocked.") + return 0, nil }).AnyTimes() persistenceReadContextMock.EXPECT().GetMinimumBlockHeight().DoAndReturn(func() (uint64, error) { @@ -507,7 +507,7 @@ func baseLeaderUtilityUnitOfWorkMock(t *testing.T, genesisState *genesis.Genesis utilityLeaderUnitOfWorkMock.EXPECT(). CreateProposalBlock(gomock.Any(), maxTxBytes). - Return(stateHash, make([][]byte, 0), nil). + Return(dummyStateHash, make([][]byte, 0), nil). AnyTimes() utilityLeaderUnitOfWorkMock.EXPECT(). ApplyBlock(). @@ -515,7 +515,7 @@ func baseLeaderUtilityUnitOfWorkMock(t *testing.T, genesisState *genesis.Genesis AnyTimes() utilityLeaderUnitOfWorkMock.EXPECT(). GetStateHash(). - Return(stateHash). + Return(dummyStateHash). AnyTimes() utilityLeaderUnitOfWorkMock.EXPECT().SetProposalBlock(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() utilityLeaderUnitOfWorkMock.EXPECT().Commit(gomock.Any()).Return(nil).AnyTimes() @@ -539,7 +539,7 @@ func baseReplicaUtilityUnitOfWorkMock(t *testing.T, genesisState *genesis.Genesi AnyTimes() utilityReplicaUnitOfWorkMock.EXPECT(). GetStateHash(). - Return(stateHash). + Return(dummyStateHash). AnyTimes() utilityReplicaUnitOfWorkMock.EXPECT().SetProposalBlock(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() utilityReplicaUnitOfWorkMock.EXPECT().Commit(gomock.Any()).Return(nil).AnyTimes() @@ -696,13 +696,13 @@ func broadcastMessages(t *testing.T, msgs []*anypb.Any, pocketNodes idToNodeMapp } } -// WaitForNodeToSync waits for a node to sync to a target height -// For every missing block for the unsynced node: +// waitForNodeToSync waits for a node to sync to a target height. // -// first, waits for the unsynced node to request a missing block via `waitForNodeToRequestMissingBlock()` function, -// then, waits for other nodes to send the requested block via `waitForNodesToReplyToBlockRequest()` function, -// finally, wait for the node to catch up to the target height via `waitForNodeToCatchUp()` function. -func WaitForNodeToSync( +// For every block the unsynched node is missing: +// 1. Wait for the unsynched node to request a missing block via `waitForNodeToRequestMissingBlock()` +// 2. Wait for other nodes to send the requested block via `waitForNodesToReplyToBlockRequest()` +// 3. Wait for the node to catch up to the target height via `waitForNodeToCatchUp()` +func waitForNodeToSync( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, @@ -711,8 +711,8 @@ func WaitForNodeToSync( targetHeight uint64, ) { t.Helper() - currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() + currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() for currentHeight < targetHeight { // waiting for unsynced node to request the same missing block from all peers. blockRequests, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators, 250, true) @@ -743,8 +743,7 @@ func WaitForNodeToSync( advanceTime(t, clck, 10*time.Millisecond) // waiting for node to reach to the next height (currentHeight + 1) - err = waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, currentHeight+1) - require.NoError(t, err) + waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, currentHeight+1) currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } @@ -757,23 +756,23 @@ func waitForNodeToCatchUp( eventsChannel modules.EventsChannel, unsyncedNode *shared.Node, targetHeight uint64, -) error { +) { t.Helper() + // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "didn't receive synced event", 1, 500, false) require.NoError(t, err) // ensure unsynced node caught up to the target height nodeState := GetConsensusNodeState(unsyncedNode) - assertHeight(t, typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()), targetHeight, nodeState.Height) - - return err + nodeId := typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()) + assertHeight(t, nodeId, targetHeight, nodeState.Height) } func generatePlaceholderBlock(height uint64, leaderAddrr crypto.Address) *coreTypes.Block { blockHeader := &coreTypes.BlockHeader{ Height: height, - StateHash: stateHash, + StateHash: dummyStateHash, PrevStateHash: "", ProposerAddress: leaderAddrr, QuorumCertificate: nil, @@ -816,49 +815,46 @@ type placeholderBlocks struct { } func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { - // get block at index-1, because block 1 is stored at index 0 of the blocks array + // returning block at index-1, because block 1 is stored at index 0 of the blocks array return p.blocks[index-1] } -func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, nodePrivKeys idToPrivKeyMapping) { - i := uint64(1) - for i <= numberOfPersistedDummyBlocks { - +func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, validatorPrivKeys idToPrivKeyMapping, numMockedBlocks uint64) { + t.Helper() + for i := uint64(1); i <= numMockedBlocks; i++ { leaderId := bus.GetConsensusModule().GetLeaderForView(i, uint64(0), uint8(consensus.NewRound)) - leaderPivKey := nodePrivKeys[typesCons.NodeId(leaderId)] + leaderPivKey := validatorPrivKeys[typesCons.NodeId(leaderId)] // Construct the block blockHeader := &coreTypes.BlockHeader{ Height: i, - StateHash: stateHash, - PrevStateHash: stateHash, + StateHash: dummyStateHash, + PrevStateHash: dummyStateHash, ProposerAddress: leaderPivKey.Address(), - QuorumCertificate: nil, + QuorumCertificate: nil, // inserted below } block := &coreTypes.Block{ BlockHeader: blockHeader, - Transactions: make([][]byte, 0), + Transactions: make([][]byte, 0), // we don't care about the transactions in this context } - qc := generateValidQuorumCertificate(nodePrivKeys, block) - + qc := generateQuorumCertificate(t, validatorPrivKeys, block) qcBytes, err := codec.GetCodec().Marshal(qc) require.NoError(t, err) block.BlockHeader.QuorumCertificate = qcBytes p.blocks = append(p.blocks, block) - i++ } } /*** Quorum certificate Generation Helpers ***/ -func generateValidQuorumCertificate(nodePKs idToPrivKeyMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { +func generateQuorumCertificate(t *testing.T, validatorPrivKeys idToPrivKeyMapping, block *coreTypes.Block) *typesCons.QuorumCertificate { + // Aggregate partial signatures var pss []*typesCons.PartialSignature - - for _, nodePK := range nodePKs { - pss = append(pss, generatePartialSignature(block, nodePK)) + for _, validatorPrivKey := range validatorPrivKeys { + pss = append(pss, generatePartialSignature(t, block, validatorPrivKey)) } // Generate threshold signature @@ -868,24 +864,23 @@ func generateValidQuorumCertificate(nodePKs idToPrivKeyMapping, block *coreTypes return &typesCons.QuorumCertificate{ Height: block.BlockHeader.Height, - Step: 1, - Round: 1, + Round: 1, // assume everything succeeds on the first round for now + Step: consensus.NewRound, // TODO_IN_THIS_COMMIT: Figure out if this shold be Prepare/NewRound or something else Block: block, ThresholdSignature: thresholdSig, } } // generate partial signature for the validator -func generatePartialSignature(block *coreTypes.Block, nodePK cryptoPocket.PrivateKey) *typesCons.PartialSignature { +func generatePartialSignature(t *testing.T, block *coreTypes.Block, validatorPrivKey cryptoPocket.PrivateKey) *typesCons.PartialSignature { return &typesCons.PartialSignature{ - Signature: getMessageSignature(block, nodePK), - Address: nodePK.PublicKey().Address().String(), + Signature: getMessageSignature(t, block, validatorPrivKey), + Address: validatorPrivKey.PublicKey().Address().String(), } } // Generates partial signature with given private key -// If there is an error signing the bytes, nil is returned instead. -func getMessageSignature(block *coreTypes.Block, privKey cryptoPocket.PrivateKey) []byte { +func getMessageSignature(t *testing.T, block *coreTypes.Block, privKey cryptoPocket.PrivateKey) []byte { // Signature only over subset of fields in HotstuffMessage // For reference, see section 4.3 of the the hotstuff whitepaper, partial signatures are // computed over `tsignr(hm.type, m.viewNumber , m.nodei)`. https://arxiv.org/pdf/1803.05069.pdf @@ -897,16 +892,10 @@ func getMessageSignature(block *coreTypes.Block, privKey cryptoPocket.PrivateKey } bytesToSign, err := codec.GetCodec().Marshal(msgToSign) - if err != nil { - logger.Global.Warn().Err(err).Msgf("error getting bytes to sign") - return nil - } + require.NoError(t, err) signature, err := privKey.Sign(bytesToSign) - if err != nil { - logger.Global.Warn().Err(err).Msgf("error signing message") - return nil - } + require.NoError(t, err) return signature } diff --git a/consensus/types/proto/hotstuff.proto b/consensus/types/proto/hotstuff.proto index 8cc9a8334..8bee5d73a 100644 --- a/consensus/types/proto/hotstuff.proto +++ b/consensus/types/proto/hotstuff.proto @@ -40,6 +40,7 @@ message QuorumCertificate { uint64 height = 1; uint64 round = 2; HotstuffStep step = 3; + // TECHDEBT: Note that there is a circular dependency between the `Block` and `QuorumCertificate` types which we have to think about and resolve. core.Block block = 4; ThresholdSignature threshold_signature = 5; } diff --git a/shared/core/types/proto/block.proto b/shared/core/types/proto/block.proto index b519ffcbe..83b0f8a16 100644 --- a/shared/core/types/proto/block.proto +++ b/shared/core/types/proto/block.proto @@ -7,13 +7,17 @@ option go_package = "github.com/pokt-network/pocket/shared/core/types"; import "google/protobuf/timestamp.proto"; message BlockHeader { - uint64 height = 1; - string networkId = 2; // used to differentiate what network the chain is on (Tendermint legacy) - string stateHash = 3; // the state committment at this blocks height - string prevStateHash = 4; // the state committment at this block height-1 - bytes proposerAddress = 5; // the address of the proposer of this block; TECHDEBT: Change this to an string + uint64 height = 1; // the block height + // TECHDEBT: This is Tendermint legacy and we have to decide if we want to keep it + string networkId = 2; // used to differentiate what network the chain is on such as MainNet/TestNet + string stateHash = 3; // the state committment (i.e. root hash) at this block height + string prevStateHash = 4; // the state committment (i.e. root hash) for the previous block (height-1) + // TECHDEBT: Change the proposer address to a string + bytes proposerAddress = 5; // the address of the proposer/leader of this block + // CONSIDERATION: Should we use `QuorumCertificate` directly here? bytes quorumCertificate = 6; // the quorum certificate containing signature from 2/3+ validators at this height - google.protobuf.Timestamp timestamp = 7; // CONSIDERATION: Is this needed? + // CONSIDERATION: Decide if this is needed + google.protobuf.Timestamp timestamp = 7; } message Block { From 361937782f5388b6c03578063c103b40b9cfb281 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 14:44:58 -0700 Subject: [PATCH 041/100] Finished reviewing consensus/e2e_tests/utils_test.go --- consensus/e2e_tests/utils_test.go | 50 ++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 1db16b611..01cbabe2d 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -34,13 +34,14 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) +// CLEANUP: Some functions in the test suite are exposed even though they do not need to be. + func TestMain(m *testing.M) { exitCode := m.Run() os.Exit(exitCode) } -// TODO(integration): These are temporary variables used in the prototype integration phase that -// will need to be parameterized later once the test framework design matures. +// TECHDEBT: Constants in the `e2e_tests` test suite that should be parameterized const ( numValidators = 4 dummyStateHash = "42" @@ -91,7 +92,7 @@ func createTestConsensusPocketNodes( return pk.Address().String() < pk2.Address().String() }) - blocks := &placeholderBlocks{} + blocks := &testingBlocks{} validatorPrivKeys := make(idToPrivKeyMapping, len(buses)) for i, bus := range buses { @@ -114,7 +115,7 @@ func createTestConsensusPocketNode( t *testing.T, bus modules.Bus, eventsChannel modules.EventsChannel, - placeholderBlocks *placeholderBlocks, + placeholderBlocks *testingBlocks, ) *shared.Node { persistenceMock := basePersistenceMock(t, eventsChannel, bus, placeholderBlocks) bus.RegisterModule(persistenceMock) @@ -398,7 +399,7 @@ loop: /*** Module Mocking Helpers ***/ // Creates a persistence module mock with mock implementations of some basic functionality -func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, placeholderBlocks *placeholderBlocks) *mockModules.MockPersistenceModule { +func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, testBlocks *testingBlocks) *mockModules.MockPersistenceModule { ctrl := gomock.NewController(t) persistenceMock := mockModules.NewMockPersistenceModule(ctrl) persistenceReadContextMock := mockModules.NewMockPersistenceReadContext(ctrl) @@ -415,14 +416,14 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, if bus.GetConsensusModule().CurrentHeight() < heightInt { return nil, fmt.Errorf("requested height is higher than current height of the node's consensus module") } - return codec.GetCodec().Marshal(placeholderBlocks.getBlock(heightInt)) + return codec.GetCodec().Marshal(testBlocks.getBlock(heightInt)) }).AnyTimes() persistenceMock.EXPECT().GetBlockStore().Return(blockStoreMock).AnyTimes() persistenceReadContextMock.EXPECT().GetMaximumBlockHeight().DoAndReturn(func() (uint64, error) { // Check that we are retrieving a block at a height that was mocked by our test suite - if int(bus.GetConsensusModule().CurrentHeight()) <= len(placeholderBlocks.blocks) { + if int(bus.GetConsensusModule().CurrentHeight()) <= len(testBlocks.blocks) { return bus.GetConsensusModule().CurrentHeight() - 1, nil } t.Error("Trying to retrieve a block at a height that was not mocked.") @@ -717,17 +718,21 @@ func waitForNodeToSync( // waiting for unsynced node to request the same missing block from all peers. blockRequests, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators, 250, true) require.NoError(t, err) - require.True(t, checkIdentical(blockRequests), "All block requests sent by node should be identical") - // broadcast one of the requests to all nodes, as all requests are identical - P2PBroadcast(t, allNodes, blockRequests[0]) + // verify that all requests are identical and take the first one + require.True(t, checkIdentical(blockRequests), "All block requests sent by node must be identical") + blockRequest := blockRequests[0] + + // broadcast one of the requests to all nodes + P2PBroadcast(t, allNodes, blockRequest) advanceTime(t, clck, 10*time.Millisecond) // wait to receive replies from all nodes - blockResponses, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block request messages.", numValidators-1, 250, true) + blockResponses, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators-1, 250, true) require.NoError(t, err) - // check that all nodes replied with the same block response + // verify that all nodes replied with the same block response + var blockResponse *typesCons.GetBlockResponse for _, msg := range blockResponses { msgAny, err := codec.GetCodec().FromAny(msg) require.NoError(t, err) @@ -735,7 +740,16 @@ func waitForNodeToSync( stateSyncMessage, ok := msgAny.(*typesCons.StateSyncMessage) require.True(t, ok) - require.Equal(t, currentHeight, stateSyncMessage.GetGetBlockRes().Block.BlockHeader.Height) + // verify that all nodes replied with the same block response + if blockResponse == nil { + // On the first block received, we just verify the height is correct + blockResponse = stateSyncMessage.GetGetBlockRes() + require.Equal(t, currentHeight, blockResponse.Block.BlockHeader.Height) + } else { + // On subsequent blocks, we verify all the blocks are identical + require.Equal(t, blockResponse.Block, stateSyncMessage.GetGetBlockRes().Block) + + } } // since all block responses are identical, send one of the block responses to the unsynced node @@ -744,7 +758,6 @@ func waitForNodeToSync( // waiting for node to reach to the next height (currentHeight + 1) waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, currentHeight+1) - currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } } @@ -810,16 +823,16 @@ func baseLoggerMock(t *testing.T, _ modules.EventsChannel) *mockModules.MockLogg /*** Placeholder Block Generation Helpers ***/ -type placeholderBlocks struct { +type testingBlocks struct { blocks []*coreTypes.Block } -func (p *placeholderBlocks) getBlock(index uint64) *coreTypes.Block { +func (p *testingBlocks) getBlock(index uint64) *coreTypes.Block { // returning block at index-1, because block 1 is stored at index 0 of the blocks array return p.blocks[index-1] } -func (p *placeholderBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, validatorPrivKeys idToPrivKeyMapping, numMockedBlocks uint64) { +func (p *testingBlocks) preparePlaceholderBlocks(t *testing.T, bus modules.Bus, validatorPrivKeys idToPrivKeyMapping, numMockedBlocks uint64) { t.Helper() for i := uint64(1); i <= numMockedBlocks; i++ { leaderId := bus.GetConsensusModule().GetLeaderForView(i, uint64(0), uint8(consensus.NewRound)) @@ -965,6 +978,8 @@ func startNode(t *testing.T, pocketNode *shared.Node) { require.NoError(t, err) } +// checkIdentical verifies that all items in the array are equal. +// Returns true if all items are equal or array is empty, false otherwise. func checkIdentical(arr []*anypb.Any) bool { if len(arr) == 0 { return true @@ -976,6 +991,5 @@ func checkIdentical(arr []*anypb.Any) bool { return false } } - return true } From 8150b76de791fe8dc2c06ff982c252bcb1df59cb Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 15:29:41 -0700 Subject: [PATCH 042/100] Simplified state sync module interface --- consensus/events.go | 5 ++++- consensus/fsm_handler.go | 25 ++++++++++++------------ consensus/module_consensus_state_sync.go | 12 ++++++------ shared/messaging/proto/events.proto | 3 +++ shared/modules/consensus_module.go | 10 ++++++---- shared/node.go | 3 ++- 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/consensus/events.go b/consensus/events.go index 5fcfc322d..3292b5456 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -15,7 +15,10 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { // publishStateSyncBlockCommittedEvent publishes a new state sync block committed event, so that state sync module can react to it func (m *consensusModule) publishStateSyncBlockCommittedEvent(height uint64) { - stateSyncBlockCommittedEvent, err := messaging.PackMessage(&messaging.StateSyncBlockCommittedEvent{Height: height}) + blockCommittedEvent := &messaging.StateSyncBlockCommittedEvent{ + Height: height, + } + stateSyncBlockCommittedEvent, err := messaging.PackMessage(blockCommittedEvent) if err != nil { m.logger.Fatal().Err(err).Msg("Failed to pack state sync committed block event") } diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 681295a0d..e401a6870 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -10,7 +10,7 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -// HandleEvent handles FSM state transition events. +// Implements the `HandleEvent` function in the `ConsensusModule` interface func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { m.m.Lock() defer m.m.Unlock() @@ -33,9 +33,8 @@ func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") - fsm_state := msg.NewState - switch coreTypes.StateMachineState(fsm_state) { + switch coreTypes.StateMachineState(msg.NewState) { case coreTypes.StateMachineState_P2P_Bootstrapped: return m.HandleBootstrapped(msg) @@ -59,28 +58,28 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine return nil } -// HandleBootstrapped handles FSM event P2P_IsBootstrapped, and P2P_Bootstrapped is the destination state. -// Bootrstapped mode is when the node (validator or non-validator) is first coming online. +// HandleBootstrapped handles the FSM event P2P_IsBootstrapped, and when P2P_Bootstrapped is the destination state. +// Bootstrapped mode is when the node (validator or non) is first coming online. // This is a transition mode from node bootstrapping to a node being out-of-sync. func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Node is in bootstrapped state") + m.logger.Info().Msg("Node is in the bootstrapped state. Consensus module NOOP.") return nil } -// HandleUnsynced handles FSM event Consensus_IsUnsynced, and Unsynced is the destination state. -// In Unsynced mode node (validator or non-validator) is out of sync with the rest of the network. +// HandleUnsynced handles the FSM event Consensus_IsUnsynced, and when Unsynced is the destination state. +// In Unsynced mode, the node (validator or not) is out of sync with the rest of the network. // This mode is a transition mode from the node being up-to-date (i.e. Pacemaker mode, Synced mode) with the latest network height to being out-of-sync. -// As soon as node transitions to this mode, it will transition to the sync mode. +// As soon as a node transitions to this mode, it will transition to the synching mode. func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Node is in Unsyched state, as node is out of sync sending syncmode event to start syncing") + m.logger.Info().Msg("Node is in an Unsynced state. Consensus module is sending an even to transition to SYNCHING mode.") return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncing) } -// HandleSyncMode handles FSM event Consensus_IsSyncing, and SyncMode is the destination state. -// In Sync mode node (validator or non-validator) starts syncing with the rest of the network. +// HandleSyncMode handles the FSM event Consensus_IsSyncing, and when SyncMode is the destination state. +// In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Node is in Sync Mode, starting to sync...") + m.logger.Info().Msg("Node is in Sync Mode. Consensus Module is about to start synching...") aggregatedMetadata := m.getAggregatedStateSyncMetadata() m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 74b1946f2..d1f84721f 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -16,7 +16,11 @@ const metadataSyncPeriod = 45 * time.Second // TODO: Make this configurable var _ modules.ConsensusStateSync = &consensusModule{} -func (m *consensusModule) GetNodeIdFromNodeAddress(peerId string) (uint64, error) { +func (m *consensusModule) GetNodeAddress() string { + return m.nodeAddress +} + +func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { // REFACTOR(#434): As per issue #434, once the new id is sorted out, this return statement must be changed @@ -27,10 +31,6 @@ func (m *consensusModule) GetNodeIdFromNodeAddress(peerId string) (uint64, error return uint64(valAddrToIdMap[peerId]), nil } -func (m *consensusModule) GetNodeAddress() string { - return m.nodeAddress -} - // blockApplicationLoop commits the blocks received from the blocksResponsesReceived channel // it is intended to be run as a background process func (m *consensusModule) blockApplicationLoop() { @@ -148,7 +148,7 @@ func (m *consensusModule) validateBlock(block *coreTypes.Block) error { return err } - leaderIdInt, err := m.GetNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) + leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) if err != nil { m.logger.Error().Err(err).Msg("Could not get leader id from leader address") return err diff --git a/shared/messaging/proto/events.proto b/shared/messaging/proto/events.proto index 6aa3d5b10..d509ba5b5 100644 --- a/shared/messaging/proto/events.proto +++ b/shared/messaging/proto/events.proto @@ -6,16 +6,19 @@ option go_package = "github.com/pokt-network/pocket/shared/messaging"; message NodeStartedEvent {} +// Notifies the node that the consensus module has started a new height message ConsensusNewHeightEvent { uint64 height = 1; } +// Notifies the node that the state of the node has transitioned through an event trigger message StateMachineTransitionEvent { string event = 1; string previous_state = 2; string new_state = 3; } +// Notifies the node that the consensus module has committed a block (either through consensus or state sync) message StateSyncBlockCommittedEvent { uint64 height = 1; } \ No newline at end of file diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index 591e006dd..9715b694b 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -25,12 +25,14 @@ type ConsensusModule interface { ConsensusPacemaker ConsensusDebugModule - // Consensus Engine Handlers // TODO: Rename `HandleMessage` to a more specific name that is consistent with its business logic. + // Consensus message handlers HandleMessage(*anypb.Any) error + // State Sync message handlers HandleStateSyncMessage(*anypb.Any) error - // FSM transition event handler + + // Internal event handler such as FSM transition events HandleEvent(transitionMessageAny *anypb.Any) error // Consensus State Accessors @@ -71,13 +73,13 @@ type ConsensusPacemaker interface { // These functions are intended to only be called by the StateSync module. // INVESTIGATE: This interface enable a fast implementation of state sync but look into a way of removing it in the future type ConsensusStateSync interface { - GetNodeIdFromNodeAddress(string) (uint64, error) GetNodeAddress() string } // ConsensusDebugModule exposes functionality used for testing & development purposes. // Not to be used in production. -// TODO: Add a flag so this is not compiled in the prod binary. +// TODO: Move this to a separate file and add a flag so this is not compiled in production +// for safety purposes. type ConsensusDebugModule interface { HandleDebugMessage(*messaging.DebugMessage) error diff --git a/shared/node.go b/shared/node.go index ed6c760d2..98b8ca16e 100644 --- a/shared/node.go +++ b/shared/node.go @@ -170,7 +170,8 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { } case messaging.HotstuffMessageContentType: return node.GetBus().GetConsensusModule().HandleMessage(message.Content) - case messaging.StateSyncMessageContentType, messaging.StateSyncBlockCommittedEventType: + case messaging.StateSyncMessageContentType, + messaging.StateSyncBlockCommittedEventType: return node.GetBus().GetConsensusModule().HandleStateSyncMessage(message.Content) case messaging.TxGossipMessageContentType: return node.GetBus().GetUtilityModule().HandleUtilityMessage(message.Content) From 373dc485c822c2aecc59b9d9573a70c1b3a7db08 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 15:50:58 -0700 Subject: [PATCH 043/100] All tests still passing --- consensus/module.go | 4 ++++ consensus/module_consensus_state_sync.go | 8 +------- shared/modules/consensus_module.go | 15 ++++++--------- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/consensus/module.go b/consensus/module.go index d799ee16d..5630b185a 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -305,6 +305,10 @@ func (m *consensusModule) CurrentStep() uint64 { return uint64(m.step) } +func (m *consensusModule) GetNodeAddress() string { + return m.nodeAddress +} + // TODO: Populate the entire state from the persistence module: validator set, quorum cert, last block hash, etc... func (m *consensusModule) loadPersistedState() error { readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(-1) // Unknown height diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index d1f84721f..8c4ee1d98 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -7,19 +7,12 @@ import ( typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" cryptoPocket "github.com/pokt-network/pocket/shared/crypto" - "github.com/pokt-network/pocket/shared/modules" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) const metadataSyncPeriod = 45 * time.Second // TODO: Make this configurable -var _ modules.ConsensusStateSync = &consensusModule{} - -func (m *consensusModule) GetNodeAddress() string { - return m.nodeAddress -} - func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { @@ -178,6 +171,7 @@ func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { minHeight, maxHeight := uint64(1), uint64(1) + chanLen := len(m.metadataReceived) for i := 0; i < chanLen; i++ { diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index 9715b694b..b1c9aa79e 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -1,6 +1,6 @@ package modules -//go:generate mockgen -destination=./mocks/consensus_module_mock.go github.com/pokt-network/pocket/shared/modules ConsensusModule,ConsensusPacemaker,ConsensusStateSync,ConsensusDebugModule +//go:generate mockgen -destination=./mocks/consensus_module_mock.go github.com/pokt-network/pocket/shared/modules ConsensusModule,ConsensusPacemaker,ConsensusDebugModule import ( "github.com/pokt-network/pocket/shared/core/types" @@ -21,7 +21,6 @@ type ConsensusModule interface { Module KeyholderModule - ConsensusStateSync ConsensusPacemaker ConsensusDebugModule @@ -36,9 +35,14 @@ type ConsensusModule interface { HandleEvent(transitionMessageAny *anypb.Any) error // Consensus State Accessors + // CLEANUP: Add `Get` prefixes to these functions CurrentHeight() uint64 CurrentRound() uint64 CurrentStep() uint64 + + // Returns The cryptographic address associated with the node's private key. + // TECHDEBT: Consider removing this function altogether when we consolidate node identities + GetNodeAddress() string } // ConsensusPacemaker represents functions exposed by the Consensus module for Pacemaker specific business logic. @@ -69,13 +73,6 @@ type ConsensusPacemaker interface { GetNodeId() uint64 } -// ConsensusStateSync exposes functionality of the Consensus module for StateSync specific business logic. -// These functions are intended to only be called by the StateSync module. -// INVESTIGATE: This interface enable a fast implementation of state sync but look into a way of removing it in the future -type ConsensusStateSync interface { - GetNodeAddress() string -} - // ConsensusDebugModule exposes functionality used for testing & development purposes. // Not to be used in production. // TODO: Move this to a separate file and add a flag so this is not compiled in production From a0ddcfc222f60cc5bffb95ce31f0a83379425cda Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 17:09:50 -0700 Subject: [PATCH 044/100] review consensus/fsm_handler.go --- consensus/fsm_handler.go | 24 ++++++++---------------- consensus/module.go | 21 ++++++++++++--------- consensus/module_consensus_state_sync.go | 8 ++++---- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index e401a6870..58e4df486 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -89,31 +89,23 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv return nil } -// HandleSynced handles FSM event IsSyncedNonValidator for Non-Validators, and Synced is the destination state. -// Currently, FSM never transition to this state and a non-validator node always stays in syncmode. +// HandleSynced handles the FSM event IsSyncedNonValidator for Non-Validators, and Synced is the destination state. +// Currently, FSM never transition to this state and a non-validator node always stays in SyncMode. // CONSIDER: when a non-validator sync is implemented, maybe there is a case that requires transitioning to this state. func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Non-validator node is in Synced mode") + m.logger.Info().Msg("Non-validator node is in Synced mode. Consensus module NOOP.") return nil } -// HandlePacemaker handles FSM event IsSyncedValidator, and Pacemaker is the destination state. -// Execution of this state means the validator node is synced. +// HandlePacemaker handles the FSM event IsSyncedValidator, and Pacemaker is the destination state. +// Execution of this state means the validator node is synced and it will stay in this mode until +// it receives a new block proposal that has a higher height than the current consensus height. func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Validator node is Synced and in Pacemaker mode. It will stay in this mode until it receives a new block proposal that has a higher height than the current block height") - // validator receives a new block proposal, and it understands that it doesn't have block and it transitions to unsycnhed state - // transitioning out of this state happens when a new block proposal is received by the hotstuff_replica + m.logger.Info().Msg("Validator node is Synced and in Pacemaker mode. Validator can now participate in voting on consensus.") - // TODO: move this to a more appropriate place // if a validator is just bootstrapped and finished state sync, it will not have a nodeId yet, which is 0. Set correct nodeId here. if m.nodeId == 0 { - // valdiator node receives nodeID after reaching pacemaker. - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) - if err != nil { - return err - } - valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() - m.nodeId = valAddrToIdMap[m.nodeAddress] + return m.updateNodeId() } return nil diff --git a/consensus/module.go b/consensus/module.go index 5630b185a..00bcafe9b 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -147,22 +147,15 @@ func (*consensusModule) Create(bus modules.Bus, options ...modules.ModuleOption) if err != nil { return nil, err } - address := privateKey.Address().String() - - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) - if err != nil { + m.nodeAddress = privateKey.Address().String() + if m.updateNodeId() != nil { return nil, err } - valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() - m.privateKey = privateKey.(cryptoPocket.Ed25519PrivateKey) m.consCfg = consensusCfg m.genesisState = genesisState - m.nodeId = valAddrToIdMap[address] - m.nodeAddress = address - m.metadataReceived = make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize) m.blocksResponsesReceived = make(chan *typesCons.GetBlockResponse, blocksChannelSize) @@ -309,6 +302,16 @@ func (m *consensusModule) GetNodeAddress() string { return m.nodeAddress } +func (m *consensusModule) updateNodeId() error { + validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) + if err != nil { + return err + } + valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() + m.nodeId = valAddrToIdMap[m.nodeAddress] + return nil +} + // TODO: Populate the entire state from the persistence module: validator set, quorum cert, last block hash, etc... func (m *consensusModule) loadPersistedState() error { readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(-1) // Unknown height diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 8c4ee1d98..e28805997 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -29,8 +29,6 @@ func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error func (m *consensusModule) blockApplicationLoop() { logger := m.logger.With().Str("source", "blockApplicationLoop").Logger() - //switch - for blockResponse := range m.blocksResponsesReceived { block := blockResponse.Block logger.Info().Msgf("New block, at height %d is received!", block.BlockHeader.Height) @@ -170,10 +168,12 @@ func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { } func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { - minHeight, maxHeight := uint64(1), uint64(1) - + // TECHDEBT(#686): This should be an ongoing background passive state sync process but just + // capturing the available messages at the time that this function was called is good enough for now. chanLen := len(m.metadataReceived) + m.logger.Info().Msgf("Looping over %d state sync metadata responses", chanLen) + minHeight, maxHeight := uint64(1), uint64(1) for i := 0; i < chanLen; i++ { metadata := <-m.metadataReceived if metadata.MaxHeight > maxHeight { From f01d704f71ce17cd6ead5dd044d858efe5b36807 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 24 May 2023 17:18:43 -0700 Subject: [PATCH 045/100] Remove isValidator --- consensus/e2e_tests/utils_test.go | 2 +- consensus/state_sync/module.go | 7 ++++++- persistence/validator.go | 15 --------------- shared/CHANGELOG.md | 1 - shared/modules/persistence_module.go | 3 --- 5 files changed, 7 insertions(+), 21 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 01cbabe2d..cd80e4c6a 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -441,7 +441,7 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, persistenceReadContextMock.EXPECT().GetAllValidators(gomock.Any()).Return(bus.GetRuntimeMgr().GetGenesis().Validators, nil).AnyTimes() persistenceReadContextMock.EXPECT().GetBlockHash(gomock.Any()).Return("", nil).AnyTimes() persistenceReadContextMock.EXPECT().Release().AnyTimes() - persistenceReadContextMock.EXPECT().IsValidator(gomock.Any(), gomock.Any()).Return(true, nil).AnyTimes() + persistenceReadContextMock.EXPECT().GetValidatorExists(gomock.Any(), gomock.Any()).Return(true, nil).AnyTimes() return persistenceMock } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 7c9e12cce..4ef815a95 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -1,6 +1,7 @@ package state_sync import ( + "encoding/hex" "fmt" "time" @@ -161,7 +162,11 @@ func (m *stateSync) Stop() error { } defer readCtx.Release() - isValidator, err := readCtx.IsValidator(int64(currentHeight), nodeAddress) + nodeAddressBz, err := hex.DecodeString(nodeAddress) + if err != nil { + return err + } + isValidator, err := readCtx.GetValidatorExists(nodeAddressBz, int64(currentHeight)) if err != nil { return err } diff --git a/persistence/validator.go b/persistence/validator.go index db9a0b497..edc9ca7a8 100644 --- a/persistence/validator.go +++ b/persistence/validator.go @@ -74,21 +74,6 @@ func (p *PostgresContext) GetValidatorOutputAddress(operator []byte, height int6 return p.GetActorOutputAddress(types.ValidatorActor, operator, height) } -func (m *PostgresContext) IsValidator(height int64, address string) (bool, error) { - validators, err := m.GetAllValidators(int64(height)) - if err != nil { - return false, err - } - - for _, actor := range validators { - if actor.Address == address { - return true, nil - } - } - - return false, nil -} - // TODO: implement missed blocks func (p *PostgresContext) SetValidatorMissedBlocks(address []byte, missedBlocks int) error { return nil diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index ed6af4d6a..2b5da10d6 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -9,7 +9,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.0.0.58] - 2023-05-04 -- Exported `IsValidator()` function for Persistence module - Added `pocket.StateSyncBlockCommittedEvent` to the shared messaging events ## [0.0.0.57] - 2023-05-04 diff --git a/shared/modules/persistence_module.go b/shared/modules/persistence_module.go index 90c90babc..49be8e62e 100644 --- a/shared/modules/persistence_module.go +++ b/shared/modules/persistence_module.go @@ -206,9 +206,6 @@ type PersistenceReadContext interface { GetValidatorOutputAddress(operator []byte, height int64) (output []byte, err error) GetValidatorMissedBlocks(address []byte, height int64) (int, error) - // Checks whether given node is validator in the given height - IsValidator(height int64, address string) (bool, error) - // Actors Queries GetAllStakedActors(height int64) ([]*coreTypes.Actor, error) From 84e2bc3b646171331d9d6fa4949a87792439f816 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 16:53:22 -0700 Subject: [PATCH 046/100] Tests pass after merging to main --- consensus/block.go | 6 ++++-- consensus/e2e_tests/utils_test.go | 14 +++++--------- consensus/helpers.go | 1 + consensus/hotstuff_replica.go | 5 ++--- consensus/module_consensus_state_sync.go | 11 +++++++---- consensus/state_sync/server.go | 2 +- 6 files changed, 20 insertions(+), 19 deletions(-) diff --git a/consensus/block.go b/consensus/block.go index cf0038fad..40961311a 100644 --- a/consensus/block.go +++ b/consensus/block.go @@ -73,12 +73,14 @@ func (m *consensusModule) isValidMessageBlock(msg *typesCons.HotstuffMessage) (b return true, nil } -// Creates a new Utility Unit Of Work and clears/nullifies any previous UOW if they exist +// Creates a new Utility Unit Of Work and clears/nullifies any previous UOW if one exists func (m *consensusModule) refreshUtilityUnitOfWork() error { // Catch-all structure to release the previous utility UOW if it wasn't properly cleaned up. utilityUnitOfWork := m.utilityUnitOfWork + + // TECHDEBT: This should, theoretically, never happen. Need to identify all + // code paths where it does and fix it. if utilityUnitOfWork != nil { - // TODO: This should, ideally, never be called m.logger.Warn().Bool("TODO", true).Msg(typesCons.NilUtilityUOWWarning) if err := utilityUnitOfWork.Release(); err != nil { m.logger.Warn().Err(err).Msg("failed to release utility unit of work") diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 0f7d075a0..d9c4bc093 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -27,6 +27,7 @@ import ( "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" mockModules "github.com/pokt-network/pocket/shared/modules/mocks" + "github.com/pokt-network/pocket/shared/utils" "github.com/pokt-network/pocket/state_machine" "github.com/stretchr/testify/require" "google.golang.org/protobuf/proto" @@ -409,7 +410,7 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, persistenceMock.EXPECT().NewReadContext(gomock.Any()).Return(persistenceReadContextMock, nil).AnyTimes() persistenceMock.EXPECT().ReleaseWriteContext().Return(nil).AnyTimes() - blockStoreMock := persistenceMocks.NewMockKVStore(ctrl) + blockStoreMock := persistenceMocks.NewMockBlockStore(ctrl) blockStoreMock.EXPECT().Get(gomock.Any()).DoAndReturn(func(height []byte) ([]byte, error) { heightInt := utils.HeightFromBytes(height) if bus.GetConsensusModule().CurrentHeight() < heightInt { @@ -425,12 +426,7 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, if bus.GetConsensusModule().CurrentHeight() < height { return nil, fmt.Errorf("requested height is higher than current height of the node's consensus module") } - blockWithHeight := &coreTypes.Block{ - BlockHeader: &coreTypes.BlockHeader{ - Height: height, - }, - } - return blockWithHeight, nil + return testBlocks.getBlock(height), nil }). AnyTimes() @@ -756,7 +752,7 @@ func waitForNodeToSync( currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() for currentHeight < targetHeight { // waiting for unsynced node to request the same missing block from all peers. - blockRequests, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators, 250, true) + blockRequests, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators, 500, true) require.NoError(t, err) // verify that all requests are identical and take the first one @@ -768,7 +764,7 @@ func waitForNodeToSync( advanceTime(t, clck, 10*time.Millisecond) // wait to receive replies from all nodes - blockResponses, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators-1, 250, true) + blockResponses, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators-1, 500, true) require.NoError(t, err) // verify that all nodes replied with the same block response diff --git a/consensus/helpers.go b/consensus/helpers.go index 3d6ddd743..282e87bfd 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -280,6 +280,7 @@ func hotstuffMsgToLoggingFields(msg *typesCons.HotstuffMessage) map[string]any { } func (m *consensusModule) maxPersistedBlockHeight() (uint64, error) { + // TECHDEBT: We don't need to pass the height here to retrieve the maximum block height. readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(m.CurrentHeight())) if err != nil { return 0, err diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index e8a1c608e..e51c2ee03 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -59,15 +59,14 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrepareMessage(m *consensusM return } - // if replica received a proposal in statesync before receiving the NEWROUND proposals, + // TODO_IN_THIS_COMMIT: Figure out how to remove this. + // if the replica received a proposal in statesync before receiving the NEWROUND proposals, // in which case utilityUnitOfWork will be nil, and we refresh utility context if m.utilityUnitOfWork == nil { - // Clear the previous utility unitOfWork, if it exists, and create a new one if err := m.refreshUtilityUnitOfWork(); err != nil { m.logger.Error().Err(err).Msg("Could not refresh utility unitOfWork") return } - } block := msg.GetBlock() diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index e28805997..aadcb89a9 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -2,6 +2,7 @@ package consensus import ( "context" + "fmt" "time" typesCons "github.com/pokt-network/pocket/consensus/types" @@ -11,20 +12,22 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -const metadataSyncPeriod = 45 * time.Second // TODO: Make this configurable +// TODO: Make this configurable in StateSyncConfig +const metadataSyncPeriod = 45 * time.Second +// REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { - // REFACTOR(#434): As per issue #434, once the new id is sorted out, this return statement must be changed - return 0, err + m.logger.Warn().Err(err).Msgf("Could not get validators at height %d when checking if peer %s is a validator", m.CurrentHeight(), peerId) + return 0, fmt.Errorf("Could determine if peer %s is a validator or not: %w", peerId, err) } valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() return uint64(valAddrToIdMap[peerId]), nil } -// blockApplicationLoop commits the blocks received from the blocksResponsesReceived channel +// blockApplicationLoop commits the blocks received from the `blocksResponsesReceived“ channel // it is intended to be run as a background process func (m *consensusModule) blockApplicationLoop() { logger := m.logger.With().Str("source", "blockApplicationLoop").Logger() diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index 21a4986aa..e9ee96690 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -77,7 +77,7 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { block, err := blockStore.GetBlock(blockReq.Height) if err != nil { m.logger.Error().Err(err).Msgf("failed to get block at height %d", blockReq.Height) - return err + return } stateSyncMessage := typesCons.StateSyncMessage{ From a0ec8d697dd5b2d1ceb61c36a86434eae1404644 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 17:14:32 -0700 Subject: [PATCH 047/100] Rename background processes --- consensus/module_consensus_state_sync.go | 39 ++++++++++++++---------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index aadcb89a9..dc3f3dda6 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -27,14 +27,15 @@ func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error return uint64(valAddrToIdMap[peerId]), nil } -// blockApplicationLoop commits the blocks received from the `blocksResponsesReceived“ channel -// it is intended to be run as a background process +// blockApplicationLoop commits the blocks received from the `blocksResponsesReceived“ channel. +// It is intended to be run as a background process via `go blockApplicationLoop()` func (m *consensusModule) blockApplicationLoop() { logger := m.logger.With().Str("source", "blockApplicationLoop").Logger() + // Blocks until m.blocksResponsesReceived is closed for blockResponse := range m.blocksResponsesReceived { block := blockResponse.Block - logger.Info().Msgf("New block, at height %d is received!", block.BlockHeader.Height) + logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { @@ -42,14 +43,15 @@ func (m *consensusModule) blockApplicationLoop() { continue } - // CONSIDERATION: rather than discarding these blocks, push them into a channel to process them later if block.BlockHeader.Height <= maxPersistedHeight { - logger.Info().Msgf("Received block at height %d, discarding as it has already been persisted", block.BlockHeader.Height) + logger.Debug().Msgf("Discarding block height %d, since node is ahead at height %d", block.BlockHeader.Height, maxPersistedHeight) continue } if block.BlockHeader.Height > m.CurrentHeight() { - logger.Info().Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) + logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) + // TECHDEBT: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop + // m.blocksResponsesReceived <- blockResponse continue } @@ -62,23 +64,25 @@ func (m *consensusModule) blockApplicationLoop() { logger.Err(err).Msg("failed to apply and commit block") continue } - logger.Info().Msgf("Block, at height %d is committed!", block.BlockHeader.Height) + + logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } } -// metadataSyncLoop periodically sends metadata requests to its peers -// it is intended to be run as a background process +// metadataSyncLoop periodically sends metadata requests to its peers to aggregate metadata related to synching the state. +// It is intended to be run as a background process via `go metadataSyncLoop` func (m *consensusModule) metadataSyncLoop() error { + logger := m.logger.With().Str("source", "metadataSyncLoop").Logger() ctx := context.TODO() ticker := time.NewTicker(metadataSyncPeriod) for { select { case <-ticker.C: - m.logger.Info().Msg("Background metadata sync check triggered") - if err := m.sendMetadataRequests(); err != nil { - m.logger.Error().Err(err).Msg("Failed to send metadata requests") + logger.Info().Msg("Background metadata sync check triggered") + if err := m.broadcastMetadataRequests(); err != nil { + logger.Error().Err(err).Msg("Failed to send metadata requests") return err } @@ -89,8 +93,10 @@ func (m *consensusModule) metadataSyncLoop() error { } } -func (m *consensusModule) sendMetadataRequests() error { - stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ +// broadcastMetadataRequests sends a metadata request to all peers in the network to understand +// the state of the network and determine if the node is behind. +func (m *consensusModule) broadcastMetadataRequests() error { + stateSyncMetadataReqMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_MetadataReq{ MetadataReq: &typesCons.StateSyncMetadataRequest{ PeerAddress: m.GetBus().GetConsensusModule().GetNodeAddress(), @@ -98,17 +104,18 @@ func (m *consensusModule) sendMetadataRequests() error { }, } + // TECHDEBT: This should be sent to all peers (full nodes, servicesr, etc...), not just validators validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) } for _, val := range validators { - - anyMsg, err := anypb.New(stateSyncMetaDataReqMessage) + anyMsg, err := anypb.New(stateSyncMetadataReqMessage) if err != nil { return err } + // TECHDEBT: Revisit why we're not using `Broadcast` here instead of `Send`. if err := m.GetBus().GetP2PModule().Send(cryptoPocket.AddressFromString(val.GetAddress()), anyMsg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) return err From 85f2156e459fe973faa527733498f2fb35ced847 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 19:32:11 -0700 Subject: [PATCH 048/100] Reviewed the block application loop --- consensus/block.go | 9 ++-- consensus/hotstuff_leader.go | 16 +++--- consensus/hotstuff_replica.go | 16 +++--- consensus/module_consensus_state_sync.go | 62 +++++++++++------------- consensus/types/types.go | 5 ++ 5 files changed, 56 insertions(+), 52 deletions(-) diff --git a/consensus/block.go b/consensus/block.go index 40961311a..7c9e64fa8 100644 --- a/consensus/block.go +++ b/consensus/block.go @@ -35,8 +35,11 @@ func (m *consensusModule) commitBlock(block *coreTypes.Block) error { } // ADDTEST: Add unit tests specific to block validation -// IMPROVE: Rename to provide clarity of operation. ValidateBasic() is typically a stateless check not stateful -func (m *consensusModule) isValidMessageBlock(msg *typesCons.HotstuffMessage) (bool, error) { +// isBlockMessageInMessageValid does basic validation of the block in the hotstuff message for the step provided, such as: +// - validating if the block could/should be nil +// - the state hash of the block +// - the size of the block +func (m *consensusModule) isBlockMessageInMessageValid(msg *typesCons.HotstuffMessage) (bool, error) { block := msg.GetBlock() step := msg.GetStep() @@ -73,7 +76,7 @@ func (m *consensusModule) isValidMessageBlock(msg *typesCons.HotstuffMessage) (b return true, nil } -// Creates a new Utility Unit Of Work and clears/nullifies any previous UOW if one exists +// refreshUtilityUnitOfWork is a helper that creates a new Utility Unit Of Work and clears/nullifies a previous one if it exists func (m *consensusModule) refreshUtilityUnitOfWork() error { // Catch-all structure to release the previous utility UOW if it wasn't properly cleaned up. utilityUnitOfWork := m.utilityUnitOfWork diff --git a/consensus/hotstuff_leader.go b/consensus/hotstuff_leader.go index 1a3520870..1dcccc0f2 100644 --- a/consensus/hotstuff_leader.go +++ b/consensus/hotstuff_leader.go @@ -30,7 +30,7 @@ func (handler *HotstuffLeaderMessageHandler) HandleNewRoundMessage(m *consensusM defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -108,7 +108,7 @@ func (handler *HotstuffLeaderMessageHandler) HandlePrepareMessage(m *consensusMo defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -159,7 +159,7 @@ func (handler *HotstuffLeaderMessageHandler) HandlePrecommitMessage(m *consensus defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -210,7 +210,7 @@ func (handler *HotstuffLeaderMessageHandler) HandleCommitMessage(m *consensusMod defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -273,17 +273,17 @@ func (handler *HotstuffLeaderMessageHandler) HandleDecideMessage(m *consensusMod defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } } -// anteHandle is the general handler called for every before every specific HotstuffLeaderMessageHandler handler -func (handler *HotstuffLeaderMessageHandler) anteHandle(m *consensusModule, msg *typesCons.HotstuffMessage) error { +// isMessageValidBasic is the general handler called for every before every specific HotstuffLeaderMessageHandler handler +func (handler *HotstuffLeaderMessageHandler) isMessageValidBasic(m *consensusModule, msg *typesCons.HotstuffMessage) error { // Basic block metadata validation - if valid, err := m.isValidMessageBlock(msg); !valid { + if valid, err := m.isBlockMessageInMessageValid(msg); !valid { return err } diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index e51c2ee03..0521f5e21 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -28,7 +28,7 @@ func (handler *HotstuffReplicaMessageHandler) HandleNewRoundMessage(m *consensus defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -48,7 +48,7 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrepareMessage(m *consensusM defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -92,7 +92,7 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrecommitMessage(m *consensu defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -130,7 +130,7 @@ func (handler *HotstuffReplicaMessageHandler) HandleCommitMessage(m *consensusMo defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -168,7 +168,7 @@ func (handler *HotstuffReplicaMessageHandler) HandleDecideMessage(m *consensusMo defer m.paceMaker.RestartTimer() handler.emitTelemetryEvent(m, msg) - if err := handler.anteHandle(m, msg); err != nil { + if err := handler.isMessageValidBasic(m, msg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrHotstuffValidation.Error()) return } @@ -206,10 +206,10 @@ func (handler *HotstuffReplicaMessageHandler) HandleDecideMessage(m *consensusMo m.paceMaker.NewHeight() } -// anteHandle is the handler called on every replica message before specific handler -func (handler *HotstuffReplicaMessageHandler) anteHandle(m *consensusModule, msg *typesCons.HotstuffMessage) error { +// isMessageValidBasic is the handler called on every replica message before specific handler +func (handler *HotstuffReplicaMessageHandler) isMessageValidBasic(m *consensusModule, msg *typesCons.HotstuffMessage) error { // Basic block metadata validation - if valid, err := m.isValidMessageBlock(msg); !valid { + if valid, err := m.isBlockMessageInMessageValid(msg); !valid { return err } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index dc3f3dda6..782f62319 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -37,17 +37,20 @@ func (m *consensusModule) blockApplicationLoop() { block := blockResponse.Block logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) + // Check what the current latest committed block height is maxPersistedHeight, err := m.maxPersistedBlockHeight() if err != nil { logger.Err(err).Msg("couldn't query max persisted height") continue } + // Check if the block being synched is behind the current height if block.BlockHeader.Height <= maxPersistedHeight { logger.Debug().Msgf("Discarding block height %d, since node is ahead at height %d", block.BlockHeader.Height, maxPersistedHeight) continue } + // Check if the block being synched is ahead of the current height if block.BlockHeader.Height > m.CurrentHeight() { logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) // TECHDEBT: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop @@ -55,17 +58,40 @@ func (m *consensusModule) blockApplicationLoop() { continue } + // Do basic block validation if err = m.validateBlock(block); err != nil { logger.Err(err).Msg("failed to validate block") continue } - if err = m.applyAndCommitBlock(block); err != nil { - logger.Err(err).Msg("failed to apply and commit block") + // Prepare the utility UOW of work to apply a new block + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") continue } + // Update the leader proposing the block + leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) + if err != nil { + m.logger.Error().Err(err).Msg("Could not get leader id from leader address") + continue + } + m.leaderId = typesCons.NewNodeId(leaderIdInt) + + // Try to apply the block by validating the transactions in the block + if err := m.applyBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not apply block") + continue + } + + // Try to commit the block to persistence + if err := m.commitBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not commit block") + continue + } logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) + + m.paceMaker.NewHeight() m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } } @@ -104,7 +130,7 @@ func (m *consensusModule) broadcastMetadataRequests() error { }, } - // TECHDEBT: This should be sent to all peers (full nodes, servicesr, etc...), not just validators + // TECHDEBT: This should be sent to all peers (full nodes, servicers, etc...), not just validators validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) @@ -144,36 +170,6 @@ func (m *consensusModule) validateBlock(block *coreTypes.Block) error { return err } - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - return err - } - - leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) - if err != nil { - m.logger.Error().Err(err).Msg("Could not get leader id from leader address") - return err - } - - leaderId := typesCons.NodeId(leaderIdInt) - m.leaderId = &leaderId - - return nil -} - -func (m *consensusModule) applyAndCommitBlock(block *coreTypes.Block) error { - if err := m.applyBlock(block); err != nil { - m.logger.Error().Err(err).Msg("Could not apply block, invalid QC") - return err - } - - if err := m.commitBlock(block); err != nil { - m.logger.Error().Err(err).Msg("Could not commit block, invalid QC") - return err - } - m.paceMaker.NewHeight() - - m.logger.Info().Msgf("New block is committed, current height is :%d", m.height) return nil } diff --git a/consensus/types/types.go b/consensus/types/types.go index 19517d3cd..cbb9f0e98 100644 --- a/consensus/types/types.go +++ b/consensus/types/types.go @@ -30,3 +30,8 @@ func ActorListToValidatorMap(actors []*coreTypes.Actor) (m ValidatorMap) { } return } + +func NewNodeId(id uint64) *NodeId { + n := NodeId(id) + return &n +} From 476ade9cbc57662af6a86422bd7ab1b4ebb61297 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 19:58:29 -0700 Subject: [PATCH 049/100] Reviewed server.go --- consensus/doc/PROTOCOL_STATE_SYNC.md | 10 ++--- consensus/state_sync/module.go | 2 +- consensus/state_sync/server.go | 62 +++++++++++++++++++--------- state_machine/module.go | 9 +++- 4 files changed, 55 insertions(+), 28 deletions(-) diff --git a/consensus/doc/PROTOCOL_STATE_SYNC.md b/consensus/doc/PROTOCOL_STATE_SYNC.md index 955600cf8..2810a1142 100644 --- a/consensus/doc/PROTOCOL_STATE_SYNC.md +++ b/consensus/doc/PROTOCOL_STATE_SYNC.md @@ -55,9 +55,9 @@ Node gathers peer metadata from its peers in `StateSyncMetadataResponse` type, d ```golang type StateSyncMetadataResponse struct { - PeerAddress string - MinHeight uint64 - MaxHeight uint64 + PeerAddress string + MinHeight uint64 + MaxHeight uint64 } ``` @@ -114,12 +114,12 @@ flowchart TD A[Node] --> B[Periodic
Sync] A[Node] --> |New Block| C{IsSynced} - %% periodic snyc + %% periodic sync B --> |Request
metadata| D[Peers] D[Peers] --> |Collect metadata| B[Periodic
Sync] - %% is node sycnhed + %% is node synched C --> |No| E[StartSyncing] C --> |Yes| F[Apply Block] diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 4ef815a95..f8b5428db 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -17,7 +17,7 @@ import ( const ( stateSyncModuleName = "stateSyncModule" - committedBlocsChannelSize = 100 + committedBlocsChannelSize = 100 // blockWaitingPeriod = 30 * time.Second ) diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index e9ee96690..40900e157 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -1,14 +1,12 @@ package state_sync import ( - "fmt" - typesCons "github.com/pokt-network/pocket/consensus/types" cryptoPocket "github.com/pokt-network/pocket/shared/crypto" ) -// This module is responsible for handling requests and business logic that advertises and shares -// local state metadata with other peers syncing to the latest block. +// StateSyncServerModule is responsible for handling requests and business logic that +// advertise and share local state metadata with other peers syncing to the latest block. type StateSyncServerModule interface { // Advertise (send) the local state sync metadata to the requesting peer HandleStateSyncMetadataRequest(*typesCons.StateSyncMetadataRequest) @@ -17,7 +15,11 @@ type StateSyncServerModule interface { HandleGetBlockRequest(*typesCons.GetBlockRequest) } +// HandleStateSyncMetadataRequest processes a request from another peer to get a view into the +// state stored in this node func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateSyncMetadataRequest) { + logger := m.logger.With().Str("source", "HandleStateSyncMetadataRequest").Logger() + consensusMod := m.GetBus().GetConsensusModule() serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := metadataReq.PeerAddress @@ -27,59 +29,74 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(prevPersistedBlockHeight)) if err != nil { - m.logger.Err(err).Msg("Error creating read context") + logger.Err(err).Msg("Error creating read context") return } defer readCtx.Release() + // What is the maximum block height this node can share with others? maxHeight, err := readCtx.GetMaximumBlockHeight() if err != nil { - m.logger.Err(err).Msg("Error getting max height") + logger.Err(err).Msg("Error getting max height") return } + // What is the minimum block height this node can share with others? minHeight, err := readCtx.GetMinimumBlockHeight() if err != nil { - m.logger.Err(err).Msg("Error getting min height") + logger.Err(err).Msg("Error getting min height") return } + // Prepare state sync message to send to peer stateSyncMessage := typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_MetadataRes{ MetadataRes: &typesCons.StateSyncMetadataResponse{ PeerAddress: serverNodePeerAddress, MinHeight: minHeight, - MaxHeight: uint64(maxHeight), + MaxHeight: maxHeight, }, }, } - err = m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)) - if err != nil { - m.logger.Err(err).Msg("Error sending state sync message") - return + fields := map[string]interface{}{ + "max_height": maxHeight, + "min_height": minHeight, + "self": serverNodePeerAddress, + "peer": clientPeerAddress, } + + if err = m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)); err != nil { + logger.Err(err).Fields(fields).Msg("Error responding to state sync metadata request") + } + logger.Debug().Fields(fields).Msg("Successfully responded to state sync metadata request") } +// HandleGetBlockRequest processes a request from another to share a specific block at a specific node +// that this node likely has available. func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { + logger := m.logger.With().Str("source", "HandleGetBlockRequest").Logger() + consensusMod := m.GetBus().GetConsensusModule() serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := blockReq.PeerAddress - prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 + // Check if the block should be retrievable based on the node's consensus height + prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 if prevPersistedBlockHeight < blockReq.Height { - m.logger.Err(fmt.Errorf("requested block height: %d is higher than current persisted block height: %d", blockReq.Height, prevPersistedBlockHeight)) + logger.Error().Msgf("The requested block height (%d) is higher than current persisted block height (%d)", blockReq.Height, prevPersistedBlockHeight) return } - // get block from the persistence module + // Try to get block from the block store blockStore := m.GetBus().GetPersistenceModule().GetBlockStore() block, err := blockStore.GetBlock(blockReq.Height) if err != nil { - m.logger.Error().Err(err).Msgf("failed to get block at height %d", blockReq.Height) + logger.Error().Err(err).Msgf("failed to get block at height %d", blockReq.Height) return } + // Prepare state sync message to send to peer stateSyncMessage := typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockRes{ GetBlockRes: &typesCons.GetBlockResponse{ @@ -89,9 +106,14 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { }, } - err = m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)) - if err != nil { - m.logger.Err(err).Msg("Error sending state sync message") - return + fields := map[string]interface{}{ + "height": blockReq.Height, + "self": serverNodePeerAddress, + "peer": clientPeerAddress, + } + + if err = m.sendStateSyncMessage(&stateSyncMessage, cryptoPocket.AddressFromString(clientPeerAddress)); err != nil { + logger.Err(err).Fields(fields).Msg("Error responding to state sync block request") } + logger.Debug().Fields(fields).Msg("Successfully responded to state sync block request") } diff --git a/state_machine/module.go b/state_machine/module.go index 3c86755fa..235154e8e 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -19,7 +19,10 @@ type stateMachineModule struct { *fsm.FSM logger *modules.Logger - // DEBUG_ONLY: debugChannels is only used for testing purposes, events pushed to it are emitted in testing + + // TEST_ONLY: debugChannels is only used for testing purposes. + // It is used to enable to aggregate and emit events during testing + // TODO: Find a way to avoid the need for this altogether or move it into an _test.go file debugChannels []modules.EventsChannel } @@ -49,6 +52,8 @@ func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOpti m.logger.Fatal().Err(err).Msg("failed to pack state machine transition event") } bus.PublishEventToBus(newStateMachineTransitionEvent) + + // TEST_ONLY: Broadcast the events to additional channels used for testing purposes for _, channel := range m.debugChannels { channel <- newStateMachineTransitionEvent } @@ -82,7 +87,7 @@ func WithCustomStateMachine(stateMachine *fsm.FSM) modules.ModuleOption { } } -// WithDebugEventsChannel is used for testing purposes. It allows us to capture the events +// WithDebugEventsChannel is used for testing purposes only. It allows us to capture the events // from the FSM and publish them to debug channel for testing. func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { return func(m modules.InitializableModule) { From 7dc5870b6af7d0cdc53ecda3eb21ebd6b82a0229 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 20:26:20 -0700 Subject: [PATCH 050/100] Cleaning up some code --- consensus/module.go | 26 ++++---- consensus/state_sync/helpers.go | 6 +- consensus/state_sync/interfaces.go | 104 ----------------------------- consensus/state_sync_handler.go | 13 ++-- 4 files changed, 23 insertions(+), 126 deletions(-) delete mode 100644 consensus/state_sync/interfaces.go diff --git a/consensus/module.go b/consensus/module.go index 00bcafe9b..31100ed3b 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -24,7 +24,7 @@ import ( var _ modules.ConsensusModule = &consensusModule{} -// TODO: This should be configurable +// TODO: Make these configurable const ( metadataChannelSize = 1000 blocksChannelSize = 1000 @@ -33,18 +33,20 @@ const ( type consensusModule struct { base_modules.IntegratableModule - privateKey cryptoPocket.Ed25519PrivateKey + logger *modules.Logger + // General configs consCfg *configs.ConsensusConfig genesisState *genesis.GenesisState - logger *modules.Logger + // The key used for participating in consensus + privateKey cryptoPocket.Ed25519PrivateKey + nodeAddress string // m is a mutex used to control synchronization when multiple goroutines are accessing the struct and its fields / properties. - // - // The idea is that you want to acquire a Lock when you are writing values and a RLock when you want to make sure that no other goroutine is changing the values you are trying to read concurrently. - // - // Locking context should be the smallest possible but not smaller than a single "unit of work". + // The idea is that you want to acquire a Lock when you are writing values and a RLock when you want to make sure that no other + // goroutine is changing the values you are trying to read concurrently. Locking context should be the smallest possible but not + // smaller than a single "unit of work". m sync.RWMutex // Hotstuff @@ -52,9 +54,11 @@ type consensusModule struct { round uint64 step typesCons.HotstuffStep block *coreTypes.Block // The current block being proposed / voted on; it has not been committed to finality - // TODO(#315): Move the statefulness of `IndexedTransaction` to the persistence module - IndexedTransactions []coreTypes.IndexedTransaction // The current block applied transaction results / voted on; it has not been committed to finality + // Stores messages aggregated during a single consensus round from other validators + hotstuffMempool map[typesCons.HotstuffStep]*hotstuffFIFOMempool + + // Hotstuff safety prepareQC *typesCons.QuorumCertificate // Highest QC for which replica voted PRECOMMIT lockedQC *typesCons.QuorumCertificate // Highest QC for which replica voted COMMIT @@ -62,8 +66,6 @@ type consensusModule struct { leaderId *typesCons.NodeId nodeId typesCons.NodeId - nodeAddress string - // Module Dependencies // IMPROVE(#283): Investigate whether the current approach to how the `utilityUnitOfWork` should be // managed or changed. Also consider exposing a function that exposes the context @@ -74,8 +76,6 @@ type consensusModule struct { stateSync state_sync.StateSyncModule - hotstuffMempool map[typesCons.HotstuffStep]*hotstuffFIFOMempool - // block responses received from peers are collected in this channel blocksResponsesReceived chan *typesCons.GetBlockResponse diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index 08fd101a1..05df5c9d7 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -8,11 +8,9 @@ import ( // SendStateSyncMessage sends a state sync message after converting to any proto, to the given peer func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cryptoPocket.Address) error { - anyMsg, err := anypb.New(msg) - if err != nil { + if anyMsg, err := anypb.New(msg); err != nil { return err - } - if err := m.GetBus().GetP2PModule().Send(dst, anyMsg); err != nil { + } else if err := m.GetBus().GetP2PModule().Send(dst, anyMsg); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) return err } diff --git a/consensus/state_sync/interfaces.go b/consensus/state_sync/interfaces.go deleted file mode 100644 index f6772ac45..000000000 --- a/consensus/state_sync/interfaces.go +++ /dev/null @@ -1,104 +0,0 @@ -package state_sync - -import ( - coreTypes "github.com/pokt-network/pocket/shared/core/types" - cryptoPocket "github.com/pokt-network/pocket/shared/crypto" -) - -// REFACTOR: Remove interface definitions from this file to their respective source code files, -// keep interface definitions in the same file with the implementation as in server.go - -type SyncState interface { - // latest local height - LatestHeight() int64 - // latest network height (from the aggregation of Peer Sync Meta) - LatestNetworkHeight() int64 - // retrieve peer meta (actively updated through churn management) - GetPeers() []PeerSyncMeta - // returns ordered array of missing block heights - GetMissingBlockHeights() []int64 -} - -type BlockRequestMessage interface { - // the height the peer wants from the block store - GetHeight() int64 -} - -type BlockResponseMessage interface { - // the bytes of the requested block from the block store - GetBlockBytes() []byte -} - -// TODO: needs to be shared between P2P as the Churn Management Process updates this information -type PeerSyncMeta interface { - // the unique identifier associated with the peer - GetPeerID() string - // the maximum height the peer has in the block store - GetMaxHeight() int64 - // the minimum height the peer has in the block store - GetMinHeight() int64 -} - -// LEGACY interface definition -// TODO(#352): delete this once state sync module is ready. -type StateSyncModuleLEGACY interface { - // -- Constructor Setter Functions -- - - // `HandleStateSync` function: - // - Create a Utility Unit Of Work - // - Block.ValidateBasic() - // - Consensus Module Replica Path - // - Prepare Block: utilityUnitOfWork.SetProposalBlock(block) - // - Apply Block: utilityUnitOfWork.ApplyBlock(block) - // - Validate Block: utilityUnitOfWork.StateHash == Block.StateHash - // - Store Block: consensusModule.CommitBlock() - HandleStateSyncMessage(msg BlockResponseMessage) - - // `GetPeerSyncMeta` function: - // - Retrieve a list of active peers with their metadata (identified and retrieved through P2P's `Churn Management`) - GetPeerMetadata(GetPeerSyncMeta func() (peers []PeerSyncMeta, err error)) - - // `typesP2P.Router#Send()` function contract: - // - sends data to an address via P2P network - NetworkSend(NetworkSend func(data []byte, address cryptoPocket.Address) error) - - // -- Sync modes -- - - // In the StateSync protocol, the Node fields valid BlockRequests from its peers to help them CatchUp to be Synced. - // This sub-protocol is continuous throughout the lifecycle of StateSync. - RunServerMode() - - // In SyncedMode, the Node is caught up to the latest block and is listening & waiting for the latest block to be passed - // to maintain a synchronous state with the global SyncState. - // - UpdatePeerMetadata from P2P module - // - UpdateSyncState - // - Rely on new blocks to be propagated via the P2P network after Validators reach consensus - // - If `localSyncState.Height < globalNetworkSyncState.Height` -> RunSyncMode() // careful about race-conditions - RunSyncedMode() - - // Runs sync mode 'service' that continuously runs while `localSyncState.Height < globalNetworkSyncState.Height` - // - UpdatePeerMetadata from P2P module - // - Retrieve missing blocks from peers - // - Process retrieved blocks - // - UpdateSyncState - // - If `localSyncState.Height == globalNetworkSyncState.Height` -> RunSyncedMode() - RunSyncMode() - - // Returns the `highest priority aka lowest height` missing block heights up to `max` heights - GetMissingBlockHeights(state SyncState, max int) (blockHeights []int64, err error) - - // Random selection of eligilbe peers enables a fair distribution of blockRequests over time via law of large numbers - // An eligible peer is when `PeerMeta.MinHeight <= blockHeight <= PeerMeta.MaxHeight` - GetRandomEligiblePeersForHeight(blockHeight int64) (eligiblePeer PeerSyncMeta, err error) - - // Uses `typesP2P.Router#Send()` to send a `BlockRequestMessage` to a specific peer - SendBlockRequest(peerId string) error - - // Uses 'typesP2P.Router#Send()' to send a `BlockResponseMessage` to a specific peer - // This function is used in 'ServerMode()' - HandleBlockRequest(message BlockRequestMessage) error - - // Uses `HandleBlock` to process retrieved blocks from peers - // Must update sync state using `SetMissingBlockHeight` - ProcessBlock(block *coreTypes.Block) error -} diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index a073b7575..12ba67771 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -16,7 +16,6 @@ func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) if err != nil { return err } - stateSyncMessage, ok := msg.(*typesCons.StateSyncMessage) if !ok { return fmt.Errorf("failed to cast message to StateSyncMessage") @@ -41,10 +40,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta } go m.stateSync.HandleStateSyncMetadataRequest(stateSyncMessage.GetMetadataReq()) return nil - case *typesCons.StateSyncMessage_MetadataRes: - m.logger.Info().Str("proto_type", "MetadataResponse").Msg("Handling StateSyncMessage MetadataRes") - m.metadataReceived <- stateSyncMessage.GetMetadataRes() - return nil + case *typesCons.StateSyncMessage_GetBlockReq: m.logger.Info().Str("proto_type", "GetBlockRequest").Msg("Handling StateSyncMessage GetBlockRequest") if !m.serverModeEnabled { @@ -53,10 +49,17 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta } go m.stateSync.HandleGetBlockRequest(stateSyncMessage.GetGetBlockReq()) return nil + + case *typesCons.StateSyncMessage_MetadataRes: + m.logger.Info().Str("proto_type", "MetadataResponse").Msg("Handling StateSyncMessage MetadataRes") + m.metadataReceived <- stateSyncMessage.GetMetadataRes() + return nil + case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") m.blocksResponsesReceived <- stateSyncMessage.GetGetBlockRes() return nil + default: return fmt.Errorf("unspecified state sync message type") } From 99455642aa7ee938b5a91cdeaf38c736c35843e8 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 20:29:52 -0700 Subject: [PATCH 051/100] Cleanup consensus module a bit --- consensus/module.go | 21 ++++++--------------- consensus/state_sync_handler.go | 4 ++-- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/consensus/module.go b/consensus/module.go index 31100ed3b..b330f8b2d 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -67,13 +67,11 @@ type consensusModule struct { nodeId typesCons.NodeId // Module Dependencies - // IMPROVE(#283): Investigate whether the current approach to how the `utilityUnitOfWork` should be - // managed or changed. Also consider exposing a function that exposes the context - // to streamline how its accessed in the module (see the ticket). utilityUnitOfWork modules.UtilityUnitOfWork paceMaker pacemaker.Pacemaker leaderElectionMod leader_election.LeaderElectionModule + // State Sync stateSync state_sync.StateSyncModule // block responses received from peers are collected in this channel @@ -81,8 +79,6 @@ type consensusModule struct { // metadata responses received from peers are collected in this channel metadataReceived chan *typesCons.StateSyncMetadataResponse - - serverModeEnabled bool } func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { @@ -129,33 +125,28 @@ func (*consensusModule) Create(bus modules.Bus, options ...modules.ModuleOption) for _, option := range options { option(m) } - bus.RegisterModule(m) runtimeMgr := bus.GetRuntimeMgr() - - consensusCfg := runtimeMgr.GetConfig().Consensus - - m.serverModeEnabled = consensusCfg.ServerModeEnabled + m.consCfg = runtimeMgr.GetConfig().Consensus genesisState := runtimeMgr.GetGenesis() if err := m.ValidateGenesis(genesisState); err != nil { return nil, fmt.Errorf("genesis validation failed: %w", err) } + m.genesisState = genesisState - privateKey, err := cryptoPocket.NewPrivateKey(consensusCfg.GetPrivateKey()) + privateKey, err := cryptoPocket.NewPrivateKey(m.consCfg.GetPrivateKey()) if err != nil { return nil, err } + m.privateKey = privateKey.(cryptoPocket.Ed25519PrivateKey) + m.nodeAddress = privateKey.Address().String() if m.updateNodeId() != nil { return nil, err } - m.privateKey = privateKey.(cryptoPocket.Ed25519PrivateKey) - m.consCfg = consensusCfg - m.genesisState = genesisState - m.metadataReceived = make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize) m.blocksResponsesReceived = make(chan *typesCons.GetBlockResponse, blocksChannelSize) diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 12ba67771..d424018cf 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -34,7 +34,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta switch stateSyncMessage.Message.(type) { case *typesCons.StateSyncMessage_MetadataReq: m.logger.Info().Str("proto_type", "MetadataRequest").Msg("Handling StateSyncMessage MetadataReq") - if !m.serverModeEnabled { + if !m.consCfg.ServerModeEnabled { m.logger.Warn().Msg("Node's server module is not enabled") return nil } @@ -43,7 +43,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta case *typesCons.StateSyncMessage_GetBlockReq: m.logger.Info().Str("proto_type", "GetBlockRequest").Msg("Handling StateSyncMessage GetBlockRequest") - if !m.serverModeEnabled { + if !m.consCfg.ServerModeEnabled { m.logger.Warn().Msg("Node's server module is not enabled") return nil } From 8d9f8242fbafbcac80d615fd28d7a19cd1e5c1b8 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 25 May 2023 21:28:10 -0700 Subject: [PATCH 052/100] Completely removed the state sync from the consensus module --- consensus/doc/PROTOCOL_STATE_SYNC.md | 8 +- consensus/fsm_handler.go | 8 +- consensus/module.go | 43 +---- consensus/module_consensus_debugging.go | 4 +- consensus/module_consensus_state_sync.go | 193 ++++++----------------- consensus/state_sync/helpers.go | 34 ++++ consensus/state_sync/module.go | 175 ++++++++++++++------ consensus/state_sync_handler.go | 4 +- state_machine/docs/README.md | 2 +- 9 files changed, 227 insertions(+), 244 deletions(-) diff --git a/consensus/doc/PROTOCOL_STATE_SYNC.md b/consensus/doc/PROTOCOL_STATE_SYNC.md index 2810a1142..a23923555 100644 --- a/consensus/doc/PROTOCOL_STATE_SYNC.md +++ b/consensus/doc/PROTOCOL_STATE_SYNC.md @@ -87,7 +87,7 @@ type StateSyncModule interface { // ... GetAggregatedStateSyncMetadata() *StateSyncMetadataResponse // Aggregated metadata received from peers. IsSynced() (bool, error) - StartSyncing() error + Start() error // ... } ``` @@ -105,7 +105,7 @@ For every new block and block proposal `Validator`s receive: According to the result of the `IsSynced()` function: -- If the node is out of sync, it runs `StartSyncing()` function. Node requests blocks one by one using the minimum and maximum height in aggregated state sync metadata. +- If the node is out of sync, it runs `Start()` function. Node requests blocks one by one using the minimum and maximum height in aggregated state sync metadata. - If the node is in sync with its peers it rejects the block and/or block proposal. ```mermaid @@ -120,7 +120,7 @@ flowchart TD %% is node synched - C --> |No| E[StartSyncing] + C --> |No| E[Start] C --> |Yes| F[Apply Block] %% syncing @@ -154,7 +154,7 @@ In `Unsynced` Mode, node transitions to `Sync Mode` by sending `Consensus_IsSync ### Sync Mode -In `Sync` Mode, the Node is catching up to the latest block by making `GetBlock` requests, via `StartSyncing()` function to eligible peers in its address book. A peer can handle a `GetBlock` request if `PeerSyncMetadata.MinHeight` <= `localSyncState.MaxHeight` <= `PeerSyncMetadata.MaxHeight`. +In `Sync` Mode, the Node is catching up to the latest block by making `GetBlock` requests, via `Start()` function to eligible peers in its address book. A peer can handle a `GetBlock` request if `PeerSyncMetadata.MinHeight` <= `localSyncState.MaxHeight` <= `PeerSyncMetadata.MaxHeight`. Though it is unspecified whether or not a Node may make `GetBlock` requests in order or in parallel, the cryptographic restraints of block processing require the Node to call `CommitBlock` sequentially until it is `Synced`. diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 58e4df486..f6e20d515 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -72,7 +72,6 @@ func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransiti // As soon as a node transitions to this mode, it will transition to the synching mode. func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Msg("Node is in an Unsynced state. Consensus module is sending an even to transition to SYNCHING mode.") - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncing) } @@ -80,12 +79,7 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv // In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Msg("Node is in Sync Mode. Consensus Module is about to start synching...") - - aggregatedMetadata := m.getAggregatedStateSyncMetadata() - m.stateSync.SetAggregatedMetadata(&aggregatedMetadata) - - go m.stateSync.StartSyncing() - + go m.stateSync.SyncStateSync() return nil } diff --git a/consensus/module.go b/consensus/module.go index b330f8b2d..154044ca5 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -24,12 +24,6 @@ import ( var _ modules.ConsensusModule = &consensusModule{} -// TODO: Make these configurable -const ( - metadataChannelSize = 1000 - blocksChannelSize = 1000 -) - type consensusModule struct { base_modules.IntegratableModule @@ -73,12 +67,6 @@ type consensusModule struct { // State Sync stateSync state_sync.StateSyncModule - - // block responses received from peers are collected in this channel - blocksResponsesReceived chan *typesCons.GetBlockResponse - - // metadata responses received from peers are collected in this channel - metadataReceived chan *typesCons.StateSyncMetadataResponse } func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { @@ -147,9 +135,6 @@ func (*consensusModule) Create(bus modules.Bus, options ...modules.ModuleOption) return nil, err } - m.metadataReceived = make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize) - m.blocksResponsesReceived = make(chan *typesCons.GetBlockResponse, blocksChannelSize) - m.initMessagesPool() return m, nil @@ -178,38 +163,12 @@ func (m *consensusModule) Start() error { return err } - go m.metadataSyncLoop() - go m.blockApplicationLoop() - return nil } func (m *consensusModule) Stop() error { m.logger.Info().Msg("Stopping consensus module") - - m.logger.Log().Msg("Draining and closing metadataReceived and blockResponse channels") - for { - select { - case metaData, ok := <-m.metadataReceived: - if ok { - m.logger.Info().Msgf("Drained metadata message: %s", metaData) - } else { - close(m.metadataReceived) - return nil - } - case blockResponse, ok := <-m.blocksResponsesReceived: - if ok { - m.logger.Info().Msgf("Drained blockResponse message: %s", blockResponse) - } else { - close(m.blocksResponsesReceived) - return nil - } - default: - close(m.metadataReceived) - close(m.blocksResponsesReceived) - return nil - } - } + return nil } func (m *consensusModule) GetModuleName() string { diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index a302f1444..a4a9c79aa 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -70,5 +70,7 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin // TODO(#609): Refactor to use the test-only package and remove reflection func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { - m.metadataReceived <- metadataRes + if err := m.stateSync.HandleStateSyncMetadataResponse(metadataRes); err != nil { + m.logger.Error().Err(err).Msg("failed to handle state sync metadata response") + } } diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 782f62319..86751cb8e 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -1,20 +1,13 @@ package consensus import ( - "context" "fmt" - "time" typesCons "github.com/pokt-network/pocket/consensus/types" coreTypes "github.com/pokt-network/pocket/shared/core/types" - cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/anypb" ) -// TODO: Make this configurable in StateSyncConfig -const metadataSyncPeriod = 45 * time.Second - // REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) @@ -27,128 +20,68 @@ func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error return uint64(valAddrToIdMap[peerId]), nil } -// blockApplicationLoop commits the blocks received from the `blocksResponsesReceived“ channel. -// It is intended to be run as a background process via `go blockApplicationLoop()` -func (m *consensusModule) blockApplicationLoop() { - logger := m.logger.With().Str("source", "blockApplicationLoop").Logger() - - // Blocks until m.blocksResponsesReceived is closed - for blockResponse := range m.blocksResponsesReceived { - block := blockResponse.Block - logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) - - // Check what the current latest committed block height is - maxPersistedHeight, err := m.maxPersistedBlockHeight() - if err != nil { - logger.Err(err).Msg("couldn't query max persisted height") - continue - } - - // Check if the block being synched is behind the current height - if block.BlockHeader.Height <= maxPersistedHeight { - logger.Debug().Msgf("Discarding block height %d, since node is ahead at height %d", block.BlockHeader.Height, maxPersistedHeight) - continue - } - - // Check if the block being synched is ahead of the current height - if block.BlockHeader.Height > m.CurrentHeight() { - logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) - // TECHDEBT: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop - // m.blocksResponsesReceived <- blockResponse - continue - } - - // Do basic block validation - if err = m.validateBlock(block); err != nil { - logger.Err(err).Msg("failed to validate block") - continue - } - - // Prepare the utility UOW of work to apply a new block - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - continue - } - - // Update the leader proposing the block - leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) - if err != nil { - m.logger.Error().Err(err).Msg("Could not get leader id from leader address") - continue - } - m.leaderId = typesCons.NewNodeId(leaderIdInt) - - // Try to apply the block by validating the transactions in the block - if err := m.applyBlock(block); err != nil { - m.logger.Error().Err(err).Msg("Could not apply block") - continue - } - - // Try to commit the block to persistence - if err := m.commitBlock(block); err != nil { - m.logger.Error().Err(err).Msg("Could not commit block") - continue - } - logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) - - m.paceMaker.NewHeight() - m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) +// tryToApplyRequestedBlock tries to commit the requested Block received from a peer +func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetBlockResponse) { + logger := m.logger.With().Str("source", "tryToApplyRequestedBlock").Logger() + + block := blockResponse.Block + logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) + + // Check what the current latest committed block height is + maxPersistedHeight, err := m.maxPersistedBlockHeight() + if err != nil { + logger.Err(err).Msg("couldn't query max persisted height") + return } -} -// metadataSyncLoop periodically sends metadata requests to its peers to aggregate metadata related to synching the state. -// It is intended to be run as a background process via `go metadataSyncLoop` -func (m *consensusModule) metadataSyncLoop() error { - logger := m.logger.With().Str("source", "metadataSyncLoop").Logger() - ctx := context.TODO() - - ticker := time.NewTicker(metadataSyncPeriod) - for { - select { - case <-ticker.C: - logger.Info().Msg("Background metadata sync check triggered") - if err := m.broadcastMetadataRequests(); err != nil { - logger.Error().Err(err).Msg("Failed to send metadata requests") - return err - } - - case <-ctx.Done(): - ticker.Stop() - return nil - } + // Check if the block being synched is behind the current height + if block.BlockHeader.Height <= maxPersistedHeight { + logger.Debug().Msgf("Discarding block height %d, since node is ahead at height %d", block.BlockHeader.Height, maxPersistedHeight) + return } -} -// broadcastMetadataRequests sends a metadata request to all peers in the network to understand -// the state of the network and determine if the node is behind. -func (m *consensusModule) broadcastMetadataRequests() error { - stateSyncMetadataReqMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_MetadataReq{ - MetadataReq: &typesCons.StateSyncMetadataRequest{ - PeerAddress: m.GetBus().GetConsensusModule().GetNodeAddress(), - }, - }, + // Check if the block being synched is ahead of the current height + if block.BlockHeader.Height > m.CurrentHeight() { + // TECHDEBT: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop + logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) + return } - // TECHDEBT: This should be sent to all peers (full nodes, servicers, etc...), not just validators - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) + // Do basic block validation + if err = m.validateBlock(block); err != nil { + logger.Err(err).Msg("failed to validate block") + return + } + + // Prepare the utility UOW of work to apply a new block + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + return + } + + // Update the leader proposing the block + leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) if err != nil { - m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) + m.logger.Error().Err(err).Msg("Could not get leader id from leader address") + return } + m.leaderId = typesCons.NewNodeId(leaderIdInt) - for _, val := range validators { - anyMsg, err := anypb.New(stateSyncMetadataReqMessage) - if err != nil { - return err - } - // TECHDEBT: Revisit why we're not using `Broadcast` here instead of `Send`. - if err := m.GetBus().GetP2PModule().Send(cryptoPocket.AddressFromString(val.GetAddress()), anyMsg); err != nil { - m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) - return err - } + // Try to apply the block by validating the transactions in the block + if err := m.applyBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not apply block") + return } - return nil + // Try to commit the block to persistence + if err := m.commitBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not commit block") + return + } + logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) + + m.paceMaker.NewHeight() + m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } func (m *consensusModule) validateBlock(block *coreTypes.Block) error { @@ -172,27 +105,3 @@ func (m *consensusModule) validateBlock(block *coreTypes.Block) error { return nil } - -func (m *consensusModule) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { - // TECHDEBT(#686): This should be an ongoing background passive state sync process but just - // capturing the available messages at the time that this function was called is good enough for now. - chanLen := len(m.metadataReceived) - m.logger.Info().Msgf("Looping over %d state sync metadata responses", chanLen) - - minHeight, maxHeight := uint64(1), uint64(1) - for i := 0; i < chanLen; i++ { - metadata := <-m.metadataReceived - if metadata.MaxHeight > maxHeight { - maxHeight = metadata.MaxHeight - } - if metadata.MinHeight < minHeight { - minHeight = metadata.MinHeight - } - } - - return typesCons.StateSyncMetadataResponse{ - PeerAddress: "unused_aggregated_metadata_address", - MinHeight: minHeight, - MaxHeight: maxHeight, - } -} diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index 05df5c9d7..dc9b0a1c1 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -2,6 +2,7 @@ package state_sync import ( typesCons "github.com/pokt-network/pocket/consensus/types" + coreTypes "github.com/pokt-network/pocket/shared/core/types" cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "google.golang.org/protobuf/types/known/anypb" ) @@ -16,3 +17,36 @@ func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cr } return nil } + +func (m *stateSync) getValidatorsAtHeight(height uint64) ([]*coreTypes.Actor, error) { + readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(height)) + if err != nil { + return nil, err + } + defer readCtx.Release() + return readCtx.GetAllValidators(int64(height)) +} + +// TECHDEBT(#686): This should be an ongoing background passive state sync process but just +// capturing the available messages at the time that this function was called is good enough for now. +func (m *stateSync) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { + chanLen := len(m.metadataReceived) + m.logger.Info().Msgf("Looping over %d state sync metadata responses", chanLen) + + minHeight, maxHeight := uint64(1), uint64(1) + for i := 0; i < chanLen; i++ { + metadata := <-m.metadataReceived + if metadata.MaxHeight > maxHeight { + maxHeight = metadata.MaxHeight + } + if metadata.MinHeight < minHeight { + minHeight = metadata.MinHeight + } + } + + return typesCons.StateSyncMetadataResponse{ + PeerAddress: "unused_aggregated_metadata_address", + MinHeight: minHeight, + MaxHeight: maxHeight, + } +} diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index f8b5428db..e144d1857 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -1,6 +1,7 @@ package state_sync import ( + "context" "encoding/hex" "fmt" "time" @@ -16,18 +17,22 @@ import ( ) const ( - stateSyncModuleName = "stateSyncModule" - committedBlocsChannelSize = 100 // + stateSyncModuleName = "stateSyncModule" + // TODO: Make these configurable blockWaitingPeriod = 30 * time.Second + committedBlocsChannelSize = 100 + metadataChannelSize = 1000 + blocksChannelSize = 1000 + metadataSyncPeriod = 45 * time.Second ) type StateSyncModule interface { modules.Module StateSyncServerModule - SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyncMetadataResponse) - StartSyncing() + SyncStateSync() error HandleStateSyncBlockCommittedEvent(message *anypb.Any) error + HandleStateSyncMetadataResponse(*typesCons.StateSyncMetadataResponse) error } var ( @@ -37,9 +42,12 @@ var ( ) type stateSync struct { - bus modules.Bus - logger *modules.Logger - aggregatedMetaData *typesCons.StateSyncMetadataResponse + bus modules.Bus + logger *modules.Logger + + // metadata responses received from peers are collected in this channel + metadataReceived chan *typesCons.StateSyncMetadataResponse + committedBlocksChannel chan uint64 } @@ -47,24 +55,6 @@ func CreateStateSync(bus modules.Bus, options ...modules.ModuleOption) (modules. return new(stateSync).Create(bus, options...) } -func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { - evt, err := codec.GetCodec().FromAny(event) - if err != nil { - return err - } - - if event.MessageName() == messaging.StateSyncBlockCommittedEventType { - newCommitBlockEvent, ok := evt.(*messaging.StateSyncBlockCommittedEvent) - if !ok { - return fmt.Errorf("failed to cast event to StateSyncBlockCommittedEvent") - } - - m.committedBlocksChannel <- newCommitBlockEvent.Height - } - - return nil -} - func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { m := &stateSync{} @@ -75,21 +65,15 @@ func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modu bus.RegisterModule(m) m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) + m.metadataReceived = make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize) m.committedBlocksChannel = make(chan uint64, committedBlocsChannelSize) return m, nil } -func (m *stateSync) SetAggregatedMetadata(aggregatedMetaData *typesCons.StateSyncMetadataResponse) { - m.aggregatedMetaData = aggregatedMetaData -} - -// TODO: Remove this. This function added as a hack to be able to check return value of m.Start(). -func (m *stateSync) StartSyncing() { - err := m.Start() - if err != nil { - m.logger.Error().Err(err).Msg("couldn't start state sync") - } +func (m *stateSync) Start() error { + go m.metadataSyncLoop() + return nil } // Start performs state sync @@ -97,7 +81,7 @@ func (m *stateSync) StartSyncing() { // requests missing blocks starting from its current height to the aggregated metadata's maxHeight, // once the requested block is received and committed by consensus module, sends the next request for the next block, // when all blocks are received and committed, stops the state sync process by calling its `Stop()` function. -func (m *stateSync) Start() error { +func (m *stateSync) SyncStateSync() error { consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() nodeAddress := consensusMod.GetNodeAddress() @@ -108,14 +92,19 @@ func (m *stateSync) Start() error { } defer readCtx.Release() + // TECHDEBT: We want to request blocks from all peers (staked or not) as opposed to just validators validators, err := readCtx.GetAllValidators(int64(currentHeight)) if err != nil { return err } + // Understand the view of the network + aggregatedMetaData := m.getAggregatedStateSyncMetadata() + maxHeight := aggregatedMetaData.MaxHeight + // requests blocks from the current height to the aggregated metadata height - for currentHeight <= m.aggregatedMetaData.MaxHeight { - m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, m.aggregatedMetaData.MaxHeight) + for currentHeight <= maxHeight { + m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, maxHeight) // form the get block request message stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ @@ -127,32 +116,54 @@ func (m *stateSync) Start() error { }, } - // broadcast the get block request message to all validators - // TODO: use raintree broadcast + // Broadcast the get block request message from all the available peers on the network + // TODO: Use P2P.broadcast instead of looping over the validators and sending the message to each one for _, val := range validators { if err := m.sendStateSyncMessage(stateSyncGetBlockMessage, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { return err } } - // wait to receive requested block for blockWaitingPeriod. If the block is received next block will be requested, otherwise the current block will be requested again + // Wait for the consensus module to commit the requested block + // If the block is not committed within some time, try re-requesting the block select { case blockHeight := <-m.committedBlocksChannel: // requested block is received and committed, continue to request the next block from the current height m.logger.Info().Msgf("Block %d is committed!", blockHeight) case <-time.After(blockWaitingPeriod): + m.logger.Warn().Msgf("Timed out waiting for block %d to be committed...", currentHeight) } - // wait for the requested block to be received and committed by consensus module - + // Update the height and continue catching up to the latest known state currentHeight = consensusMod.CurrentHeight() } // syncing is complete and all requested blocks are committed, stop the state sync module - return m.Stop() + return m.pauseSynching() +} + +func (m *stateSync) HandleStateSyncMetadataResponse(res *typesCons.StateSyncMetadataResponse) error { + m.metadataReceived <- res + return nil +} + +func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { + evt, err := codec.GetCodec().FromAny(event) + if err != nil { + return err + } + + if event.MessageName() == messaging.StateSyncBlockCommittedEventType { + newCommitBlockEvent, ok := evt.(*messaging.StateSyncBlockCommittedEvent) + if !ok { + return fmt.Errorf("failed to cast event to StateSyncBlockCommittedEvent") + } + m.committedBlocksChannel <- newCommitBlockEvent.Height + } + return nil } // Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event -func (m *stateSync) Stop() error { +func (m *stateSync) pauseSynching() error { currentHeight := m.bus.GetConsensusModule().CurrentHeight() nodeAddress := m.bus.GetConsensusModule().GetNodeAddress() @@ -177,6 +188,24 @@ func (m *stateSync) Stop() error { return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator) } +func (m *stateSync) Stop() error { + m.logger.Log().Msg("Draining and closing metadataReceived and blockResponse channels") + for { + select { + case metaData, ok := <-m.metadataReceived: + if ok { + m.logger.Info().Msgf("Drained metadata message: %s", metaData) + } else { + close(m.metadataReceived) + return nil + } + default: + close(m.metadataReceived) + return nil + } + } +} + func (m *stateSync) SetBus(pocketBus modules.Bus) { m.bus = pocketBus } @@ -191,3 +220,59 @@ func (m *stateSync) GetBus() modules.Bus { func (m *stateSync) GetModuleName() string { return stateSyncModuleName } + +// metadataSyncLoop periodically sends metadata requests to its peers to aggregate metadata related to synching the state. +// It is intended to be run as a background process via `go metadataSyncLoop` +func (m *stateSync) metadataSyncLoop() error { + logger := m.logger.With().Str("source", "metadataSyncLoop").Logger() + ctx := context.TODO() + + ticker := time.NewTicker(metadataSyncPeriod) + for { + select { + case <-ticker.C: + logger.Info().Msg("Background metadata sync check triggered") + if err := m.broadcastMetadataRequests(); err != nil { + logger.Error().Err(err).Msg("Failed to send metadata requests") + return err + } + + case <-ctx.Done(): + ticker.Stop() + return nil + } + } +} + +// broadcastMetadataRequests sends a metadata request to all peers in the network to understand +// the state of the network and determine if the node is behind. +func (m *stateSync) broadcastMetadataRequests() error { + stateSyncMetadataReqMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_MetadataReq{ + MetadataReq: &typesCons.StateSyncMetadataRequest{ + PeerAddress: m.GetBus().GetConsensusModule().GetNodeAddress(), + }, + }, + } + + currentHeight := m.bus.GetConsensusModule().CurrentHeight() + // TECHDEBT: This should be sent to all peers (full nodes, servicers, etc...), not just validators + validators, err := m.getValidatorsAtHeight(currentHeight) + if err != nil { + m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) + } + + for _, val := range validators { + anyMsg, err := anypb.New(stateSyncMetadataReqMessage) + if err != nil { + return err + } + // TECHDEBT: Revisit why we're not using `Broadcast` here instead of `Send`. + if err := m.GetBus().GetP2PModule().Send(cryptoPocket.AddressFromString(val.GetAddress()), anyMsg); err != nil { + m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) + return err + } + } + + return nil +} diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index d424018cf..66cb69eea 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -52,12 +52,12 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta case *typesCons.StateSyncMessage_MetadataRes: m.logger.Info().Str("proto_type", "MetadataResponse").Msg("Handling StateSyncMessage MetadataRes") - m.metadataReceived <- stateSyncMessage.GetMetadataRes() + go m.stateSync.HandleStateSyncMetadataResponse(stateSyncMessage.GetMetadataRes()) return nil case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") - m.blocksResponsesReceived <- stateSyncMessage.GetGetBlockRes() + go m.tryToApplyRequestedBlock(stateSyncMessage.GetGetBlockRes()) return nil default: diff --git a/state_machine/docs/README.md b/state_machine/docs/README.md index d0e3a03ec..b816e89c0 100644 --- a/state_machine/docs/README.md +++ b/state_machine/docs/README.md @@ -66,7 +66,7 @@ These are the main building blocks: - **P2P_Bootstrapped**: The Consensus module handles `P2P_Bootstrapped` -> triggers a `Consensus_IsUnsynced` event -> transitions to `Consensus_Unsynced`. - **Consensus_Unsynced**: Node is out of sync, the Consensus module sends `Consensus_IsSyncing` event -> transitions to `Consensus_SyncMode` to start syncing with the rest of the network. -- **Consensus_SyncMode**: The Consensus module runs `StartSyncing()` and requests blocks one by one from peers in its address book. +- **Consensus_SyncMode**: The Consensus module runs `Start()` and requests blocks one by one from peers in its address book. - **Node finishes syncing**: When node completes syncing: - if the node is a validator, the Consensus module sends `Consensus_IsSyncedValidator` event -> transitions to `Consensus_Pacemaker`. - if the node is not a validator, the Consensus module sends `Consensus_IsSyncedNonValidator` event -> transitions to `Consensus_Synced`. From 60966cbf4b1bf95bde29239f1224b1f88df83d2b Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Tue, 30 May 2023 12:31:43 -0700 Subject: [PATCH 053/100] Interim commit to help me pick up --- consensus/fsm_handler.go | 2 ++ consensus/module.go | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index f6e20d515..f265594b4 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -63,6 +63,8 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine // This is a transition mode from node bootstrapping to a node being out-of-sync. func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Msg("Node is in the bootstrapped state. Consensus module NOOP.") + // TODO_IN_THIS_COMMIT: Pick up here + // return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) return nil } diff --git a/consensus/module.go b/consensus/module.go index 154044ca5..2a1279c44 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -163,6 +163,11 @@ func (m *consensusModule) Start() error { return err } + // TODO_IN_THIS_COMMIT: Pick up here + // if err := m.stateSync.Start(); err != nil { + // return err + // } + return nil } From f86b49178d347fecc05c3502038f83ebd95f7f8d Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Jun 2023 19:17:07 +0000 Subject: [PATCH 054/100] add generated helm docs --- charts/pocket/README.md | 198 ++++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 99 deletions(-) diff --git a/charts/pocket/README.md b/charts/pocket/README.md index f27e47dab..405cecb3e 100644 --- a/charts/pocket/README.md +++ b/charts/pocket/README.md @@ -20,9 +20,9 @@ Here is an example of the private key stored in a Kubernetes Secret: apiVersion: v1 kind: Secret metadata: - name: validator-private-key + name: validator-private-key stringData: - "1919605e50c0a60177d0554b528c9810313523b3": "4d6d24690137b0c43dee3490cafa4ca49cc1c4facdd1a73be1255a5b752223dc2b7672ea2493dcdd0efc6c6caf1073c4f3ff8508c686031e2d1244c02f0b900d" + "1919605e50c0a60177d0554b528c9810313523b3": "4d6d24690137b0c43dee3490cafa4ca49cc1c4facdd1a73be1255a5b752223dc2b7672ea2493dcdd0efc6c6caf1073c4f3ff8508c686031e2d1244c02f0b900d" ``` This secret can then be utilized with this helm chart using the following variables: @@ -35,100 +35,100 @@ privateKeySecretKeyRef: ## Values -| Key | Type | Default | Description | -| --------------------------------------------------------------- | ------ | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -| affinity | object | `{}` | | -| config.consensus.max_mempool_bytes | int | `500000000` | | -| config.consensus.pacemaker_config.debug_time_between_steps_msec | int | `1000` | | -| config.consensus.pacemaker_config.manual | bool | `true` | | -| config.consensus.pacemaker_config.timeout_msec | int | `10000` | | -| config.consensus.private_key | string | `""` | | -| config.consensus.server_mode_enabled | bool | `true` | | -| config.fisherman.enabled | bool | `false` | | -| config.logger.format | string | `"json"` | | -| config.logger.level | string | `"debug"` | | -| config.p2p.hostname | string | `""` | | -| config.p2p.is_empty_connection_type | bool | `false` | | -| config.p2p.max_mempool_count | int | `100000` | | -| config.p2p.port | int | `42069` | | -| config.p2p.private_key | string | `""` | | -| config.p2p.use_rain_tree | bool | `true` | | -| config.persistence.block_store_path | string | `"/pocket/data/block-store"` | | -| config.persistence.health_check_period | string | `"30s"` | | -| config.persistence.max_conn_idle_time | string | `"1m"` | | -| config.persistence.max_conn_lifetime | string | `"5m"` | | -| config.persistence.max_conns_count | int | `50` | | -| config.persistence.min_conns_count | int | `1` | | -| config.persistence.node_schema | string | `"pocket"` | | -| config.persistence.postgres_url | string | `""` | | -| config.persistence.trees_store_dir | string | `"/pocket/data/trees"` | | -| config.persistence.tx_indexer_path | string | `"/pocket/data/tx-indexer"` | | -| config.private_key | string | `""` | | -| config.root_directory | string | `"/go/src/github.com/pocket-network"` | | -| config.rpc.enabled | bool | `true` | | -| config.rpc.port | string | `"50832"` | | -| config.rpc.timeout | int | `30000` | | -| config.rpc.use_cors | bool | `false` | | -| config.servicer.enabled | bool | `false` | | -| config.telemetry.address | string | `"0.0.0.0:9000"` | | -| config.telemetry.enabled | bool | `true` | | -| config.telemetry.endpoint | string | `"/metrics"` | | -| config.utility.max_mempool_transaction_bytes | int | `1073741824` | | -| config.utility.max_mempool_transactions | int | `9000` | | -| externalPostgresql.database | string | `""` | name of the external database | -| externalPostgresql.enabled | bool | `false` | use external postgres database | -| externalPostgresql.host | string | `""` | host of the external database | -| externalPostgresql.passwordSecretKeyRef.key | string | `""` | key in the Secret that contains the database password | -| externalPostgresql.passwordSecretKeyRef.name | string | `""` | name of the Secret in the same namespace that contains the database password | -| externalPostgresql.port | int | `5432` | port of the external database | -| externalPostgresql.userSecretKeyRef.key | string | `""` | key in the Secret that contains the database user | -| externalPostgresql.userSecretKeyRef.name | string | `""` | name of the Secret in the same namespace that contains the database user | -| fullnameOverride | string | `""` | | -| genesis.externalConfigMap.key | string | `""` | Key in the ConfigMap that contains the genesis file, only used if `genesis.preProvisionedGenesis.enabled` is false | -| genesis.externalConfigMap.name | string | `""` | Name of the ConfigMap that contains the genesis file, only used if `genesis.preProvisionedGenesis.enabled` is false | -| genesis.preProvisionedGenesis.enabled | bool | `true` | Use genesis file supplied by the Helm chart, of false refer to `genesis.externalConfigMap` | -| genesis.preProvisionedGenesis.type | string | `"devnet"` | Type of the genesis file to use, can be `devnet`, `testnet`, `mainnet` | -| global.postgresql.service.ports.postgresql | string | `"5432"` | | -| image.pullPolicy | string | `"IfNotPresent"` | image pull policy | -| image.repository | string | `"ghcr.io/pokt-network/pocket-v1"` | image repository | -| image.tag | string | `"latest"` | image tag | -| imagePullSecrets | list | `[]` | image pull secrets | -| ingress.annotations | object | `{}` | | -| ingress.className | string | `""` | | -| ingress.enabled | bool | `false` | enable ingress for RPC port | -| ingress.hosts[0].host | string | `"chart-example.local"` | | -| ingress.hosts[0].paths[0].path | string | `"/"` | | -| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | -| ingress.tls | list | `[]` | | -| nameOverride | string | `""` | | -| nodeSelector | object | `{}` | | -| nodeType | string | `"full"` | type of the blockchain node to run. Can be either `full`, `validator`, `servicer`, `fishermen` | -| persistence.accessModes | list | `["ReadWriteOnce"]` | persistent Volume Access Modes | -| persistence.annotations | object | `{}` | annotations of the persistent volume claim | -| persistence.dataSource | object | `{}` | custom data source of the persistent volume claim | -| persistence.enabled | bool | `true` | enable persistent volume claim | -| persistence.existingClaim | string | `""` | name of an existing PVC to use for persistence | -| persistence.reclaimPolicy | string | `"Delete"` | persistent volume reclaim policy | -| persistence.selector | object | `{}` | selector to match an existing Persistent Volume | -| persistence.size | string | `"8Gi"` | size of the persistent volume claim | -| persistence.storageClass | string | `""` | storage class of the persistent volume claim | -| podAnnotations | object | `{}` | pod annotations | -| podSecurityContext | object | `{}` | | -| postgresql.enabled | bool | `true` | deploy postgresql database automatically. Refer to https://github.com/bitnami/charts/blob/main/bitnami/postgresql/values.yaml for additional options. | -| postgresql.primary.persistence.enabled | bool | `false` | enable persistent volume claim for PostgreSQL | -| postgresql.primary.persistence.size | string | `"8Gi"` | size of the persistent volume claim for PostgreSQL | -| privateKeySecretKeyRef.key | string | `""` | REQUIRED. Key in the Secret that contains the private key of the node | -| privateKeySecretKeyRef.name | string | `""` | REQUIRED. Name of the Secret in the same namespace that contains the private key of the node | -| resources | object | `{}` | resources limits and requests | -| securityContext | object | `{}` | | -| service.annotations | object | `{}` | service annotations | -| service.nameOverride | string | `""` | | -| service.ports.consensus | int | `42069` | consensus port of the node | -| service.ports.metrics | int | `9000` | OpenTelemetry metrics port of the node | -| service.ports.rpc | int | `50832` | rpc port of the node | -| service.type | string | `"ClusterIP"` | service type | -| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | -| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | -| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | -| serviceMonitor.enabled | bool | `false` | enable service monitor | -| tolerations | list | `[]` | | +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| config.consensus.max_mempool_bytes | int | `500000000` | | +| config.consensus.pacemaker_config.debug_time_between_steps_msec | int | `1000` | | +| config.consensus.pacemaker_config.manual | bool | `true` | | +| config.consensus.pacemaker_config.timeout_msec | int | `10000` | | +| config.consensus.private_key | string | `""` | | +| config.consensus.server_mode_enabled | bool | `true` | | +| config.fisherman.enabled | bool | `false` | | +| config.logger.format | string | `"json"` | | +| config.logger.level | string | `"debug"` | | +| config.p2p.hostname | string | `""` | | +| config.p2p.is_empty_connection_type | bool | `false` | | +| config.p2p.max_mempool_count | int | `100000` | | +| config.p2p.port | int | `42069` | | +| config.p2p.private_key | string | `""` | | +| config.p2p.use_rain_tree | bool | `true` | | +| config.persistence.block_store_path | string | `"/pocket/data/block-store"` | | +| config.persistence.health_check_period | string | `"30s"` | | +| config.persistence.max_conn_idle_time | string | `"1m"` | | +| config.persistence.max_conn_lifetime | string | `"5m"` | | +| config.persistence.max_conns_count | int | `50` | | +| config.persistence.min_conns_count | int | `1` | | +| config.persistence.node_schema | string | `"pocket"` | | +| config.persistence.postgres_url | string | `""` | | +| config.persistence.trees_store_dir | string | `"/pocket/data/trees"` | | +| config.persistence.tx_indexer_path | string | `"/pocket/data/tx-indexer"` | | +| config.private_key | string | `""` | | +| config.root_directory | string | `"/go/src/github.com/pocket-network"` | | +| config.rpc.enabled | bool | `true` | | +| config.rpc.port | string | `"50832"` | | +| config.rpc.timeout | int | `30000` | | +| config.rpc.use_cors | bool | `false` | | +| config.servicer.enabled | bool | `false` | | +| config.telemetry.address | string | `"0.0.0.0:9000"` | | +| config.telemetry.enabled | bool | `true` | | +| config.telemetry.endpoint | string | `"/metrics"` | | +| config.utility.max_mempool_transaction_bytes | int | `1073741824` | | +| config.utility.max_mempool_transactions | int | `9000` | | +| externalPostgresql.database | string | `""` | name of the external database | +| externalPostgresql.enabled | bool | `false` | use external postgres database | +| externalPostgresql.host | string | `""` | host of the external database | +| externalPostgresql.passwordSecretKeyRef.key | string | `""` | key in the Secret that contains the database password | +| externalPostgresql.passwordSecretKeyRef.name | string | `""` | name of the Secret in the same namespace that contains the database password | +| externalPostgresql.port | int | `5432` | port of the external database | +| externalPostgresql.userSecretKeyRef.key | string | `""` | key in the Secret that contains the database user | +| externalPostgresql.userSecretKeyRef.name | string | `""` | name of the Secret in the same namespace that contains the database user | +| fullnameOverride | string | `""` | | +| genesis.externalConfigMap.key | string | `""` | Key in the ConfigMap that contains the genesis file, only used if `genesis.preProvisionedGenesis.enabled` is false | +| genesis.externalConfigMap.name | string | `""` | Name of the ConfigMap that contains the genesis file, only used if `genesis.preProvisionedGenesis.enabled` is false | +| genesis.preProvisionedGenesis.enabled | bool | `true` | Use genesis file supplied by the Helm chart, of false refer to `genesis.externalConfigMap` | +| genesis.preProvisionedGenesis.type | string | `"devnet"` | Type of the genesis file to use, can be `devnet`, `testnet`, `mainnet` | +| global.postgresql.service.ports.postgresql | string | `"5432"` | | +| image.pullPolicy | string | `"IfNotPresent"` | image pull policy | +| image.repository | string | `"ghcr.io/pokt-network/pocket-v1"` | image repository | +| image.tag | string | `"latest"` | image tag | +| imagePullSecrets | list | `[]` | image pull secrets | +| ingress.annotations | object | `{}` | | +| ingress.className | string | `""` | | +| ingress.enabled | bool | `false` | enable ingress for RPC port | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths[0].path | string | `"/"` | | +| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | | +| ingress.tls | list | `[]` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| nodeType | string | `"full"` | type of the blockchain node to run. Can be either `full`, `validator`, `servicer`, `fishermen` | +| persistence.accessModes | list | `["ReadWriteOnce"]` | persistent Volume Access Modes | +| persistence.annotations | object | `{}` | annotations of the persistent volume claim | +| persistence.dataSource | object | `{}` | custom data source of the persistent volume claim | +| persistence.enabled | bool | `true` | enable persistent volume claim | +| persistence.existingClaim | string | `""` | name of an existing PVC to use for persistence | +| persistence.reclaimPolicy | string | `"Delete"` | persistent volume reclaim policy | +| persistence.selector | object | `{}` | selector to match an existing Persistent Volume | +| persistence.size | string | `"8Gi"` | size of the persistent volume claim | +| persistence.storageClass | string | `""` | storage class of the persistent volume claim | +| podAnnotations | object | `{}` | pod annotations | +| podSecurityContext | object | `{}` | | +| postgresql.enabled | bool | `true` | deploy postgresql database automatically. Refer to https://github.com/bitnami/charts/blob/main/bitnami/postgresql/values.yaml for additional options. | +| postgresql.primary.persistence.enabled | bool | `false` | enable persistent volume claim for PostgreSQL | +| postgresql.primary.persistence.size | string | `"8Gi"` | size of the persistent volume claim for PostgreSQL | +| privateKeySecretKeyRef.key | string | `""` | REQUIRED. Key in the Secret that contains the private key of the node | +| privateKeySecretKeyRef.name | string | `""` | REQUIRED. Name of the Secret in the same namespace that contains the private key of the node | +| resources | object | `{}` | resources limits and requests | +| securityContext | object | `{}` | | +| service.annotations | object | `{}` | service annotations | +| service.nameOverride | string | `""` | | +| service.ports.consensus | int | `42069` | consensus port of the node | +| service.ports.metrics | int | `9000` | OpenTelemetry metrics port of the node | +| service.ports.rpc | int | `50832` | rpc port of the node | +| service.type | string | `"ClusterIP"` | service type | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | +| serviceAccount.name | string | `""` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | +| serviceMonitor.enabled | bool | `false` | enable service monitor | +| tolerations | list | `[]` | | From f7e0fbe0cbbdfa8f1c8828251b4b80293523bc57 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 8 Jun 2023 12:30:50 -0700 Subject: [PATCH 055/100] Add some notes & qualifiers to the protocol state sync document --- consensus/doc/PROTOCOL_STATE_SYNC.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/consensus/doc/PROTOCOL_STATE_SYNC.md b/consensus/doc/PROTOCOL_STATE_SYNC.md index a23923555..4c28a11b3 100644 --- a/consensus/doc/PROTOCOL_STATE_SYNC.md +++ b/consensus/doc/PROTOCOL_STATE_SYNC.md @@ -1,6 +1,12 @@ # State Sync Protocol Design -_NOTE: This document makes some assumption of P2P implementation details, so please see [p2p](../../p2p/README.md) for the latest source of truth._ +⚠️ IMPORTANT NOTES TO THE (last updated on 06/08/2023): + +- State Sync implementation is a WIP and has taken several different shapes. +- This document is out of date and needs to be updated to reflect the latest implementation. This will be done once a functional implementation is in place. +- This document makes some assumption of P2P implementation details, so please see [p2p](../../p2p/README.md) for the latest source of truth. + +## Table of Contents - [Background](#background) - [State Sync - Peer Metadata](#state-sync---peer-metadata) From a4b7a7ba15fd8f40ee69e3272dbf323d8f0a65a8 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 8 Jun 2023 12:49:54 -0700 Subject: [PATCH 056/100] Moved WithDebugEventsChannel into a file with the test tag --- consensus/e2e_tests/utils_test.go | 7 ++++--- consensus/events.go | 2 +- state_machine/module.go | 12 +----------- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index d9c4bc093..a758609a8 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -43,9 +43,9 @@ func TestMain(m *testing.M) { // TECHDEBT: Constants in the `e2e_tests` test suite that should be parameterized const ( - numValidators = 4 - dummyStateHash = "42" - numMockedBlocks = 200 + numValidators = 4 // The number of validators in the testing network created + dummyStateHash = "42" // The state hash returned for all committed blocks + numMockedBlocks = 200 // The number of mocked blocks in in memory for testing purposes ) var maxTxBytes = defaults.DefaultConsensusMaxMempoolBytes @@ -292,6 +292,7 @@ func WaitForNetworkStateSyncEvents( return waitForEventsInternal(clck, eventsChannel, messaging.StateSyncMessageContentType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) } +// WaitForNetworkFSMEvents waits for the number of expected state machine events to be published on the events channel. func WaitForNetworkFSMEvents( t *testing.T, clck *clock.Mock, diff --git a/consensus/events.go b/consensus/events.go index 3292b5456..47cf5591c 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -13,7 +13,7 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { m.GetBus().PublishEventToBus(newHeightEvent) } -// publishStateSyncBlockCommittedEvent publishes a new state sync block committed event, so that state sync module can react to it +// publishStateSyncBlockCommittedEvent publishes a nstate_machine/module.goew state sync block committed event, so that state sync module can react to it func (m *consensusModule) publishStateSyncBlockCommittedEvent(height uint64) { blockCommittedEvent := &messaging.StateSyncBlockCommittedEvent{ Height: height, diff --git a/state_machine/module.go b/state_machine/module.go index 235154e8e..6dbe9606a 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -22,7 +22,7 @@ type stateMachineModule struct { // TEST_ONLY: debugChannels is only used for testing purposes. // It is used to enable to aggregate and emit events during testing - // TODO: Find a way to avoid the need for this altogether or move it into an _test.go file + // TECHDEBT: Find a way to avoid the need for this altogether or move it into an _test.go file debugChannels []modules.EventsChannel } @@ -86,13 +86,3 @@ func WithCustomStateMachine(stateMachine *fsm.FSM) modules.ModuleOption { } } } - -// WithDebugEventsChannel is used for testing purposes only. It allows us to capture the events -// from the FSM and publish them to debug channel for testing. -func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { - return func(m modules.InitializableModule) { - if m, ok := m.(*stateMachineModule); ok { - m.debugChannels = append(m.debugChannels, eventsChannel) - } - } -} From 239661fa8a9a70211bbdef63f841a6a3266150b1 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 8 Jun 2023 12:59:23 -0700 Subject: [PATCH 057/100] Moved some helpers into a debug helper file --- consensus/module.go | 1 + shared/modules/consensus_module.go | 20 -------------------- shared/modules/debug_helpers.go | 24 ++++++++++++++++++++++++ state_machine/debug_helpers.go | 15 +++++++++++++++ 4 files changed, 40 insertions(+), 20 deletions(-) create mode 100644 shared/modules/debug_helpers.go create mode 100644 state_machine/debug_helpers.go diff --git a/consensus/module.go b/consensus/module.go index 2a1279c44..8f1a26af2 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -124,6 +124,7 @@ func (*consensusModule) Create(bus modules.Bus, options ...modules.ModuleOption) } m.genesisState = genesisState + // TECHDEBT: Should we use the same private key everywhere (top level config, consensus config, etc...) or should we consolidate them? privateKey, err := cryptoPocket.NewPrivateKey(m.consCfg.GetPrivateKey()) if err != nil { return nil, err diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index b1c9aa79e..a1444d754 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -3,8 +3,6 @@ package modules //go:generate mockgen -destination=./mocks/consensus_module_mock.go github.com/pokt-network/pocket/shared/modules ConsensusModule,ConsensusPacemaker,ConsensusDebugModule import ( - "github.com/pokt-network/pocket/shared/core/types" - "github.com/pokt-network/pocket/shared/messaging" "google.golang.org/protobuf/types/known/anypb" ) @@ -72,21 +70,3 @@ type ConsensusPacemaker interface { GetPrepareQC() (*anypb.Any, error) GetNodeId() uint64 } - -// ConsensusDebugModule exposes functionality used for testing & development purposes. -// Not to be used in production. -// TODO: Move this to a separate file and add a flag so this is not compiled in production -// for safety purposes. -type ConsensusDebugModule interface { - HandleDebugMessage(*messaging.DebugMessage) error - - SetHeight(uint64) - SetRound(uint64) - SetStep(uint8) // REFACTOR: This should accept typesCons.HotstuffStep - SetBlock(*types.Block) - - SetUtilityUnitOfWork(UtilityUnitOfWork) - - // REFACTOR: This should accept typesCons.HotstuffStep and return typesCons.NodeId. - GetLeaderForView(height, round uint64, step uint8) (leaderId uint64) -} diff --git a/shared/modules/debug_helpers.go b/shared/modules/debug_helpers.go new file mode 100644 index 000000000..b56a3effe --- /dev/null +++ b/shared/modules/debug_helpers.go @@ -0,0 +1,24 @@ +//go:build test + +package modules + +import ( + "github.com/pokt-network/pocket/shared/core/types" + "github.com/pokt-network/pocket/shared/messaging" +) + +// ConsensusDebugModule exposes functionality used for testing & development purposes. +// Not to be used in production. +type ConsensusDebugModule interface { + HandleDebugMessage(*messaging.DebugMessage) error + + SetHeight(uint64) + SetRound(uint64) + SetStep(uint8) // REFACTOR: This should accept typesCons.HotstuffStep + SetBlock(*types.Block) + + SetUtilityUnitOfWork(UtilityUnitOfWork) + + // REFACTOR: This should accept typesCons.HotstuffStep and return typesCons.NodeId. + GetLeaderForView(height, round uint64, step uint8) (leaderId uint64) +} diff --git a/state_machine/debug_helpers.go b/state_machine/debug_helpers.go new file mode 100644 index 000000000..65081166e --- /dev/null +++ b/state_machine/debug_helpers.go @@ -0,0 +1,15 @@ +//go:build test + +package state_machine + +import "github.com/pokt-network/pocket/shared/modules" + +// WithDebugEventsChannel is used for testing purposes only. It allows us to capture the events +// from the FSM and publish them to debug channel for testing. +func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { + return func(m modules.InitializableModule) { + if m, ok := m.(*stateMachineModule); ok { + m.debugChannels = append(m.debugChannels, eventsChannel) + } + } +} From da3684edc85ac2934d1af5dcd9626d3323465660 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 8 Jun 2023 14:57:54 -0700 Subject: [PATCH 058/100] Code cleanup figuring things out before diving into the business logic again --- build/Dockerfile.debian.dev | 1 - build/localnet/Tiltfile | 198 +++++++++++++++++------------ consensus/fsm_handler.go | 5 +- shared/modules/consensus_module.go | 19 +++ shared/modules/debug_helpers.go | 24 ---- shared/node.go | 4 +- state_machine/debug_helpers.go | 2 +- 7 files changed, 139 insertions(+), 114 deletions(-) delete mode 100644 shared/modules/debug_helpers.go diff --git a/build/Dockerfile.debian.dev b/build/Dockerfile.debian.dev index e4f3d9625..2d69562b9 100644 --- a/build/Dockerfile.debian.dev +++ b/build/Dockerfile.debian.dev @@ -54,7 +54,6 @@ RUN make protogen_local && \ RUN go get -d -v ./app/pocket RUN go build -o /usr/local/bin/pocket ./app/pocket RUN go build -tags=debug -o /usr/local/bin/p1 ./app/client - RUN go build -o /usr/local/bin/cluster-manager ./build/localnet/cluster-manager CMD ["/usr/local/bin/pocket"] diff --git a/build/localnet/Tiltfile b/build/localnet/Tiltfile index bf5c62300..8b315c5e6 100644 --- a/build/localnet/Tiltfile +++ b/build/localnet/Tiltfile @@ -2,7 +2,7 @@ load("ext://helm_resource", "helm_resource", "helm_repo") load("ext://namespace", "namespace_create") load("ext://restart_process", "docker_build_with_restart") -load('ext://tests/golang', 'test_go') +load("ext://tests/golang", "test_go") tiltfile_dir = os.path.dirname(config.main_dir) root_dir = os.path.dirname(tiltfile_dir + "/../..") @@ -13,7 +13,7 @@ localnet_config_defaults = { "validators": {"count": 4}, "servicers": {"count": 1}, "fishermen": {"count": 1}, - "full_nodes": {"count": 1} + "full_nodes": {"count": 1}, } localnet_config_file = read_yaml(localnet_config_path, default=localnet_config_defaults) @@ -49,6 +49,7 @@ deps = [ deps_full_path = [root_dir + "/" + depdir for depdir in deps] + # Avoid downloading dependencies if no missing/outdated charts are found def check_helm_dependencies_for_chart(path): check_helm_dependencies = local( @@ -58,6 +59,7 @@ def check_helm_dependencies_for_chart(path): if helm_dependencies_not_ok_count > 1: local("helm dependency update " + path) + check_helm_dependencies_for_chart("dependencies") k8s_yaml(helm("dependencies", name="dependencies")) @@ -78,7 +80,7 @@ local_resource( root_dir=root_dir ), deps=deps_full_path, - labels=['watchers'] + labels=["watchers"], ) local_resource( "debug client: Watch & Compile", @@ -86,16 +88,16 @@ local_resource( root_dir=root_dir ), deps=deps_full_path, - labels=['watchers'] + labels=["watchers"], ) # Builds the cluster manager binary local_resource( - 'cluster manager: Watch & Compile', - 'GOOS=linux go build -o {root_dir}/bin/cluster-manager {root_dir}/build/localnet/cluster-manager/*.go'.format( + "cluster manager: Watch & Compile", + "GOOS=linux go build -o {root_dir}/bin/cluster-manager {root_dir}/build/localnet/cluster-manager/*.go".format( root_dir=root_dir ), deps=deps_full_path, - labels=['watchers'] + labels=["watchers"], ) # Builds and maintains the pocket container image after the binary is built on local machine, restarts a process on code change @@ -119,7 +121,7 @@ WORKDIR / docker_build_with_restart( "client-image", root_dir, - dockerfile_contents="""FROM debian:bullseye + dockerfile_contents= """FROM debian:bullseye RUN apt-get update && apt-get install -y procps WORKDIR / COPY bin/p1-linux /usr/local/bin/p1 @@ -138,12 +140,12 @@ WORKDIR / COPY bin/cluster-manager /usr/local/bin/cluster-manager COPY bin/p1-linux /usr/local/bin/p1 """, - only=['bin/cluster-manager', 'bin/p1-linux'], + only=["bin/cluster-manager", "bin/p1-linux"], entrypoint=["/usr/local/bin/cluster-manager"], live_update=[ sync("bin/cluster-manager", "/usr/local/bin/cluster-manager"), sync("bin/p1-linux", "/usr/local/bin/p1"), - ] + ], ) # Pushes localnet manifests to the cluster. @@ -159,9 +161,9 @@ k8s_yaml( ) k8s_yaml(["manifests/cli-client.yaml"]) -k8s_resource('dev-cli-client', labels=['client']) -k8s_yaml(['manifests/cluster-manager.yaml']) -k8s_resource('pocket-v1-cluster-manager', labels=['cluster-manager']) +k8s_resource("dev-cli-client", labels=["client"]) +k8s_yaml(["manifests/cluster-manager.yaml"]) +k8s_resource("pocket-v1-cluster-manager", labels=["cluster-manager"]) chart_dir = root_dir + "/charts/pocket" check_helm_dependencies_for_chart(chart_dir) @@ -170,74 +172,93 @@ check_helm_dependencies_for_chart(chart_dir) def formatted_actor_number(n): return local('printf "%03d" ' + str(n)) + # Provisions validators actor_number = 0 for x in range(localnet_config["validators"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(chart_dir, - name="validator-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=validators-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "nodeType=validator", - ], - values=[chart_dir + "/pocket-validator-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-validator-overrides.yaml") else [],)) - - k8s_resource("validator-%s-pocket" % formatted_number, labels=['pocket-validators']) + k8s_yaml( + helm( + chart_dir, + name="validator-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=validators-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "nodeType=validator", + ], + values=[chart_dir + "/pocket-validator-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-validator-overrides.yaml") + else [], + ) + ) + + k8s_resource("validator-%s-pocket" % formatted_number, labels=["pocket-validators"]) actor_number = 0 for x in range(localnet_config["servicers"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(chart_dir, - name="servicer-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=servicers-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "config.servicer.enabled=true", - "nodeType=servicer", - ], - values=[chart_dir + "/pocket-servicer-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-servicer-overrides.yaml") else [],)) - - k8s_resource("servicer-%s-pocket" % formatted_number, labels=['pocket-servicers']) + k8s_yaml( + helm( + chart_dir, + name="servicer-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=servicers-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "config.servicer.enabled=true", + "nodeType=servicer", + ], + values=[chart_dir + "/pocket-servicer-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-servicer-overrides.yaml") + else [], + ) + ) + + k8s_resource("servicer-%s-pocket" % formatted_number, labels=["pocket-servicers"]) actor_number = 0 for x in range(localnet_config["fishermen"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(chart_dir, - name="fisherman-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=fishermen-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "config.fisherman.enabled=true", - "nodeType=fisherman", - ], - values=[chart_dir + "/pocket-fisherman-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-fisherman-overrides.yaml") else [],)) - - k8s_resource("fisherman-%s-pocket" % formatted_number, labels=['pocket-fishermen']) + k8s_yaml( + helm( + chart_dir, + name="fisherman-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=fishermen-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "config.fisherman.enabled=true", + "nodeType=fisherman", + ], + values=[chart_dir + "/pocket-fisherman-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-fisherman-overrides.yaml") + else [], + ) + ) + + k8s_resource("fisherman-%s-pocket" % formatted_number, labels=["pocket-fishermen"]) # Provisions full nodes actor_number = 0 @@ -245,22 +266,28 @@ for x in range(localnet_config["full_nodes"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(root_dir + "/charts/pocket", - name="full-node-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=misc-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "nodeType=full", - ], - values=[chart_dir + "/pocket-full-node-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-full-node-overrides.yaml") else [],)) - - k8s_resource("full-node-%s-pocket" % formatted_number, labels=['pocket-full-nodes']) + k8s_yaml( + helm( + root_dir + "/charts/pocket", + name="full-node-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=misc-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "nodeType=full", + ], + values=[chart_dir + "/pocket-full-node-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-full-node-overrides.yaml") + else [], + ) + ) + + k8s_resource("full-node-%s-pocket" % formatted_number, labels=["pocket-full-nodes"]) # Exposes grafana k8s_resource( @@ -268,12 +295,15 @@ k8s_resource( workload="dependencies-grafana", extra_pod_selectors=[{"app.kubernetes.io/name": "grafana"}], port_forwards=["42000:3000"], - labels=["monitoring"] + labels=["monitoring"], ) # E2E test button -test_go('e2e-tests', '{root_dir}/e2e/tests'.format(root_dir=root_dir), '.', - extra_args=["-v", "-tags=e2e"], - labels=['e2e-tests'], - trigger_mode=TRIGGER_MODE_MANUAL, +test_go( + "e2e-tests", + "{root_dir}/e2e/tests".format(root_dir=root_dir), + ".", + extra_args=["-v", "-tags=e2e"], + labels=["e2e-tests"], + trigger_mode=TRIGGER_MODE_MANUAL, ) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index f265594b4..3ce4020d9 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -31,6 +31,7 @@ func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { } } +// handleStateTransitionEvent handles the state transition event from the state machine module func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Fields(messaging.TransitionEventToMap(msg)).Msg("Received state machine transition msg") @@ -64,8 +65,8 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Msg("Node is in the bootstrapped state. Consensus module NOOP.") // TODO_IN_THIS_COMMIT: Pick up here - // return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) - return nil + return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) + // return nil } // HandleUnsynced handles the FSM event Consensus_IsUnsynced, and when Unsynced is the destination state. diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index a1444d754..30f08e5b3 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -3,6 +3,8 @@ package modules //go:generate mockgen -destination=./mocks/consensus_module_mock.go github.com/pokt-network/pocket/shared/modules ConsensusModule,ConsensusPacemaker,ConsensusDebugModule import ( + "github.com/pokt-network/pocket/shared/core/types" + "github.com/pokt-network/pocket/shared/messaging" "google.golang.org/protobuf/types/known/anypb" ) @@ -70,3 +72,20 @@ type ConsensusPacemaker interface { GetPrepareQC() (*anypb.Any, error) GetNodeId() uint64 } + +// ConsensusDebugModule exposes functionality used for testing & development purposes. +// Not to be used in production. +// TECHDEBT: Move this into a separate file with the `//go:build debug test` tags +type ConsensusDebugModule interface { + HandleDebugMessage(*messaging.DebugMessage) error + + SetHeight(uint64) + SetRound(uint64) + SetStep(uint8) // REFACTOR: This should accept typesCons.HotstuffStep + SetBlock(*types.Block) + + SetUtilityUnitOfWork(UtilityUnitOfWork) + + // REFACTOR: This should accept typesCons.HotstuffStep and return typesCons.NodeId. + GetLeaderForView(height, round uint64, step uint8) (leaderId uint64) +} diff --git a/shared/modules/debug_helpers.go b/shared/modules/debug_helpers.go deleted file mode 100644 index b56a3effe..000000000 --- a/shared/modules/debug_helpers.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build test - -package modules - -import ( - "github.com/pokt-network/pocket/shared/core/types" - "github.com/pokt-network/pocket/shared/messaging" -) - -// ConsensusDebugModule exposes functionality used for testing & development purposes. -// Not to be used in production. -type ConsensusDebugModule interface { - HandleDebugMessage(*messaging.DebugMessage) error - - SetHeight(uint64) - SetRound(uint64) - SetStep(uint8) // REFACTOR: This should accept typesCons.HotstuffStep - SetBlock(*types.Block) - - SetUtilityUnitOfWork(UtilityUnitOfWork) - - // REFACTOR: This should accept typesCons.HotstuffStep and return typesCons.NodeId. - GetLeaderForView(height, round uint64, step uint8) (leaderId uint64) -} diff --git a/shared/node.go b/shared/node.go index 98b8ca16e..7d2eb54b3 100644 --- a/shared/node.go +++ b/shared/node.go @@ -175,8 +175,6 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { return node.GetBus().GetConsensusModule().HandleStateSyncMessage(message.Content) case messaging.TxGossipMessageContentType: return node.GetBus().GetUtilityModule().HandleUtilityMessage(message.Content) - case messaging.DebugMessageEventType: - return node.handleDebugMessage(message) case messaging.ConsensusNewHeightEventType: return node.GetBus().GetP2PModule().HandleEvent(message.Content) case messaging.StateMachineTransitionEventType: @@ -184,6 +182,8 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { err_p2p := node.GetBus().GetP2PModule().HandleEvent(message.Content) // TODO: Remove this lib once we move to Go 1.2 return multierr.Combine(err_consensus, err_p2p) + case messaging.DebugMessageEventType: + return node.handleDebugMessage(message) default: logger.Global.Warn().Msgf("Unsupported message content type: %s", contentType) } diff --git a/state_machine/debug_helpers.go b/state_machine/debug_helpers.go index 65081166e..e8882a612 100644 --- a/state_machine/debug_helpers.go +++ b/state_machine/debug_helpers.go @@ -1,4 +1,4 @@ -//go:build test +// +built test debug package state_machine From 0bcc33aaf23cacf55ab1d0653ec418b64ad91941 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 8 Jun 2023 16:57:41 -0700 Subject: [PATCH 059/100] Add some comments --- consensus/fsm_handler.go | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 3ce4020d9..8363a5de1 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -10,6 +10,14 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) +// RESEARCH(#816): Research whether the E2E state sync business logic can be simplified by not using the FSM module at all. +// We were originally intending to make heavier use of the FSM module to handle state sync, but we ended up not using it much as +// seen below. + +const ( + consensusFSMHandlerSource = "ConsensusFSMHandler" +) + // Implements the `HandleEvent` function in the `ConsensusModule` interface func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { m.m.Lock() @@ -63,10 +71,9 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine // Bootstrapped mode is when the node (validator or non) is first coming online. // This is a transition mode from node bootstrapping to a node being out-of-sync. func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Node is in the bootstrapped state. Consensus module NOOP.") - // TODO_IN_THIS_COMMIT: Pick up here - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) - // return nil + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in the bootstrapped state. Consensus module NOOP.") + // INVESTIGATE(#816): Why are we not calling fsm.SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) here? + return nil } // HandleUnsynced handles the FSM event Consensus_IsUnsynced, and when Unsynced is the destination state. @@ -74,14 +81,14 @@ func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransiti // This mode is a transition mode from the node being up-to-date (i.e. Pacemaker mode, Synced mode) with the latest network height to being out-of-sync. // As soon as a node transitions to this mode, it will transition to the synching mode. func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Node is in an Unsynced state. Consensus module is sending an even to transition to SYNCHING mode.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in an Unsynced state. Consensus module is sending an even to transition to SYNCHING mode.") return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncing) } // HandleSyncMode handles the FSM event Consensus_IsSyncing, and when SyncMode is the destination state. // In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Node is in Sync Mode. Consensus Module is about to start synching...") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. Consensus Module is about to start synching...") go m.stateSync.SyncStateSync() return nil } @@ -90,7 +97,7 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv // Currently, FSM never transition to this state and a non-validator node always stays in SyncMode. // CONSIDER: when a non-validator sync is implemented, maybe there is a case that requires transitioning to this state. func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Non-validator node is in Synced mode. Consensus module NOOP.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Non-validator node is in Synced mode. Consensus module NOOP.") return nil } @@ -98,7 +105,7 @@ func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEven // Execution of this state means the validator node is synced and it will stay in this mode until // it receives a new block proposal that has a higher height than the current consensus height. func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Msg("Validator node is Synced and in Pacemaker mode. Validator can now participate in voting on consensus.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Validator node is Synced and in Pacemaker mode. Validator can now participate in voting on consensus.") // if a validator is just bootstrapped and finished state sync, it will not have a nodeId yet, which is 0. Set correct nodeId here. if m.nodeId == 0 { From ab8db90a3b12deacd747fda8d290f3bcc7cb57d0 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 8 Jun 2023 17:26:57 -0700 Subject: [PATCH 060/100] Removed SetBlock --- consensus/e2e_tests/pacemaker_test.go | 17 +-- consensus/e2e_tests/state_sync_test.go | 151 +++++++++--------------- consensus/e2e_tests/utils_test.go | 4 +- consensus/module_consensus_debugging.go | 8 +- shared/modules/consensus_module.go | 4 +- utility/main_test.go | 12 +- 6 files changed, 83 insertions(+), 113 deletions(-) diff --git a/consensus/e2e_tests/pacemaker_test.go b/consensus/e2e_tests/pacemaker_test.go index 997f3ae4c..7a2741615 100644 --- a/consensus/e2e_tests/pacemaker_test.go +++ b/consensus/e2e_tests/pacemaker_test.go @@ -112,22 +112,23 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { // Prepare leader info leaderRound := uint64(6) + // Any node in pocketNodes mapping can be used to get this function + leaderFn := pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView + // Get leaderId for the given height, round and step, by using the Consensus Modules' GetLeaderForView() function. - // Any node in pocketNodes mapping can be used to call GetLeaderForView() function. - leaderId := typesCons.NodeId(pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView(testHeight, leaderRound, uint8(consensus.Prepare))) + leaderId := typesCons.NodeId(leaderFn(testHeight, leaderRound, uint8(consensus.Prepare))) leader := pocketNodes[leaderId] leaderPK, err := leader.GetBus().GetConsensusModule().GetPrivateKey() require.NoError(t, err) - block := generatePlaceholderBlock(testHeight, leaderPK.Address()) - leader.GetBus().GetConsensusModule().SetBlock(block) - // Set the leader to be in the highest round. - pocketNodes[1].GetBus().GetConsensusModule().SetRound(leaderRound - 2) - pocketNodes[2].GetBus().GetConsensusModule().SetRound(leaderRound - 3) + require.Equal(t, typesCons.NodeId(1), leaderId) pocketNodes[leaderId].GetBus().GetConsensusModule().SetRound(leaderRound) - pocketNodes[4].GetBus().GetConsensusModule().SetRound(leaderRound - 4) + pocketNodes[2].GetBus().GetConsensusModule().SetRound(leaderRound - 1) + pocketNodes[3].GetBus().GetConsensusModule().SetRound(leaderRound - 2) + pocketNodes[4].GetBus().GetConsensusModule().SetRound(leaderRound - 3) + block := generatePlaceholderBlock(testHeight, leaderPK.Address()) prepareProposal := &typesCons.HotstuffMessage{ Type: consensus.Propose, Height: testHeight, diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 8fe40000d..039d1bbe5 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -14,31 +14,20 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) +func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { + clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) // Choose node 1 as the server node - // Set server node's height to test height. serverNode := pocketNodes[1] serverNodePeerId := serverNode.GetBus().GetConsensusModule().GetNodeAddress() + // Set server node's height to test height. serverNode.GetBus().GetConsensusModule().SetHeight(uint64(4)) // Choose node 2 as the requester node. requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - // Test MetaData Req + // Prepare StateSyncMetadataRequest stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_MetadataReq{ MetadataReq: &typesCons.StateSyncMetadataRequest{ @@ -52,49 +41,39 @@ func TestStateSync_ServerGetMetaDataReq_Success(t *testing.T) { // Send metadata request to the server node P2PSend(t, serverNode, anyProto) - // Start waiting for the metadata request on server node, - errMsg := "StateSync Metadata Request" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + // Wait for response from the server node + receivedMsgs, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false) require.NoError(t, err) + require.Len(t, receivedMsgs, 1) - msg, err := codec.GetCodec().FromAny(receivedMsg[0]) + // Validate the response + msg, err := codec.GetCodec().FromAny(receivedMsgs[0]) require.NoError(t, err) - stateSyncMetaDataResMessage, ok := msg.(*typesCons.StateSyncMessage) + stateSyncMetaDataResMsg, ok := msg.(*typesCons.StateSyncMessage) require.True(t, ok) - metaDataRes := stateSyncMetaDataResMessage.GetMetadataRes() - require.NotEmpty(t, metaDataRes) + stateSyncMetaDataRes := stateSyncMetaDataResMsg.GetMetadataRes() + require.NotEmpty(t, stateSyncMetaDataRes) - require.Equal(t, uint64(3), metaDataRes.MaxHeight) // 3 because node sends the last persisted height - require.Equal(t, uint64(1), metaDataRes.MinHeight) - require.Equal(t, serverNodePeerId, metaDataRes.PeerAddress) + require.Equal(t, uint64(3), stateSyncMetaDataRes.MaxHeight) // 3 because node sends the last persisted height + require.Equal(t, uint64(1), stateSyncMetaDataRes.MinHeight) + require.Equal(t, serverNodePeerId, stateSyncMetaDataRes.PeerAddress) } -func TestStateSync_ServerGetBlock_Success(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) +func TestStateSync_BlockRequestResponse_Success(t *testing.T) { + clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) + // Choose node 1 as the server node serverNode := pocketNodes[1] + // Set server node's height to test height. serverNode.GetBus().GetConsensusModule().SetHeight(uint64(5)) // Choose node 2 as the requester node requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - // Passing Test - // Test GetBlock Req + // Prepare GetBlockRequest stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ @@ -111,10 +90,10 @@ func TestStateSync_ServerGetBlock_Success(t *testing.T) { P2PSend(t, serverNode, anyProto) // Start waiting for the get block request on server node, expect to return error - errMsg := "StateSync Get Block Request Message" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false) require.NoError(t, err) + // validate the response msg, err := codec.GetCodec().FromAny(receivedMsg[0]) require.NoError(t, err) @@ -125,40 +104,30 @@ func TestStateSync_ServerGetBlock_Success(t *testing.T) { require.NotEmpty(t, getBlockRes) require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) + // IMPROVE: What other data should we validate from the response? } -func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) +func TestStateSync_BlockRequestResponse_FailNonExistingBlock(t *testing.T) { + clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) + // Choose node 1 as the server node serverNode := pocketNodes[1] + // Set server node's height to test height. serverNode.GetBus().GetConsensusModule().SetHeight(uint64(5)) // Choose node 2 as the requester node requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - // Failing Test + // Prepare a get block request for a non existing block (server is only at height 5) stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ PeerAddress: requesterNodePeerAddress, - Height: uint64(6), // 6 because node ask for the next block + Height: uint64(6), }, }, } - anyProto, err := anypb.New(stateSyncGetBlockMessage) require.NoError(t, err) @@ -166,43 +135,29 @@ func TestStateSync_ServerGetBlock_FailNonExistingBlock(t *testing.T) { P2PSend(t, serverNode, anyProto) // Start waiting for the get block request on server node, expect to return error - errMsg := "StateSync Get Block Request Message" + errMsg := "expecting to time out waiting on a response from a non existent" _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) require.Error(t, err) } func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) - - // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) + clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) - err := StartAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) - - // Prepare unsynced node info - unsyncedNode := pocketNodes[2] - unsyncedNodeId := typesCons.NodeId(2) - unsyncedNodeHeight := uint64(2) + // Select node 2 as the unsynched node that will catch up + unsyncedNodeId := typesCons.NodeId(pocketNodes[2].GetBus().GetConsensusModule().GetNodeId()) + unsyncedNode := pocketNodes[unsyncedNodeId] // Set the unsynced node to height (2) and rest of the nodes to height (3) for id, pocketNode := range pocketNodes { + var height uint64 if id == unsyncedNodeId { - pocketNode.GetBus().GetConsensusModule().SetHeight(unsyncedNodeHeight) + height = uint64(2) } else { - pocketNode.GetBus().GetConsensusModule().SetHeight(uint64(3)) + height = uint64(3) } + pocketNode.GetBus().GetConsensusModule().SetHeight(height) pocketNode.GetBus().GetConsensusModule().SetStep(uint8(consensus.NewRound)) pocketNode.GetBus().GetConsensusModule().SetRound(uint64(0)) - - utilityUnitOfWork, err := pocketNode.GetBus().GetUtilityModule().NewUnitOfWork(int64(3)) - require.NoError(t, err) - pocketNode.GetBus().GetConsensusModule().SetUtilityUnitOfWork(utilityUnitOfWork) } // Debug message to start consensus by triggering first view change @@ -210,16 +165,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { TriggerNextView(t, pocketNode) } - // Get leaderId for the given height, round and step, by using the Consensus Modules' GetLeaderForView() function. - // Any node in pocketNodes mapping can be used to call GetLeaderForView() function. - leaderId := typesCons.NodeId(pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView(uint64(3), uint64(1), uint8(consensus.NewRound))) - leader := pocketNodes[leaderId] - leaderPK, err := leader.GetBus().GetConsensusModule().GetPrivateKey() - require.NoError(t, err) - - block := generatePlaceholderBlock(3, leaderPK.Address()) - leader.GetBus().GetConsensusModule().SetBlock(block) - // Assert that unsynced node has a different view of the network than the rest of the nodes newRoundMessages, err := WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) require.NoError(t, err) @@ -229,7 +174,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { if nodeId == unsyncedNodeId { assertNodeConsensusView(t, nodeId, typesCons.ConsensusNodeState{ - Height: unsyncedNodeHeight, + Height: uint64(2), Step: uint8(consensus.NewRound), Round: uint8(1), }, @@ -295,3 +240,21 @@ func TestStateSync_4of10UnsyncedPeersCatchUp(t *testing.T) { func TestStateSync_9of10UnsyncedPeersCatchUp(t *testing.T) { t.Skip() } + +func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsChannel, idToNodeMapping) { + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) + + // Test configs + runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := GenerateBuses(t, runtimeMgrs) + + // Create & start test pocket nodes + eventsChannel := make(modules.EventsChannel, 100) + pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) + err := StartAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) + + return clockMock, eventsChannel, pocketNodes +} diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index a758609a8..96b562ab0 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -230,7 +230,9 @@ func P2PBroadcast(_ *testing.T, nodes idToNodeMapping, any *anypb.Any) { } } -func P2PSend(_ *testing.T, node *shared.Node, any *anypb.Any) { +func P2PSend(t *testing.T, node *shared.Node, any *anypb.Any) { + t.Helper() + e := &messaging.PocketEnvelope{Content: any} node.GetBus().PublishEventToBus(e) } diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index a4a9c79aa..401cc0fe6 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -1,8 +1,10 @@ package consensus +// All the code below is used for debugging & testing purposes only and should not be used in prod. +// TODO: Add debug/test tags to avoid accidental production usage. + import ( typesCons "github.com/pokt-network/pocket/consensus/types" - coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" ) @@ -47,10 +49,6 @@ func (m *consensusModule) SetStep(step uint8) { m.step = typesCons.HotstuffStep(step) } -func (m *consensusModule) SetBlock(block *coreTypes.Block) { - m.block = block -} - func (m *consensusModule) SetUtilityUnitOfWork(utilityUnitOfWork modules.UtilityUnitOfWork) { m.utilityUnitOfWork = utilityUnitOfWork } diff --git a/shared/modules/consensus_module.go b/shared/modules/consensus_module.go index 30f08e5b3..636721671 100644 --- a/shared/modules/consensus_module.go +++ b/shared/modules/consensus_module.go @@ -3,7 +3,6 @@ package modules //go:generate mockgen -destination=./mocks/consensus_module_mock.go github.com/pokt-network/pocket/shared/modules ConsensusModule,ConsensusPacemaker,ConsensusDebugModule import ( - "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" "google.golang.org/protobuf/types/known/anypb" ) @@ -81,8 +80,7 @@ type ConsensusDebugModule interface { SetHeight(uint64) SetRound(uint64) - SetStep(uint8) // REFACTOR: This should accept typesCons.HotstuffStep - SetBlock(*types.Block) + SetStep(uint8) SetUtilityUnitOfWork(UtilityUnitOfWork) diff --git a/utility/main_test.go b/utility/main_test.go index 7f3450147..7e9c36a50 100644 --- a/utility/main_test.go +++ b/utility/main_test.go @@ -16,13 +16,20 @@ import ( ) var ( - dbURL string + // dbURL string + + // Initialized in TestMain + testPersistenceMod modules.PersistenceModule ) // NB: `TestMain` serves all tests in the immediate `utility` package and not its children func TestMain(m *testing.M) { pool, resource, url := test_artifacts.SetupPostgresDocker() - dbURL = url + // dbURL = url + testPersistenceMod = newTestPersistenceModule(url) + if testPersistenceMod == nil { + log.Fatal("[ERROR] Unable to create new test persistence module") + } exitCode := m.Run() test_artifacts.CleanupPostgresDocker(m, pool, resource) @@ -31,6 +38,7 @@ func TestMain(m *testing.M) { func newTestUtilityModule(bus modules.Bus) modules.UtilityModule { utilityMod, err := Create(bus) + if err != nil { log.Fatalf("Error creating utility module: %s", err) } From 759788ed820ed859327d6584c8c65c6a575204c3 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 9 Jun 2023 13:06:18 -0700 Subject: [PATCH 061/100] Cleanup to some of the consensus testing utilities --- consensus/debugging.go | 2 +- consensus/e2e_tests/hotstuff_test.go | 34 ++---- consensus/e2e_tests/pacemaker_test.go | 22 ++-- consensus/e2e_tests/state_sync_test.go | 88 +++++--------- consensus/e2e_tests/utils_test.go | 162 +++++++++++++++---------- 5 files changed, 140 insertions(+), 168 deletions(-) diff --git a/consensus/debugging.go b/consensus/debugging.go index d7ae18134..1b8b6fc74 100644 --- a/consensus/debugging.go +++ b/consensus/debugging.go @@ -83,7 +83,7 @@ func (m *consensusModule) togglePacemakerManualMode(_ *messaging.DebugMessage) { m.paceMaker.SetManualMode(newMode) } -// requests current block from all validators +// sendGetBlockStateSyncMessage requests nodes with the state sync server to send the current block func (m *consensusModule) sendGetBlockStateSyncMessage(_ *messaging.DebugMessage) { currentHeight := m.CurrentHeight() requestHeight := currentHeight - 1 diff --git a/consensus/e2e_tests/hotstuff_test.go b/consensus/e2e_tests/hotstuff_test.go index 773b9028a..f15abbfee 100644 --- a/consensus/e2e_tests/hotstuff_test.go +++ b/consensus/e2e_tests/hotstuff_test.go @@ -10,7 +10,6 @@ import ( "github.com/pokt-network/pocket/shared/codec" "github.com/pokt-network/pocket/shared/modules" "github.com/stretchr/testify/require" - "google.golang.org/protobuf/types/known/anypb" ) func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { @@ -19,19 +18,17 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { timeReminder(t, clockMock, time.Second) // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) + runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := generateBuses(t, runtimeMgrs) // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) + err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) // Debug message to start consensus by triggering first view change - for _, pocketNode := range pocketNodes { - TriggerNextView(t, pocketNode) - } + triggerNextView(t, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) // Wait for nodes to reach height=1 by generating a block @@ -51,35 +48,18 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - stateSyncGetBlockReq := typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: 1, - } - - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &stateSyncGetBlockReq, - }, - } - - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) - // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, 1) + send(t, serverNode, stateSyncGetBlockMsg) // Server node is waiting for the get block request message - numExpectedMsgs := 1 - errMsg := "StateSync Get Block Request Message" - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, numExpectedMsgs, 500, false) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false) require.NoError(t, err) msg, err := codec.GetCodec().FromAny(receivedMsg[0]) require.NoError(t, err) - stateSyncGetBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) require.True(t, ok) - getBlockRes := stateSyncGetBlockResMessage.GetGetBlockRes() require.NotEmpty(t, getBlockRes) diff --git a/consensus/e2e_tests/pacemaker_test.go b/consensus/e2e_tests/pacemaker_test.go index 7a2741615..1cfa0fdf7 100644 --- a/consensus/e2e_tests/pacemaker_test.go +++ b/consensus/e2e_tests/pacemaker_test.go @@ -23,23 +23,21 @@ func TestPacemakerTimeoutIncreasesRound(t *testing.T) { paceMakerTimeoutMsec := uint64(10000) // Set a small pacemaker timeout paceMakerTimeout := time.Duration(paceMakerTimeoutMsec) * time.Millisecond consensusMessageTimeout := time.Duration(paceMakerTimeoutMsec / 5) // Must be smaller than pacemaker timeout because we expect a deterministic number of consensus messages. - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) for _, runtimeConfig := range runtimeMgrs { consCfg := runtimeConfig.GetConfig().Consensus.PacemakerConfig consCfg.TimeoutMsec = paceMakerTimeoutMsec } - buses := GenerateBuses(t, runtimeMgrs) + buses := generateBuses(t, runtimeMgrs) // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) + err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) // Debug message to start consensus by triggering next view - for _, pocketNode := range pocketNodes { - TriggerNextView(t, pocketNode) - } + triggerNextView(t, pocketNodes) // Advance time by an amount shorter than the pacemaker timeout advanceTime(t, clockMock, 10*time.Millisecond) @@ -77,13 +75,13 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { clockMock := clock.NewMock() timeReminder(t, clockMock, time.Second) - runtimeConfigs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeConfigs) + runtimeConfigs := generateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := generateBuses(t, runtimeConfigs) // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) + err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) // Starting point @@ -92,7 +90,7 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { // UnitTestNet configs paceMakerTimeoutMsec := uint64(500) // Set a small pacemaker timeout - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) + runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) for _, runtimeConfig := range runtimeMgrs { runtimeConfig.GetConfig().Consensus.PacemakerConfig.TimeoutMsec = paceMakerTimeoutMsec } @@ -145,13 +143,13 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { broadcastMessages(t, []*anypb.Any{anyMsg}, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) - _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, 2, consensus.Vote, numExpectedMsgs, time.Duration(msgTimeout), true) + _, err = waitForNetworkConsensusEvents(t, clockMock, eventsChannel, 2, consensus.Vote, numExpectedMsgs, time.Duration(msgTimeout), true) require.NoError(t, err) // Check that all the nodes caught up to the leader's (i.e. the latest) round for nodeId, pocketNode := range pocketNodes { - nodeState := GetConsensusNodeState(pocketNode) + nodeState := getConsensusNodeState(pocketNode) if nodeId == leaderId { require.Equal(t, consensus.Prepare.String(), typesCons.HotstuffStep(nodeState.Step).String()) } else { diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 039d1bbe5..58dbcb75a 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,7 +1,6 @@ package e2e_tests import ( - "reflect" "testing" "time" @@ -39,10 +38,10 @@ func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { require.NoError(t, err) // Send metadata request to the server node - P2PSend(t, serverNode, anyProto) + send(t, serverNode, anyProto) // Wait for response from the server node - receivedMsgs, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false) + receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false) require.NoError(t, err) require.Len(t, receivedMsgs, 1) @@ -74,23 +73,13 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() // Prepare GetBlockRequest - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: 1, - }, - }, - } - - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) + stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, 1) // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + send(t, serverNode, stateSyncGetBlockMsg) // Start waiting for the get block request on server node, expect to return error - receivedMsg, err := WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false) require.NoError(t, err) // validate the response @@ -120,23 +109,14 @@ func TestStateSync_BlockRequestResponse_FailNonExistingBlock(t *testing.T) { requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() // Prepare a get block request for a non existing block (server is only at height 5) - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: requesterNodePeerAddress, - Height: uint64(6), - }, - }, - } - anyProto, err := anypb.New(stateSyncGetBlockMessage) - require.NoError(t, err) + stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, 6) // Send get block request to the server node - P2PSend(t, serverNode, anyProto) + send(t, serverNode, stateSyncGetBlockMsg) // Start waiting for the get block request on server node, expect to return error errMsg := "expecting to time out waiting on a response from a non existent" - _, err = WaitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + _, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) require.Error(t, err) } @@ -147,30 +127,27 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { unsyncedNodeId := typesCons.NodeId(pocketNodes[2].GetBus().GetConsensusModule().GetNodeId()) unsyncedNode := pocketNodes[unsyncedNodeId] - // Set the unsynced node to height (2) and rest of the nodes to height (3) + // Set the unsynced node to height (2) and rest of the nodes to height (4) for id, pocketNode := range pocketNodes { var height uint64 if id == unsyncedNodeId { height = uint64(2) } else { - height = uint64(3) + height = uint64(4) } pocketNode.GetBus().GetConsensusModule().SetHeight(height) pocketNode.GetBus().GetConsensusModule().SetStep(uint8(consensus.NewRound)) pocketNode.GetBus().GetConsensusModule().SetRound(uint64(0)) } - // Debug message to start consensus by triggering first view change - for _, pocketNode := range pocketNodes { - TriggerNextView(t, pocketNode) - } - - // Assert that unsynced node has a different view of the network than the rest of the nodes - newRoundMessages, err := WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) + // Trigger all the nodes to the next step + triggerNextView(t, pocketNodes) + _, err := waitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) require.NoError(t, err) + // Verify the unsynched node is still behind after NewRound starts for nodeId, pocketNode := range pocketNodes { - nodeState := GetConsensusNodeState(pocketNode) + nodeState := getConsensusNodeState(pocketNode) if nodeId == unsyncedNodeId { assertNodeConsensusView(t, nodeId, typesCons.ConsensusNodeState{ @@ -182,7 +159,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { } else { assertNodeConsensusView(t, nodeId, typesCons.ConsensusNodeState{ - Height: uint64(3), + Height: uint64(4), Step: uint8(consensus.NewRound), Round: uint8(1), }, @@ -192,27 +169,16 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { require.Equal(t, typesCons.NodeId(0), nodeState.LeaderId) } - metadataReceived := &typesCons.StateSyncMetadataResponse{ - PeerAddress: "unused_peer_addr_in_tests", - MinHeight: uint64(1), - MaxHeight: uint64(2), // 2 because unsynced node last persisted height 2 - } - - // Simulate state sync metadata response by pushing metadata to the unsynced node's consensus module - consensusModImpl := GetConsensusModImpl(unsyncedNode) - consensusModImpl.MethodByName("PushStateSyncMetadataResponse").Call([]reflect.Value{reflect.ValueOf(metadataReceived)}) - - for _, message := range newRoundMessages { - P2PBroadcast(t, pocketNodes, message) - } - advanceTime(t, clockMock, 10*time.Millisecond) - - // 2. Propose - _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) - require.NoError(t, err) + // Broadcast the new round messages so nodes enter the prepare stage + // broadcastMessages(t, newRoundMessages, pocketNodes) + // advanceTime(t, clockMock, 10*time.Millisecond) + // _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) + // require.NoError(t, err) + // Wait for unsyncedNode to go from height 2 to height 4 + assertHeight(t, unsyncedNodeId, uint64(2), getConsensusNodeState(unsyncedNode).Height) waitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, 3) - require.NoError(t, err) + assertHeight(t, unsyncedNodeId, uint64(3), getConsensusNodeState(unsyncedNode).Height) } // TODO: Implement these tests @@ -247,13 +213,13 @@ func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsC timeReminder(t, clockMock, time.Second) // Test configs - runtimeMgrs := GenerateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := GenerateBuses(t, runtimeMgrs) + runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := generateBuses(t, runtimeMgrs) // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) - err := StartAllTestPocketNodes(t, pocketNodes) + err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) return clockMock, eventsChannel, pocketNodes diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 96b562ab0..64532e90a 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -34,8 +34,6 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -// CLEANUP: Some functions in the test suite are exposed even though they do not need to be. - func TestMain(m *testing.M) { exitCode := m.Run() os.Exit(exitCode) @@ -55,9 +53,10 @@ type idToPrivKeyMapping map[typesCons.NodeId]cryptoPocket.PrivateKey /*** Node Generation Helpers ***/ -func GenerateNodeRuntimeMgrs(_ *testing.T, validatorCount int, clockMgr clock.Clock) []*runtime.Manager { +func generateNodeRuntimeMgrs(t *testing.T, validatorCount int, clockMgr clock.Clock) []*runtime.Manager { + t.Helper() + runtimeMgrs := make([]*runtime.Manager, validatorCount) - var validatorKeys []string genesisState, validatorKeys := test_artifacts.NewGenesisState(validatorCount, 1, 1, 1) cfgs := test_artifacts.NewDefaultConfigs(validatorKeys) for i, config := range cfgs { @@ -159,7 +158,7 @@ func createTestConsensusPocketNode( return pocketNode } -func GenerateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []modules.Bus) { +func generateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []modules.Bus) { buses = make([]modules.Bus, len(runtimeMgrs)) for i := range runtimeMgrs { bus, err := runtime.CreateBus(runtimeMgrs[i]) @@ -169,8 +168,7 @@ func GenerateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []module return } -// CLEANUP: Reduce package scope visibility in the consensus test module -func StartAllTestPocketNodes(t *testing.T, pocketNodes idToNodeMapping) error { +func startAllTestPocketNodes(t *testing.T, pocketNodes idToNodeMapping) error { for _, pocketNode := range pocketNodes { go startNode(t, pocketNode) startEvent := pocketNode.GetBus().GetBusEvent() @@ -188,27 +186,21 @@ func StartAllTestPocketNodes(t *testing.T, pocketNodes idToNodeMapping) error { /*** Node Visibility/Reflection Helpers ***/ -// TODO(discuss): Should we use reflections inside the testing module as being done here or explicitly -// define the interfaces used for debug/development. The latter will probably scale more but will -// require more effort and pollute the source code with debugging information. -func GetConsensusNodeState(node *shared.Node) typesCons.ConsensusNodeState { - return GetConsensusModImpl(node).MethodByName("GetNodeState").Call([]reflect.Value{})[0].Interface().(typesCons.ConsensusNodeState) +// HACK: Look for ways to avoid using reflections in the testing package. It was a quick & dirty way to keep going. +func getConsensusNodeState(node *shared.Node) typesCons.ConsensusNodeState { + return getConsensusModImpl(node).MethodByName("GetNodeState").Call([]reflect.Value{})[0].Interface().(typesCons.ConsensusNodeState) } -func GetConsensusModElem(node *shared.Node) reflect.Value { +func getConsensusModElem(node *shared.Node) reflect.Value { return reflect.ValueOf(node.GetBus().GetConsensusModule()).Elem() } -func GetConsensusModImpl(node *shared.Node) reflect.Value { +func getConsensusModImpl(node *shared.Node) reflect.Value { return reflect.ValueOf(node.GetBus().GetConsensusModule()) } /*** Debug/Development Message Helpers ***/ -func TriggerNextView(t *testing.T, node *shared.Node) { - triggerDebugMessage(t, node, messaging.DebugMessageAction_DEBUG_CONSENSUS_TRIGGER_NEXT_VIEW) -} - func triggerDebugMessage(t *testing.T, node *shared.Node, action messaging.DebugMessageAction) { debugMessage := &messaging.DebugMessage{ Action: action, @@ -223,14 +215,16 @@ func triggerDebugMessage(t *testing.T, node *shared.Node, action messaging.Debug /*** P2P Helpers ***/ -func P2PBroadcast(_ *testing.T, nodes idToNodeMapping, any *anypb.Any) { +func broadcast(t *testing.T, nodes idToNodeMapping, any *anypb.Any) { + t.Helper() + e := &messaging.PocketEnvelope{Content: any} for _, node := range nodes { node.GetBus().PublishEventToBus(e) } } -func P2PSend(t *testing.T, node *shared.Node, any *anypb.Any) { +func send(t *testing.T, node *shared.Node, any *anypb.Any) { t.Helper() e := &messaging.PocketEnvelope{Content: any} @@ -246,7 +240,7 @@ func P2PSend(t *testing.T, node *shared.Node, any *anypb.Any) { // For example, if the test expects to receive 5 messages within 2 seconds: // false: continue if 5 messages are received in 0.5 seconds // true: wait for another 1.5 seconds after 5 messages are received in 0.5 seconds, and fail if any additional messages are received. -func WaitForNetworkConsensusEvents( +func waitForNetworkConsensusEvents( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, @@ -272,7 +266,7 @@ func WaitForNetworkConsensusEvents( // IMPROVE: Consider unifying this function with WaitForNetworkConsensusEvents // This is a helper for 'waitForEventsInternal' that creates the `includeFilter` function based on state sync message specific parameters. -func WaitForNetworkStateSyncEvents( +func waitForNetworkStateSyncEvents( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, @@ -294,8 +288,8 @@ func WaitForNetworkStateSyncEvents( return waitForEventsInternal(clck, eventsChannel, messaging.StateSyncMessageContentType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) } -// WaitForNetworkFSMEvents waits for the number of expected state machine events to be published on the events channel. -func WaitForNetworkFSMEvents( +// waitForNetworkFSMEvents waits for the number of expected state machine events to be published on the events channel. +func waitForNetworkFSMEvents( t *testing.T, clck *clock.Mock, eventsChannel modules.EventsChannel, @@ -644,7 +638,7 @@ func WaitForNextBlock( advanceTime(t, clck, 10*time.Millisecond) // wait for prepare votes - prepareVotes, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, 2, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) + prepareVotes, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, 2, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, prepareVotes, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) @@ -656,7 +650,7 @@ func WaitForNextBlock( advanceTime(t, clck, 10*time.Millisecond) // wait for preCommit votes - preCommitVotes, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, 3, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) + preCommitVotes, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, 3, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, preCommitVotes, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) @@ -668,7 +662,7 @@ func WaitForNextBlock( advanceTime(t, clck, 10*time.Millisecond) // wait for commit votes - commitVotes, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, 4, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) + commitVotes, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, 4, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, commitVotes, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) @@ -699,14 +693,13 @@ func waitForProposalMsgs( maxWaitTime time.Duration, failOnExtraMessages bool, ) ([]*anypb.Any, error) { - - proposalMsgs, err := WaitForNetworkConsensusEvents(t, clck, eventsChannel, typesCons.HotstuffStep(step), consensus.Propose, numExpectedMsgs, maxWaitTime, failOnExtraMessages) + proposalMsgs, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, typesCons.HotstuffStep(step), consensus.Propose, numExpectedMsgs, maxWaitTime, failOnExtraMessages) if err != nil { return nil, err } for nodeId, pocketNode := range pocketNodes { - nodeState := GetConsensusNodeState(pocketNode) + nodeState := getConsensusNodeState(pocketNode) if (typesCons.HotstuffStep(step) == consensus.Decide) && (nodeId == leaderId) { assertNodeConsensusView(t, nodeId, typesCons.ConsensusNodeState{ @@ -732,12 +725,17 @@ func waitForProposalMsgs( func broadcastMessages(t *testing.T, msgs []*anypb.Any, pocketNodes idToNodeMapping) { for _, message := range msgs { - P2PBroadcast(t, pocketNodes, message) + broadcast(t, pocketNodes, message) + } +} + +func triggerNextView(t *testing.T, pocketNodes idToNodeMapping) { + for _, node := range pocketNodes { + triggerDebugMessage(t, node, messaging.DebugMessageAction_DEBUG_CONSENSUS_TRIGGER_NEXT_VIEW) } } // waitForNodeToSync waits for a node to sync to a target height. -// // For every block the unsynched node is missing: // 1. Wait for the unsynched node to request a missing block via `waitForNodeToRequestMissingBlock()` // 2. Wait for other nodes to send the requested block via `waitForNodesToReplyToBlockRequest()` @@ -752,22 +750,52 @@ func waitForNodeToSync( ) { t.Helper() + metadataReceived := &typesCons.StateSyncMetadataResponse{ + PeerAddress: "unused_peer_addr_in_tests", + MinHeight: uint64(1), + MaxHeight: uint64(2), // 2 because unsynced node last persisted height 2 + } + + // Simulate state sync metadata response by pushing metadata to the unsynced node's consensus module + consensusModImpl := getConsensusModImpl(unsyncedNode) + consensusModImpl.MethodByName("PushStateSyncMetadataResponse").Call([]reflect.Value{reflect.ValueOf(metadataReceived)}) + + // Get unsynched node info + unsyncedNodeId := typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()) currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() + require.Less(t, currentHeight, targetHeight, "target height must be greater than current height") + for currentHeight < targetHeight { + + receivedMsg, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error waiting on response to a get block request", 1, 500, false) + require.NoError(t, err) + fmt.Println("receivedMsg", receivedMsg) + + // Wait for block request messages + // Broadcast them + // Wait for block response messages + // Broadcast them + + // anyProto, err := anypb.New(stateSyncGetBlockMessage) + // require.NoError(t, err) + + // // Send get block request to the server node + // P2PSend(t, serverNode, anyProto) + // waiting for unsynced node to request the same missing block from all peers. - blockRequests, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators, 500, true) + blockRequests, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error while waiting for block response messages.", numValidators, 500, true) require.NoError(t, err) // verify that all requests are identical and take the first one require.True(t, checkIdentical(blockRequests), "All block requests sent by node must be identical") blockRequest := blockRequests[0] - // broadcast one of the requests to all nodes - P2PBroadcast(t, allNodes, blockRequest) + // broadcast one of the block requests to all nodes + broadcast(t, allNodes, blockRequest) advanceTime(t, clck, 10*time.Millisecond) - // wait to receive replies from all nodes - blockResponses, err := WaitForNetworkStateSyncEvents(t, clck, eventsChannel, "Error while waiting for block response messages.", numValidators-1, 500, true) + // wait to receive block replies from all nodes (except for self) + blockResponses, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error while waiting for block response messages.", numValidators-1, 500, true) require.NoError(t, err) // verify that all nodes replied with the same block response @@ -779,48 +807,28 @@ func waitForNodeToSync( stateSyncMessage, ok := msgAny.(*typesCons.StateSyncMessage) require.True(t, ok) - // verify that all nodes replied with the same block response if blockResponse == nil { - // On the first block received, we just verify the height is correct blockResponse = stateSyncMessage.GetGetBlockRes() - require.Equal(t, currentHeight, blockResponse.Block.BlockHeader.Height) - } else { - // On subsequent blocks, we verify all the blocks are identical - require.Equal(t, blockResponse.Block, stateSyncMessage.GetGetBlockRes().Block) - + continue } + require.Equal(t, blockResponse.Block, stateSyncMessage.GetGetBlockRes().Block) } // since all block responses are identical, send one of the block responses to the unsynced node - P2PSend(t, unsyncedNode, blockResponses[0]) + send(t, unsyncedNode, blockResponses[0]) advanceTime(t, clck, 10*time.Millisecond) - // waiting for node to reach to the next height (currentHeight + 1) - waitForNodeToCatchUp(t, clck, eventsChannel, unsyncedNode, currentHeight+1) + // waiting for node to reach to the next height (currentHeight+1) + _, err = waitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "error while waiting for validator to sync", 1, 500, false) + require.NoError(t, err) + + // ensure unsynced node caught up to the target height + nodeState := getConsensusNodeState(unsyncedNode) + assertHeight(t, unsyncedNodeId, currentHeight+1, nodeState.Height) currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } } -// waitForNodeToCatchUp waits for unsynced node to catch up to the target height -func waitForNodeToCatchUp( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, - unsyncedNode *shared.Node, - targetHeight uint64, -) { - t.Helper() - - // wait for unsynced node to send StateMachineEvent_Consensus_IsSyncedValidator event - _, err := WaitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "didn't receive synced event", 1, 500, false) - require.NoError(t, err) - - // ensure unsynced node caught up to the target height - nodeState := GetConsensusNodeState(unsyncedNode) - nodeId := typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()) - assertHeight(t, nodeId, targetHeight, nodeState.Height) -} - func generatePlaceholderBlock(height uint64, leaderAddrr crypto.Address) *coreTypes.Block { blockHeader := &coreTypes.BlockHeader{ Height: height, @@ -1032,3 +1040,23 @@ func checkIdentical(arr []*anypb.Any) bool { } return true } + +func prepareStateSyncGetBlockMessage(t *testing.T, peerAddress string, height uint64) *anypb.Any { + t.Helper() + + stateSyncGetBlockReq := typesCons.GetBlockRequest{ + PeerAddress: peerAddress, + Height: height, + } + + stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_GetBlockReq{ + GetBlockReq: &stateSyncGetBlockReq, + }, + } + + anyProto, err := anypb.New(stateSyncGetBlockMessage) + require.NoError(t, err) + + return anyProto +} From 5205d9abe5e8b206c0b54150aca38def3005b068 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 9 Jun 2023 13:34:35 -0700 Subject: [PATCH 062/100] Update factory functions for consensus submodules --- consensus/module.go | 4 ++-- consensus/pacemaker/module.go | 25 ++++++++++++------------- consensus/state_sync/module.go | 13 ++++++------- consensus/types/messages.go | 1 - 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/consensus/module.go b/consensus/module.go index 8f1a26af2..2db31282c 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -79,13 +79,13 @@ func (*consensusModule) Create(bus modules.Bus, options ...modules.ModuleOption) return nil, err } - paceMakerMod, err := pacemaker.CreatePacemaker(bus) + paceMakerMod, err := pacemaker.Create(bus) if err != nil { return nil, err } pm := paceMakerMod.(pacemaker.Pacemaker) - stateSyncMod, err := state_sync.CreateStateSync(bus) + stateSyncMod, err := state_sync.Create(bus) if err != nil { return nil, err } diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 2219d82aa..2a2d63d7b 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -60,32 +60,31 @@ type pacemaker struct { logPrefix string } -func CreatePacemaker(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { +func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { return new(pacemaker).Create(bus, options...) } func (*pacemaker) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { + runtimeMgr := bus.GetRuntimeMgr() + cfg := runtimeMgr.GetConfig() + pacemakerCfg := cfg.Consensus.PacemakerConfig + m := &pacemaker{ logPrefix: defaultLogPrefix, + debug: pacemakerDebug{ + manualMode: pacemakerCfg.GetManual(), + debugTimeBetweenStepsMsec: pacemakerCfg.GetDebugTimeBetweenStepsMsec(), + quorumCertificate: nil, + }, + pacemakerCfg: pacemakerCfg, } + m.roundTimeout = m.getRoundTimeout() for _, option := range options { option(m) } - bus.RegisterModule(m) - runtimeMgr := bus.GetRuntimeMgr() - cfg := runtimeMgr.GetConfig() - - m.pacemakerCfg = cfg.Consensus.PacemakerConfig - m.roundTimeout = m.getRoundTimeout() - m.debug = pacemakerDebug{ - manualMode: m.pacemakerCfg.GetManual(), - debugTimeBetweenStepsMsec: m.pacemakerCfg.GetDebugTimeBetweenStepsMsec(), - quorumCertificate: nil, - } - return m, nil } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index e144d1857..c8a90827e 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -51,23 +51,22 @@ type stateSync struct { committedBlocksChannel chan uint64 } -func CreateStateSync(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { +func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { return new(stateSync).Create(bus, options...) } func (*stateSync) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { - m := &stateSync{} + m := &stateSync{ + metadataReceived: make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize), + committedBlocksChannel: make(chan uint64, committedBlocsChannelSize), + } + m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) for _, option := range options { option(m) } - bus.RegisterModule(m) - m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) - m.metadataReceived = make(chan *typesCons.StateSyncMetadataResponse, metadataChannelSize) - m.committedBlocksChannel = make(chan uint64, committedBlocsChannelSize) - return m, nil } diff --git a/consensus/types/messages.go b/consensus/types/messages.go index 8faceb2ba..8f30259db 100644 --- a/consensus/types/messages.go +++ b/consensus/types/messages.go @@ -116,7 +116,6 @@ var ( ErrSendMessage = errors.New(sendMessageError) ErrBroadcastMessage = errors.New(broadcastMessageError) ErrCreateConsensusMessage = errors.New(createConsensusMessageError) - ErrCreateStateSyncMessage = errors.New(createStateSyncMessageError) ErrNoQcInReceivedBlock = errors.New(noQcInReceivedBlockError) ErrBlockRetrievalMessage = errors.New(blockRetrievalError) ErrHotstuffValidation = errors.New(anteValidationError) From b24871fcff2f307db95f5469705c30a81f82e8d9 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 9 Jun 2023 13:46:26 -0700 Subject: [PATCH 063/100] Updated StartSynchronousStateSync --- consensus/fsm_handler.go | 2 +- consensus/module.go | 7 ++-- consensus/pacemaker/module.go | 2 +- consensus/state_sync/module.go | 58 +++++++++++++++------------------- 4 files changed, 30 insertions(+), 39 deletions(-) diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 8363a5de1..e423ffb78 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -89,7 +89,7 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv // In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. Consensus Module is about to start synching...") - go m.stateSync.SyncStateSync() + go m.stateSync.StartSynchronousStateSync() return nil } diff --git a/consensus/module.go b/consensus/module.go index 2db31282c..043a3985b 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -164,10 +164,9 @@ func (m *consensusModule) Start() error { return err } - // TODO_IN_THIS_COMMIT: Pick up here - // if err := m.stateSync.Start(); err != nil { - // return err - // } + if err := m.stateSync.Start(); err != nil { + return err + } return nil } diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 2a2d63d7b..bc70cb8a6 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -79,6 +79,7 @@ func (*pacemaker) Create(bus modules.Bus, options ...modules.ModuleOption) (modu pacemakerCfg: pacemakerCfg, } m.roundTimeout = m.getRoundTimeout() + m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) for _, option := range options { option(m) @@ -89,7 +90,6 @@ func (*pacemaker) Create(bus modules.Bus, options ...modules.ModuleOption) (modu } func (m *pacemaker) Start() error { - m.logger = logger.Global.CreateLoggerForModule(m.GetModuleName()) m.RestartTimer() return nil } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index c8a90827e..bcc6cabe9 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -30,9 +30,11 @@ type StateSyncModule interface { modules.Module StateSyncServerModule - SyncStateSync() error HandleStateSyncBlockCommittedEvent(message *anypb.Any) error HandleStateSyncMetadataResponse(*typesCons.StateSyncMetadataResponse) error + + // TECHDEBT: This function can be removed once the dependency of state sync on the FSM module is removed. + StartSynchronousStateSync() error } var ( @@ -80,10 +82,14 @@ func (m *stateSync) Start() error { // requests missing blocks starting from its current height to the aggregated metadata's maxHeight, // once the requested block is received and committed by consensus module, sends the next request for the next block, // when all blocks are received and committed, stops the state sync process by calling its `Stop()` function. -func (m *stateSync) SyncStateSync() error { +func (m *stateSync) StartSynchronousStateSync() error { consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() nodeAddress := consensusMod.GetNodeAddress() + nodeAddressBz, err := hex.DecodeString(nodeAddress) + if err != nil { + return err + } readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(currentHeight)) if err != nil { @@ -91,7 +97,8 @@ func (m *stateSync) SyncStateSync() error { } defer readCtx.Release() - // TECHDEBT: We want to request blocks from all peers (staked or not) as opposed to just validators + // TODO: Replace `GetAllValidators` with `GetAllStakedActors` to retrieve blocks from all staked actors and add tests + // TODO: Extend `GetAllStakedActors` to use all nodes for block requests validators, err := readCtx.GetAllValidators(int64(currentHeight)) if err != nil { return err @@ -136,8 +143,18 @@ func (m *stateSync) SyncStateSync() error { // Update the height and continue catching up to the latest known state currentHeight = consensusMod.CurrentHeight() } - // syncing is complete and all requested blocks are committed, stop the state sync module - return m.pauseSynching() + + // Checked if the synched node is a validator or not + isValidator, err := readCtx.GetValidatorExists(nodeAddressBz, int64(currentHeight)) + if err != nil { + return err + } + + // Send out the appropriate FSM event now that the node is caught up + if isValidator { + return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) + } + return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator) } func (m *stateSync) HandleStateSyncMetadataResponse(res *typesCons.StateSyncMetadataResponse) error { @@ -161,32 +178,6 @@ func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { return nil } -// Stop stops the state sync process, and sends `Consensus_IsSyncedValidator` FSM event -func (m *stateSync) pauseSynching() error { - currentHeight := m.bus.GetConsensusModule().CurrentHeight() - nodeAddress := m.bus.GetConsensusModule().GetNodeAddress() - - readCtx, err := m.bus.GetPersistenceModule().NewReadContext(int64(currentHeight)) - if err != nil { - return err - } - defer readCtx.Release() - - nodeAddressBz, err := hex.DecodeString(nodeAddress) - if err != nil { - return err - } - isValidator, err := readCtx.GetValidatorExists(nodeAddressBz, int64(currentHeight)) - if err != nil { - return err - } - - if isValidator { - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) - } - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator) -} - func (m *stateSync) Stop() error { m.logger.Log().Msg("Draining and closing metadataReceived and blockResponse channels") for { @@ -220,8 +211,9 @@ func (m *stateSync) GetModuleName() string { return stateSyncModuleName } -// metadataSyncLoop periodically sends metadata requests to its peers to aggregate metadata related to synching the state. -// It is intended to be run as a background process via `go metadataSyncLoop` +// metadataSyncLoop periodically sends metadata requests to its peers to collect & +// aggregate metadata related to synching the state. +// It is intended to be run as a background process via a goroutine. func (m *stateSync) metadataSyncLoop() error { logger := m.logger.With().Str("source", "metadataSyncLoop").Logger() ctx := context.TODO() From e82e41d10dc009fc950ea67fbf35207ee2642ade Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 9 Jun 2023 14:04:00 -0700 Subject: [PATCH 064/100] WIP - simplifying the test --- consensus/e2e_tests/utils_test.go | 11 ------- consensus/module_consensus_debugging.go | 7 ---- consensus/state_sync/helpers.go | 12 +++---- consensus/state_sync/module.go | 43 ++++++++++--------------- 4 files changed, 21 insertions(+), 52 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 64532e90a..8f62596bc 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -750,23 +750,12 @@ func waitForNodeToSync( ) { t.Helper() - metadataReceived := &typesCons.StateSyncMetadataResponse{ - PeerAddress: "unused_peer_addr_in_tests", - MinHeight: uint64(1), - MaxHeight: uint64(2), // 2 because unsynced node last persisted height 2 - } - - // Simulate state sync metadata response by pushing metadata to the unsynced node's consensus module - consensusModImpl := getConsensusModImpl(unsyncedNode) - consensusModImpl.MethodByName("PushStateSyncMetadataResponse").Call([]reflect.Value{reflect.ValueOf(metadataReceived)}) - // Get unsynched node info unsyncedNodeId := typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()) currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() require.Less(t, currentHeight, targetHeight, "target height must be greater than current height") for currentHeight < targetHeight { - receivedMsg, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error waiting on response to a get block request", 1, 500, false) require.NoError(t, err) fmt.Println("receivedMsg", receivedMsg) diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index 401cc0fe6..9c066f8bf 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -65,10 +65,3 @@ func (m *consensusModule) GetLeaderForView(height, round uint64, step uint8) uin } return uint64(leaderId) } - -// TODO(#609): Refactor to use the test-only package and remove reflection -func (m *consensusModule) PushStateSyncMetadataResponse(metadataRes *typesCons.StateSyncMetadataResponse) { - if err := m.stateSync.HandleStateSyncMetadataResponse(metadataRes); err != nil { - m.logger.Error().Err(err).Msg("failed to handle state sync metadata response") - } -} diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index dc9b0a1c1..035b0a598 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -27,9 +27,9 @@ func (m *stateSync) getValidatorsAtHeight(height uint64) ([]*coreTypes.Actor, er return readCtx.GetAllValidators(int64(height)) } -// TECHDEBT(#686): This should be an ongoing background passive state sync process but just -// capturing the available messages at the time that this function was called is good enough for now. -func (m *stateSync) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadataResponse { +// TECHDEBT(#686): This should be an ongoing background passive state sync process. +// For now, aggregating the messages when requests is good enough. +func (m *stateSync) getAggregatedStateSyncMetadata() (uint64, uint64) { chanLen := len(m.metadataReceived) m.logger.Info().Msgf("Looping over %d state sync metadata responses", chanLen) @@ -44,9 +44,5 @@ func (m *stateSync) getAggregatedStateSyncMetadata() typesCons.StateSyncMetadata } } - return typesCons.StateSyncMetadataResponse{ - PeerAddress: "unused_aggregated_metadata_address", - MinHeight: minHeight, - MaxHeight: maxHeight, - } + return minHeight, maxHeight } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index bcc6cabe9..e2da6aac9 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -77,11 +77,11 @@ func (m *stateSync) Start() error { return nil } -// Start performs state sync -// processes and aggregates all metadata collected in metadataReceived channel, -// requests missing blocks starting from its current height to the aggregated metadata's maxHeight, -// once the requested block is received and committed by consensus module, sends the next request for the next block, -// when all blocks are received and committed, stops the state sync process by calling its `Stop()` function. +// Start a synchronous state sync process to catch up to the network +// 1. Processes and aggregates all metadata collected in metadataReceived channel +// 2. Requests missing blocks until the maximum seen block is retrieved +// 3. Perform (2) one-by-one, applying and validating each block while doing so +// 4. Once all blocks are received and committed, stop the synchronous state sync process func (m *stateSync) StartSynchronousStateSync() error { consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() @@ -97,23 +97,15 @@ func (m *stateSync) StartSynchronousStateSync() error { } defer readCtx.Release() - // TODO: Replace `GetAllValidators` with `GetAllStakedActors` to retrieve blocks from all staked actors and add tests - // TODO: Extend `GetAllStakedActors` to use all nodes for block requests - validators, err := readCtx.GetAllValidators(int64(currentHeight)) - if err != nil { - return err - } + // Get a view into the state of the network + _, maxHeight := m.getAggregatedStateSyncMetadata() - // Understand the view of the network - aggregatedMetaData := m.getAggregatedStateSyncMetadata() - maxHeight := aggregatedMetaData.MaxHeight - - // requests blocks from the current height to the aggregated metadata height + // Synchronously request block requests from the current height to the aggregated metadata height for currentHeight <= maxHeight { m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, maxHeight) // form the get block request message - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ + stateSyncGetBlockMsg := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ PeerAddress: nodeAddress, @@ -121,20 +113,19 @@ func (m *stateSync) StartSynchronousStateSync() error { }, }, } + anyProtoStateSyncMsg, err := anypb.New(stateSyncGetBlockMsg) + if err != nil { + return err + } - // Broadcast the get block request message from all the available peers on the network - // TODO: Use P2P.broadcast instead of looping over the validators and sending the message to each one - for _, val := range validators { - if err := m.sendStateSyncMessage(stateSyncGetBlockMessage, cryptoPocket.AddressFromString(val.GetAddress())); err != nil { - return err - } + // Broadcast the block request + if err := m.GetBus().GetP2PModule().Broadcast(anyProtoStateSyncMsg); err != nil { + return err } - // Wait for the consensus module to commit the requested block - // If the block is not committed within some time, try re-requesting the block + // Wait for the consensus module to commit the requested block and re-try on timeout select { case blockHeight := <-m.committedBlocksChannel: - // requested block is received and committed, continue to request the next block from the current height m.logger.Info().Msgf("Block %d is committed!", blockHeight) case <-time.After(blockWaitingPeriod): m.logger.Warn().Msgf("Timed out waiting for block %d to be committed...", currentHeight) From 5696d48d086e643fd27d02ad351f09379a5d6610 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 9 Jun 2023 15:01:05 -0700 Subject: [PATCH 065/100] Using broadcast instead of send with a loop where appropriate --- consensus/debugging.go | 66 ++++++------------------ consensus/e2e_tests/state_sync_test.go | 6 --- consensus/e2e_tests/utils_test.go | 24 +++++---- consensus/fsm_handler.go | 3 +- consensus/helpers.go | 3 +- consensus/module_consensus_state_sync.go | 61 +++++++++++----------- consensus/pacemaker/module.go | 4 +- consensus/state_sync/module.go | 22 ++------ consensus/state_sync_handler.go | 1 + 9 files changed, 72 insertions(+), 118 deletions(-) diff --git a/consensus/debugging.go b/consensus/debugging.go index 1b8b6fc74..5379b17d9 100644 --- a/consensus/debugging.go +++ b/consensus/debugging.go @@ -2,7 +2,6 @@ package consensus import ( typesCons "github.com/pokt-network/pocket/consensus/types" - cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "github.com/pokt-network/pocket/shared/messaging" "google.golang.org/protobuf/types/known/anypb" ) @@ -83,74 +82,43 @@ func (m *consensusModule) togglePacemakerManualMode(_ *messaging.DebugMessage) { m.paceMaker.SetManualMode(newMode) } -// sendGetBlockStateSyncMessage requests nodes with the state sync server to send the current block +// sendGetBlockStateSyncMessage sends a messages to request specific blocks from peers func (m *consensusModule) sendGetBlockStateSyncMessage(_ *messaging.DebugMessage) { - currentHeight := m.CurrentHeight() - requestHeight := currentHeight - 1 - peerAddress := m.GetNodeAddress() - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ GetBlockReq: &typesCons.GetBlockRequest{ - PeerAddress: peerAddress, - Height: requestHeight, + PeerAddress: m.GetNodeAddress(), + Height: m.CurrentHeight() - 1, }, }, } - - validators, err := m.getValidatorsAtHeight(currentHeight) + anyMsg, err := anypb.New(stateSyncGetBlockMessage) if err != nil { - m.logger.Debug().Msgf(typesCons.ErrPersistenceGetAllValidators.Error(), err) + m.logger.Error().Err(err).Str("proto_type", "GetBlockRequest").Msg("failed to create StateSyncGetBlockMessage") + return } - - for _, val := range validators { - if m.GetNodeAddress() == val.GetAddress() { - continue - } - valAddress := cryptoPocket.AddressFromString(val.GetAddress()) - - anyMsg, err := anypb.New(stateSyncGetBlockMessage) - if err != nil { - m.logger.Error().Err(err).Str("proto_type", "GetBlockRequest").Msg("failed to send StateSyncMessage") - } - - if err := m.GetBus().GetP2PModule().Send(valAddress, anyMsg); err != nil { - m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) - } + if err := m.GetBus().GetP2PModule().Broadcast(anyMsg); err != nil { + m.logger.Error().Err(err).Msg(typesCons.ErrBroadcastMessage.Error()) + return } } -// requests metadata from all validators +// sendGetMetadataStateSyncMessage sends a message to request metadata from their peers func (m *consensusModule) sendGetMetadataStateSyncMessage(_ *messaging.DebugMessage) { - currentHeight := m.CurrentHeight() - peerAddress := m.GetNodeAddress() - stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_MetadataReq{ MetadataReq: &typesCons.StateSyncMetadataRequest{ - PeerAddress: peerAddress, + PeerAddress: m.GetNodeAddress(), }, }, } - - validators, err := m.getValidatorsAtHeight(currentHeight) + anyMsg, err := anypb.New(stateSyncMetaDataReqMessage) if err != nil { - m.logger.Debug().Msgf(typesCons.ErrPersistenceGetAllValidators.Error(), err) + m.logger.Error().Err(err).Str("proto_type", "GetBlockRequest").Msg("failed to create StateSyncGetBlockMessage") + return } - - for _, val := range validators { - if m.GetNodeAddress() == val.GetAddress() { - continue - } - valAddress := cryptoPocket.AddressFromString(val.GetAddress()) - - anyMsg, err := anypb.New(stateSyncMetaDataReqMessage) - if err != nil { - m.logger.Error().Err(err).Str("proto_type", "GetMetadataRequest").Msg("failed to send StateSyncMessage") - } - - if err := m.GetBus().GetP2PModule().Send(valAddress, anyMsg); err != nil { - m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) - } + if m.GetBus().GetP2PModule().Broadcast(anyMsg) != nil { + m.logger.Error().Err(err).Msg(typesCons.ErrBroadcastMessage.Error()) + return } } diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 58dbcb75a..72cfc2aab 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -169,12 +169,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { require.Equal(t, typesCons.NodeId(0), nodeState.LeaderId) } - // Broadcast the new round messages so nodes enter the prepare stage - // broadcastMessages(t, newRoundMessages, pocketNodes) - // advanceTime(t, clockMock, 10*time.Millisecond) - // _, err = WaitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.Prepare, consensus.Propose, numValidators, 500, true) - // require.NoError(t, err) - // Wait for unsyncedNode to go from height 2 to height 4 assertHeight(t, unsyncedNodeId, uint64(2), getConsensusNodeState(unsyncedNode).Height) waitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, 3) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 8f62596bc..fb7e14d40 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -380,7 +380,8 @@ loop: if numRemainingMsgs == 0 { break loop } else if numRemainingMsgs > 0 { - return expectedMsgs, fmt.Errorf("Missing '%s' messages; %d expected but %d received. (%s) \n\t DO_NOT_SKIP_ME(#462): Consider increasing `maxWaitTime` as a workaround", eventContentType, numExpectedMsgs, len(expectedMsgs), errMsg) + fmt.Println("OLSH", expectedMsgs) + return expectedMsgs, fmt.Errorf("Missing '%s' messages; %d expected but %d received. (%s) \n\t !!!IMPORTANT(#462)!!!: Consider increasing `maxWaitTime` as a workaround", eventContentType, numExpectedMsgs, len(expectedMsgs), errMsg) } else { return expectedMsgs, fmt.Errorf("Too many '%s' messages; %d expected but %d received. (%s)", eventContentType, numExpectedMsgs, len(expectedMsgs), errMsg) } @@ -756,9 +757,16 @@ func waitForNodeToSync( require.Less(t, currentHeight, targetHeight, "target height must be greater than current height") for currentHeight < targetHeight { - receivedMsg, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error waiting on response to a get block request", 1, 500, false) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error waiting on response to a get block request", 5, 5000, false) require.NoError(t, err) - fmt.Println("receivedMsg", receivedMsg) + + msg, err := codec.GetCodec().FromAny(receivedMsg[0]) + require.NoError(t, err) + stateSyncMessage, ok := msg.(*typesCons.StateSyncMessage) + require.True(t, ok) + fmt.Println("receivedMsg", stateSyncMessage.GetGetBlockReq().Height, "~~~~~", stateSyncMessage.GetGetBlockReq().PeerAddress, "~~~~~") + + broadcastMessages(t, receivedMsg, allNodes) // Wait for block request messages // Broadcast them @@ -1033,14 +1041,12 @@ func checkIdentical(arr []*anypb.Any) bool { func prepareStateSyncGetBlockMessage(t *testing.T, peerAddress string, height uint64) *anypb.Any { t.Helper() - stateSyncGetBlockReq := typesCons.GetBlockRequest{ - PeerAddress: peerAddress, - Height: height, - } - stateSyncGetBlockMessage := &typesCons.StateSyncMessage{ Message: &typesCons.StateSyncMessage_GetBlockReq{ - GetBlockReq: &stateSyncGetBlockReq, + GetBlockReq: &typesCons.GetBlockRequest{ + PeerAddress: peerAddress, + Height: height, + }, }, } diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index e423ffb78..10e44fda5 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -72,8 +72,7 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine // This is a transition mode from node bootstrapping to a node being out-of-sync. func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in the bootstrapped state. Consensus module NOOP.") - // INVESTIGATE(#816): Why are we not calling fsm.SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) here? - return nil + return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) } // HandleUnsynced handles the FSM event Consensus_IsUnsynced, and when Unsynced is the destination state. diff --git a/consensus/helpers.go b/consensus/helpers.go index 282e87bfd..05c4bf82b 100644 --- a/consensus/helpers.go +++ b/consensus/helpers.go @@ -177,7 +177,6 @@ func (m *consensusModule) sendToLeader(msg *typesCons.HotstuffMessage) { } // Star-like (O(n)) broadcast - send to all nodes directly -// INVESTIGATE: Re-evaluate if we should be using our structured broadcast (RainTree O(log3(n))) algorithm instead func (m *consensusModule) broadcastToValidators(msg *typesCons.HotstuffMessage) { m.logger.Info().Fields(hotstuffMsgToLoggingFields(msg)).Msg("📣 Broadcasting message 📣") @@ -187,11 +186,11 @@ func (m *consensusModule) broadcastToValidators(msg *typesCons.HotstuffMessage) return } + // Not using Broadcast because this is a direct message to all validators only validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) if err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) } - for _, val := range validators { if err := m.GetBus().GetP2PModule().Send(cryptoPocket.AddressFromString(val.GetAddress()), anyConsensusMessage); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrBroadcastMessage.Error()) diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 6d8c18e57..f7203152b 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -8,23 +8,18 @@ import ( "google.golang.org/protobuf/proto" ) -// REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed -func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) - if err != nil { - m.logger.Warn().Err(err).Msgf("Could not get validators at height %d when checking if peer %s is a validator", m.CurrentHeight(), peerId) - return 0, fmt.Errorf("Could determine if peer %s is a validator or not: %w", peerId, err) - } - - valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() - return uint64(valAddrToIdMap[peerId]), nil -} - -// tryToApplyRequestedBlock tries to commit the requested Block received from a peer +// tryToApplyRequestedBlock tries to commit the requested Block received from a peer. +// Intended to be called via a background goroutine. +// CLEANUP: Investigate whether this should be part of `Consensus` or part of `StateSync` func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetBlockResponse) { logger := m.logger.With().Str("source", "tryToApplyRequestedBlock").Logger() + // Retrieve the block we're about to try and apply block := blockResponse.Block + if block == nil { + logger.Error().Msg("Received nil block in GetBlockResponse") + return + } logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) // Check what the current latest committed block height is @@ -42,24 +37,19 @@ func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetB // Check if the block being synched is ahead of the current height if block.BlockHeader.Height > m.CurrentHeight() { - // TECHDEBT: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop + // IMPROVE: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) return } - // Do basic block validation - if err = m.validateBlock(block); err != nil { + // Perform basic validation on the block + if err = m.basicValidateBlock(block); err != nil { logger.Err(err).Msg("failed to validate block") return } - // Prepare the utility UOW of work to apply a new block - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - return - } - // Update the leader proposing the block + // TECHDEBT: This ID logic could potentially be simplified in the future but needs a SPIKE leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) if err != nil { m.logger.Error().Err(err).Msg("Could not get leader id from leader address") @@ -67,6 +57,12 @@ func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetB } m.leaderId = typesCons.NewNodeId(leaderIdInt) + // Prepare the utility UOW of work to apply a new block + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + return + } + // Try to apply the block by validating the transactions in the block if err := m.applyBlock(block); err != nil { m.logger.Error().Err(err).Msg("Could not apply block") @@ -84,16 +80,21 @@ func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetB m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } -// TODO(#352): Implement this function, currently a placeholder. -// blockApplicationLoop commits the blocks received from the blocksReceived channel -// it is intended to be run as a background process -func (m *consensusModule) blockApplicationLoop() { - // runs as a background process in consensus module - // listens on the blocksReceived channel - // commits the received block +// REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed +func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { + validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) + if err != nil { + m.logger.Warn().Err(err).Msgf("Could not get validators at height %d when checking if peer %s is a validator", m.CurrentHeight(), peerId) + return 0, fmt.Errorf("Could determine if peer %s is a validator or not: %w", peerId, err) + } + + valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() + return uint64(valAddrToIdMap[peerId]), nil } -func (m *consensusModule) validateBlock(block *coreTypes.Block) error { +// basicValidateBlock performs basic validation of the block, its metadata, signatures, +// but not the transactions themselves +func (m *consensusModule) basicValidateBlock(block *coreTypes.Block) error { blockHeader := block.BlockHeader qcBytes := blockHeader.GetQuorumCertificate() diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index bc70cb8a6..639571195 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -112,8 +112,8 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e } // If this case happens, there are two possibilities: - // 1. The node is behind and needs to catch up, node must start syncing, - // 2. The leader is sending a malicious proposal. + // 1. The node is behind and needs to catch up, node must start syncing, + // 2. The leader is sending a malicious proposal. // There, for both cases, node rejects the proposal, because: // 1. If node is out of sync, node can't verify the block proposal, so rejects it. But node will eventually sync with the rest of the network and add the block. // 2. If node is synced, node must reject the proposal because proposal is not valid. diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index e2da6aac9..8d93af981 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -10,7 +10,6 @@ import ( "github.com/pokt-network/pocket/logger" "github.com/pokt-network/pocket/shared/codec" coreTypes "github.com/pokt-network/pocket/shared/core/types" - cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" "google.golang.org/protobuf/types/known/anypb" @@ -236,25 +235,12 @@ func (m *stateSync) broadcastMetadataRequests() error { }, }, } - - currentHeight := m.bus.GetConsensusModule().CurrentHeight() - // TECHDEBT: This should be sent to all peers (full nodes, servicers, etc...), not just validators - validators, err := m.getValidatorsAtHeight(currentHeight) + anyMsg, err := anypb.New(stateSyncMetadataReqMessage) if err != nil { - m.logger.Error().Err(err).Msg(typesCons.ErrPersistenceGetAllValidators.Error()) + return err } - - for _, val := range validators { - anyMsg, err := anypb.New(stateSyncMetadataReqMessage) - if err != nil { - return err - } - // TECHDEBT: Revisit why we're not using `Broadcast` here instead of `Send`. - if err := m.GetBus().GetP2PModule().Send(cryptoPocket.AddressFromString(val.GetAddress()), anyMsg); err != nil { - m.logger.Error().Err(err).Msg(typesCons.ErrSendMessage.Error()) - return err - } + if err := m.GetBus().GetP2PModule().Broadcast(anyMsg); err != nil { + return err } - return nil } diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 66cb69eea..9f701606e 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -32,6 +32,7 @@ func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.StateSyncMessage) error { switch stateSyncMessage.Message.(type) { + case *typesCons.StateSyncMessage_MetadataReq: m.logger.Info().Str("proto_type", "MetadataRequest").Msg("Handling StateSyncMessage MetadataReq") if !m.consCfg.ServerModeEnabled { From 54fcd1b5bf2909f3f028f5745e5d6541c2fd0c9f Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Mon, 12 Jun 2023 10:34:39 -0700 Subject: [PATCH 066/100] A little bit of cleanup --- consensus/e2e_tests/state_sync_test.go | 2 +- consensus/fsm_handler.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 72cfc2aab..85d5fe911 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -145,7 +145,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { _, err := waitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) require.NoError(t, err) - // Verify the unsynched node is still behind after NewRound starts + // Verify the unsynced node is still behind after NewRound starts for nodeId, pocketNode := range pocketNodes { nodeState := getConsensusNodeState(pocketNode) if nodeId == unsyncedNodeId { diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 10e44fda5..51fd55585 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -71,7 +71,7 @@ func (m *consensusModule) handleStateTransitionEvent(msg *messaging.StateMachine // Bootstrapped mode is when the node (validator or non) is first coming online. // This is a transition mode from node bootstrapping to a node being out-of-sync. func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in the bootstrapped state. Consensus module NOOP.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in the bootstrapped state. Transitioning to IsUnsynched mode...") return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) } @@ -80,14 +80,14 @@ func (m *consensusModule) HandleBootstrapped(msg *messaging.StateMachineTransiti // This mode is a transition mode from the node being up-to-date (i.e. Pacemaker mode, Synced mode) with the latest network height to being out-of-sync. // As soon as a node transitions to this mode, it will transition to the synching mode. func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in an Unsynced state. Consensus module is sending an even to transition to SYNCHING mode.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in an Unsynced state. Transitioning to IsSyncing mode...") return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncing) } // HandleSyncMode handles the FSM event Consensus_IsSyncing, and when SyncMode is the destination state. // In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. Consensus Module is about to start synching...") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. About to start synchronous sync loop...") go m.stateSync.StartSynchronousStateSync() return nil } @@ -96,7 +96,7 @@ func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEv // Currently, FSM never transition to this state and a non-validator node always stays in SyncMode. // CONSIDER: when a non-validator sync is implemented, maybe there is a case that requires transitioning to this state. func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Non-validator node is in Synced mode. Consensus module NOOP.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node (non-validator) is Synced. NOOP") return nil } @@ -104,7 +104,7 @@ func (m *consensusModule) HandleSynced(msg *messaging.StateMachineTransitionEven // Execution of this state means the validator node is synced and it will stay in this mode until // it receives a new block proposal that has a higher height than the current consensus height. func (m *consensusModule) HandlePacemaker(msg *messaging.StateMachineTransitionEvent) error { - m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Validator node is Synced and in Pacemaker mode. Validator can now participate in voting on consensus.") + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node (validator) node is Synced and entering Pacemaker mode. About to starting participating in consensus...") // if a validator is just bootstrapped and finished state sync, it will not have a nodeId yet, which is 0. Set correct nodeId here. if m.nodeId == 0 { From f0570dac82e478042e59a26c8e21c11417b429e3 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Mon, 12 Jun 2023 11:04:04 -0700 Subject: [PATCH 067/100] Reverted utility module changes --- utility/main_test.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/utility/main_test.go b/utility/main_test.go index 7e9c36a50..7f3450147 100644 --- a/utility/main_test.go +++ b/utility/main_test.go @@ -16,20 +16,13 @@ import ( ) var ( - // dbURL string - - // Initialized in TestMain - testPersistenceMod modules.PersistenceModule + dbURL string ) // NB: `TestMain` serves all tests in the immediate `utility` package and not its children func TestMain(m *testing.M) { pool, resource, url := test_artifacts.SetupPostgresDocker() - // dbURL = url - testPersistenceMod = newTestPersistenceModule(url) - if testPersistenceMod == nil { - log.Fatal("[ERROR] Unable to create new test persistence module") - } + dbURL = url exitCode := m.Run() test_artifacts.CleanupPostgresDocker(m, pool, resource) @@ -38,7 +31,6 @@ func TestMain(m *testing.M) { func newTestUtilityModule(bus modules.Bus) modules.UtilityModule { utilityMod, err := Create(bus) - if err != nil { log.Fatalf("Error creating utility module: %s", err) } From 51864e23554955472c5598b31ea7a06751ee99c2 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Mon, 12 Jun 2023 16:44:50 -0700 Subject: [PATCH 068/100] Interim commit while working on 'TestStateSync_UnsyncedPeerSyncs_Success' --- consensus/e2e_tests/hotstuff_test.go | 13 ++- consensus/e2e_tests/state_sync_test.go | 133 +++++++++++++++-------- consensus/e2e_tests/utils_test.go | 120 +++++--------------- consensus/events.go | 2 +- consensus/fsm_handler.go | 1 + consensus/module_consensus_state_sync.go | 2 +- consensus/pacemaker/module.go | 1 + consensus/state_sync/module.go | 3 + runtime/bus.go | 2 + 9 files changed, 132 insertions(+), 145 deletions(-) diff --git a/consensus/e2e_tests/hotstuff_test.go b/consensus/e2e_tests/hotstuff_test.go index f15abbfee..be0149481 100644 --- a/consensus/e2e_tests/hotstuff_test.go +++ b/consensus/e2e_tests/hotstuff_test.go @@ -27,10 +27,6 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) - // Debug message to start consensus by triggering first view change - triggerNextView(t, pocketNodes) - advanceTime(t, clockMock, 10*time.Millisecond) - // Wait for nodes to reach height=1 by generating a block block := WaitForNextBlock(t, clockMock, eventsChannel, pocketNodes, 1, 0, 500, true) require.Equal(t, uint64(1), block.BlockHeader.Height) @@ -52,10 +48,14 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, 1) send(t, serverNode, stateSyncGetBlockMsg) - // Server node is waiting for the get block request message - receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false) + isGetBlockResponse := func(msg *typesCons.StateSyncMessage) bool { + return msg.GetGetBlockRes() != nil + } + // Server node is waiting for the get block response message + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false, &isGetBlockResponse) require.NoError(t, err) + // Verify that it was a get block request of the right height msg, err := codec.GetCodec().FromAny(receivedMsg[0]) require.NoError(t, err) stateSyncGetBlockResMessage, ok := msg.(*typesCons.StateSyncMessage) @@ -63,6 +63,7 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { getBlockRes := stateSyncGetBlockResMessage.GetGetBlockRes() require.NotEmpty(t, getBlockRes) + // Validate the data in the block received require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) } diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 85d5fe911..7043de457 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,6 +1,7 @@ package e2e_tests import ( + "fmt" "testing" "time" @@ -8,9 +9,9 @@ import ( "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/shared/codec" + "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" "github.com/stretchr/testify/require" - "google.golang.org/protobuf/types/known/anypb" ) func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { @@ -26,24 +27,13 @@ func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() - // Prepare StateSyncMetadataRequest - stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ - Message: &typesCons.StateSyncMessage_MetadataReq{ - MetadataReq: &typesCons.StateSyncMetadataRequest{ - PeerAddress: requesterNodePeerAddress, - }, - }, - } - anyProto, err := anypb.New(stateSyncMetaDataReqMessage) - require.NoError(t, err) - // Send metadata request to the server node + anyProto := prepareStateSyncGetMetadataMessage(t, requesterNodePeerAddress) send(t, serverNode, anyProto) // Wait for response from the server node - receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false) + receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false, nil) require.NoError(t, err) - require.Len(t, receivedMsgs, 1) // Validate the response msg, err := codec.GetCodec().FromAny(receivedMsgs[0]) @@ -79,7 +69,7 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { send(t, serverNode, stateSyncGetBlockMsg) // Start waiting for the get block request on server node, expect to return error - receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false, nil) require.NoError(t, err) // validate the response @@ -93,6 +83,7 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { require.NotEmpty(t, getBlockRes) require.Equal(t, uint64(1), getBlockRes.Block.GetBlockHeader().Height) + // IMPROVE: What other data should we validate from the response? } @@ -116,7 +107,7 @@ func TestStateSync_BlockRequestResponse_FailNonExistingBlock(t *testing.T) { // Start waiting for the get block request on server node, expect to return error errMsg := "expecting to time out waiting on a response from a non existent" - _, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false) + _, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false, nil) require.Error(t, err) } @@ -126,53 +117,105 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Select node 2 as the unsynched node that will catch up unsyncedNodeId := typesCons.NodeId(pocketNodes[2].GetBus().GetConsensusModule().GetNodeId()) unsyncedNode := pocketNodes[unsyncedNodeId] + unsyncedNodeHeight := uint64(2) + targetHeight := uint64(5) // Set the unsynced node to height (2) and rest of the nodes to height (4) for id, pocketNode := range pocketNodes { var height uint64 if id == unsyncedNodeId { - height = uint64(2) + height = unsyncedNodeHeight } else { - height = uint64(4) + height = targetHeight } pocketNode.GetBus().GetConsensusModule().SetHeight(height) pocketNode.GetBus().GetConsensusModule().SetStep(uint8(consensus.NewRound)) pocketNode.GetBus().GetConsensusModule().SetRound(uint64(0)) } - // Trigger all the nodes to the next step + // Sanity check unsynched node is at height 2 + assertHeight(t, unsyncedNodeId, uint64(2), getConsensusNodeState(unsyncedNode).Height) + + // Broadcast metadata to all the others nodes so the node that's behind has a view of the network + anyProto := prepareStateSyncGetMetadataMessage(t, unsyncedNode.GetBus().GetConsensusModule().GetNodeAddress()) + broadcast(t, pocketNodes, anyProto) + + // Make sure the unsynched node has a view of the network + receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", len(pocketNodes), 500, false, nil) + require.NoError(t, err) + for _, msg := range receivedMsgs { + send(t, unsyncedNode, msg) + } + // IMPROVE: Look into ways to assert on unsynched.MinHeightViewOfNetwork and unsynched.MaxHeightViewOfNetwork + + // Trigger the next round of consensus so the unsynched nodes is prompted to start synching triggerNextView(t, pocketNodes) - _, err := waitForNetworkConsensusEvents(t, clockMock, eventsChannel, consensus.NewRound, consensus.Propose, numValidators*numValidators, 500, true) + advanceTime(t, clockMock, 10*time.Millisecond) + proposalMsgs, err := waitForNetworkConsensusEvents(t, clockMock, eventsChannel, typesCons.HotstuffStep(consensus.NewRound), consensus.Propose, numValidators*numValidators, 500, false) require.NoError(t, err) + broadcastMessages(t, proposalMsgs, pocketNodes) + advanceTime(t, clockMock, 10*time.Millisecond) - // Verify the unsynced node is still behind after NewRound starts - for nodeId, pocketNode := range pocketNodes { - nodeState := getConsensusNodeState(pocketNode) - if nodeId == unsyncedNodeId { - assertNodeConsensusView(t, nodeId, - typesCons.ConsensusNodeState{ - Height: uint64(2), - Step: uint8(consensus.NewRound), - Round: uint8(1), - }, - nodeState) - } else { - assertNodeConsensusView(t, nodeId, - typesCons.ConsensusNodeState{ - Height: uint64(4), - Step: uint8(consensus.NewRound), - Round: uint8(1), - }, - nodeState) + isGetBlockRequest := func(msg *typesCons.StateSyncMessage) bool { + return msg.GetGetBlockReq() != nil + } + isGetBlockResponse := func(msg *typesCons.StateSyncMessage) bool { + return msg.GetGetBlockRes() != nil + } + + for unsyncedNodeHeight < targetHeight { + // Wait for the unsynched node to request the block at the current height + blockRequests, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 5000, false, &isGetBlockRequest) + require.NoError(t, err) + + // Validate the height being requested is correct + msg, err := codec.GetCodec().FromAny(blockRequests[0]) + require.NoError(t, err) + heightRequested := msg.(*typesCons.StateSyncMessage).GetGetBlockReq().Height + require.Equal(t, unsyncedNodeHeight, heightRequested) + + // Broadcast the block request to all nodes + broadcast(t, pocketNodes, blockRequests[0]) + advanceTime(t, clockMock, 10*time.Millisecond) + + // Wait for the unsynched node to receive the block responses + blockResponses, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block response", numValidators-1, 5000, false, &isGetBlockResponse) + require.NoError(t, err) + + // Validate that the block is the same from all the validators who send it + var blockResponse *typesCons.GetBlockResponse + for _, msg := range blockResponses { + msgAny, err := codec.GetCodec().FromAny(msg) + require.NoError(t, err) + + stateSyncMessage, ok := msgAny.(*typesCons.StateSyncMessage) + require.True(t, ok) + + if blockResponse == nil { + blockResponse = stateSyncMessage.GetGetBlockRes() + continue + } + require.Equal(t, blockResponse.Block, stateSyncMessage.GetGetBlockRes().Block) } - require.Equal(t, false, nodeState.IsLeader) - require.Equal(t, typesCons.NodeId(0), nodeState.LeaderId) + + // Send one of the responses (since they are equal) to the unsynched node to apply it + send(t, unsyncedNode, blockResponses[0]) + advanceTime(t, clockMock, 10*time.Millisecond) + + fmt.Println("OLSH events channel", eventsChannel) + // Wait for the unsynched node to commit the block + _, err = waitForEventsInternal(clockMock, eventsChannel, messaging.StateSyncBlockCommittedEventType, 1, 5000, nil, "error waiting on response to a get block response", false) + require.NoError(t, err) + + // ensure unsynced node height increased + nodeState := getConsensusNodeState(unsyncedNode) + assertHeight(t, unsyncedNodeId, unsyncedNodeHeight+1, nodeState.Height) + + // Same as `unsyncedNodeHeight+=1` + unsyncedNodeHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } - // Wait for unsyncedNode to go from height 2 to height 4 - assertHeight(t, unsyncedNodeId, uint64(2), getConsensusNodeState(unsyncedNode).Height) - waitForNodeToSync(t, clockMock, eventsChannel, unsyncedNode, pocketNodes, 3) - assertHeight(t, unsyncedNodeId, uint64(3), getConsensusNodeState(unsyncedNode).Height) + assertHeight(t, unsyncedNodeId, uint64(4), getConsensusNodeState(unsyncedNode).Height) } // TODO: Implement these tests diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index fb7e14d40..13d6432f7 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -274,14 +274,18 @@ func waitForNetworkStateSyncEvents( numExpectedMsgs int, maxWaitTime time.Duration, failOnExtraMessages bool, + include *func(*typesCons.StateSyncMessage) bool, ) (messages []*anypb.Any, err error) { includeFilter := func(anyMsg *anypb.Any) bool { msg, err := codec.GetCodec().FromAny(anyMsg) require.NoError(t, err) - _, ok := msg.(*typesCons.StateSyncMessage) + stateSyncMsg, ok := msg.(*typesCons.StateSyncMessage) require.True(t, ok) + if include != nil { + return (*include)(stateSyncMsg) + } return true } @@ -354,16 +358,19 @@ loop: for { select { case nodeEvent := <-eventsChannel: + fmt.Println("OLSH eventContentType0", eventContentType, nodeEvent.GetContentType()) if nodeEvent.GetContentType() != eventContentType { unusedEvents = append(unusedEvents, nodeEvent) continue } + fmt.Println("OLSH eventContentType1", eventContentType) message := nodeEvent.Content if message == nil || !msgIncludeFilter(message) { unusedEvents = append(unusedEvents, nodeEvent) continue } + fmt.Println("OLSH eventContentType2", eventContentType) expectedMsgs = append(expectedMsgs, message) numRemainingMsgs-- @@ -380,7 +387,6 @@ loop: if numRemainingMsgs == 0 { break loop } else if numRemainingMsgs > 0 { - fmt.Println("OLSH", expectedMsgs) return expectedMsgs, fmt.Errorf("Missing '%s' messages; %d expected but %d received. (%s) \n\t !!!IMPORTANT(#462)!!!: Consider increasing `maxWaitTime` as a workaround", eventContentType, numExpectedMsgs, len(expectedMsgs), errMsg) } else { return expectedMsgs, fmt.Errorf("Too many '%s' messages; %d expected but %d received. (%s)", eventContentType, numExpectedMsgs, len(expectedMsgs), errMsg) @@ -626,6 +632,10 @@ func WaitForNextBlock( ) *coreTypes.Block { leaderId := typesCons.NodeId(pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView(height, uint64(round), uint8(consensus.NewRound))) + // Debug message to start consensus by triggering first view change + triggerNextView(t, pocketNodes) + advanceTime(t, clck, 10*time.Millisecond) + // 1. NewRound newRoundMessages, err := waitForProposalMsgs(t, clck, eventsChannel, pocketNodes, height, uint8(consensus.NewRound), round, 0, numValidators*numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) @@ -736,96 +746,6 @@ func triggerNextView(t *testing.T, pocketNodes idToNodeMapping) { } } -// waitForNodeToSync waits for a node to sync to a target height. -// For every block the unsynched node is missing: -// 1. Wait for the unsynched node to request a missing block via `waitForNodeToRequestMissingBlock()` -// 2. Wait for other nodes to send the requested block via `waitForNodesToReplyToBlockRequest()` -// 3. Wait for the node to catch up to the target height via `waitForNodeToCatchUp()` -func waitForNodeToSync( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, - unsyncedNode *shared.Node, - allNodes idToNodeMapping, - targetHeight uint64, -) { - t.Helper() - - // Get unsynched node info - unsyncedNodeId := typesCons.NodeId(unsyncedNode.GetBus().GetConsensusModule().GetNodeId()) - currentHeight := unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() - require.Less(t, currentHeight, targetHeight, "target height must be greater than current height") - - for currentHeight < targetHeight { - receivedMsg, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error waiting on response to a get block request", 5, 5000, false) - require.NoError(t, err) - - msg, err := codec.GetCodec().FromAny(receivedMsg[0]) - require.NoError(t, err) - stateSyncMessage, ok := msg.(*typesCons.StateSyncMessage) - require.True(t, ok) - fmt.Println("receivedMsg", stateSyncMessage.GetGetBlockReq().Height, "~~~~~", stateSyncMessage.GetGetBlockReq().PeerAddress, "~~~~~") - - broadcastMessages(t, receivedMsg, allNodes) - - // Wait for block request messages - // Broadcast them - // Wait for block response messages - // Broadcast them - - // anyProto, err := anypb.New(stateSyncGetBlockMessage) - // require.NoError(t, err) - - // // Send get block request to the server node - // P2PSend(t, serverNode, anyProto) - - // waiting for unsynced node to request the same missing block from all peers. - blockRequests, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error while waiting for block response messages.", numValidators, 500, true) - require.NoError(t, err) - - // verify that all requests are identical and take the first one - require.True(t, checkIdentical(blockRequests), "All block requests sent by node must be identical") - blockRequest := blockRequests[0] - - // broadcast one of the block requests to all nodes - broadcast(t, allNodes, blockRequest) - advanceTime(t, clck, 10*time.Millisecond) - - // wait to receive block replies from all nodes (except for self) - blockResponses, err := waitForNetworkStateSyncEvents(t, clck, eventsChannel, "error while waiting for block response messages.", numValidators-1, 500, true) - require.NoError(t, err) - - // verify that all nodes replied with the same block response - var blockResponse *typesCons.GetBlockResponse - for _, msg := range blockResponses { - msgAny, err := codec.GetCodec().FromAny(msg) - require.NoError(t, err) - - stateSyncMessage, ok := msgAny.(*typesCons.StateSyncMessage) - require.True(t, ok) - - if blockResponse == nil { - blockResponse = stateSyncMessage.GetGetBlockRes() - continue - } - require.Equal(t, blockResponse.Block, stateSyncMessage.GetGetBlockRes().Block) - } - - // since all block responses are identical, send one of the block responses to the unsynced node - send(t, unsyncedNode, blockResponses[0]) - advanceTime(t, clck, 10*time.Millisecond) - - // waiting for node to reach to the next height (currentHeight+1) - _, err = waitForNetworkFSMEvents(t, clck, eventsChannel, coreTypes.StateMachineEvent_Consensus_IsSyncedValidator, "error while waiting for validator to sync", 1, 500, false) - require.NoError(t, err) - - // ensure unsynced node caught up to the target height - nodeState := getConsensusNodeState(unsyncedNode) - assertHeight(t, unsyncedNodeId, currentHeight+1, nodeState.Height) - currentHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() - } -} - func generatePlaceholderBlock(height uint64, leaderAddrr crypto.Address) *coreTypes.Block { blockHeader := &coreTypes.BlockHeader{ Height: height, @@ -1055,3 +975,19 @@ func prepareStateSyncGetBlockMessage(t *testing.T, peerAddress string, height ui return anyProto } + +func prepareStateSyncGetMetadataMessage(t *testing.T, selfAddress string) *anypb.Any { + t.Helper() + + stateSyncMetaDataReqMessage := &typesCons.StateSyncMessage{ + Message: &typesCons.StateSyncMessage_MetadataReq{ + MetadataReq: &typesCons.StateSyncMetadataRequest{ + PeerAddress: selfAddress, + }, + }, + } + anyProto, err := anypb.New(stateSyncMetaDataReqMessage) + require.NoError(t, err) + + return anyProto +} diff --git a/consensus/events.go b/consensus/events.go index 47cf5591c..4a27a0050 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -13,7 +13,7 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { m.GetBus().PublishEventToBus(newHeightEvent) } -// publishStateSyncBlockCommittedEvent publishes a nstate_machine/module.goew state sync block committed event, so that state sync module can react to it +// publishStateSyncBlockCommittedEvent publishes a state_machine/module.goew state sync block committed event, so that state sync module can react to it func (m *consensusModule) publishStateSyncBlockCommittedEvent(height uint64) { blockCommittedEvent := &messaging.StateSyncBlockCommittedEvent{ Height: height, diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 51fd55585..7cdca1b1e 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -87,6 +87,7 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv // HandleSyncMode handles the FSM event Consensus_IsSyncing, and when SyncMode is the destination state. // In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { + fmt.Println("OLSH") m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. About to start synchronous sync loop...") go m.stateSync.StartSynchronousStateSync() return nil diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index f7203152b..6eaaa5116 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -76,8 +76,8 @@ func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetB } logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) - m.paceMaker.NewHeight() m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) + m.paceMaker.NewHeight() } // REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 639571195..8d6fedc26 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -99,6 +99,7 @@ func (*pacemaker) GetModuleName() string { } func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, error) { + fmt.Println("OLSH ShouldHandleMessage") consensusMod := m.GetBus().GetConsensusModule() currentHeight := consensusMod.CurrentHeight() diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 8d93af981..cec35bdc0 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -100,6 +100,9 @@ func (m *stateSync) StartSynchronousStateSync() error { _, maxHeight := m.getAggregatedStateSyncMetadata() // Synchronously request block requests from the current height to the aggregated metadata height + // Note that we are using `<=` because: + // - maxHeight is the max * committed * height of the network + // - currentHeight is the latest * committing * height of the node for currentHeight <= maxHeight { m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, maxHeight) diff --git a/runtime/bus.go b/runtime/bus.go index cea228733..faae4a9bf 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -1,6 +1,7 @@ package runtime import ( + "fmt" "sync" "github.com/pokt-network/pocket/logger" @@ -51,6 +52,7 @@ func (m *bus) RegisterModule(module modules.Module) { } func (m *bus) PublishEventToBus(e *messaging.PocketEnvelope) { + fmt.Println("OLSH eventsChannel", m.channel) m.channel <- e } From 3a6cd23bcb9ada5858393f979ade22ba8f6158c6 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Mon, 12 Jun 2023 17:04:20 -0700 Subject: [PATCH 069/100] State sync test passes with time.Sleep hack --- consensus/e2e_tests/state_sync_test.go | 24 +++++++++++++++++++----- consensus/e2e_tests/utils_test.go | 4 ++-- runtime/bus.go | 19 ++++++++++++++++--- shared/modules/bus_module.go | 2 ++ 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 7043de457..da4a8f59b 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -9,7 +9,6 @@ import ( "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/shared/codec" - "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" "github.com/stretchr/testify/require" ) @@ -118,7 +117,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { unsyncedNodeId := typesCons.NodeId(pocketNodes[2].GetBus().GetConsensusModule().GetNodeId()) unsyncedNode := pocketNodes[unsyncedNodeId] unsyncedNodeHeight := uint64(2) - targetHeight := uint64(5) + targetHeight := uint64(6) // Set the unsynced node to height (2) and rest of the nodes to height (4) for id, pocketNode := range pocketNodes { @@ -203,9 +202,12 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { advanceTime(t, clockMock, 10*time.Millisecond) fmt.Println("OLSH events channel", eventsChannel) + + // TODO_IN_THIS_COMMIT: Remove this hack // Wait for the unsynched node to commit the block - _, err = waitForEventsInternal(clockMock, eventsChannel, messaging.StateSyncBlockCommittedEventType, 1, 5000, nil, "error waiting on response to a get block response", false) - require.NoError(t, err) + // _, err = waitForEventsInternal(clockMock, eventsChannel, messaging.StateSyncBlockCommittedEventType, 1, 5000, nil, "error waiting on response to a get block response", false) + // require.NoError(t, err) + time.Sleep(10 * time.Millisecond) // ensure unsynced node height increased nodeState := getConsensusNodeState(unsyncedNode) @@ -215,7 +217,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { unsyncedNodeHeight = unsyncedNode.GetBus().GetConsensusModule().CurrentHeight() } - assertHeight(t, unsyncedNodeId, uint64(4), getConsensusNodeState(unsyncedNode).Height) + assertHeight(t, unsyncedNodeId, targetHeight, getConsensusNodeState(unsyncedNode).Height) } // TODO: Implement these tests @@ -252,12 +254,24 @@ func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsC // Test configs runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) buses := generateBuses(t, runtimeMgrs) + // buses := generateBusesTemp(t, runtimeMgrs, eventsChannel) // Create & start test pocket nodes eventsChannel := make(modules.EventsChannel, 100) + // buses := generateBusesTemp(t, runtimeMgrs, eventsChannel) pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) return clockMock, eventsChannel, pocketNodes } + +// func generateBusesTemp(t *testing.T, runtimeMgrs []*runtime.Manager, channel modules.EventsChannel) (buses []modules.Bus) { +// buses = make([]modules.Bus, len(runtimeMgrs)) +// for i := range runtimeMgrs { +// bus, err := runtime.CreateBus(runtimeMgrs[i], runtime.WithEventsChannel(channel)) +// require.NoError(t, err) +// buses[i] = bus +// } +// return +// } diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 13d6432f7..7d36127c2 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -171,8 +171,8 @@ func generateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []module func startAllTestPocketNodes(t *testing.T, pocketNodes idToNodeMapping) error { for _, pocketNode := range pocketNodes { go startNode(t, pocketNode) - startEvent := pocketNode.GetBus().GetBusEvent() - require.Equal(t, messaging.NodeStartedEventType, startEvent.GetContentType()) + // startEvent := pocketNode.GetBus().GetBusEvent() + // require.Equal(t, messaging.NodeStartedEventType, startEvent.GetContentType()) stateMachine := pocketNode.GetBus().GetStateMachineModule() if err := stateMachine.SendEvent(coreTypes.StateMachineEvent_Start); err != nil { return err diff --git a/runtime/bus.go b/runtime/bus.go index faae4a9bf..7a9679b2b 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -27,11 +27,11 @@ type bus struct { runtimeMgr modules.RuntimeMgr } -func CreateBus(runtimeMgr modules.RuntimeMgr) (modules.Bus, error) { - return new(bus).Create(runtimeMgr) +func CreateBus(runtimeMgr modules.RuntimeMgr, opts ...modules.BusOption) (modules.Bus, error) { + return new(bus).Create(runtimeMgr, opts...) } -func (b *bus) Create(runtimeMgr modules.RuntimeMgr) (modules.Bus, error) { +func (b *bus) Create(runtimeMgr modules.RuntimeMgr, opts ...modules.BusOption) (modules.Bus, error) { bus := &bus{ channel: make(modules.EventsChannel, defaults.DefaultBusBufferSize), @@ -39,6 +39,10 @@ func (b *bus) Create(runtimeMgr modules.RuntimeMgr) (modules.Bus, error) { modulesRegistry: NewModulesRegistry(), } + for _, o := range opts { + o(bus) + } + return bus, nil } @@ -121,6 +125,15 @@ func (m *bus) GetStateMachineModule() modules.StateMachineModule { return getModuleFromRegistry[modules.StateMachineModule](m, modules.StateMachineModuleName) } +// WithEventsChannel is used initialize the bus with a specific events channel +func WithEventsChannel(eventsChannel modules.EventsChannel) modules.BusOption { + return func(m modules.Bus) { + if m, ok := m.(*bus); ok { + m.channel = eventsChannel + } + } +} + // getModuleFromRegistry is a helper function to get a module from the registry that handles errors and casting via generics func getModuleFromRegistry[T modules.Module](m *bus, moduleName string) T { mod, err := m.modulesRegistry.GetModule(moduleName) diff --git a/shared/modules/bus_module.go b/shared/modules/bus_module.go index 8699b756f..95a3a95e2 100644 --- a/shared/modules/bus_module.go +++ b/shared/modules/bus_module.go @@ -13,6 +13,8 @@ const BusModuleName = "bus" // it, which could potentially be a feature rather than a bug. type EventsChannel chan *messaging.PocketEnvelope +type BusOption func(Bus) + type Bus interface { // Bus Events PublishEventToBus(e *messaging.PocketEnvelope) From ed16e8005a3c9e57724ca3c4fe0279f1ba9b1eb3 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 15:44:04 -0700 Subject: [PATCH 070/100] Some code cleanup while validating that unit tests all pass and doing a self review --- consensus/block.go | 6 ++--- consensus/debugging.go | 2 +- consensus/e2e_tests/hotstuff_test.go | 34 ++++++++++++------------ consensus/e2e_tests/state_sync_test.go | 36 +++++++++++--------------- consensus/e2e_tests/utils_test.go | 9 ++++--- consensus/hotstuff_leader.go | 2 +- consensus/hotstuff_replica.go | 2 +- 7 files changed, 42 insertions(+), 49 deletions(-) diff --git a/consensus/block.go b/consensus/block.go index 7c9e64fa8..70d40fe1f 100644 --- a/consensus/block.go +++ b/consensus/block.go @@ -34,12 +34,12 @@ func (m *consensusModule) commitBlock(block *coreTypes.Block) error { return nil } -// ADDTEST: Add unit tests specific to block validation -// isBlockMessageInMessageValid does basic validation of the block in the hotstuff message for the step provided, such as: +// isBlockInMessageValidBasic does basic validation of the block in the hotstuff message such as: // - validating if the block could/should be nil // - the state hash of the block // - the size of the block -func (m *consensusModule) isBlockMessageInMessageValid(msg *typesCons.HotstuffMessage) (bool, error) { +// ADDTEST: Add unit tests specific to block validation +func (m *consensusModule) isBlockInMessageValidBasic(msg *typesCons.HotstuffMessage) (bool, error) { block := msg.GetBlock() step := msg.GetStep() diff --git a/consensus/debugging.go b/consensus/debugging.go index 5379b17d9..39b34ec92 100644 --- a/consensus/debugging.go +++ b/consensus/debugging.go @@ -114,7 +114,7 @@ func (m *consensusModule) sendGetMetadataStateSyncMessage(_ *messaging.DebugMess } anyMsg, err := anypb.New(stateSyncMetaDataReqMessage) if err != nil { - m.logger.Error().Err(err).Str("proto_type", "GetBlockRequest").Msg("failed to create StateSyncGetBlockMessage") + m.logger.Error().Err(err).Str("proto_type", "StateSyncMessage").Msg("failed to create StateSyncMetadataRequest") return } if m.GetBus().GetP2PModule().Broadcast(anyMsg) != nil { diff --git a/consensus/e2e_tests/hotstuff_test.go b/consensus/e2e_tests/hotstuff_test.go index be0149481..acf82fd20 100644 --- a/consensus/e2e_tests/hotstuff_test.go +++ b/consensus/e2e_tests/hotstuff_test.go @@ -1,6 +1,7 @@ package e2e_tests import ( + "reflect" "testing" "time" @@ -12,7 +13,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { +func TestHotstuff_4Nodes1BlockHappyPath(t *testing.T) { // Test preparation clockMock := clock.NewMock() timeReminder(t, clockMock, time.Second) @@ -48,11 +49,8 @@ func TestHotstuff4Nodes1BlockHappyPath(t *testing.T) { stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, 1) send(t, serverNode, stateSyncGetBlockMsg) - isGetBlockResponse := func(msg *typesCons.StateSyncMessage) bool { - return msg.GetGetBlockRes() != nil - } // Server node is waiting for the get block response message - receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false, &isGetBlockResponse) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.NoError(t, err) // Verify that it was a get block request of the right height @@ -116,53 +114,53 @@ func TestQuorumCertificate_ResistenceToSignatureMalleability(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes1Byzantine1Block(t *testing.T) { +func TestHotstuff_4Nodes1Byzantine1Block(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes2Byzantine1Block(t *testing.T) { +func TestHotstuff_4Nodes2Byzantine1Block(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes1BlockNetworkPartition(t *testing.T) { +func TestHotstuff_4Nodes1BlockNetworkPartition(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes1Block4Rounds(t *testing.T) { +func TestHotstuff_4Nodes1Block4Rounds(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes2Blocks(t *testing.T) { +func TestHotstuff_4Nodes2Blocks(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes2NewNodes1Block(t *testing.T) { +func TestHotstuff_4Nodes2NewNodes1Block(t *testing.T) { t.Skip() } -func TestHotstuff4Nodes2DroppedNodes1Block(t *testing.T) { +func TestHotstuff_4Nodes2DroppedNodes1Block(t *testing.T) { t.Skip() } -func TestHotstuff4NodesFailOnPrepare(t *testing.T) { +func TestHotstuff_4NodesFailOnPrepare(t *testing.T) { t.Skip() } -func TestHotstuff4NodesFailOnPrecommit(t *testing.T) { +func TestHotstuff_4NodesFailOnPrecommit(t *testing.T) { t.Skip() } -func TestHotstuff4NodesFailOnCommit(t *testing.T) { +func TestHotstuff_4NodesFailOnCommit(t *testing.T) { t.Skip() } -func TestHotstuff4NodesFailOnDecide(t *testing.T) { +func TestHotstuff_4NodesFailOnDecide(t *testing.T) { t.Skip() } -func TestHotstuffValidatorWithLockedQC(t *testing.T) { +func TestHotstuff_ValidatorWithLockedQC(t *testing.T) { t.Skip() } -func TestHotstuffValidatorWithLockedQCMissingNewRoundMsg(t *testing.T) { +func TestHotstuff_ValidatorWithLockedQCMissingNewRoundMsg(t *testing.T) { t.Skip() } diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index da4a8f59b..8222c9fc0 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -2,6 +2,7 @@ package e2e_tests import ( "fmt" + "reflect" "testing" "time" @@ -31,19 +32,18 @@ func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { send(t, serverNode, anyProto) // Wait for response from the server node - receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false, nil) + receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_MetadataRes{})) require.NoError(t, err) - // Validate the response + // Extract the response msg, err := codec.GetCodec().FromAny(receivedMsgs[0]) require.NoError(t, err) - stateSyncMetaDataResMsg, ok := msg.(*typesCons.StateSyncMessage) require.True(t, ok) - stateSyncMetaDataRes := stateSyncMetaDataResMsg.GetMetadataRes() require.NotEmpty(t, stateSyncMetaDataRes) + // Validate the response require.Equal(t, uint64(3), stateSyncMetaDataRes.MaxHeight) // 3 because node sends the last persisted height require.Equal(t, uint64(1), stateSyncMetaDataRes.MinHeight) require.Equal(t, serverNodePeerId, stateSyncMetaDataRes.PeerAddress) @@ -67,8 +67,8 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { // Send get block request to the server node send(t, serverNode, stateSyncGetBlockMsg) - // Start waiting for the get block request on server node, expect to return error - receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false, nil) + // Start waiting for the get block response on server node + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.NoError(t, err) // validate the response @@ -89,24 +89,25 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { func TestStateSync_BlockRequestResponse_FailNonExistingBlock(t *testing.T) { clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) - // Choose node 1 as the server node + testHeight := uint64(5) + + // Choose node 1 as the server node and set its height serverNode := pocketNodes[1] - // Set server node's height to test height. - serverNode.GetBus().GetConsensusModule().SetHeight(uint64(5)) + serverNode.GetBus().GetConsensusModule().SetHeight(testHeight) // Choose node 2 as the requester node requesterNode := pocketNodes[2] requesterNodePeerAddress := requesterNode.GetBus().GetConsensusModule().GetNodeAddress() // Prepare a get block request for a non existing block (server is only at height 5) - stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, 6) + stateSyncGetBlockMsg := prepareStateSyncGetBlockMessage(t, requesterNodePeerAddress, testHeight+2) // Send get block request to the server node send(t, serverNode, stateSyncGetBlockMsg) // Start waiting for the get block request on server node, expect to return error errMsg := "expecting to time out waiting on a response from a non existent" - _, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false, nil) + _, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.Error(t, err) } @@ -155,16 +156,9 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { broadcastMessages(t, proposalMsgs, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) - isGetBlockRequest := func(msg *typesCons.StateSyncMessage) bool { - return msg.GetGetBlockReq() != nil - } - isGetBlockResponse := func(msg *typesCons.StateSyncMessage) bool { - return msg.GetGetBlockRes() != nil - } - for unsyncedNodeHeight < targetHeight { // Wait for the unsynched node to request the block at the current height - blockRequests, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 5000, false, &isGetBlockRequest) + blockRequests, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockReq{})) require.NoError(t, err) // Validate the height being requested is correct @@ -178,10 +172,10 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { advanceTime(t, clockMock, 10*time.Millisecond) // Wait for the unsynched node to receive the block responses - blockResponses, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block response", numValidators-1, 5000, false, &isGetBlockResponse) + blockResponses, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block response", numValidators-1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.NoError(t, err) - // Validate that the block is the same from all the validators who send it + // Validate that the block is the same from all the validators who send it (non byzantine scenario) var blockResponse *typesCons.GetBlockResponse for _, msg := range blockResponses { msgAny, err := codec.GetCodec().FromAny(msg) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 3c301bf09..2e2b1a665 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -276,7 +276,7 @@ func waitForNetworkStateSyncEvents( numExpectedMsgs int, maxWaitTime time.Duration, failOnExtraMessages bool, - include *func(*typesCons.StateSyncMessage) bool, + stateSyncMsgType any, ) (messages []*anypb.Any, err error) { includeFilter := func(anyMsg *anypb.Any) bool { msg, err := codec.GetCodec().FromAny(anyMsg) @@ -285,8 +285,9 @@ func waitForNetworkStateSyncEvents( stateSyncMsg, ok := msg.(*typesCons.StateSyncMessage) require.True(t, ok) - if include != nil { - return (*include)(stateSyncMsg) + if stateSyncMsgType != nil { + fmt.Println("OLSH HERE", reflect.TypeOf(stateSyncMsg.Message), stateSyncMsgType) + return reflect.TypeOf(stateSyncMsg.Message) == stateSyncMsgType } return true } @@ -366,7 +367,7 @@ loop: continue } - fmt.Println("OLSH eventContentType1", eventContentType) + fmt.Println("OLSH eventContentType1", eventContentType, nodeEvent) message := nodeEvent.Content if message == nil || !msgIncludeFilter(message) { unusedEvents = append(unusedEvents, nodeEvent) diff --git a/consensus/hotstuff_leader.go b/consensus/hotstuff_leader.go index 1dcccc0f2..aa478e944 100644 --- a/consensus/hotstuff_leader.go +++ b/consensus/hotstuff_leader.go @@ -283,7 +283,7 @@ func (handler *HotstuffLeaderMessageHandler) HandleDecideMessage(m *consensusMod func (handler *HotstuffLeaderMessageHandler) isMessageValidBasic(m *consensusModule, msg *typesCons.HotstuffMessage) error { // Basic block metadata validation - if valid, err := m.isBlockMessageInMessageValid(msg); !valid { + if valid, err := m.isBlockInMessageValidBasic(msg); !valid { return err } diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index 0521f5e21..7fd729f4e 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -209,7 +209,7 @@ func (handler *HotstuffReplicaMessageHandler) HandleDecideMessage(m *consensusMo // isMessageValidBasic is the handler called on every replica message before specific handler func (handler *HotstuffReplicaMessageHandler) isMessageValidBasic(m *consensusModule, msg *typesCons.HotstuffMessage) error { // Basic block metadata validation - if valid, err := m.isBlockMessageInMessageValid(msg); !valid { + if valid, err := m.isBlockInMessageValidBasic(msg); !valid { return err } From 918e65bc96dc271c033d397cb869dc14d3d668a3 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:01:12 -0700 Subject: [PATCH 071/100] Renamed eventsChannel to sharedNetworkEvents in consensus --- consensus/e2e_tests/hotstuff_test.go | 10 ++-- consensus/e2e_tests/pacemaker_test.go | 20 ++++---- consensus/e2e_tests/state_sync_test.go | 42 +++++++--------- consensus/e2e_tests/utils_test.go | 66 +++++++++++++------------- p2p/utils_test.go | 3 +- runtime/bus.go | 11 +++-- 6 files changed, 75 insertions(+), 77 deletions(-) diff --git a/consensus/e2e_tests/hotstuff_test.go b/consensus/e2e_tests/hotstuff_test.go index acf82fd20..10e35939b 100644 --- a/consensus/e2e_tests/hotstuff_test.go +++ b/consensus/e2e_tests/hotstuff_test.go @@ -23,17 +23,17 @@ func TestHotstuff_4Nodes1BlockHappyPath(t *testing.T) { buses := generateBuses(t, runtimeMgrs) // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) + sharedNetworkChannel := make(modules.EventsChannel, 100) + pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) // Wait for nodes to reach height=1 by generating a block - block := WaitForNextBlock(t, clockMock, eventsChannel, pocketNodes, 1, 0, 500, true) + block := WaitForNextBlock(t, clockMock, sharedNetworkChannel, pocketNodes, 1, 0, 500, true) require.Equal(t, uint64(1), block.BlockHeader.Height) // Expecting NewRound messages for height=2 to be sent after a block is committed - _, err = waitForProposalMsgs(t, clockMock, eventsChannel, pocketNodes, 2, uint8(consensus.NewRound), 0, 0, numValidators*numValidators, 500, true) + _, err = waitForProposalMsgs(t, clockMock, sharedNetworkChannel, pocketNodes, 2, uint8(consensus.NewRound), 0, 0, numValidators*numValidators, 500, true) require.NoError(t, err) // TODO(#615): Add QC verification here after valid block mocking is implemented with issue #352. @@ -50,7 +50,7 @@ func TestHotstuff_4Nodes1BlockHappyPath(t *testing.T) { send(t, serverNode, stateSyncGetBlockMsg) // Server node is waiting for the get block response message - receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "error waiting for StateSync.GetBlockRequest message", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.NoError(t, err) // Verify that it was a get block request of the right height diff --git a/consensus/e2e_tests/pacemaker_test.go b/consensus/e2e_tests/pacemaker_test.go index 1cfa0fdf7..595ae3b8a 100644 --- a/consensus/e2e_tests/pacemaker_test.go +++ b/consensus/e2e_tests/pacemaker_test.go @@ -31,8 +31,8 @@ func TestPacemakerTimeoutIncreasesRound(t *testing.T) { buses := generateBuses(t, runtimeMgrs) // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) + sharedNetworkChannel := make(modules.EventsChannel, 100) + pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) @@ -42,23 +42,23 @@ func TestPacemakerTimeoutIncreasesRound(t *testing.T) { // Advance time by an amount shorter than the pacemaker timeout advanceTime(t, clockMock, 10*time.Millisecond) - _, err = waitForProposalMsgs(t, clockMock, eventsChannel, pocketNodes, 1, uint8(consensus.NewRound), 0, 0, numValidators*numValidators, consensusMessageTimeout, true) + _, err = waitForProposalMsgs(t, clockMock, sharedNetworkChannel, pocketNodes, 1, uint8(consensus.NewRound), 0, 0, numValidators*numValidators, consensusMessageTimeout, true) require.NoError(t, err) // Force the pacemaker to time out forcePacemakerTimeout(t, clockMock, paceMakerTimeout) // Wait for the round=1 to fail - _, err = waitForProposalMsgs(t, clockMock, eventsChannel, pocketNodes, 1, uint8(consensus.NewRound), 1, 0, numValidators*numValidators, consensusMessageTimeout, true) + _, err = waitForProposalMsgs(t, clockMock, sharedNetworkChannel, pocketNodes, 1, uint8(consensus.NewRound), 1, 0, numValidators*numValidators, consensusMessageTimeout, true) require.NoError(t, err) forcePacemakerTimeout(t, clockMock, paceMakerTimeout) // Wait for the round=2 to fail - _, err = waitForProposalMsgs(t, clockMock, eventsChannel, pocketNodes, 1, uint8(consensus.NewRound), 2, 0, numValidators*numValidators, consensusMessageTimeout, true) + _, err = waitForProposalMsgs(t, clockMock, sharedNetworkChannel, pocketNodes, 1, uint8(consensus.NewRound), 2, 0, numValidators*numValidators, consensusMessageTimeout, true) require.NoError(t, err) forcePacemakerTimeout(t, clockMock, paceMakerTimeout) // Wait for the round=3 to succeed - newRoundMessages, err := waitForProposalMsgs(t, clockMock, eventsChannel, pocketNodes, 1, uint8(consensus.NewRound), 3, 0, numValidators*numValidators, consensusMessageTimeout, true) + newRoundMessages, err := waitForProposalMsgs(t, clockMock, sharedNetworkChannel, pocketNodes, 1, uint8(consensus.NewRound), 3, 0, numValidators*numValidators, consensusMessageTimeout, true) require.NoError(t, err) broadcastMessages(t, newRoundMessages, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) @@ -66,7 +66,7 @@ func TestPacemakerTimeoutIncreasesRound(t *testing.T) { // Get the expected leader id for round=3 leaderId := typesCons.NodeId(pocketNodes[1].GetBus().GetConsensusModule().GetLeaderForView(1, 3, uint8(consensus.NewRound))) // Wait for nodes to proceed to Propose step in round=3 - _, err = waitForProposalMsgs(t, clockMock, eventsChannel, pocketNodes, 1, uint8(consensus.Prepare), 3, leaderId, numValidators, consensusMessageTimeout, true) + _, err = waitForProposalMsgs(t, clockMock, sharedNetworkChannel, pocketNodes, 1, uint8(consensus.Prepare), 3, leaderId, numValidators, consensusMessageTimeout, true) require.NoError(t, err) } @@ -79,8 +79,8 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { buses := generateBuses(t, runtimeConfigs) // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) + sharedNetworkChannel := make(modules.EventsChannel, 100) + pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) @@ -143,7 +143,7 @@ func TestPacemakerCatchupSameStepDifferentRounds(t *testing.T) { broadcastMessages(t, []*anypb.Any{anyMsg}, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) - _, err = waitForNetworkConsensusEvents(t, clockMock, eventsChannel, 2, consensus.Vote, numExpectedMsgs, time.Duration(msgTimeout), true) + _, err = waitForNetworkConsensusEvents(t, clockMock, sharedNetworkChannel, 2, consensus.Vote, numExpectedMsgs, time.Duration(msgTimeout), true) require.NoError(t, err) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 8222c9fc0..57422d09d 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,7 +1,6 @@ package e2e_tests import ( - "fmt" "reflect" "testing" "time" @@ -15,7 +14,7 @@ import ( ) func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { - clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) + clockMock, sharedNetworkChannel, pocketNodes := prepareStateSyncTestEnvironment(t) // Choose node 1 as the server node serverNode := pocketNodes[1] @@ -32,7 +31,7 @@ func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { send(t, serverNode, anyProto) // Wait for response from the server node - receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_MetadataRes{})) + receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "did not receive response to state sync metadata request", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_MetadataRes{})) require.NoError(t, err) // Extract the response @@ -50,7 +49,7 @@ func TestStateSync_MetadataRequestResponse_Success(t *testing.T) { } func TestStateSync_BlockRequestResponse_Success(t *testing.T) { - clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) + clockMock, sharedNetworkChannel, pocketNodes := prepareStateSyncTestEnvironment(t) // Choose node 1 as the server node serverNode := pocketNodes[1] @@ -68,7 +67,7 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { send(t, serverNode, stateSyncGetBlockMsg) // Start waiting for the get block response on server node - receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) + receivedMsg, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "error waiting on response to a get block request", 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.NoError(t, err) // validate the response @@ -87,7 +86,7 @@ func TestStateSync_BlockRequestResponse_Success(t *testing.T) { } func TestStateSync_BlockRequestResponse_FailNonExistingBlock(t *testing.T) { - clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) + clockMock, sharedNetworkChannel, pocketNodes := prepareStateSyncTestEnvironment(t) testHeight := uint64(5) @@ -107,12 +106,12 @@ func TestStateSync_BlockRequestResponse_FailNonExistingBlock(t *testing.T) { // Start waiting for the get block request on server node, expect to return error errMsg := "expecting to time out waiting on a response from a non existent" - _, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, errMsg, 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) + _, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, errMsg, 1, 500, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.Error(t, err) } func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { - clockMock, eventsChannel, pocketNodes := prepareStateSyncTestEnvironment(t) + clockMock, sharedNetworkChannel, pocketNodes := prepareStateSyncTestEnvironment(t) // Select node 2 as the unsynched node that will catch up unsyncedNodeId := typesCons.NodeId(pocketNodes[2].GetBus().GetConsensusModule().GetNodeId()) @@ -141,7 +140,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { broadcast(t, pocketNodes, anyProto) // Make sure the unsynched node has a view of the network - receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "did not receive response to state sync metadata request", len(pocketNodes), 500, false, nil) + receivedMsgs, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "did not receive response to state sync metadata request", len(pocketNodes), 500, false, nil) require.NoError(t, err) for _, msg := range receivedMsgs { send(t, unsyncedNode, msg) @@ -151,14 +150,14 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Trigger the next round of consensus so the unsynched nodes is prompted to start synching triggerNextView(t, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) - proposalMsgs, err := waitForNetworkConsensusEvents(t, clockMock, eventsChannel, typesCons.HotstuffStep(consensus.NewRound), consensus.Propose, numValidators*numValidators, 500, false) + proposalMsgs, err := waitForNetworkConsensusEvents(t, clockMock, sharedNetworkChannel, typesCons.HotstuffStep(consensus.NewRound), consensus.Propose, numValidators*numValidators, 500, false) require.NoError(t, err) broadcastMessages(t, proposalMsgs, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) for unsyncedNodeHeight < targetHeight { // Wait for the unsynched node to request the block at the current height - blockRequests, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block request", 1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockReq{})) + blockRequests, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "error waiting on response to a get block request", 1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockReq{})) require.NoError(t, err) // Validate the height being requested is correct @@ -172,7 +171,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { advanceTime(t, clockMock, 10*time.Millisecond) // Wait for the unsynched node to receive the block responses - blockResponses, err := waitForNetworkStateSyncEvents(t, clockMock, eventsChannel, "error waiting on response to a get block response", numValidators-1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) + blockResponses, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "error waiting on response to a get block response", numValidators-1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockRes{})) require.NoError(t, err) // Validate that the block is the same from all the validators who send it (non byzantine scenario) @@ -193,14 +192,9 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Send one of the responses (since they are equal) to the unsynched node to apply it send(t, unsyncedNode, blockResponses[0]) - advanceTime(t, clockMock, 10*time.Millisecond) - - fmt.Println("OLSH events channel", eventsChannel) - // TODO_IN_THIS_COMMIT: Remove this hack - // Wait for the unsynched node to commit the block - // _, err = waitForEventsInternal(clockMock, eventsChannel, messaging.StateSyncBlockCommittedEventType, 1, 5000, nil, "error waiting on response to a get block response", false) - // require.NoError(t, err) + // CONSIDERATION: Do we need to sleep or block before checking if the block was committed? + advanceTime(t, clockMock, 10*time.Millisecond) time.Sleep(10 * time.Millisecond) // ensure unsynced node height increased @@ -248,16 +242,16 @@ func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsC // Test configs runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) buses := generateBuses(t, runtimeMgrs) - // buses := generateBusesTemp(t, runtimeMgrs, eventsChannel) + // buses := generateBusesTemp(t, runtimeMgrs, sharedNetworkChannel) // Create & start test pocket nodes - eventsChannel := make(modules.EventsChannel, 100) - // buses := generateBusesTemp(t, runtimeMgrs, eventsChannel) - pocketNodes := createTestConsensusPocketNodes(t, buses, eventsChannel) + sharedNetworkChannel := make(modules.EventsChannel, 100) + // buses := generateBusesTemp(t, runtimeMgrs, sharedNetworkChannel) + pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) - return clockMock, eventsChannel, pocketNodes + return clockMock, sharedNetworkChannel, pocketNodes } // func generateBusesTemp(t *testing.T, runtimeMgrs []*runtime.Manager, channel modules.EventsChannel) (buses []modules.Bus) { diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 2e2b1a665..4f8bdb1c7 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -80,7 +80,7 @@ func generateNodeRuntimeMgrs(t *testing.T, validatorCount int, clockMgr clock.Cl func createTestConsensusPocketNodes( t *testing.T, buses []modules.Bus, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, ) (pocketNodes idToNodeMapping) { pocketNodes = make(idToNodeMapping, len(buses)) // TECHDEBT: The order here is important in order for NodeIds to be set correctly below. @@ -99,7 +99,7 @@ func createTestConsensusPocketNodes( for i, bus := range buses { nodeId := typesCons.NodeId(i + 1) - pocketNode := createTestConsensusPocketNode(t, bus, eventsChannel, blocks) + pocketNode := createTestConsensusPocketNode(t, bus, sharedNetworkChannel, blocks) pocketNodes[nodeId] = pocketNode validatorPrivKey, err := cryptoPocket.NewPrivateKey(pocketNode.GetBus().GetRuntimeMgr().GetConfig().PrivateKey) @@ -115,10 +115,10 @@ func createTestConsensusPocketNodes( func createTestConsensusPocketNode( t *testing.T, bus modules.Bus, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, placeholderBlocks *testingBlocks, ) *shared.Node { - persistenceMock := basePersistenceMock(t, eventsChannel, bus, placeholderBlocks) + persistenceMock := basePersistenceMock(t, sharedNetworkChannel, bus, placeholderBlocks) bus.RegisterModule(persistenceMock) consensusMod, err := consensus.Create(bus) @@ -126,17 +126,17 @@ func createTestConsensusPocketNode( consensusModule, ok := consensusMod.(modules.ConsensusModule) require.True(t, ok) - _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(eventsChannel)) + _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(sharedNetworkChannel)) require.NoError(t, err) runtimeMgr := (bus).GetRuntimeMgr() // TODO(olshansky): At the moment we are using the same base mocks for all the tests, // but note that they will need to be customized on a per test basis. - p2pMock := baseP2PMock(t, eventsChannel) - utilityMock := baseUtilityMock(t, eventsChannel, runtimeMgr.GetGenesis(), consensusModule) - telemetryMock := baseTelemetryMock(t, eventsChannel) - loggerMock := baseLoggerMock(t, eventsChannel) - rpcMock := baseRpcMock(t, eventsChannel) + p2pMock := baseP2PMock(t, sharedNetworkChannel) + utilityMock := baseUtilityMock(t, sharedNetworkChannel, runtimeMgr.GetGenesis(), consensusModule) + telemetryMock := baseTelemetryMock(t, sharedNetworkChannel) + loggerMock := baseLoggerMock(t, sharedNetworkChannel) + rpcMock := baseRpcMock(t, sharedNetworkChannel) for _, module := range []modules.Module{ p2pMock, @@ -245,7 +245,7 @@ func send(t *testing.T, node *shared.Node, any *anypb.Any) { func waitForNetworkConsensusEvents( t *testing.T, clck *clock.Mock, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, step typesCons.HotstuffStep, msgType typesCons.HotstuffMessageType, numExpectedMsgs int, @@ -263,7 +263,7 @@ func waitForNetworkConsensusEvents( } errMsg := fmt.Sprintf("HotStuff step: %s, type: %s", typesCons.HotstuffStep_name[int32(step)], typesCons.HotstuffMessageType_name[int32(msgType)]) - return waitForEventsInternal(clck, eventsChannel, messaging.HotstuffMessageContentType, numExpectedMsgs, millis, includeFilter, errMsg, failOnExtraMessages) + return waitForEventsInternal(clck, sharedNetworkChannel, messaging.HotstuffMessageContentType, numExpectedMsgs, millis, includeFilter, errMsg, failOnExtraMessages) } // IMPROVE: Consider unifying this function with WaitForNetworkConsensusEvents @@ -271,7 +271,7 @@ func waitForNetworkConsensusEvents( func waitForNetworkStateSyncEvents( t *testing.T, clck *clock.Mock, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, errMsg string, numExpectedMsgs int, maxWaitTime time.Duration, @@ -292,14 +292,14 @@ func waitForNetworkStateSyncEvents( return true } - return waitForEventsInternal(clck, eventsChannel, messaging.StateSyncMessageContentType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) + return waitForEventsInternal(clck, sharedNetworkChannel, messaging.StateSyncMessageContentType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) } // waitForNetworkFSMEvents waits for the number of expected state machine events to be published on the events channel. func waitForNetworkFSMEvents( t *testing.T, clck *clock.Mock, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, eventType coreTypes.StateMachineEvent, errMsg string, numExpectedMsgs int, @@ -316,14 +316,14 @@ func waitForNetworkFSMEvents( return stateTransitionMessage.Event == string(eventType) } - return waitForEventsInternal(clck, eventsChannel, messaging.StateMachineTransitionEventType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) + return waitForEventsInternal(clck, sharedNetworkChannel, messaging.StateMachineTransitionEventType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) } // RESEARCH(#462): Research ways to eliminate time-based non-determinism from the test framework // IMPROVE: This function can be extended to testing events outside of just the consensus module. func waitForEventsInternal( clck *clock.Mock, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, eventContentType string, numExpectedMsgs int, maxWaitTime time.Duration, @@ -360,7 +360,7 @@ func waitForEventsInternal( loop: for { select { - case nodeEvent := <-eventsChannel: + case nodeEvent := <-sharedNetworkChannel: fmt.Println("OLSH eventContentType0", eventContentType, nodeEvent.GetContentType()) if nodeEvent.GetContentType() != eventContentType { unusedEvents = append(unusedEvents, nodeEvent) @@ -398,7 +398,7 @@ loop: } for _, u := range unusedEvents { - eventsChannel <- u + sharedNetworkChannel <- u } return } @@ -510,7 +510,7 @@ func basePersistenceMock(t *testing.T, _ modules.EventsChannel, bus modules.Bus, } // Creates a p2p module mock with mock implementations of some basic functionality -func baseP2PMock(t *testing.T, eventsChannel modules.EventsChannel) *mockModules.MockP2PModule { +func baseP2PMock(t *testing.T, sharedNetworkChannel modules.EventsChannel) *mockModules.MockP2PModule { ctrl := gomock.NewController(t) p2pMock := mockModules.NewMockP2PModule(ctrl) @@ -520,7 +520,7 @@ func baseP2PMock(t *testing.T, eventsChannel modules.EventsChannel) *mockModules Broadcast(gomock.Any()). Do(func(msg *anypb.Any) { e := &messaging.PocketEnvelope{Content: msg} - eventsChannel <- e + sharedNetworkChannel <- e }). AnyTimes() // CONSIDERATION: Adding a check to not to send message to itself @@ -528,7 +528,7 @@ func baseP2PMock(t *testing.T, eventsChannel modules.EventsChannel) *mockModules Send(gomock.Any(), gomock.Any()). Do(func(addr cryptoPocket.Address, msg *anypb.Any) { e := &messaging.PocketEnvelope{Content: msg} - eventsChannel <- e + sharedNetworkChannel <- e }). AnyTimes() p2pMock.EXPECT().GetModuleName().Return(modules.P2PModuleName).AnyTimes() @@ -640,7 +640,7 @@ func baseRpcMock(t *testing.T, _ modules.EventsChannel) *mockModules.MockRPCModu func WaitForNextBlock( t *testing.T, clck *clock.Mock, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, pocketNodes idToNodeMapping, height uint64, round uint8, @@ -654,49 +654,49 @@ func WaitForNextBlock( advanceTime(t, clck, 10*time.Millisecond) // 1. NewRound - newRoundMessages, err := waitForProposalMsgs(t, clck, eventsChannel, pocketNodes, height, uint8(consensus.NewRound), round, 0, numValidators*numValidators, maxWaitTime, failOnExtraMessages) + newRoundMessages, err := waitForProposalMsgs(t, clck, sharedNetworkChannel, pocketNodes, height, uint8(consensus.NewRound), round, 0, numValidators*numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, newRoundMessages, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // 2. Prepare - prepareProposals, err := waitForProposalMsgs(t, clck, eventsChannel, pocketNodes, height, uint8(consensus.Prepare), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) + prepareProposals, err := waitForProposalMsgs(t, clck, sharedNetworkChannel, pocketNodes, height, uint8(consensus.Prepare), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, prepareProposals, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // wait for prepare votes - prepareVotes, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, 2, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) + prepareVotes, err := waitForNetworkConsensusEvents(t, clck, sharedNetworkChannel, 2, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, prepareVotes, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // 3. PreCommit - preCommitProposals, err := waitForProposalMsgs(t, clck, eventsChannel, pocketNodes, height, uint8(consensus.PreCommit), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) + preCommitProposals, err := waitForProposalMsgs(t, clck, sharedNetworkChannel, pocketNodes, height, uint8(consensus.PreCommit), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, preCommitProposals, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // wait for preCommit votes - preCommitVotes, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, 3, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) + preCommitVotes, err := waitForNetworkConsensusEvents(t, clck, sharedNetworkChannel, 3, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, preCommitVotes, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // 4. Commit - commitProposals, err := waitForProposalMsgs(t, clck, eventsChannel, pocketNodes, height, uint8(consensus.Commit), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) + commitProposals, err := waitForProposalMsgs(t, clck, sharedNetworkChannel, pocketNodes, height, uint8(consensus.Commit), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, commitProposals, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // wait for commit votes - commitVotes, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, 4, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) + commitVotes, err := waitForNetworkConsensusEvents(t, clck, sharedNetworkChannel, 4, consensus.Vote, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, commitVotes, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) // 5. Decide - decideProposals, err := waitForProposalMsgs(t, clck, eventsChannel, pocketNodes, height, uint8(consensus.Decide), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) + decideProposals, err := waitForProposalMsgs(t, clck, sharedNetworkChannel, pocketNodes, height, uint8(consensus.Decide), round, leaderId, numValidators, maxWaitTime, failOnExtraMessages) require.NoError(t, err) broadcastMessages(t, decideProposals, pocketNodes) advanceTime(t, clck, 10*time.Millisecond) @@ -711,7 +711,7 @@ func WaitForNextBlock( func waitForProposalMsgs( t *testing.T, clck *clock.Mock, - eventsChannel modules.EventsChannel, + sharedNetworkChannel modules.EventsChannel, pocketNodes idToNodeMapping, height uint64, step uint8, @@ -721,7 +721,7 @@ func waitForProposalMsgs( maxWaitTime time.Duration, failOnExtraMessages bool, ) ([]*anypb.Any, error) { - proposalMsgs, err := waitForNetworkConsensusEvents(t, clck, eventsChannel, typesCons.HotstuffStep(step), consensus.Propose, numExpectedMsgs, maxWaitTime, failOnExtraMessages) + proposalMsgs, err := waitForNetworkConsensusEvents(t, clck, sharedNetworkChannel, typesCons.HotstuffStep(step), consensus.Propose, numExpectedMsgs, maxWaitTime, failOnExtraMessages) if err != nil { return nil, err } diff --git a/p2p/utils_test.go b/p2p/utils_test.go index 633ea1425..41f7fa2ca 100644 --- a/p2p/utils_test.go +++ b/p2p/utils_test.go @@ -37,8 +37,7 @@ import ( // ~~~~~~ RainTree Unit Test Configurations ~~~~~~ const ( - serviceURLFormat = "node%d.consensus:42069" - eventsChannelSize = 10000 + serviceURLFormat = "node%d.consensus:42069" // Since we simulate up to a 27 node network, we will pre-generate a n >= 27 number of keys to avoid generation // every time. The genesis config seed start is set for deterministic key generation and 42 was chosen arbitrarily. genesisConfigSeedStart = 42 diff --git a/runtime/bus.go b/runtime/bus.go index 7a9679b2b..73a52d68b 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -22,6 +22,10 @@ type bus struct { // Node events channel modules.EventsChannel + // A secondary channel that receives all the same events as the main bus, + // but does not pull events when `GetBusEvent` is called + debugChannel modules.EventsChannel + modulesRegistry modules.ModulesRegistry runtimeMgr modules.RuntimeMgr @@ -125,11 +129,12 @@ func (m *bus) GetStateMachineModule() modules.StateMachineModule { return getModuleFromRegistry[modules.StateMachineModule](m, modules.StateMachineModuleName) } -// WithEventsChannel is used initialize the bus with a specific events channel -func WithEventsChannel(eventsChannel modules.EventsChannel) modules.BusOption { +// WithDebugEventsChannel is used initialize a secondary (debug) bus that receives all the same events +// as the main bus, but does pull events when `GetBusEvent` is called +func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.BusOption { return func(m modules.Bus) { if m, ok := m.(*bus); ok { - m.channel = eventsChannel + m.debugChannel = eventsChannel } } } From d2ed2bb91d12f0cbb93375283ebc464c7c9166f2 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:03:27 -0700 Subject: [PATCH 072/100] Added runtime/debug_helpers.go --- runtime/bus.go | 10 ---------- runtime/debug_helpers.go | 15 +++++++++++++++ state_machine/debug_helpers.go | 15 --------------- 3 files changed, 15 insertions(+), 25 deletions(-) create mode 100644 runtime/debug_helpers.go delete mode 100644 state_machine/debug_helpers.go diff --git a/runtime/bus.go b/runtime/bus.go index 73a52d68b..55a89b0f8 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -129,16 +129,6 @@ func (m *bus) GetStateMachineModule() modules.StateMachineModule { return getModuleFromRegistry[modules.StateMachineModule](m, modules.StateMachineModuleName) } -// WithDebugEventsChannel is used initialize a secondary (debug) bus that receives all the same events -// as the main bus, but does pull events when `GetBusEvent` is called -func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.BusOption { - return func(m modules.Bus) { - if m, ok := m.(*bus); ok { - m.debugChannel = eventsChannel - } - } -} - // getModuleFromRegistry is a helper function to get a module from the registry that handles errors and casting via generics func getModuleFromRegistry[T modules.Module](m *bus, moduleName string) T { mod, err := m.modulesRegistry.GetModule(moduleName) diff --git a/runtime/debug_helpers.go b/runtime/debug_helpers.go new file mode 100644 index 000000000..6257b14a6 --- /dev/null +++ b/runtime/debug_helpers.go @@ -0,0 +1,15 @@ +// +built test debug + +package runtime + +import "github.com/pokt-network/pocket/shared/modules" + +// WithDebugEventsChannel is used initialize a secondary (debug) bus that receives all the same events +// as the main bus, but does pull events when `GetBusEvent` is called +func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.BusOption { + return func(m modules.Bus) { + if m, ok := m.(*bus); ok { + m.debugChannel = eventsChannel + } + } +} diff --git a/state_machine/debug_helpers.go b/state_machine/debug_helpers.go deleted file mode 100644 index e8882a612..000000000 --- a/state_machine/debug_helpers.go +++ /dev/null @@ -1,15 +0,0 @@ -// +built test debug - -package state_machine - -import "github.com/pokt-network/pocket/shared/modules" - -// WithDebugEventsChannel is used for testing purposes only. It allows us to capture the events -// from the FSM and publish them to debug channel for testing. -func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.ModuleOption { - return func(m modules.InitializableModule) { - if m, ok := m.(*stateMachineModule); ok { - m.debugChannels = append(m.debugChannels, eventsChannel) - } - } -} From d2d7d010b42dc269f581df5427dba8a68b2d795a Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:21:30 -0700 Subject: [PATCH 073/100] Removed waitForNetworkFSMEvents --- consensus/e2e_tests/state_sync_test.go | 57 ++++++++++++-------------- consensus/e2e_tests/utils_test.go | 31 ++------------ runtime/bus.go | 13 ++++-- runtime/debug_helpers.go | 14 ++++++- shared/modules/bus_module.go | 1 + 5 files changed, 54 insertions(+), 62 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 57422d09d..e1196bd6e 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,6 +1,7 @@ package e2e_tests import ( + "fmt" "reflect" "testing" "time" @@ -8,6 +9,7 @@ import ( "github.com/benbjohnson/clock" "github.com/pokt-network/pocket/consensus" typesCons "github.com/pokt-network/pocket/consensus/types" + "github.com/pokt-network/pocket/runtime" "github.com/pokt-network/pocket/shared/codec" "github.com/pokt-network/pocket/shared/modules" "github.com/stretchr/testify/require" @@ -192,6 +194,11 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Send one of the responses (since they are equal) to the unsynched node to apply it send(t, unsyncedNode, blockResponses[0]) + debugChannel := unsyncedNode.GetBus().GetDebugEventBus() + for { + e := <-debugChannel + fmt.Println(e) + } // CONSIDERATION: Do we need to sleep or block before checking if the block was committed? advanceTime(t, clockMock, 10*time.Millisecond) @@ -208,8 +215,26 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { assertHeight(t, unsyncedNodeId, targetHeight, getConsensusNodeState(unsyncedNode).Height) } -// TODO: Implement these tests +func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsChannel, idToNodeMapping) { + // Test preparation + clockMock := clock.NewMock() + timeReminder(t, clockMock, time.Second) + + // Test configs + runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) + buses := generateBuses(t, runtimeMgrs, runtime.WithNewDebugEventsChannel()) + + // Create & start test pocket nodes + // This channel captures all the messages that consensus nodes would send to each other over the network + sharedNetworkChannel := make(modules.EventsChannel, 100) + pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) + err := startAllTestPocketNodes(t, pocketNodes) + require.NoError(t, err) + return clockMock, sharedNetworkChannel, pocketNodes +} + +// INCOMPLETE: Implement the following tests func TestStateSync_UnsyncedPeerSyncsABlock_Success(t *testing.T) { t.Skip() } @@ -233,33 +258,3 @@ func TestStateSync_4of10UnsyncedPeersCatchUp(t *testing.T) { func TestStateSync_9of10UnsyncedPeersCatchUp(t *testing.T) { t.Skip() } - -func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsChannel, idToNodeMapping) { - // Test preparation - clockMock := clock.NewMock() - timeReminder(t, clockMock, time.Second) - - // Test configs - runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) - buses := generateBuses(t, runtimeMgrs) - // buses := generateBusesTemp(t, runtimeMgrs, sharedNetworkChannel) - - // Create & start test pocket nodes - sharedNetworkChannel := make(modules.EventsChannel, 100) - // buses := generateBusesTemp(t, runtimeMgrs, sharedNetworkChannel) - pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) - err := startAllTestPocketNodes(t, pocketNodes) - require.NoError(t, err) - - return clockMock, sharedNetworkChannel, pocketNodes -} - -// func generateBusesTemp(t *testing.T, runtimeMgrs []*runtime.Manager, channel modules.EventsChannel) (buses []modules.Bus) { -// buses = make([]modules.Bus, len(runtimeMgrs)) -// for i := range runtimeMgrs { -// bus, err := runtime.CreateBus(runtimeMgrs[i], runtime.WithEventsChannel(channel)) -// require.NoError(t, err) -// buses[i] = bus -// } -// return -// } diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 4f8bdb1c7..b08ecd311 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -126,7 +126,8 @@ func createTestConsensusPocketNode( consensusModule, ok := consensusMod.(modules.ConsensusModule) require.True(t, ok) - _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(sharedNetworkChannel)) + // _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(sharedNetworkChannel)) + _, err = state_machine.Create(bus) require.NoError(t, err) runtimeMgr := (bus).GetRuntimeMgr() @@ -160,10 +161,10 @@ func createTestConsensusPocketNode( return pocketNode } -func generateBuses(t *testing.T, runtimeMgrs []*runtime.Manager) (buses []modules.Bus) { +func generateBuses(t *testing.T, runtimeMgrs []*runtime.Manager, opts ...modules.BusOption) (buses []modules.Bus) { buses = make([]modules.Bus, len(runtimeMgrs)) for i := range runtimeMgrs { - bus, err := runtime.CreateBus(runtimeMgrs[i]) + bus, err := runtime.CreateBus(runtimeMgrs[i], opts...) require.NoError(t, err) buses[i] = bus } @@ -295,30 +296,6 @@ func waitForNetworkStateSyncEvents( return waitForEventsInternal(clck, sharedNetworkChannel, messaging.StateSyncMessageContentType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) } -// waitForNetworkFSMEvents waits for the number of expected state machine events to be published on the events channel. -func waitForNetworkFSMEvents( - t *testing.T, - clck *clock.Mock, - sharedNetworkChannel modules.EventsChannel, - eventType coreTypes.StateMachineEvent, - errMsg string, - numExpectedMsgs int, - maxWaitTime time.Duration, - failOnExtraMessages bool, -) (messages []*anypb.Any, err error) { - includeFilter := func(anyMsg *anypb.Any) bool { - msg, err := codec.GetCodec().FromAny(anyMsg) - require.NoError(t, err) - - stateTransitionMessage, ok := msg.(*messaging.StateMachineTransitionEvent) - require.True(t, ok) - - return stateTransitionMessage.Event == string(eventType) - } - - return waitForEventsInternal(clck, sharedNetworkChannel, messaging.StateMachineTransitionEventType, numExpectedMsgs, maxWaitTime, includeFilter, errMsg, failOnExtraMessages) -} - // RESEARCH(#462): Research ways to eliminate time-based non-determinism from the test framework // IMPROVE: This function can be extended to testing events outside of just the consensus module. func waitForEventsInternal( diff --git a/runtime/bus.go b/runtime/bus.go index 55a89b0f8..a388c3947 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -1,7 +1,6 @@ package runtime import ( - "fmt" "sync" "github.com/pokt-network/pocket/logger" @@ -37,7 +36,8 @@ func CreateBus(runtimeMgr modules.RuntimeMgr, opts ...modules.BusOption) (module func (b *bus) Create(runtimeMgr modules.RuntimeMgr, opts ...modules.BusOption) (modules.Bus, error) { bus := &bus{ - channel: make(modules.EventsChannel, defaults.DefaultBusBufferSize), + channel: make(modules.EventsChannel, defaults.DefaultBusBufferSize), + debugChannel: nil, runtimeMgr: runtimeMgr, modulesRegistry: NewModulesRegistry(), @@ -60,8 +60,10 @@ func (m *bus) RegisterModule(module modules.Module) { } func (m *bus) PublishEventToBus(e *messaging.PocketEnvelope) { - fmt.Println("OLSH eventsChannel", m.channel) m.channel <- e + if m.debugChannel != nil { + m.debugChannel <- e + } } func (m *bus) GetBusEvent() *messaging.PocketEnvelope { @@ -73,6 +75,11 @@ func (m *bus) GetEventBus() modules.EventsChannel { return m.channel } +// GetDebugEventBus returns the debug event bus +func (m *bus) GetDebugEventBus() modules.EventsChannel { + return m.channel +} + func (m *bus) GetRuntimeMgr() modules.RuntimeMgr { return m.runtimeMgr } diff --git a/runtime/debug_helpers.go b/runtime/debug_helpers.go index 6257b14a6..62e354192 100644 --- a/runtime/debug_helpers.go +++ b/runtime/debug_helpers.go @@ -2,7 +2,10 @@ package runtime -import "github.com/pokt-network/pocket/shared/modules" +import ( + "github.com/pokt-network/pocket/runtime/defaults" + "github.com/pokt-network/pocket/shared/modules" +) // WithDebugEventsChannel is used initialize a secondary (debug) bus that receives all the same events // as the main bus, but does pull events when `GetBusEvent` is called @@ -13,3 +16,12 @@ func WithDebugEventsChannel(eventsChannel modules.EventsChannel) modules.BusOpti } } } + +// WithNewDebugEventsChannel is used initialize a secondary (debug) bus that receives all the same events +func WithNewDebugEventsChannel() modules.BusOption { + return func(m modules.Bus) { + if m, ok := m.(*bus); ok { + m.debugChannel = make(modules.EventsChannel, defaults.DefaultBusBufferSize) + } + } +} diff --git a/shared/modules/bus_module.go b/shared/modules/bus_module.go index 95a3a95e2..947cd89fb 100644 --- a/shared/modules/bus_module.go +++ b/shared/modules/bus_module.go @@ -20,6 +20,7 @@ type Bus interface { PublishEventToBus(e *messaging.PocketEnvelope) GetBusEvent() *messaging.PocketEnvelope GetEventBus() EventsChannel + GetDebugEventBus() EventsChannel // Dependency Injection / Service Discovery GetModulesRegistry() ModulesRegistry From 8f540f277d37e4e7edadd0d66f63544b589cacd0 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:21:46 -0700 Subject: [PATCH 074/100] Removed waitForNetworkFSMEvents --- consensus/e2e_tests/state_sync_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index e1196bd6e..6b57ed9c4 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -197,7 +197,7 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { debugChannel := unsyncedNode.GetBus().GetDebugEventBus() for { e := <-debugChannel - fmt.Println(e) + fmt.Println("OLSH", e) } // CONSIDERATION: Do we need to sleep or block before checking if the block was committed? From 7a1e14674da756bcd8e0a4e6751511cf2af4eb05 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:39:15 -0700 Subject: [PATCH 075/100] Removed all the 'OLSH' comments --- consensus/e2e_tests/state_sync_test.go | 23 +++++++++++++++++------ consensus/e2e_tests/utils_test.go | 4 ---- consensus/fsm_handler.go | 1 - consensus/pacemaker/module.go | 2 -- runtime/bus.go | 2 +- shared/node.go | 3 +-- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index 6b57ed9c4..ad0efe0dd 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -1,7 +1,6 @@ package e2e_tests import ( - "fmt" "reflect" "testing" "time" @@ -11,7 +10,9 @@ import ( typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/runtime" "github.com/pokt-network/pocket/shared/codec" + "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -194,15 +195,25 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Send one of the responses (since they are equal) to the unsynched node to apply it send(t, unsyncedNode, blockResponses[0]) + advanceTime(t, clockMock, 10*time.Millisecond) debugChannel := unsyncedNode.GetBus().GetDebugEventBus() - for { - e := <-debugChannel - fmt.Println("OLSH", e) + select { + case e := <-debugChannel: + if e.GetContentType() == messaging.StateSyncBlockCommittedEventType { + msg, err := codec.GetCodec().FromAny(e.Content) + require.NoError(t, err) + blockCommittedEvent, ok := msg.(*messaging.StateSyncBlockCommittedEvent) + require.True(t, ok) + if unsyncedNodeHeight == blockCommittedEvent.Height { + break + } + } + case <-time.After(time.Second): + assert.Fail(t, "Timed out waiting for block %d to be committed...", unsyncedNodeHeight) } // CONSIDERATION: Do we need to sleep or block before checking if the block was committed? - advanceTime(t, clockMock, 10*time.Millisecond) - time.Sleep(10 * time.Millisecond) + // time.Sleep(10 * time.Millisecond) // ensure unsynced node height increased nodeState := getConsensusNodeState(unsyncedNode) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index b08ecd311..2d29e00e3 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -287,7 +287,6 @@ func waitForNetworkStateSyncEvents( require.True(t, ok) if stateSyncMsgType != nil { - fmt.Println("OLSH HERE", reflect.TypeOf(stateSyncMsg.Message), stateSyncMsgType) return reflect.TypeOf(stateSyncMsg.Message) == stateSyncMsgType } return true @@ -338,19 +337,16 @@ loop: for { select { case nodeEvent := <-sharedNetworkChannel: - fmt.Println("OLSH eventContentType0", eventContentType, nodeEvent.GetContentType()) if nodeEvent.GetContentType() != eventContentType { unusedEvents = append(unusedEvents, nodeEvent) continue } - fmt.Println("OLSH eventContentType1", eventContentType, nodeEvent) message := nodeEvent.Content if message == nil || !msgIncludeFilter(message) { unusedEvents = append(unusedEvents, nodeEvent) continue } - fmt.Println("OLSH eventContentType2", eventContentType) expectedMsgs = append(expectedMsgs, message) numRemainingMsgs-- diff --git a/consensus/fsm_handler.go b/consensus/fsm_handler.go index 7cdca1b1e..51fd55585 100644 --- a/consensus/fsm_handler.go +++ b/consensus/fsm_handler.go @@ -87,7 +87,6 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv // HandleSyncMode handles the FSM event Consensus_IsSyncing, and when SyncMode is the destination state. // In Sync mode, the node (validator or not starts syncing with the rest of the network. func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { - fmt.Println("OLSH") m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. About to start synchronous sync loop...") go m.stateSync.StartSynchronousStateSync() return nil diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index 8d6fedc26..b857e2938 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -99,9 +99,7 @@ func (*pacemaker) GetModuleName() string { } func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, error) { - fmt.Println("OLSH ShouldHandleMessage") consensusMod := m.GetBus().GetConsensusModule() - currentHeight := consensusMod.CurrentHeight() currentRound := consensusMod.CurrentRound() currentStep := typesCons.HotstuffStep(consensusMod.CurrentStep()) diff --git a/runtime/bus.go b/runtime/bus.go index a388c3947..630905e04 100644 --- a/runtime/bus.go +++ b/runtime/bus.go @@ -77,7 +77,7 @@ func (m *bus) GetEventBus() modules.EventsChannel { // GetDebugEventBus returns the debug event bus func (m *bus) GetDebugEventBus() modules.EventsChannel { - return m.channel + return m.debugChannel } func (m *bus) GetRuntimeMgr() modules.RuntimeMgr { diff --git a/shared/node.go b/shared/node.go index 7d2eb54b3..c770ce973 100644 --- a/shared/node.go +++ b/shared/node.go @@ -180,8 +180,7 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { case messaging.StateMachineTransitionEventType: err_consensus := node.GetBus().GetConsensusModule().HandleEvent(message.Content) err_p2p := node.GetBus().GetP2PModule().HandleEvent(message.Content) - // TODO: Remove this lib once we move to Go 1.2 - return multierr.Combine(err_consensus, err_p2p) + return multierr.Combine(err_consensus, err_p2p) // TECHDEBT: Remove this lib once we move to Go 1.2 case messaging.DebugMessageEventType: return node.handleDebugMessage(message) default: From c9bdebfab34b140a2a4d01faae64221280f00e23 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:56:02 -0700 Subject: [PATCH 076/100] Removed an unnecessary check for m.utilityUnitOfWork --- consensus/e2e_tests/state_sync_test.go | 6 ++---- consensus/e2e_tests/utils_test.go | 2 -- consensus/hotstuff_replica.go | 10 ---------- consensus/module_consensus_state_sync.go | 2 +- consensus/pacemaker/module.go | 4 ++-- 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index ad0efe0dd..ea58bc70a 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -212,9 +212,6 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { assert.Fail(t, "Timed out waiting for block %d to be committed...", unsyncedNodeHeight) } - // CONSIDERATION: Do we need to sleep or block before checking if the block was committed? - // time.Sleep(10 * time.Millisecond) - // ensure unsynced node height increased nodeState := getConsensusNodeState(unsyncedNode) assertHeight(t, unsyncedNodeId, unsyncedNodeHeight+1, nodeState.Height) @@ -235,9 +232,10 @@ func prepareStateSyncTestEnvironment(t *testing.T) (*clock.Mock, modules.EventsC runtimeMgrs := generateNodeRuntimeMgrs(t, numValidators, clockMock) buses := generateBuses(t, runtimeMgrs, runtime.WithNewDebugEventsChannel()) - // Create & start test pocket nodes // This channel captures all the messages that consensus nodes would send to each other over the network sharedNetworkChannel := make(modules.EventsChannel, 100) + + // Create & start test pocket nodes pocketNodes := createTestConsensusPocketNodes(t, buses, sharedNetworkChannel) err := startAllTestPocketNodes(t, pocketNodes) require.NoError(t, err) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 2d29e00e3..9593290db 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -174,8 +174,6 @@ func generateBuses(t *testing.T, runtimeMgrs []*runtime.Manager, opts ...modules func startAllTestPocketNodes(t *testing.T, pocketNodes idToNodeMapping) error { for _, pocketNode := range pocketNodes { go startNode(t, pocketNode) - // startEvent := pocketNode.GetBus().GetBusEvent() - // require.Equal(t, messaging.NodeStartedEventType, startEvent.GetContentType()) stateMachine := pocketNode.GetBus().GetStateMachineModule() if err := stateMachine.SendEvent(coreTypes.StateMachineEvent_Start); err != nil { return err diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index 7fd729f4e..263d9042c 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -59,16 +59,6 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrepareMessage(m *consensusM return } - // TODO_IN_THIS_COMMIT: Figure out how to remove this. - // if the replica received a proposal in statesync before receiving the NEWROUND proposals, - // in which case utilityUnitOfWork will be nil, and we refresh utility context - if m.utilityUnitOfWork == nil { - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility unitOfWork") - return - } - } - block := msg.GetBlock() if err := m.applyBlock(block); err != nil { m.logger.Error().Err(err).Msg(typesCons.ErrApplyBlock.Error()) diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index 6eaaa5116..f7203152b 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -76,8 +76,8 @@ func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetB } logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) - m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) m.paceMaker.NewHeight() + m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } // REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index b857e2938..c11f03cf3 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -114,8 +114,8 @@ func (m *pacemaker) ShouldHandleMessage(msg *typesCons.HotstuffMessage) (bool, e // 1. The node is behind and needs to catch up, node must start syncing, // 2. The leader is sending a malicious proposal. // There, for both cases, node rejects the proposal, because: - // 1. If node is out of sync, node can't verify the block proposal, so rejects it. But node will eventually sync with the rest of the network and add the block. - // 2. If node is synced, node must reject the proposal because proposal is not valid. + // 1. If node is out of sync, node can't verify the block proposal, so rejects it. But node will eventually sync with the rest of the network and add the block. + // 2. If node is synced, node must reject the proposal because proposal is not valid. if msg.Height > currentHeight { m.logger.Info().Msgf("⚠️ [WARN] ⚠️ Node at height %d < message height %d", currentHeight, msg.Height) err := m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced) From 1fa2a69d34b01f49dde23020d99f0d4dc3dfcbe5 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 16:57:37 -0700 Subject: [PATCH 077/100] Removed unnecessary checks in hotstuff_replica --- consensus/hotstuff_replica.go | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index 263d9042c..7eea2037e 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -94,15 +94,6 @@ func (handler *HotstuffReplicaMessageHandler) HandlePrecommitMessage(m *consensu return } - // replica might have receive PRECOMMIT proposal without receiving the NEWROUND and/or PREPARE proposals while performing state sync. - // in this case m.block will be nil, and we set it via the proposal, since we already performed QC verification block in the proposal is valid. - if m.block == nil { - m.block = msg.GetBlock() - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - } - } - m.step = Commit m.prepareQC = quorumCert // INVESTIGATE: Why are we never using this for validation? @@ -132,15 +123,6 @@ func (handler *HotstuffReplicaMessageHandler) HandleCommitMessage(m *consensusMo return } - // replica might have receive COMMIT proposal without receiving the NEWROUND and/or PREPARE, PRECOMMIT proposals while performing state sync. - // in this case m.block will be nil, and we set it via the proposal, since we already performed QC verification block in the proposal is valid. - if m.block == nil { - m.block = msg.GetBlock() - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - } - } - m.step = Decide m.lockedQC = quorumCert // DISCUSS: How does the replica recover if it's locked? Replica `formally` agrees on the QC while the rest of the network `verbally` agrees on the QC. @@ -176,15 +158,6 @@ func (handler *HotstuffReplicaMessageHandler) HandleDecideMessage(m *consensusMo return } - // replica might have receive DECIDE proposal without receiving the NEWROUND and/or PREPARE, PRECOMMIT, COMMIT proposals while performing state sync. - // in this case m.block will be nil, and we set it via the proposal, since we already performed QC verification block in the proposal is valid. - if m.block == nil { - m.block = msg.GetBlock() - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - } - } - m.block.BlockHeader.QuorumCertificate = quorumCertBytes if err := m.commitBlock(m.block); err != nil { From 730a1bf18a53a27e8577216f558e5b357230212a Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 18:17:21 -0700 Subject: [PATCH 078/100] Reverted changelogs --- consensus/doc/CHANGELOG.md | 7 ------- shared/CHANGELOG.md | 4 ---- state_machine/docs/CHANGELOG.md | 4 ---- 3 files changed, 15 deletions(-) diff --git a/consensus/doc/CHANGELOG.md b/consensus/doc/CHANGELOG.md index 28334e268..4d5b89930 100644 --- a/consensus/doc/CHANGELOG.md +++ b/consensus/doc/CHANGELOG.md @@ -7,13 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.55] - 2023-06-08 - -- Added state sync channels `blocksReceived` and `metadataReceived` -- Implemented `blockApplicationLoop()` -- Implemented state sync module functions `Start()` and `Stop()` -- Implemented `WaitForNetworkFSMEvents()` function in test utils - ## [0.0.0.54] - 2023-06-13 - Fix tests diff --git a/shared/CHANGELOG.md b/shared/CHANGELOG.md index 67ec21a80..b78e17e09 100644 --- a/shared/CHANGELOG.md +++ b/shared/CHANGELOG.md @@ -7,10 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.62] - 2023-06-14 - -- Added `pocket.StateSyncBlockCommittedEvent` to the shared messaging events - ## [0.0.0.61] - 2023-06-14 - Replace AES-GCM in key encryption with Secretbox (XSalsa20+Poly1305) diff --git a/state_machine/docs/CHANGELOG.md b/state_machine/docs/CHANGELOG.md index eda6ae944..9ac27e72c 100644 --- a/state_machine/docs/CHANGELOG.md +++ b/state_machine/docs/CHANGELOG.md @@ -7,10 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [0.0.0.5] - 2023-04-19 - -- Add `WithDebugEventsChannel()` function to be used in testing - ## [0.0.0.4] - 2023-04-03 - Clarify state transitions in README From c1057b0a1b74e1fe1364c33ddc02230ef33019de Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 18:17:52 -0700 Subject: [PATCH 079/100] Removed unused debug channels --- p2p/utils_test.go | 1 + shared/node.go | 9 +++++++++ state_machine/module.go | 13 +------------ 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/p2p/utils_test.go b/p2p/utils_test.go index 41f7fa2ca..d0d0cd654 100644 --- a/p2p/utils_test.go +++ b/p2p/utils_test.go @@ -37,6 +37,7 @@ import ( // ~~~~~~ RainTree Unit Test Configurations ~~~~~~ const ( + // TECHDEBT: Look into ways to remove `serviceURLFormat` from the test suite serviceURLFormat = "node%d.consensus:42069" // Since we simulate up to a 27 node network, we will pre-generate a n >= 27 number of keys to avoid generation // every time. The genesis config seed start is set for deterministic key generation and 42 was chosen arbitrarily. diff --git a/shared/node.go b/shared/node.go index c770ce973..17efb11a9 100644 --- a/shared/node.go +++ b/shared/node.go @@ -163,29 +163,38 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { }).Msg("node handling event") switch contentType { + case messaging.NodeStartedEventType: logger.Global.Info().Msg("Received NodeStartedEvent") if err := node.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Start); err != nil { return err } + case messaging.HotstuffMessageContentType: return node.GetBus().GetConsensusModule().HandleMessage(message.Content) + case messaging.StateSyncMessageContentType, messaging.StateSyncBlockCommittedEventType: return node.GetBus().GetConsensusModule().HandleStateSyncMessage(message.Content) + case messaging.TxGossipMessageContentType: return node.GetBus().GetUtilityModule().HandleUtilityMessage(message.Content) + case messaging.ConsensusNewHeightEventType: return node.GetBus().GetP2PModule().HandleEvent(message.Content) + case messaging.StateMachineTransitionEventType: err_consensus := node.GetBus().GetConsensusModule().HandleEvent(message.Content) err_p2p := node.GetBus().GetP2PModule().HandleEvent(message.Content) return multierr.Combine(err_consensus, err_p2p) // TECHDEBT: Remove this lib once we move to Go 1.2 + case messaging.DebugMessageEventType: return node.handleDebugMessage(message) + default: logger.Global.Warn().Msgf("Unsupported message content type: %s", contentType) } + return nil } diff --git a/state_machine/module.go b/state_machine/module.go index 6dbe9606a..a34659755 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -19,11 +19,6 @@ type stateMachineModule struct { *fsm.FSM logger *modules.Logger - - // TEST_ONLY: debugChannels is only used for testing purposes. - // It is used to enable to aggregate and emit events during testing - // TECHDEBT: Find a way to avoid the need for this altogether or move it into an _test.go file - debugChannels []modules.EventsChannel } func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { @@ -32,8 +27,7 @@ func Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, e func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOption) (modules.Module, error) { m := &stateMachineModule{ - logger: logger.Global.CreateLoggerForModule(modules.StateMachineModuleName), - debugChannels: make([]modules.EventsChannel, 0), + logger: logger.Global.CreateLoggerForModule(modules.StateMachineModuleName), } m.FSM = NewNodeFSM(&fsm.Callbacks{ @@ -52,11 +46,6 @@ func (*stateMachineModule) Create(bus modules.Bus, options ...modules.ModuleOpti m.logger.Fatal().Err(err).Msg("failed to pack state machine transition event") } bus.PublishEventToBus(newStateMachineTransitionEvent) - - // TEST_ONLY: Broadcast the events to additional channels used for testing purposes - for _, channel := range m.debugChannels { - channel <- newStateMachineTransitionEvent - } }, }) From 17384e9db3a9bec82bb703548144c2173b0bd742 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 22:12:28 -0700 Subject: [PATCH 080/100] Removed publishStateSyncBlockCommittedEvent --- app/client/cli/debug.go | 2 +- consensus/e2e_tests/state_sync_test.go | 47 +++++++++++-------- .../{fsm_handler.go => event_handler.go} | 24 +++++++--- consensus/events.go | 12 ----- consensus/module_consensus_state_sync.go | 3 +- consensus/pacemaker/module.go | 2 + consensus/state_sync/module.go | 26 ++++------ consensus/state_sync_handler.go | 3 -- p2p/event_handler.go | 1 - shared/messaging/events.go | 3 +- shared/messaging/proto/events.proto | 5 -- shared/node.go | 7 +-- 12 files changed, 62 insertions(+), 73 deletions(-) rename consensus/{fsm_handler.go => event_handler.go} (90%) diff --git a/app/client/cli/debug.go b/app/client/cli/debug.go index 07ab0cfb2..5c5b41f05 100644 --- a/app/client/cli/debug.go +++ b/app/client/cli/debug.go @@ -252,7 +252,7 @@ func fetchPeerstore(cmd *cobra.Command) (typesP2P.Peerstore, error) { // sendConsensusNewHeightEventToP2PModule mimicks the consensus module sending a ConsensusNewHeightEvent to the p2p module // This is necessary because the debug client is not a validator and has no consensus module but it has to update the peerstore -// depending on the changes in the validator set. +// depending on the changes in the validator set, which is based on the on-chain state. // TODO(#613): Make the debug client mimic a full node. func sendConsensusNewHeightEventToP2PModule(height uint64, bus modules.Bus) error { newHeightEvent, err := messaging.PackMessage(&messaging.ConsensusNewHeightEvent{Height: height}) diff --git a/consensus/e2e_tests/state_sync_test.go b/consensus/e2e_tests/state_sync_test.go index ea58bc70a..1eff6ceb1 100644 --- a/consensus/e2e_tests/state_sync_test.go +++ b/consensus/e2e_tests/state_sync_test.go @@ -123,16 +123,14 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { targetHeight := uint64(6) // Set the unsynced node to height (2) and rest of the nodes to height (4) + unsyncedNode.GetBus().GetConsensusModule().SetHeight(unsyncedNodeHeight) for id, pocketNode := range pocketNodes { - var height uint64 - if id == unsyncedNodeId { - height = unsyncedNodeHeight - } else { - height = targetHeight + consensusMod := pocketNode.GetBus().GetConsensusModule() + if id != unsyncedNodeId { + consensusMod.SetHeight(targetHeight) } - pocketNode.GetBus().GetConsensusModule().SetHeight(height) - pocketNode.GetBus().GetConsensusModule().SetStep(uint8(consensus.NewRound)) - pocketNode.GetBus().GetConsensusModule().SetRound(uint64(0)) + consensusMod.SetStep(uint8(consensus.NewRound)) + consensusMod.SetRound(uint64(0)) } // Sanity check unsynched node is at height 2 @@ -153,11 +151,19 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { // Trigger the next round of consensus so the unsynched nodes is prompted to start synching triggerNextView(t, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) + + // Wait for proposal messages proposalMsgs, err := waitForNetworkConsensusEvents(t, clockMock, sharedNetworkChannel, typesCons.HotstuffStep(consensus.NewRound), consensus.Propose, numValidators*numValidators, 500, false) require.NoError(t, err) + + // Broadcast the proposal messages to all nodes broadcastMessages(t, proposalMsgs, pocketNodes) advanceTime(t, clockMock, 10*time.Millisecond) + // TODO: Figure out why we have one extra (non harmful) request at the very beginning + _, err = waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "error waiting on response to a get block request", 1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockReq{})) + require.NoError(t, err) + for unsyncedNodeHeight < targetHeight { // Wait for the unsynched node to request the block at the current height blockRequests, err := waitForNetworkStateSyncEvents(t, clockMock, sharedNetworkChannel, "error waiting on response to a get block request", 1, 5000, false, reflect.TypeOf(&typesCons.StateSyncMessage_GetBlockReq{})) @@ -197,19 +203,22 @@ func TestStateSync_UnsyncedPeerSyncs_Success(t *testing.T) { send(t, unsyncedNode, blockResponses[0]) advanceTime(t, clockMock, 10*time.Millisecond) debugChannel := unsyncedNode.GetBus().GetDebugEventBus() - select { - case e := <-debugChannel: - if e.GetContentType() == messaging.StateSyncBlockCommittedEventType { - msg, err := codec.GetCodec().FromAny(e.Content) - require.NoError(t, err) - blockCommittedEvent, ok := msg.(*messaging.StateSyncBlockCommittedEvent) - require.True(t, ok) - if unsyncedNodeHeight == blockCommittedEvent.Height { - break + loop: + for { + select { + case e := <-debugChannel: + if e.GetContentType() == messaging.ConsensusNewHeightEventType { + msg, err := codec.GetCodec().FromAny(e.Content) + require.NoError(t, err) + blockCommittedEvent, ok := msg.(*messaging.ConsensusNewHeightEvent) + require.True(t, ok) + if unsyncedNodeHeight == blockCommittedEvent.Height { + break loop + } } + case <-time.After(time.Second): + assert.Fail(t, "Timed out waiting for block %d to be committed...", unsyncedNodeHeight) } - case <-time.After(time.Second): - assert.Fail(t, "Timed out waiting for block %d to be committed...", unsyncedNodeHeight) } // ensure unsynced node height increased diff --git a/consensus/fsm_handler.go b/consensus/event_handler.go similarity index 90% rename from consensus/fsm_handler.go rename to consensus/event_handler.go index 51fd55585..3918797fa 100644 --- a/consensus/fsm_handler.go +++ b/consensus/event_handler.go @@ -19,23 +19,33 @@ const ( ) // Implements the `HandleEvent` function in the `ConsensusModule` interface -func (m *consensusModule) HandleEvent(transitionMessageAny *anypb.Any) error { +func (m *consensusModule) HandleEvent(event *anypb.Any) error { m.m.Lock() defer m.m.Unlock() - switch transitionMessageAny.MessageName() { + msg, err := codec.GetCodec().FromAny(event) + if err != nil { + return err + } + + switch event.MessageName() { + case messaging.StateMachineTransitionEventType: - msg, err := codec.GetCodec().FromAny(transitionMessageAny) - if err != nil { - return err - } stateTransitionMessage, ok := msg.(*messaging.StateMachineTransitionEvent) if !ok { return fmt.Errorf("failed to cast message to StateSyncMessage") } return m.handleStateTransitionEvent(stateTransitionMessage) + + case messaging.ConsensusNewHeightEventType: + blockCommittedEvent, ok := msg.(*messaging.ConsensusNewHeightEvent) + if !ok { + return fmt.Errorf("failed to cast event to ConsensusNewHeightEvent") + } + return m.stateSync.HandleBlockCommittedEvent(blockCommittedEvent) + default: - return typesCons.ErrUnknownStateSyncMessageType(transitionMessageAny.MessageName()) + return typesCons.ErrUnknownStateSyncMessageType(event.MessageName()) } } diff --git a/consensus/events.go b/consensus/events.go index 4a27a0050..0e31f8d72 100644 --- a/consensus/events.go +++ b/consensus/events.go @@ -12,15 +12,3 @@ func (m *consensusModule) publishNewHeightEvent(height uint64) { } m.GetBus().PublishEventToBus(newHeightEvent) } - -// publishStateSyncBlockCommittedEvent publishes a state_machine/module.goew state sync block committed event, so that state sync module can react to it -func (m *consensusModule) publishStateSyncBlockCommittedEvent(height uint64) { - blockCommittedEvent := &messaging.StateSyncBlockCommittedEvent{ - Height: height, - } - stateSyncBlockCommittedEvent, err := messaging.PackMessage(blockCommittedEvent) - if err != nil { - m.logger.Fatal().Err(err).Msg("Failed to pack state sync committed block event") - } - m.GetBus().PublishEventToBus(stateSyncBlockCommittedEvent) -} diff --git a/consensus/module_consensus_state_sync.go b/consensus/module_consensus_state_sync.go index f7203152b..1434ef9f2 100644 --- a/consensus/module_consensus_state_sync.go +++ b/consensus/module_consensus_state_sync.go @@ -74,10 +74,9 @@ func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetB m.logger.Error().Err(err).Msg("Could not commit block") return } - logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("Block, at height %d is committed!", block.BlockHeader.Height) + logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("State sync committed block at height %d!", block.BlockHeader.Height) m.paceMaker.NewHeight() - m.publishStateSyncBlockCommittedEvent(block.BlockHeader.Height) } // REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed diff --git a/consensus/pacemaker/module.go b/consensus/pacemaker/module.go index c11f03cf3..1d5e8113f 100644 --- a/consensus/pacemaker/module.go +++ b/consensus/pacemaker/module.go @@ -226,8 +226,10 @@ func (m *pacemaker) NewHeight() { consensusMod := m.GetBus().GetConsensusModule() consensusMod.ResetRound(true) + newHeight := consensusMod.CurrentHeight() + 1 consensusMod.SetHeight(newHeight) + m.logger.Info().Uint64("height", newHeight).Msg("🏁 Starting 1st round at new height 🏁") // CONSIDERATION: We are omitting CommitQC and TimeoutQC here for simplicity, but should we add them? diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index cec35bdc0..0a5f754d2 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -3,12 +3,10 @@ package state_sync import ( "context" "encoding/hex" - "fmt" "time" typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/logger" - "github.com/pokt-network/pocket/shared/codec" coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" "github.com/pokt-network/pocket/shared/modules" @@ -29,7 +27,7 @@ type StateSyncModule interface { modules.Module StateSyncServerModule - HandleStateSyncBlockCommittedEvent(message *anypb.Any) error + HandleBlockCommittedEvent(*messaging.ConsensusNewHeightEvent) error HandleStateSyncMetadataResponse(*typesCons.StateSyncMetadataResponse) error // TECHDEBT: This function can be removed once the dependency of state sync on the FSM module is removed. @@ -104,7 +102,7 @@ func (m *stateSync) StartSynchronousStateSync() error { // - maxHeight is the max * committed * height of the network // - currentHeight is the latest * committing * height of the node for currentHeight <= maxHeight { - m.logger.Info().Msgf("Sync is requesting block: %d, ending height: %d", currentHeight, maxHeight) + m.logger.Info().Msgf("Synchronous state sync is requesting block: %d, ending height: %d", currentHeight, maxHeight) // form the get block request message stateSyncGetBlockMsg := &typesCons.StateSyncMessage{ @@ -128,7 +126,7 @@ func (m *stateSync) StartSynchronousStateSync() error { // Wait for the consensus module to commit the requested block and re-try on timeout select { case blockHeight := <-m.committedBlocksChannel: - m.logger.Info().Msgf("Block %d is committed!", blockHeight) + m.logger.Info().Msgf("State sync received event that block %d is committed!", blockHeight) case <-time.After(blockWaitingPeriod): m.logger.Warn().Msgf("Timed out waiting for block %d to be committed...", currentHeight) } @@ -151,23 +149,15 @@ func (m *stateSync) StartSynchronousStateSync() error { } func (m *stateSync) HandleStateSyncMetadataResponse(res *typesCons.StateSyncMetadataResponse) error { + m.logger.Info().Msg("Handling state sync metadata response") + m.metadataReceived <- res return nil } -func (m *stateSync) HandleStateSyncBlockCommittedEvent(event *anypb.Any) error { - evt, err := codec.GetCodec().FromAny(event) - if err != nil { - return err - } - - if event.MessageName() == messaging.StateSyncBlockCommittedEventType { - newCommitBlockEvent, ok := evt.(*messaging.StateSyncBlockCommittedEvent) - if !ok { - return fmt.Errorf("failed to cast event to StateSyncBlockCommittedEvent") - } - m.committedBlocksChannel <- newCommitBlockEvent.Height - } +func (m *stateSync) HandleBlockCommittedEvent(msg *messaging.ConsensusNewHeightEvent) error { + m.logger.Info().Msg("Handling state sync block committed event") + m.committedBlocksChannel <- msg.Height return nil } diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 9f701606e..9f3c06c21 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -22,9 +22,6 @@ func (m *consensusModule) HandleStateSyncMessage(stateSyncMessageAny *anypb.Any) } return m.handleStateSyncMessage(stateSyncMessage) - case messaging.StateSyncBlockCommittedEventType: - return m.stateSync.HandleStateSyncBlockCommittedEvent(stateSyncMessageAny) - default: return typesCons.ErrUnknownStateSyncMessageType(stateSyncMessageAny.MessageName()) } diff --git a/p2p/event_handler.go b/p2p/event_handler.go index 48e1a7d73..8fe2a2947 100644 --- a/p2p/event_handler.go +++ b/p2p/event_handler.go @@ -9,7 +9,6 @@ import ( "google.golang.org/protobuf/types/known/anypb" ) -// CONSIDERATION(#576): making this part of some new `ConnManager`. func (m *p2pModule) HandleEvent(event *anypb.Any) error { evt, err := codec.GetCodec().FromAny(event) if err != nil { diff --git a/shared/messaging/events.go b/shared/messaging/events.go index 28692f27b..6b19785be 100644 --- a/shared/messaging/events.go +++ b/shared/messaging/events.go @@ -10,8 +10,7 @@ const ( HotstuffMessageContentType = "consensus.HotstuffMessage" // Consensus - State Sync - StateSyncBlockCommittedEventType = "pocket.StateSyncBlockCommittedEvent" - StateSyncMessageContentType = "consensus.StateSyncMessage" + StateSyncMessageContentType = "consensus.StateSyncMessage" // Utility TxGossipMessageContentType = "utility.TxGossipMessage" diff --git a/shared/messaging/proto/events.proto b/shared/messaging/proto/events.proto index d509ba5b5..8fc6ff3e5 100644 --- a/shared/messaging/proto/events.proto +++ b/shared/messaging/proto/events.proto @@ -16,9 +16,4 @@ message StateMachineTransitionEvent { string event = 1; string previous_state = 2; string new_state = 3; -} - -// Notifies the node that the consensus module has committed a block (either through consensus or state sync) -message StateSyncBlockCommittedEvent { - uint64 height = 1; } \ No newline at end of file diff --git a/shared/node.go b/shared/node.go index 17efb11a9..d0b8e385e 100644 --- a/shared/node.go +++ b/shared/node.go @@ -173,15 +173,16 @@ func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { case messaging.HotstuffMessageContentType: return node.GetBus().GetConsensusModule().HandleMessage(message.Content) - case messaging.StateSyncMessageContentType, - messaging.StateSyncBlockCommittedEventType: + case messaging.StateSyncMessageContentType: return node.GetBus().GetConsensusModule().HandleStateSyncMessage(message.Content) case messaging.TxGossipMessageContentType: return node.GetBus().GetUtilityModule().HandleUtilityMessage(message.Content) case messaging.ConsensusNewHeightEventType: - return node.GetBus().GetP2PModule().HandleEvent(message.Content) + err_consensus := node.GetBus().GetConsensusModule().HandleEvent(message.Content) + err_p2p := node.GetBus().GetP2PModule().HandleEvent(message.Content) + return multierr.Combine(err_consensus, err_p2p) // TECHDEBT: Remove this lib once we move to Go 1.2 case messaging.StateMachineTransitionEventType: err_consensus := node.GetBus().GetConsensusModule().HandleEvent(message.Content) From 2771a84a9d71946fb70eb994e9b78a0ca73761b0 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 22:13:38 -0700 Subject: [PATCH 081/100] Renamed module_state_sync to state_sync_helpers --- .../{module_consensus_state_sync.go => state_sync_helpers.go} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename consensus/{module_consensus_state_sync.go => state_sync_helpers.go} (100%) diff --git a/consensus/module_consensus_state_sync.go b/consensus/state_sync_helpers.go similarity index 100% rename from consensus/module_consensus_state_sync.go rename to consensus/state_sync_helpers.go From 54f6bce89f4893057bb25948be30195de5ad4d02 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 22:14:39 -0700 Subject: [PATCH 082/100] Consolidated handlers and helpers for state sync --- consensus/state_sync_handler.go | 109 ++++++++++++++++++++++++++++++ consensus/state_sync_helpers.go | 116 -------------------------------- 2 files changed, 109 insertions(+), 116 deletions(-) delete mode 100644 consensus/state_sync_helpers.go diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 9f3c06c21..8f7758b4c 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -5,7 +5,9 @@ import ( typesCons "github.com/pokt-network/pocket/consensus/types" "github.com/pokt-network/pocket/shared/codec" + coreTypes "github.com/pokt-network/pocket/shared/core/types" "github.com/pokt-network/pocket/shared/messaging" + "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) @@ -62,3 +64,110 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return fmt.Errorf("unspecified state sync message type") } } + +// tryToApplyRequestedBlock tries to commit the requested Block received from a peer. +// Intended to be called via a background goroutine. +// CLEANUP: Investigate whether this should be part of `Consensus` or part of `StateSync` +func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetBlockResponse) { + logger := m.logger.With().Str("source", "tryToApplyRequestedBlock").Logger() + + // Retrieve the block we're about to try and apply + block := blockResponse.Block + if block == nil { + logger.Error().Msg("Received nil block in GetBlockResponse") + return + } + logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) + + // Check what the current latest committed block height is + maxPersistedHeight, err := m.maxPersistedBlockHeight() + if err != nil { + logger.Err(err).Msg("couldn't query max persisted height") + return + } + + // Check if the block being synched is behind the current height + if block.BlockHeader.Height <= maxPersistedHeight { + logger.Debug().Msgf("Discarding block height %d, since node is ahead at height %d", block.BlockHeader.Height, maxPersistedHeight) + return + } + + // Check if the block being synched is ahead of the current height + if block.BlockHeader.Height > m.CurrentHeight() { + // IMPROVE: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop + logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) + return + } + + // Perform basic validation on the block + if err = m.basicValidateBlock(block); err != nil { + logger.Err(err).Msg("failed to validate block") + return + } + + // Update the leader proposing the block + // TECHDEBT: This ID logic could potentially be simplified in the future but needs a SPIKE + leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) + if err != nil { + m.logger.Error().Err(err).Msg("Could not get leader id from leader address") + return + } + m.leaderId = typesCons.NewNodeId(leaderIdInt) + + // Prepare the utility UOW of work to apply a new block + if err := m.refreshUtilityUnitOfWork(); err != nil { + m.logger.Error().Err(err).Msg("Could not refresh utility context") + return + } + + // Try to apply the block by validating the transactions in the block + if err := m.applyBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not apply block") + return + } + + // Try to commit the block to persistence + if err := m.commitBlock(block); err != nil { + m.logger.Error().Err(err).Msg("Could not commit block") + return + } + logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("State sync committed block at height %d!", block.BlockHeader.Height) + + m.paceMaker.NewHeight() +} + +// REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed +func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { + validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) + if err != nil { + m.logger.Warn().Err(err).Msgf("Could not get validators at height %d when checking if peer %s is a validator", m.CurrentHeight(), peerId) + return 0, fmt.Errorf("Could determine if peer %s is a validator or not: %w", peerId, err) + } + + valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() + return uint64(valAddrToIdMap[peerId]), nil +} + +// basicValidateBlock performs basic validation of the block, its metadata, signatures, +// but not the transactions themselves +func (m *consensusModule) basicValidateBlock(block *coreTypes.Block) error { + blockHeader := block.BlockHeader + qcBytes := blockHeader.GetQuorumCertificate() + + if qcBytes == nil { + m.logger.Error().Err(typesCons.ErrNoQcInReceivedBlock).Msg(typesCons.DisregardBlock) + return typesCons.ErrNoQcInReceivedBlock + } + + qc := typesCons.QuorumCertificate{} + if err := proto.Unmarshal(qcBytes, &qc); err != nil { + return err + } + + if err := m.validateQuorumCertificate(&qc); err != nil { + m.logger.Error().Err(err).Msg("Couldn't apply block, invalid QC") + return err + } + + return nil +} diff --git a/consensus/state_sync_helpers.go b/consensus/state_sync_helpers.go deleted file mode 100644 index 1434ef9f2..000000000 --- a/consensus/state_sync_helpers.go +++ /dev/null @@ -1,116 +0,0 @@ -package consensus - -import ( - "fmt" - - typesCons "github.com/pokt-network/pocket/consensus/types" - coreTypes "github.com/pokt-network/pocket/shared/core/types" - "google.golang.org/protobuf/proto" -) - -// tryToApplyRequestedBlock tries to commit the requested Block received from a peer. -// Intended to be called via a background goroutine. -// CLEANUP: Investigate whether this should be part of `Consensus` or part of `StateSync` -func (m *consensusModule) tryToApplyRequestedBlock(blockResponse *typesCons.GetBlockResponse) { - logger := m.logger.With().Str("source", "tryToApplyRequestedBlock").Logger() - - // Retrieve the block we're about to try and apply - block := blockResponse.Block - if block == nil { - logger.Error().Msg("Received nil block in GetBlockResponse") - return - } - logger.Info().Msgf("Received new block at height %d.", block.BlockHeader.Height) - - // Check what the current latest committed block height is - maxPersistedHeight, err := m.maxPersistedBlockHeight() - if err != nil { - logger.Err(err).Msg("couldn't query max persisted height") - return - } - - // Check if the block being synched is behind the current height - if block.BlockHeader.Height <= maxPersistedHeight { - logger.Debug().Msgf("Discarding block height %d, since node is ahead at height %d", block.BlockHeader.Height, maxPersistedHeight) - return - } - - // Check if the block being synched is ahead of the current height - if block.BlockHeader.Height > m.CurrentHeight() { - // IMPROVE: we need to store block responses that we are not yet ready to validate so we can validate them on a subsequent iteration of this loop - logger.Info().Bool("TODO", true).Msgf("Received block at height %d, discarding as it is higher than the current height", block.BlockHeader.Height) - return - } - - // Perform basic validation on the block - if err = m.basicValidateBlock(block); err != nil { - logger.Err(err).Msg("failed to validate block") - return - } - - // Update the leader proposing the block - // TECHDEBT: This ID logic could potentially be simplified in the future but needs a SPIKE - leaderIdInt, err := m.getNodeIdFromNodeAddress(string(block.BlockHeader.ProposerAddress)) - if err != nil { - m.logger.Error().Err(err).Msg("Could not get leader id from leader address") - return - } - m.leaderId = typesCons.NewNodeId(leaderIdInt) - - // Prepare the utility UOW of work to apply a new block - if err := m.refreshUtilityUnitOfWork(); err != nil { - m.logger.Error().Err(err).Msg("Could not refresh utility context") - return - } - - // Try to apply the block by validating the transactions in the block - if err := m.applyBlock(block); err != nil { - m.logger.Error().Err(err).Msg("Could not apply block") - return - } - - // Try to commit the block to persistence - if err := m.commitBlock(block); err != nil { - m.logger.Error().Err(err).Msg("Could not commit block") - return - } - logger.Info().Int64("height", int64(block.BlockHeader.Height)).Msgf("State sync committed block at height %d!", block.BlockHeader.Height) - - m.paceMaker.NewHeight() -} - -// REFACTOR(#434): Once we consolidated NodeIds/PeerIds, this could potentially be removed -func (m *consensusModule) getNodeIdFromNodeAddress(peerId string) (uint64, error) { - validators, err := m.getValidatorsAtHeight(m.CurrentHeight()) - if err != nil { - m.logger.Warn().Err(err).Msgf("Could not get validators at height %d when checking if peer %s is a validator", m.CurrentHeight(), peerId) - return 0, fmt.Errorf("Could determine if peer %s is a validator or not: %w", peerId, err) - } - - valAddrToIdMap := typesCons.NewActorMapper(validators).GetValAddrToIdMap() - return uint64(valAddrToIdMap[peerId]), nil -} - -// basicValidateBlock performs basic validation of the block, its metadata, signatures, -// but not the transactions themselves -func (m *consensusModule) basicValidateBlock(block *coreTypes.Block) error { - blockHeader := block.BlockHeader - qcBytes := blockHeader.GetQuorumCertificate() - - if qcBytes == nil { - m.logger.Error().Err(typesCons.ErrNoQcInReceivedBlock).Msg(typesCons.DisregardBlock) - return typesCons.ErrNoQcInReceivedBlock - } - - qc := typesCons.QuorumCertificate{} - if err := proto.Unmarshal(qcBytes, &qc); err != nil { - return err - } - - if err := m.validateQuorumCertificate(&qc); err != nil { - m.logger.Error().Err(err).Msg("Couldn't apply block, invalid QC") - return err - } - - return nil -} From e9fd550e89513c9356505c7fd43e53ce963f508a Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 14 Jun 2023 22:27:58 -0700 Subject: [PATCH 083/100] Minor formatting issues in the .feature files --- consensus/state_sync_handler.go | 3 +++ e2e/tests/query.feature | 4 ++-- e2e/tests/valdator.feature | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 8f7758b4c..99ee7822a 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -55,6 +55,9 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta go m.stateSync.HandleStateSyncMetadataResponse(stateSyncMessage.GetMetadataRes()) return nil + // NB: Note that this is the only case that calls a function in the consensus module (not the state sync submodule) since + // consensus is the one responsible for calling business logic to apply and commit the blocks. State sync listens for events + // that are a result of it. case *typesCons.StateSyncMessage_GetBlockRes: m.logger.Info().Str("proto_type", "GetBlockResponse").Msg("Handling StateSyncMessage GetBlockResponse") go m.tryToApplyRequestedBlock(stateSyncMessage.GetGetBlockRes()) diff --git a/e2e/tests/query.feature b/e2e/tests/query.feature index 91e3e4eb9..e7993e1f2 100644 --- a/e2e/tests/query.feature +++ b/e2e/tests/query.feature @@ -1,13 +1,13 @@ Feature: Query Namespace - Scenario: User Wants Help Using The Query Command + Scenario: User Wants Help Using The Query Command Given the user has a validator When the user runs the command "Query help" Then the user should be able to see standard output containing "Available Commands" And the validator should have exited without error - Scenario: User Wants To See The Block At Current Height + Scenario: User Wants To See The Block At Current Height Given the user has a validator When the user runs the command "Query Block" Then the user should be able to see standard output containing "state_hash" diff --git a/e2e/tests/valdator.feature b/e2e/tests/valdator.feature index ec8a2ca47..1cea8e3cc 100644 --- a/e2e/tests/valdator.feature +++ b/e2e/tests/valdator.feature @@ -1,13 +1,13 @@ # TECHDEBT: Validator should eventually be changed to full node or just node. Feature: Validator Namespace - Scenario: User Wants Help Using The Validator Command + Scenario: User Wants Help Using The Validator Command Given the user has a validator When the user runs the command "Validator help" Then the user should be able to see standard output containing "Available Commands" And the validator should have exited without error - Scenario: User Can Stake An Address + Scenario: User Can Stake An Address Given the user has a validator When the user stakes their validator with amount 150000000001 uPOKT Then the user should be able to see standard output containing "" @@ -17,7 +17,7 @@ Feature: Validator Namespace Given the user has a validator When the user stakes their validator with amount 150000000001 uPOKT Then the user should be able to see standard output containing "" - Then the user should be able to unstake their validator + Then the user should be able to unstake their validator Then the user should be able to see standard output containing "" And the validator should have exited without error From c2cc4ae54b366d93fbbb13fadfd2c5a81cf028c4 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 26 Jul 2023 16:53:04 -0700 Subject: [PATCH 084/100] Remove code from merge conflict --- app/client/cli/debug.go | 43 ----------------------------------------- 1 file changed, 43 deletions(-) diff --git a/app/client/cli/debug.go b/app/client/cli/debug.go index d9841f8ef..99d5b83de 100644 --- a/app/client/cli/debug.go +++ b/app/client/cli/debug.go @@ -1,8 +1,6 @@ package cli import ( - "errors" - "fmt" "os" "github.com/manifoldco/promptui" @@ -11,9 +9,7 @@ import ( "github.com/pokt-network/pocket/app/client/cli/helpers" "github.com/pokt-network/pocket/logger" - "github.com/pokt-network/pocket/p2p/providers/peerstore_provider" "github.com/pokt-network/pocket/shared/messaging" - "github.com/pokt-network/pocket/shared/modules" ) // TECHDEBT: Lowercase variables / constants that do not need to be exported. @@ -211,42 +207,3 @@ func sendDebugMessage(cmd *cobra.Command, debugMsg *messaging.DebugMessage) { logger.Global.Error().Err(err).Msg("Failed to send debug message") } } - -// fetchPeerstore retrieves the providers from the CLI context and uses them to retrieve the address book for the current height -func fetchPeerstore(cmd *cobra.Command) (typesP2P.Peerstore, error) { - bus, ok := helpers.GetValueFromCLIContext[modules.Bus](cmd, helpers.BusCLICtxKey) - if !ok || bus == nil { - return nil, errors.New("retrieving bus from CLI context") - } - // TECHDEBT(#810, #811): use `bus.GetPeerstoreProvider()` after peerstore provider - // is retrievable as a proper submodule - pstoreProvider, err := bus.GetModulesRegistry().GetModule(peerstore_provider.PeerstoreProviderSubmoduleName) - if err != nil { - return nil, errors.New("retrieving peerstore provider") - } - currentHeightProvider := bus.GetCurrentHeightProvider() - - height := currentHeightProvider.CurrentHeight() - pstore, err := pstoreProvider.(peerstore_provider.PeerstoreProvider).GetStakedPeerstoreAtHeight(height) - if err != nil { - return nil, fmt.Errorf("retrieving peerstore at height %d", height) - } - // Inform the client's main P2P that a the blockchain is at a new height so it can, if needed, update its view of the validator set - err = sendConsensusNewHeightEventToP2PModule(height, bus) - if err != nil { - return nil, errors.New("sending consensus new height event") - } - return pstore, nil -} - -// sendConsensusNewHeightEventToP2PModule mimicks the consensus module sending a ConsensusNewHeightEvent to the p2p module -// This is necessary because the debug client is not a validator and has no consensus module but it has to update the peerstore -// depending on the changes in the validator set, which is based on the on-chain state. -// TODO(#613): Make the debug client mimic a full node. -func sendConsensusNewHeightEventToP2PModule(height uint64, bus modules.Bus) error { - newHeightEvent, err := messaging.PackMessage(&messaging.ConsensusNewHeightEvent{Height: height}) - if err != nil { - logger.Global.Fatal().Err(err).Msg("Failed to pack consensus new height event") - } - return bus.GetP2PModule().HandleEvent(newHeightEvent.Content) -} From 209d8cc884c7b0d849d66c73379c54796c7c4b72 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 26 Jul 2023 17:28:02 -0700 Subject: [PATCH 085/100] Tend to go lint errors --- consensus/e2e_tests/utils_test.go | 79 ++++--------------------------- consensus/event_handler.go | 4 +- consensus/state_sync/helpers.go | 16 +------ consensus/state_sync/module.go | 49 ++++++++++--------- 4 files changed, 41 insertions(+), 107 deletions(-) diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index 61e9142cb..e915c94ee 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -32,7 +32,6 @@ import ( "github.com/pokt-network/pocket/shared/utils" "github.com/pokt-network/pocket/state_machine" "github.com/stretchr/testify/require" - "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" ) @@ -55,6 +54,7 @@ type idToPrivKeyMapping map[typesCons.NodeId]cryptoPocket.PrivateKey /*** Node Generation Helpers ***/ +//nolint:unparam // validatorCount will be varied in the future func generateNodeRuntimeMgrs(t *testing.T, validatorCount int, clockMgr clock.Clock) []*runtime.Manager { t.Helper() @@ -126,18 +126,17 @@ func createTestConsensusPocketNode( consensusModule, ok := consensusMod.(modules.ConsensusModule) require.True(t, ok) - // _, err = state_machine.Create(bus, state_machine.WithDebugEventsChannel(sharedNetworkChannel)) _, err = state_machine.Create(bus) require.NoError(t, err) runtimeMgr := (bus).GetRuntimeMgr() // TODO(olshansky): At the moment we are using the same base mocks for all the tests, // but note that they will need to be customized on a per test basis. - p2pMock := baseP2PMock(t, eventsChannel) - utilityMock := baseUtilityMock(t, eventsChannel, runtimeMgr.GetGenesis(), consensusModule) - telemetryMock := baseTelemetryMock(t, eventsChannel) - loggerMock := baseLoggerMock(t, eventsChannel) - rpcMock := baseRpcMock(t, eventsChannel) + p2pMock := baseP2PMock(t, sharedNetworkChannel) + utilityMock := baseUtilityMock(t, sharedNetworkChannel, runtimeMgr.GetGenesis(), consensusModule) + telemetryMock := baseTelemetryMock(t, sharedNetworkChannel) + loggerMock := baseLoggerMock(t, sharedNetworkChannel) + rpcMock := baseRpcMock(t, sharedNetworkChannel) ibcMock, hostMock := ibcUtils.IBCMockWithHost(t, bus) bus.RegisterModule(hostMock) @@ -195,10 +194,6 @@ func getConsensusNodeState(node *shared.Node) typesCons.ConsensusNodeState { return getConsensusModImpl(node).MethodByName("GetNodeState").Call([]reflect.Value{})[0].Interface().(typesCons.ConsensusNodeState) } -func getConsensusModElem(node *shared.Node) reflect.Value { - return reflect.ValueOf(node.GetBus().GetConsensusModule()).Elem() -} - func getConsensusModImpl(node *shared.Node) reflect.Value { return reflect.ValueOf(node.GetBus().GetConsensusModule()) } @@ -270,6 +265,8 @@ func waitForNetworkConsensusEvents( // IMPROVE: Consider unifying this function with WaitForNetworkConsensusEvents // This is a helper for 'waitForEventsInternal' that creates the `includeFilter` function based on state sync message specific parameters. +// +//nolint:unparam // failOnExtraMessages will be varied in the future func waitForNetworkStateSyncEvents( t *testing.T, clck *clock.Mock, @@ -735,46 +732,6 @@ func triggerNextView(t *testing.T, pocketNodes idToNodeMapping) { for _, node := range pocketNodes { triggerDebugMessage(t, node, messaging.DebugMessageAction_DEBUG_CONSENSUS_TRIGGER_NEXT_VIEW) } - return nil -} - -// TODO(#352): implement this function. -// waitForNodeToRequestMissingBlock waits for unsynced node to request missing block form the network -func waitForNodeToRequestMissingBlock( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, - allNodes IdToNodeMapping, - startingHeight uint64, - targetHeight uint64, -) (*anypb.Any, error) { - return &anypb.Any{}, nil -} - -// TODO(#352): implement this function. -// waitForNodeToReceiveMissingBlock requests block request of the unsynced node -// for given node to node to catch up to the target height by sending the requested block. -func waitForNodeToReceiveMissingBlock( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, - allNodes IdToNodeMapping, - blockReq *anypb.Any, -) (*anypb.Any, error) { - return &anypb.Any{}, nil -} - -// TODO(#352): implement this function. -// waitForNodeToCatchUp waits for given node to node to catch up to the target height by sending the requested block. -func waitForNodeToCatchUp( - t *testing.T, - clck *clock.Mock, - eventsChannel modules.EventsChannel, - unsyncedNode *shared.Node, - blockResponse *anypb.Any, - targetHeight uint64, -) error { - return nil } func generatePlaceholderBlock(height uint64, leaderAddrr cryptoPocket.Address) *coreTypes.Block { @@ -872,8 +829,8 @@ func generateQuorumCertificate(t *testing.T, validatorPrivKeys idToPrivKeyMappin return &typesCons.QuorumCertificate{ Height: block.BlockHeader.Height, - Round: 1, // assume everything succeeds on the first round for now - Step: consensus.NewRound, // TODO_IN_THIS_COMMIT: Figure out if this shold be Prepare/NewRound or something else + Round: 1, // assume everything succeeds on the first round for now + Step: consensus.NewRound, Block: block, ThresholdSignature: thresholdSig, } @@ -973,22 +930,6 @@ func startNode(t *testing.T, pocketNode *shared.Node) { require.NoError(t, err) } -// checkIdentical verifies that all items in the array are equal. -// Returns true if all items are equal or array is empty, false otherwise. -func checkIdentical(arr []*anypb.Any) bool { - if len(arr) == 0 { - return true - } - - first := arr[0] - for _, msg := range arr { - if !proto.Equal(first, msg) { - return false - } - } - return true -} - func prepareStateSyncGetBlockMessage(t *testing.T, peerAddress string, height uint64) *anypb.Any { t.Helper() diff --git a/consensus/event_handler.go b/consensus/event_handler.go index 3918797fa..f428e6a88 100644 --- a/consensus/event_handler.go +++ b/consensus/event_handler.go @@ -42,8 +42,8 @@ func (m *consensusModule) HandleEvent(event *anypb.Any) error { if !ok { return fmt.Errorf("failed to cast event to ConsensusNewHeightEvent") } - return m.stateSync.HandleBlockCommittedEvent(blockCommittedEvent) - + m.stateSync.HandleBlockCommittedEvent(blockCommittedEvent) + return nil default: return typesCons.ErrUnknownStateSyncMessageType(event.MessageName()) } diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index 035b0a598..a930eb5af 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -2,7 +2,6 @@ package state_sync import ( typesCons "github.com/pokt-network/pocket/consensus/types" - coreTypes "github.com/pokt-network/pocket/shared/core/types" cryptoPocket "github.com/pokt-network/pocket/shared/crypto" "google.golang.org/protobuf/types/known/anypb" ) @@ -18,22 +17,12 @@ func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cr return nil } -func (m *stateSync) getValidatorsAtHeight(height uint64) ([]*coreTypes.Actor, error) { - readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(height)) - if err != nil { - return nil, err - } - defer readCtx.Release() - return readCtx.GetAllValidators(int64(height)) -} - // TECHDEBT(#686): This should be an ongoing background passive state sync process. // For now, aggregating the messages when requests is good enough. -func (m *stateSync) getAggregatedStateSyncMetadata() (uint64, uint64) { +func (m *stateSync) getAggregatedStateSyncMetadata() (minHeight, maxHeight uint64) { chanLen := len(m.metadataReceived) m.logger.Info().Msgf("Looping over %d state sync metadata responses", chanLen) - minHeight, maxHeight := uint64(1), uint64(1) for i := 0; i < chanLen; i++ { metadata := <-m.metadataReceived if metadata.MaxHeight > maxHeight { @@ -43,6 +32,5 @@ func (m *stateSync) getAggregatedStateSyncMetadata() (uint64, uint64) { minHeight = metadata.MinHeight } } - - return minHeight, maxHeight + return } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index 0a5f754d2..f3bd7606c 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -27,11 +27,11 @@ type StateSyncModule interface { modules.Module StateSyncServerModule - HandleBlockCommittedEvent(*messaging.ConsensusNewHeightEvent) error - HandleStateSyncMetadataResponse(*typesCons.StateSyncMetadataResponse) error + HandleBlockCommittedEvent(*messaging.ConsensusNewHeightEvent) + HandleStateSyncMetadataResponse(*typesCons.StateSyncMetadataResponse) // TECHDEBT: This function can be removed once the dependency of state sync on the FSM module is removed. - StartSynchronousStateSync() error + StartSynchronousStateSync() } var ( @@ -79,18 +79,20 @@ func (m *stateSync) Start() error { // 2. Requests missing blocks until the maximum seen block is retrieved // 3. Perform (2) one-by-one, applying and validating each block while doing so // 4. Once all blocks are received and committed, stop the synchronous state sync process -func (m *stateSync) StartSynchronousStateSync() error { +func (m *stateSync) StartSynchronousStateSync() { consensusMod := m.bus.GetConsensusModule() currentHeight := consensusMod.CurrentHeight() nodeAddress := consensusMod.GetNodeAddress() nodeAddressBz, err := hex.DecodeString(nodeAddress) if err != nil { - return err + m.logger.Error().Err(err).Msg("Failed to decode node address") + return } readCtx, err := m.GetBus().GetPersistenceModule().NewReadContext(int64(currentHeight)) if err != nil { - return err + m.logger.Error().Err(err).Msg("Failed to create read context") + return } defer readCtx.Release() @@ -115,12 +117,14 @@ func (m *stateSync) StartSynchronousStateSync() error { } anyProtoStateSyncMsg, err := anypb.New(stateSyncGetBlockMsg) if err != nil { - return err + m.logger.Error().Err(err).Msg("Failed to create Any proto") + return } // Broadcast the block request if err := m.GetBus().GetP2PModule().Broadcast(anyProtoStateSyncMsg); err != nil { - return err + m.logger.Error().Err(err).Msg("Failed to broadcast state sync message") + return } // Wait for the consensus module to commit the requested block and re-try on timeout @@ -138,27 +142,30 @@ func (m *stateSync) StartSynchronousStateSync() error { // Checked if the synched node is a validator or not isValidator, err := readCtx.GetValidatorExists(nodeAddressBz, int64(currentHeight)) if err != nil { - return err + m.logger.Error().Err(err).Msg("Failed to check if validator exists") + return } // Send out the appropriate FSM event now that the node is caught up if isValidator { - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) + err = m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedValidator) + } else { + err = m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator) + } + if err != nil { + m.logger.Error().Err(err).Msg("Failed to send state machine event") + return } - return m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator) } -func (m *stateSync) HandleStateSyncMetadataResponse(res *typesCons.StateSyncMetadataResponse) error { +func (m *stateSync) HandleStateSyncMetadataResponse(res *typesCons.StateSyncMetadataResponse) { m.logger.Info().Msg("Handling state sync metadata response") - m.metadataReceived <- res - return nil } -func (m *stateSync) HandleBlockCommittedEvent(msg *messaging.ConsensusNewHeightEvent) error { +func (m *stateSync) HandleBlockCommittedEvent(msg *messaging.ConsensusNewHeightEvent) { m.logger.Info().Msg("Handling state sync block committed event") m.committedBlocksChannel <- msg.Height - return nil } func (m *stateSync) Stop() error { @@ -197,23 +204,21 @@ func (m *stateSync) GetModuleName() string { // metadataSyncLoop periodically sends metadata requests to its peers to collect & // aggregate metadata related to synching the state. // It is intended to be run as a background process via a goroutine. -func (m *stateSync) metadataSyncLoop() error { - logger := m.logger.With().Str("source", "metadataSyncLoop").Logger() +func (m *stateSync) metadataSyncLoop() { + metaSyncLoopLogger := m.logger.With().Str("source", "metadataSyncLoop").Logger() ctx := context.TODO() ticker := time.NewTicker(metadataSyncPeriod) for { select { case <-ticker.C: - logger.Info().Msg("Background metadata sync check triggered") + metaSyncLoopLogger.Info().Msg("Background metadata sync check triggered") if err := m.broadcastMetadataRequests(); err != nil { - logger.Error().Err(err).Msg("Failed to send metadata requests") - return err + metaSyncLoopLogger.Error().Err(err).Msg("Failed to send metadata requests") } case <-ctx.Done(): ticker.Stop() - return nil } } } From e74cc906769916af2626fbac2fdb65fd7dcd4fa7 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 28 Jul 2023 16:53:00 -0700 Subject: [PATCH 086/100] E2E State Sync Test --- .gitignore | 6 +- .tiltignore | 3 - Makefile | 34 ++-- app/client/cli/debug.go | 137 ++++++++++++--- build/config/README.md | 2 +- build/docs/CHANGELOG.md | 2 +- build/localnet/Tiltfile | 215 ++++++++++++++---------- build/scripts/watch.sh | 9 +- build/scripts/watch_build.sh | 8 - consensus/module_consensus_debugging.go | 6 +- docs/demos/iteration_3_end_to_end_tx.md | 4 +- docs/development/FAQ.md | 4 +- docs/development/README.md | 7 +- e2e/README.md | 27 ++- e2e/docs/E2E_ADR.md | 8 +- e2e/tests/debug.feature | 16 ++ e2e/tests/node.feature | 27 +++ e2e/tests/node.go | 76 +++++++++ e2e/tests/state_sync.feature | 20 +++ e2e/tests/steps_init_test.go | 201 +++++++++++++++++++--- e2e/tests/tilt_helpers.go | 36 ++++ e2e/tests/valdator.feature | 28 --- e2e/tests/validator.go | 67 -------- go.mod | 2 +- persistence/docs/CHANGELOG.md | 2 +- persistence/docs/README.md | 2 +- shared/modules/doc/CHANGELOG.md | 4 +- telemetry/README.md | 2 +- 28 files changed, 663 insertions(+), 292 deletions(-) delete mode 100755 build/scripts/watch_build.sh create mode 100644 e2e/tests/debug.feature create mode 100644 e2e/tests/node.feature create mode 100644 e2e/tests/node.go create mode 100644 e2e/tests/state_sync.feature create mode 100644 e2e/tests/tilt_helpers.go delete mode 100644 e2e/tests/valdator.feature delete mode 100644 e2e/tests/validator.go diff --git a/.gitignore b/.gitignore index d049b2729..4996b6c24 100644 --- a/.gitignore +++ b/.gitignore @@ -55,9 +55,6 @@ temp_test.go test_results.json coverage.out -# Output of `make build_and_watch` -main - # generated RPC server and client from openapi.yaml rpc/server.gen.go rpc/client.gen.go @@ -90,3 +87,6 @@ tools/wiki # ggshield .cache_ggshield + +# mock temporary files +**/gomock_reflect_*/ diff --git a/.tiltignore b/.tiltignore index 63afc9698..9a59a3fde 100644 --- a/.tiltignore +++ b/.tiltignore @@ -41,9 +41,6 @@ temp_test.go test_results.json coverage.out -# Output of `make build_and_watch` -main - # generated RPC server and client from openapi.yaml rpc/server.gen.go rpc/client.gen.go diff --git a/Makefile b/Makefile index 9ec1c636a..e3c289138 100644 --- a/Makefile +++ b/Makefile @@ -163,33 +163,27 @@ develop_test: docker_check ## Run all of the make commands necessary to develop make develop_start && \ make test_all -.PHONY: client_start -client_start: docker_check ## Run a client daemon which is only used for debugging purposes +.PHONY: lightweight_localnet_client +lightweight_localnet_client: docker_check ## Run a client daemon which is only used for debugging purposes +# Add `--build` to rebuild the client ${docker-compose} up -d client -.PHONY: rebuild_client_start -rebuild_client_start: docker_check ## Rebuild and run a client daemon which is only used for debugging purposes - ${docker-compose} up -d --build client - -.PHONY: client_connect -client_connect: docker_check ## Connect to the running client debugging daemon +.PHONY: lightweight_localnet_client_debug +lightweight_localnet_client_debug: docker_check ## Connect to the running client debugging daemon docker exec -it client /bin/bash -c "go run -tags=debug app/client/*.go DebugUI" -.PHONY: build_and_watch -build_and_watch: ## Continous build Pocket's main entrypoint as files change - /bin/sh ${PWD}/build/scripts/watch_build.sh +# IMPROVE: Avoid building the binary on every shell execution and sync it from local instead +.PHONY: lightweight_localnet_shell +lightweight_localnet_shell: docker_check ## Connect to the running client debugging daemon + docker exec -it client /bin/bash -c "go build -tags=debug -o p1 ./app/client/*.go && chmod +x p1 && mv p1 /usr/bin && echo \"Finished building a new p1 binary\" && /bin/bash" -# TODO(olshansky): Need to think of a Pocket related name for `compose_and_watch`, maybe just `pocket_watch`? -.PHONY: compose_and_watch -compose_and_watch: docker_check db_start monitoring_start ## Run a localnet composed of 4 consensus validators w/ hot reload & debugging +.PHONY: lightweight_localnet +lightweight_localnet: docker_check db_start monitoring_start ## Run a lightweight localnet composed of 4 validators w/ hot reload & debugging +# Add `--build` to rebuild the client ${docker-compose} up --force-recreate validator1 validator2 validator3 validator4 servicer1 fisherman1 -.PHONY: rebuild_and_compose_and_watch -rebuild_and_compose_and_watch: docker_check db_start monitoring_start ## Rebuilds the container from scratch and launches compose_and_watch - ${docker-compose} up --build --force-recreate validator1 validator2 validator3 validator4 servicer1 fisherman1 - .PHONY: db_start -db_start: docker_check ## Start a detached local postgres and admin instance; compose_and_watch is responsible for instantiating the actual schemas +db_start: docker_check ## Start a detached local postgres and admin instance; lightweight_localnet is responsible for instantiating the actual schemas ${docker-compose} up --no-recreate -d db pgadmin .PHONY: db_cli @@ -245,7 +239,7 @@ docker_wipe_nodes: docker_check prompt_user db_drop ## [WARNING] Remove all the docker ps -a -q --filter="name=node*" | xargs -r -I {} docker rm {} .PHONY: monitoring_start -monitoring_start: docker_check ## Start grafana, metrics and logging system (this is auto-triggered by compose_and_watch) +monitoring_start: docker_check ## Start grafana, metrics and logging system (this is auto-triggered by lightweight_localnet) ${docker-compose} up --no-recreate -d grafana loki vm .PHONY: docker_loki_install diff --git a/app/client/cli/debug.go b/app/client/cli/debug.go index 99d5b83de..ca5a1b764 100644 --- a/app/client/cli/debug.go +++ b/app/client/cli/debug.go @@ -1,10 +1,15 @@ package cli import ( + "fmt" + "log" "os" + "os/exec" + "time" "github.com/manifoldco/promptui" "github.com/spf13/cobra" + "golang.org/x/exp/slices" "google.golang.org/protobuf/types/known/anypb" "github.com/pokt-network/pocket/app/client/cli/helpers" @@ -34,43 +39,123 @@ var items = []string{ } func init() { + dbg := newDebugCommand() + dbg.AddCommand(newDebugSubCommands()...) + rootCmd.AddCommand(dbg) + dbgUI := newDebugUICommand() - dbgUI.AddCommand(newDebugUISubCommands()...) rootCmd.AddCommand(dbgUI) } -// newDebugUISubCommands builds out the list of debug subcommands by matching the -// handleSelect dispatch to the appropriate command. -// * To add a debug subcommand, you must add it to the `items` array and then -// write a function handler to match for it in `handleSelect`. -func newDebugUISubCommands() []*cobra.Command { - commands := make([]*cobra.Command, len(items)) - for idx, promptItem := range items { - commands[idx] = &cobra.Command{ - Use: promptItem, +// newDebugCommand returns the cobra CLI for the Debug command. +func newDebugCommand() *cobra.Command { + return &cobra.Command{ + Use: "Debug", + Aliases: []string{"d"}, + Short: "Debug utility for rapid development", + Long: "Debug utility to send fire-and-forget messages to the network for development purposes", + Args: cobra.MaximumNArgs(1), + } +} + +// newDebugSubCommands is a list of commands that can be "fired & forgotten" (no selection necessary) +func newDebugSubCommands() []*cobra.Command { + cmds := []*cobra.Command{ + { + Use: "PrintNodeState", + Aliases: []string{"print", "state"}, + Short: "Prints the node state", + Long: "Sends a message to all visible nodes to log the current state of their consensus", + Args: cobra.ExactArgs(0), PersistentPreRunE: helpers.P2PDependenciesPreRunE, - Run: func(cmd *cobra.Command, _ []string) { - handleSelect(cmd, cmd.Use) + Run: func(cmd *cobra.Command, args []string) { + runWithSleep(func() { + handleSelect(cmd, PromptPrintNodeState) + }) }, - ValidArgs: items, - } + }, + { + Use: "ResetToGenesis", + Aliases: []string{"reset", "genesis"}, + Short: "Reset to genesis", + Long: "Broadcast a message to all visible nodes to reset the state to genesis", + Args: cobra.ExactArgs(0), + PersistentPreRunE: helpers.P2PDependenciesPreRunE, + Run: func(cmd *cobra.Command, args []string) { + runWithSleep(func() { + handleSelect(cmd, PromptResetToGenesis) + }) + }, + }, + { + Use: "TriggerView", + Aliases: []string{"next", "trigger", "view"}, + Short: "Trigger the next view in consensus", + Long: "Sends a message to all visible nodes on the network to start the next view (height/step/round) in consensus", + Args: cobra.ExactArgs(0), + PersistentPreRunE: helpers.P2PDependenciesPreRunE, + Run: func(cmd *cobra.Command, args []string) { + runWithSleep(func() { + handleSelect(cmd, PromptTriggerNextView) + }) + }, + }, + { + Use: "TogglePacemakerMode", + Aliases: []string{"toggle", "pcm"}, + Short: "Toggle the pacemaker", + Long: "Toggle the consensus pacemaker either on or off so the chain progresses on its own or loses liveness", + Args: cobra.ExactArgs(0), + PersistentPreRunE: helpers.P2PDependenciesPreRunE, + Run: func(cmd *cobra.Command, args []string) { + runWithSleep(func() { + handleSelect(cmd, PromptTogglePacemakerMode) + }) + }, + }, + { + Use: "ScaleActor", + Aliases: []string{"scale"}, + Short: "Scales the number of actors up or down", + Long: "Scales the type of actor specified to the number provided", + Args: cobra.ExactArgs(2), + PersistentPreRunE: helpers.P2PDependenciesPreRunE, + Run: func(cmd *cobra.Command, args []string) { + actor := args[0] + numActors := args[1] + validActors := []string{"fishermen", "full_nodes", "servicers", "validators"} + if !slices.Contains(validActors, actor) { + logger.Global.Fatal().Msg("Invalid actor type provided") + } + sedCmd := exec.Command("sed", "-i", fmt.Sprintf("/%s:/,/count:/ s/count: [0-9]*/count: %s/", actor, numActors), "/usr/local/localnet_config.yaml") + err := sedCmd.Run() + if err != nil { + log.Fatal(err) + } + }, + }, } - return commands + return cmds } // newDebugUICommand returns the cobra CLI for the Debug UI interface. func newDebugUICommand() *cobra.Command { return &cobra.Command{ - Aliases: []string{"dui"}, Use: "DebugUI", - Short: "Debug selection ui for rapid development", + Aliases: []string{"dui"}, + Short: "Debug utility with an interactive UI for development purposes", + Long: "Opens a shell-driven selection UI to view and select from a list of debug actions for development purposes", Args: cobra.MaximumNArgs(0), PersistentPreRunE: helpers.P2PDependenciesPreRunE, - RunE: runDebug, + RunE: selectDebugCommand, } } -func runDebug(cmd *cobra.Command, _ []string) (err error) { +// selectDebugCommand builds out the list of debug subcommands by matching the +// handleSelect dispatch to the appropriate command. +// - To add a debug subcommand, you must add it to the `items` array and then +// write a function handler to match for it in `handleSelect`. +func selectDebugCommand(cmd *cobra.Command, _ []string) error { for { if selection, err := promptGetInput(); err == nil { handleSelect(cmd, selection) @@ -158,7 +243,17 @@ func handleSelect(cmd *cobra.Command, selection string) { } } -// Broadcast to the entire network. +// HACK: Because of how the p2p module works, we need to surround it with sleep both BEFORE and AFTER the task. +// - Starting the task too early after the debug client initializes results in a lack of visibility of the nodes in the network +// - Ending the task too early before the debug client completes its task results in a lack of propagation of the message or retrieval of the result +// TECHDEBT: There is likely an event based solution to this but it would require a lot more refactoring of the p2p module. +func runWithSleep(task func()) { + time.Sleep(1000 * time.Millisecond) + task() + time.Sleep(1000 * time.Millisecond) +} + +// broadcastDebugMessage broadcasts the debug message to the entire visible network. func broadcastDebugMessage(cmd *cobra.Command, debugMsg *messaging.DebugMessage) { anyProto, err := anypb.New(debugMsg) if err != nil { @@ -174,7 +269,7 @@ func broadcastDebugMessage(cmd *cobra.Command, debugMsg *messaging.DebugMessage) } } -// Send to just a single (i.e. first) validator in the set +// sendDebugMessage sends the debug message to just a single (i.e. first) node visible func sendDebugMessage(cmd *cobra.Command, debugMsg *messaging.DebugMessage) { anyProto, err := anypb.New(debugMsg) if err != nil { diff --git a/build/config/README.md b/build/config/README.md index 24d8d110c..fffb1883b 100644 --- a/build/config/README.md +++ b/build/config/README.md @@ -12,7 +12,7 @@ It is not recommended at this time to build infrastructure components that rely ## Origin Document -Currently, the Genesis and Configuration generator is necessary to create development `localnet` environments for iterating on V1. A current example (as of 09/2022) of this is the `make compose_and_watch` debug utility that generates a `localnet` using `docker-compose` by injecting the appropriate `config.json` and `genesis.json` files. +Currently, the Genesis and Configuration generator is necessary to create development `localnet` environments for iterating on V1. A current example (as of 09/2022) of this is the `make lightweight_localnet` debug utility that generates a `localnet` using `docker-compose` by injecting the appropriate `config.json` and `genesis.json` files. ## Usage diff --git a/build/docs/CHANGELOG.md b/build/docs/CHANGELOG.md index a23ab1698..aec7e163e 100644 --- a/build/docs/CHANGELOG.md +++ b/build/docs/CHANGELOG.md @@ -233,7 +233,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.0.0.1] - 2022-12-29 - Updated all `config*.json` files with the missing `max_mempool_count` value -- Added `is_client_only` to `config1.json` so Viper knows it can be overridden. The config override is done in the Makefile's `client_connect` target. Setting this can be avoided if we merge the changes in https://github.com/pokt-network/pocket/compare/main...issue/cli-viper-environment-vars-fix +- Added `is_client_only` to `config1.json` so Viper knows it can be overridden. The config override is done in the Makefile's `lightweight_localnet_client_debug` target. Setting this can be avoided if we merge the changes in https://github.com/pokt-network/pocket/compare/main...issue/cli-viper-environment-vars-fix ## [0.0.0.0] - 2022-12-22 diff --git a/build/localnet/Tiltfile b/build/localnet/Tiltfile index 9f5a36b4c..9fc086763 100644 --- a/build/localnet/Tiltfile +++ b/build/localnet/Tiltfile @@ -2,7 +2,8 @@ load("ext://helm_resource", "helm_resource", "helm_repo") load("ext://namespace", "namespace_create") load("ext://restart_process", "docker_build_with_restart") -load('ext://tests/golang', 'test_go') +load("ext://tests/golang", "test_go") +load("ext://syncback", "syncback") tiltfile_dir = os.path.dirname(config.main_dir) root_dir = os.path.dirname(tiltfile_dir + "/../..") @@ -13,7 +14,7 @@ localnet_config_defaults = { "validators": {"count": 4}, "servicers": {"count": 1}, "fishermen": {"count": 1}, - "full_nodes": {"count": 1} + "full_nodes": {"count": 1}, } localnet_config_file = read_yaml(localnet_config_path, default=localnet_config_defaults) @@ -22,6 +23,7 @@ localnet_config = {} localnet_config.update(localnet_config_defaults) localnet_config.update(localnet_config_file) + # Create a default config file if it does not exist if (localnet_config_file != localnet_config) or ( not os.path.exists(localnet_config_path) @@ -29,6 +31,15 @@ if (localnet_config_file != localnet_config) or ( print("Updating " + localnet_config_path + " with defaults") local("cat - > " + localnet_config_path, stdin=encode_yaml(localnet_config)) +syncback( + name="syncback_localnet_config", + k8s_object="deploy/dev-cli-client", + src_dir="/usr/local/", + paths=["localnet_config.yaml"], + target_dir=root_dir, + labels=["watchers"], +) + # List of directories Tilt watches to trigger a hot-reload on changes. # CONSIDERATION: This can potentially can be replaced with a list of excluded directories. deps = [ @@ -49,6 +60,7 @@ deps = [ deps_full_path = [root_dir + "/" + depdir for depdir in deps] + # Avoid downloading dependencies if no missing/outdated charts are found def check_helm_dependencies_for_chart(path): check_helm_dependencies = local( @@ -58,6 +70,7 @@ def check_helm_dependencies_for_chart(path): if helm_dependencies_not_ok_count > 1: local("helm dependency update " + path) + check_helm_dependencies_for_chart("dependencies") k8s_yaml(helm("dependencies", name="dependencies")) @@ -78,7 +91,7 @@ local_resource( root_dir=root_dir ), deps=deps_full_path, - labels=['watchers'] + labels=["watchers"], ) local_resource( "debug client: Watch & Compile", @@ -86,16 +99,16 @@ local_resource( root_dir=root_dir ), deps=deps_full_path, - labels=['watchers'] + labels=["watchers"], ) # Builds the cluster manager binary local_resource( - 'cluster manager: Watch & Compile', - 'GOOS=linux go build -o {root_dir}/bin/cluster-manager {root_dir}/build/localnet/cluster-manager/*.go'.format( + "cluster manager: Watch & Compile", + "GOOS=linux go build -o {root_dir}/bin/cluster-manager {root_dir}/build/localnet/cluster-manager/*.go".format( root_dir=root_dir ), deps=deps_full_path, - labels=['watchers'] + labels=["watchers"], ) # Builds and maintains the pocket container image after the binary is built on local machine, restarts a process on code change @@ -126,10 +139,14 @@ RUN echo "source /etc/bash_completion" >> ~/.bashrc RUN echo "source <(p1 completion bash | tail -n +2)" >> ~/.bashrc WORKDIR /root COPY bin/p1-linux /usr/local/bin/p1 +COPY localnet_config.yaml /usr/local/localnet_config.yaml """, - only=["bin/p1-linux"], + only=["bin/p1-linux", localnet_config_path], entrypoint=["sleep", "infinity"], - live_update=[sync("bin/p1-linux", "/usr/local/bin/p1")], + live_update=[ + sync("bin/p1-linux", "/usr/local/bin/p1"), + sync(localnet_config_path, "/usr/local/localnet_config.yaml"), + ], ) # Builds and maintains the cluster-manager container image after the binary is built on local machine @@ -141,12 +158,12 @@ WORKDIR / COPY bin/cluster-manager /usr/local/bin/cluster-manager COPY bin/p1-linux /usr/local/bin/p1 """, - only=['bin/cluster-manager', 'bin/p1-linux'], + only=["bin/cluster-manager", "bin/p1-linux"], entrypoint=["/usr/local/bin/cluster-manager"], live_update=[ sync("bin/cluster-manager", "/usr/local/bin/cluster-manager"), sync("bin/p1-linux", "/usr/local/bin/p1"), - ] + ], ) # Pushes localnet manifests to the cluster. @@ -162,9 +179,9 @@ k8s_yaml( ) k8s_yaml(["manifests/cli-client.yaml"]) -k8s_resource('dev-cli-client', labels=['client']) -k8s_yaml(['manifests/cluster-manager.yaml']) -k8s_resource('pocket-v1-cluster-manager', labels=['cluster-manager']) +k8s_resource("dev-cli-client", labels=["client"]) +k8s_yaml(["manifests/cluster-manager.yaml"]) +k8s_resource("pocket-v1-cluster-manager", labels=["cluster-manager"]) chart_dir = root_dir + "/charts/pocket" check_helm_dependencies_for_chart(chart_dir) @@ -173,28 +190,35 @@ check_helm_dependencies_for_chart(chart_dir) def formatted_actor_number(n): return local('printf "%03d" ' + str(n)) + # Provisions validators actor_number = 0 for x in range(localnet_config["validators"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(chart_dir, - name="validator-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=validators-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "nodeType=validator", - ], - values=[chart_dir + "/pocket-validator-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-validator-overrides.yaml") else [],)) - - k8s_resource("validator-%s-pocket" % formatted_number, labels=['pocket-validators']) + k8s_yaml( + helm( + chart_dir, + name="validator-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=validators-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "nodeType=validator", + ], + values=[chart_dir + "/pocket-validator-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-validator-overrides.yaml") + else [], + ) + ) + + k8s_resource("validator-%s-pocket" % formatted_number, labels=["pocket-validators"]) # Provisions servicer nodes actor_number = 0 @@ -202,23 +226,29 @@ for x in range(localnet_config["servicers"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(chart_dir, - name="servicer-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=servicers-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "config.servicer.enabled=true", - "nodeType=servicer", - ], - values=[chart_dir + "/pocket-servicer-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-servicer-overrides.yaml") else [],)) - - k8s_resource("servicer-%s-pocket" % formatted_number, labels=['pocket-servicers']) + k8s_yaml( + helm( + chart_dir, + name="servicer-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=servicers-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "config.servicer.enabled=true", + "nodeType=servicer", + ], + values=[chart_dir + "/pocket-servicer-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-servicer-overrides.yaml") + else [], + ) + ) + + k8s_resource("servicer-%s-pocket" % formatted_number, labels=["pocket-servicers"]) # Provisions fishermen nodes actor_number = 0 @@ -226,23 +256,29 @@ for x in range(localnet_config["fishermen"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(chart_dir, - name="fisherman-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=fishermen-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "config.fisherman.enabled=true", - "nodeType=fisherman", - ], - values=[chart_dir + "/pocket-fisherman-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-fisherman-overrides.yaml") else [],)) - - k8s_resource("fisherman-%s-pocket" % formatted_number, labels=['pocket-fishermen']) + k8s_yaml( + helm( + chart_dir, + name="fisherman-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=fishermen-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "config.fisherman.enabled=true", + "nodeType=fisherman", + ], + values=[chart_dir + "/pocket-fisherman-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-fisherman-overrides.yaml") + else [], + ) + ) + + k8s_resource("fisherman-%s-pocket" % formatted_number, labels=["pocket-fishermen"]) # Provisions full nodes actor_number = 0 @@ -250,22 +286,28 @@ for x in range(localnet_config["full_nodes"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) - k8s_yaml(helm(root_dir + "/charts/pocket", - name="full-node-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=misc-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "nodeType=full", - ], - values=[chart_dir + "/pocket-full-node-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-full-node-overrides.yaml") else [],)) - - k8s_resource("full-node-%s-pocket" % formatted_number, labels=['pocket-full-nodes']) + k8s_yaml( + helm( + root_dir + "/charts/pocket", + name="full-node-%s-pocket" % formatted_number, + set=[ + "global.postgresql.auth.postgresPassword=LocalNetPassword", + "image.repository=pocket-image", + "privateKeySecretKeyRef.name=misc-private-keys", + "privateKeySecretKeyRef.key=%s" % formatted_number, + "genesis.preProvisionedGenesis.enabled=false", + "genesis.externalConfigMap.name=v1-localnet-genesis", + "genesis.externalConfigMap.key=genesis.json", + "postgresql.primary.persistence.enabled=false", + "nodeType=full", + ], + values=[chart_dir + "/pocket-full-node-overrides.yaml"] + if os.path.exists(chart_dir + "/pocket-full-node-overrides.yaml") + else [], + ) + ) + + k8s_resource("full-node-%s-pocket" % formatted_number, labels=["pocket-full-nodes"]) # Exposes grafana k8s_resource( @@ -273,12 +315,15 @@ k8s_resource( workload="dependencies-grafana", extra_pod_selectors=[{"app.kubernetes.io/name": "grafana"}], port_forwards=["42000:3000"], - labels=["monitoring"] + labels=["monitoring"], ) # E2E test button -test_go('e2e-tests', '{root_dir}/e2e/tests'.format(root_dir=root_dir), '.', - extra_args=["-v", "-count=1", "-tags=e2e"], - labels=['e2e-tests'], - trigger_mode=TRIGGER_MODE_MANUAL, +test_go( + "e2e-tests", + "{root_dir}/e2e/tests".format(root_dir=root_dir), + ".", + extra_args=["-v", "-count=1", "-tags=e2e"], + labels=["e2e-tests"], + trigger_mode=TRIGGER_MODE_MANUAL, ) diff --git a/build/scripts/watch.sh b/build/scripts/watch.sh index 01d55d544..b2fbdd892 100755 --- a/build/scripts/watch.sh +++ b/build/scripts/watch.sh @@ -19,7 +19,8 @@ else fi reflex \ - --start-service \ - -r '\.go' \ - --decoration="none" \ - -s -- sh -c "$command"; + --start-service \ + -R '^app/client' \ + -r '\.go' \ + --decoration="none" \ + -s -- sh -c "$command" diff --git a/build/scripts/watch_build.sh b/build/scripts/watch_build.sh deleted file mode 100755 index 5f5e5b920..000000000 --- a/build/scripts/watch_build.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -if command -v reflex >/dev/null -then - reflex -r '\.go$' -s --decoration="none" -- sh -c "go build -v app/pocket/main.go" -else - echo "reflex not found. Install with `go install github.com/cespare/reflex@latest`" -fi diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index a302f1444..137643df6 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -13,11 +13,11 @@ func (m *consensusModule) HandleDebugMessage(debugMessage *messaging.DebugMessag m.m.Lock() defer m.m.Unlock() + m.logger.Debug().Msgf("Consensus module handling debug message: %s", debugMessage.Action) + switch debugMessage.Action { case messaging.DebugMessageAction_DEBUG_CONSENSUS_RESET_TO_GENESIS: - if err := m.resetToGenesis(debugMessage); err != nil { - return err - } + return m.resetToGenesis(debugMessage) case messaging.DebugMessageAction_DEBUG_CONSENSUS_PRINT_NODE_STATE: m.printNodeState(debugMessage) case messaging.DebugMessageAction_DEBUG_CONSENSUS_TRIGGER_NEXT_VIEW: diff --git a/docs/demos/iteration_3_end_to_end_tx.md b/docs/demos/iteration_3_end_to_end_tx.md index 1ace95335..86d2dab19 100644 --- a/docs/demos/iteration_3_end_to_end_tx.md +++ b/docs/demos/iteration_3_end_to_end_tx.md @@ -43,13 +43,13 @@ make protogen_local # generate the protobuf files make generate_rpc_openapi # generate the OpenAPI spec make docker_wipe_nodes # clear all the 4 validator nodes make db_drop # clear the existing database -make compose_and_watch # Start 4 validator node LocalNet +make lightweight_localnet # Start 4 validator node LocalNet ``` ## Shell #2: Setup Consensus debugger ```bash -make client_start && make client_connect # start the consensus debugger +make lightweight_localnet_client && make lightweight_localnet_client_debug # start the consensus debugger ``` Use `TriggerNextView` and `PrintNodeState` to increment and inspect each node's `height/round/step`. diff --git a/docs/development/FAQ.md b/docs/development/FAQ.md index 4d4eaf1ff..8b7de79a7 100644 --- a/docs/development/FAQ.md +++ b/docs/development/FAQ.md @@ -11,9 +11,9 @@ _NOTE: Consider turning off the `gofmt` in your IDE to prevent unexpected format ## Unable to start LocalNet - permission denied -- **Issue**: when trying to run `make compose_and_watch` on an operating system with SELinux, the command gives the error: +- **Issue**: when trying to run `make lightweight_localnet` on an operating system with SELinux, the command gives the error: -``` +```bash Recreating validator2 ... done Recreating validator4 ... done Recreating validator1 ... done diff --git a/docs/development/README.md b/docs/development/README.md index 3f62d8a6b..84dfad020 100644 --- a/docs/development/README.md +++ b/docs/development/README.md @@ -86,6 +86,7 @@ Optionally activate changelog pre-commit hook cp .githooks/pre-commit .git/hooks/pre-commit chmod +x .git/hooks/pre-commit ``` + _**NOTE**: The pre-commit changelog verification has been disabled during the developement of V1 as of 2023-05-16 to unblock development velocity; see more details [here](https://github.com/pokt-network/pocket/assets/1892194/394fdb09-e388-44aa-820d-e9d5a23578cf). This check is no longer done in the CI and is not recommended for local development either currently._ ### Pocket Network CLI @@ -167,7 +168,7 @@ Note that there are a few tests in the library that are prone to race conditions ### Running LocalNet -At the time of writing, we have two basic approaches to running a LocalNet. We suggest getting started with the `Docker Compose` approach outlined below before moving to the advanced Kubernetes configuration. +At the time of writing, we have two basic approaches to running a LocalNet. We suggest getting started with the `Docker Compose` (aka `lightweight LocalNet`) approach outlined below before moving to the advanced Kubernetes (aka LocalNet) configuration. #### [Advanced] Kubernetes @@ -186,13 +187,13 @@ make docker_wipe 2. In one shell, run the 4 nodes setup: ```bash -make compose_and_watch +make lightweight_localnet ``` 4. In another shell, run the development client: ```bash -make client_start && make client_connect +make lightweight_localnet_client && make lightweight_localnet_client_debug ``` 4. Check the state of each node: diff --git a/e2e/README.md b/e2e/README.md index a87c4fcf2..5e3ee41c9 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -7,6 +7,7 @@ - [Build Tags](#build-tags) - [Issue templates](#issue-templates) - [Implementation](#implementation) +- [Keywords](#keywords) > tl; dr - `make localnet_up` and then `make test_e2e` @@ -35,8 +36,8 @@ Issues can formally define features by attaching an erroring `feature` file to b ```gherkin Feature: Example Namespace - Scenario: User Needs Example - Given the user has a validator + Scenario: User Needs Example + Given the user has a node When the user runs the command "example" Then the user should be able to see standard output containing "Example Output" And the pocket client should have exited without error @@ -46,7 +47,7 @@ Feature: Example Namespace The test suite is located in `e2e/tests` and it contains a set of Cucumber feature files and the associated Go tests to run them. `make test_e2e` sees any files named with the pattern `*.feature` in `e2e/tests` and runs them with [godog](https://github.com/cucumber/godog), the Go test runner for Cucumber tests. The LocalNet must be up and running for the E2E test suite to run. -The Validator issues RPC commands on the container by calling `kubectl exec` and targeting the pod in the cluster by name. It records the results of the command including stdout and stderr, allowing for assertions about the results of the command. +The Node issues RPC commands on the container by calling `kubectl exec` and targeting the pod in the cluster by name. It records the results of the command including stdout and stderr, allowing for assertions about the results of the command. ```mermaid --- @@ -60,10 +61,26 @@ flowchart TD Kubeconfig --> Kubectl Kubeconfig --> DevNet subgraph E2E [E2E scenarios] - Kubectl -- commandResult --> Validator - Validator -- args --> Kubectl + Kubectl -- commandResult --> Node + Node -- args --> Kubectl end subgraph DevNet [DevNet] Runner[E2E Test Runner] end ``` + +## Keywords + +The keywords below are a summary of the source documentation available [here](https://cucumber.io/docs/gherkin/reference/#keywords). + +- **Feature**: This keyword, followed by the name and optional description, is used to describe a feature of the system that you're testing. It should provide a high-level description of a software feature, and to group related scenarios. +- **Scenario**: This keyword, followed by the name and optional description, is used to describe a particular behavior of the system that you're testing. A feature can have multiple scenarios, and each scenario should follow the 'Given-When-Then' structure. +- **Given**: This keyword is used to set up a situation or a context. It puts the system in a known state before the user interacts with the system. +- **When**: This keyword is used to describe an action or event. This is something the user does or the system does. +- **Then**: This keyword is used to describe an expected outcome or result. +- **And**, But: These keywords are used when you have more than one Given, When, or Then step. They help to make the specifications more readable. +- **Background**: This keyword provides the context for the following scenarios. It allows you to add some context to the scenarios in a single place. +- **Scenario Outline**: This keyword can be used when the same test is performed multiple times with a different combination of values. +- **Examples**: This keyword is used in conjunction with **Scenario Outline** to provide the values for the test. +- **Rule**: This keyword is used to represent one business rule that should be implemented. It provides additional information for a feature. +- **Tags**: This is not a Gherkin keyword but an integral part of organizing your Cucumber features. They are preceded by '@' symbol and can be used before Feature, Scenario, Scenario Outline, or Examples. diff --git a/e2e/docs/E2E_ADR.md b/e2e/docs/E2E_ADR.md index d3e7dee53..59a7546dd 100644 --- a/e2e/docs/E2E_ADR.md +++ b/e2e/docs/E2E_ADR.md @@ -124,16 +124,16 @@ type PocketClient interface { ``` - The `PocketClient` interface is included in the test suite and defines a single function interface with the `RunCommand` method. -- The `validatorPod` adapter fulfills the `PocketClient` interface and lets us call commands through Kubernetes. This is the main way that tests assemble the environment for later assertions. +- The `nodePod` adapter fulfills the `PocketClient` interface and lets us call commands through Kubernetes. This is the main way that tests assemble the environment for later assertions. ```go -// validatorPod holds the connection information to pod validator-001 for testing -type validatorPod struct { +// nodePod holds the connection information to pod validator-001 for testing +type nodePod struct { result *commandResult // stores the result of the last command that was run } // RunCommand runs a command on the pocket binary -func (v *validatorPod) RunCommand(args ...string) (*commandResult, error) { +func (v *nodePod) RunCommand(args ...string) (*commandResult, error) { base := []string{ "exec", "-i", "deploy/pocket-v1-cli-client", "--container", "pocket", diff --git a/e2e/tests/debug.feature b/e2e/tests/debug.feature new file mode 100644 index 000000000..e33bf33a7 --- /dev/null +++ b/e2e/tests/debug.feature @@ -0,0 +1,16 @@ +Feature: Debug Namespace + + # Since the configuration for consensus is optimistically responsive, we need to be in manual + # Pacemaker mode and call TriggerView to further the blockchain. + # 1 second was chosen arbitrarily for the time for block propagation. + Scenario: 4 Validator blockchain from genesis reaches block 2 when TriggerView is executed twice + Given the network is at genesis + And the network has "4" actors of type "Validator" + When the developer runs the command "TriggerView" + And the developer waits for "1000" milliseconds + Then "validator-001" should be at height "1" + And "validator-004" should be at height "1" + When the developer runs the command "TriggerView" + And the developer waits for "1000" milliseconds + Then "validator-001" should be at height "2" + And "validator-004" should be at height "2" \ No newline at end of file diff --git a/e2e/tests/node.feature b/e2e/tests/node.feature new file mode 100644 index 000000000..69e1cc26f --- /dev/null +++ b/e2e/tests/node.feature @@ -0,0 +1,27 @@ +Feature: Node Namespace + + Scenario: User Wants Help Using The Node Command + Given the user has a node + When the user runs the command "Node help" + Then the user should be able to see standard output containing "Available Commands" + And the node should have exited without error + + Scenario: User Can Stake An Address + Given the user has a node + When the user stakes their node with amount 150000000001 uPOKT + Then the user should be able to see standard output containing "" + And the node should have exited without error + + Scenario: User Can Unstake An Address + Given the user has a node + When the user stakes their node with amount 150000000001 uPOKT + Then the user should be able to see standard output containing "" + Then the user should be able to unstake their node + Then the user should be able to see standard output containing "" + And the node should have exited without error + + Scenario: User Can Send To An Address + Given the user has a node + When the user sends 150000000 uPOKT to another address + Then the user should be able to see standard output containing "" + And the node should have exited without error diff --git a/e2e/tests/node.go b/e2e/tests/node.go new file mode 100644 index 000000000..0d626756a --- /dev/null +++ b/e2e/tests/node.go @@ -0,0 +1,76 @@ +// //go:build e2e + +package e2e + +import ( + "fmt" + "os/exec" + + "github.com/pokt-network/pocket/runtime" + "github.com/pokt-network/pocket/runtime/defaults" +) + +// cliPath is the path of the binary installed and is set by the Tiltfile +const cliPath = "/usr/local/bin/p1" + +var ( + // defaultRPCURL used by targetPod to build commands + defaultRPCURL string + // targetDevClientPod is the kube pod that executes calls to the pocket binary under test + targetDevClientPod = "deploy/dev-cli-client" +) + +func init() { + defaultRPCHost := runtime.GetEnv("RPC_HOST", defaults.RandomValidatorEndpointK8SHostname) + defaultRPCURL = fmt.Sprintf("http://%s:%s", defaultRPCHost, defaults.DefaultRPCPort) +} + +// commandResult combines the stdout, stderr, and err of an operation +type commandResult struct { + Stdout string + Stderr string + Err error +} + +// PocketClient is a single function interface for interacting with a node +type PocketClient interface { + RunCommand(...string) (*commandResult, error) + RunCommandOnHost(string, ...string) (*commandResult, error) +} + +// Ensure that Validator fulfills PocketClient +var _ PocketClient = &nodePod{} + +// nodePod holds the connection information to a specific pod in between different instructions during testing +type nodePod struct { + targetPodName string + result *commandResult // stores the result of the last command that was run +} + +// RunCommand runs a command on a pre-configured kube pod with the given args +func (n *nodePod) RunCommand(args ...string) (*commandResult, error) { + return n.RunCommandOnHost(defaultRPCURL, args...) +} + +// RunCommandOnHost runs a command on specified kube pod with the given args +func (n *nodePod) RunCommandOnHost(rpcUrl string, args ...string) (*commandResult, error) { + base := []string{ + "exec", "-i", targetDevClientPod, + "--container", "pocket", + "--", cliPath, + "--non_interactive=true", + "--remote_cli_url=" + rpcUrl, + } + args = append(base, args...) + cmd := exec.Command("kubectl", args...) + r := &commandResult{} + out, err := cmd.Output() + r.Stdout = string(out) + n.result = r + // IMPROVE: make targetPodName configurable + n.targetPodName = targetDevClientPod + if err != nil { + return r, err + } + return r, nil +} diff --git a/e2e/tests/state_sync.feature b/e2e/tests/state_sync.feature new file mode 100644 index 000000000..837159722 --- /dev/null +++ b/e2e/tests/state_sync.feature @@ -0,0 +1,20 @@ +Feature: State Sync Namespace + + Scenario: New FullNode does not sync to Blockchain at height 2 + Given the network is at genesis + And the network has "4" actors of type "Validator" + When the developer runs the command "ScaleActor full_nodes 1" + And the developer waits for "3000" milliseconds + Then "full-node-002" should be unreachable + When the developer runs the command "TriggerView" + And the developer waits for "1000" milliseconds + And the developer runs the command "TriggerView" + And the developer waits for "1000" milliseconds + Then "validator-001" should be at height "2" + And "validator-004" should be at height "2" + # full_nodes is the key used in `localnet_config.yaml` + When the developer runs the command "ScaleActor full_nodes 2" + # IMPROVE: Figure out if there's something better to do then waiting for a node to spin up + And the developer waits for "20000" milliseconds + # TODO(#812): The full node should be at height "2" after state sync is implemented + Then "full-node-002" should be at height "1" \ No newline at end of file diff --git a/e2e/tests/steps_init_test.go b/e2e/tests/steps_init_test.go index ee680cd82..1bd8dea7d 100644 --- a/e2e/tests/steps_init_test.go +++ b/e2e/tests/steps_init_test.go @@ -1,13 +1,15 @@ -//go:build e2e +// // go:build e2e package e2e import ( + "encoding/json" "fmt" "os" "path/filepath" "strings" "testing" + "time" pocketLogger "github.com/pokt-network/pocket/logger" "github.com/pokt-network/pocket/runtime/defaults" @@ -15,6 +17,8 @@ import ( pocketk8s "github.com/pokt-network/pocket/shared/k8s" "github.com/regen-network/gocuke" "github.com/stretchr/testify/require" + "golang.org/x/text/cases" + "golang.org/x/text/language" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -25,13 +29,12 @@ var e2eLogger = pocketLogger.Global.CreateLoggerForModule("e2e") const ( // Each actor is represented e.g. validator-001-pocket:42069 thru validator-999-pocket:42069. // Defines the host & port scheme that LocalNet uses for naming actors. - validatorServiceURLTmpl = "validator-%s-pocket:%d" - // validatorA maps to suffix ID 001 and is also used by the cluster-manager - // though it has no special permissions. + validatorServiceURLTemplate = "validator-%s-pocket:%d" + // Mapping from validators to suffix IDs as convienece for some of the tests validatorA = "001" - // validatorB maps to suffix ID 002 and receives in the Send test. validatorB = "002" - chainId = "0001" + // Placeholder chainID + chainId = "0001" ) type rootSuite struct { @@ -42,24 +45,22 @@ type rootSuite struct { validatorKeys map[string]string // clientset is the kubernetes API we acquire from the user's $HOME/.kube/config clientset *kubernetes.Clientset - // validator holds command results between runs and reports errors to the test suite - // TECHDEBT: Rename `validator` to something more appropriate - validator *validatorPod - // validatorA maps to suffix ID 001 of the kube pod that we use as our control agent + // node holds command results between runs and reports errors to the test suite + node *nodePod } func (s *rootSuite) Before() { clientSet, err := getClientset(s) require.NoErrorf(s, err, "failed to get clientset") - vkmap, err := pocketk8s.FetchValidatorPrivateKeys(clientSet) + validatorKeyMap, err := pocketk8s.FetchValidatorPrivateKeys(clientSet) if err != nil { e2eLogger.Fatal().Err(err).Msg("failed to get validator key map") } - s.validator = new(validatorPod) + s.node = new(nodePod) s.clientset = clientSet - s.validatorKeys = vkmap + s.validatorKeys = validatorKeyMap } // TestFeatures runs the e2e tests specified in any .features files in this directory @@ -71,24 +72,136 @@ func TestFeatures(t *testing.T) { // InitializeScenario registers step regexes to function handlers func (s *rootSuite) TheUserHasAValidator() { - res, err := s.validator.RunCommand("help") + res, err := s.node.RunCommand("help") require.NoErrorf(s, err, res.Stderr) - s.validator.result = res + s.node.result = res } func (s *rootSuite) TheValidatorShouldHaveExitedWithoutError() { - require.NoError(s, s.validator.result.Err) + require.NoError(s, s.node.result.Err) } func (s *rootSuite) TheUserRunsTheCommand(cmd string) { cmds := strings.Split(cmd, " ") - res, err := s.validator.RunCommand(cmds...) + res, err := s.node.RunCommand(cmds...) require.NoError(s, err) - s.validator.result = res + s.node.result = res +} + +// TheDeveloperRunsTheCommand is similar to TheUserRunsTheCommand but exclusive to `Debug` commands +func (s *rootSuite) TheDeveloperRunsTheCommand(cmd string) { + cmds := strings.Split(cmd, " ") + cmds = append([]string{"Debug"}, cmds...) + res, err := s.node.RunCommand(cmds...) + require.NoError(s, err, fmt.Sprintf("failed to run command: '%s' due to error: %s", cmd, err)) + s.node.result = res + e2eLogger.Debug().Msgf("TheDeveloperRunsTheCommand: '%s' with result: %s", cmd, res.Stdout) + + // Special case for managing LocalNet config when scaling actors + if cmds[1] == "ScaleActor" { + s.syncLocalNetConfigFromHostToLocalFS() + } +} + +func (s *rootSuite) TheNetworkIsAtGenesis() { + s.TheDeveloperRunsTheCommand("ResetToGenesis") +} + +func (s *rootSuite) TheDeveloperWaitsForMilliseconds(millis int64) { + time.Sleep(time.Duration(millis) * time.Millisecond) +} + +func (s *rootSuite) TheNetworkHasActorsOfType(num int64, actor string) { + // normalize actor to Title case and plural + caser := cases.Title(language.AmericanEnglish) + actor = caser.String(strings.ToLower(actor)) + if len(actor) > 0 && actor[len(actor)-1] != 's' { + actor += "s" + } + args := []string{ + "Query", + actor, + } + + // Depending ont he type of `actor` we're querying, we'll have a different expected responses + // so not all of these fields will be populated, but at least one will be. + type expectedResponse struct { + NumValidators *int64 `json:"total_validators"` + NumApps *int64 `json:"total_apps"` + NumFishermen *int64 `json:"total_fishermen"` + NumServicers *int64 `json:"total_servicers"` + NumAccounts *int64 `json:"total_accounts"` + } + validate := func(res *expectedResponse) bool { + return res != nil && ((res.NumValidators != nil && *res.NumValidators > 0) || + (res.NumApps != nil && *res.NumApps > 0) || + (res.NumFishermen != nil && *res.NumFishermen > 0) || + (res.NumServicers != nil && *res.NumServicers > 0) || + (res.NumAccounts != nil && *res.NumAccounts > 0)) + } + + resRaw, err := s.node.RunCommand(args...) + require.NoError(s, err) + + res := getResponseFromStdout[expectedResponse](s, resRaw.Stdout, validate) + require.NotNil(s, res) + + // Validate that at least one of the fields that is populated has the right number of actors + if res.NumValidators != nil { + require.Equal(s, num, *res.NumValidators) + } else if res.NumApps != nil { + require.Equal(s, num, *res.NumApps) + } else if res.NumFishermen != nil { + require.Equal(s, num, *res.NumFishermen) + } else if res.NumServicers != nil { + require.Equal(s, num, *res.NumServicers) + } else if res.NumAccounts != nil { + require.Equal(s, num, *res.NumAccounts) + } +} + +func (s *rootSuite) ShouldBeUnreachable(pod string) { + validate := func(res *string) bool { + return res != nil && strings.Contains(*res, "Unable to connect to the RPC") + } + args := []string{ + "Query", + "Height", + } + rpcURL := fmt.Sprintf("http://%s-pocket:%s", pod, defaults.DefaultRPCPort) + resRaw, err := s.node.RunCommandOnHost(rpcURL, args...) + require.NoError(s, err) + + res := getStrFromStdout(s, resRaw.Stdout, validate) + require.NotNil(s, res) + + require.Equal(s, fmt.Sprintf("❌ Unable to connect to the RPC @ \x1b[1mhttp://%s-pocket:%s\x1b[0m", pod, defaults.DefaultRPCPort), *res) +} + +func (s *rootSuite) ShouldBeAtHeight(pod string, height int64) { + args := []string{ + "Query", + "Height", + } + type expectedResponse struct { + Height *int64 `json:"Height"` + } + validate := func(res *expectedResponse) bool { + return res != nil && res.Height != nil + } + + rpcURL := fmt.Sprintf("http://%s-pocket:%s", pod, defaults.DefaultRPCPort) + resRaw, err := s.node.RunCommandOnHost(rpcURL, args...) + require.NoError(s, err) + + res := getResponseFromStdout[expectedResponse](s, resRaw.Stdout, validate) + require.NotNil(s, res) + + require.Equal(s, height, *res.Height) } func (s *rootSuite) TheUserShouldBeAbleToSeeStandardOutputContaining(arg1 string) { - require.Contains(s, s.validator.result.Stdout, arg1) + require.Contains(s, s.node.result.Stdout, arg1) } func (s *rootSuite) TheUserStakesTheirValidatorWithAmountUpokt(amount int64) { @@ -111,15 +224,15 @@ func (s *rootSuite) TheUserSendsUpoktToAnotherAddress(amount int64) { valB.Address().String(), fmt.Sprintf("%d", amount), } - res, err := s.validator.RunCommand(args...) + res, err := s.node.RunCommand(args...) require.NoError(s, err) - s.validator.result = res + s.node.result = res } // stakeValidator runs Validator stake command with the address, amount, chains..., and serviceURL provided func (s *rootSuite) stakeValidator(privKey cryptoPocket.PrivateKey, amount string) { - validatorServiceUrl := fmt.Sprintf(validatorServiceURLTmpl, validatorA, defaults.DefaultP2PPort) + validatorServiceUrl := fmt.Sprintf(validatorServiceURLTemplate, validatorA, defaults.DefaultP2PPort) args := []string{ "Validator", "Stake", @@ -128,10 +241,10 @@ func (s *rootSuite) stakeValidator(privKey cryptoPocket.PrivateKey, amount strin chainId, validatorServiceUrl, } - res, err := s.validator.RunCommand(args...) + res, err := s.node.RunCommand(args...) require.NoError(s, err) - s.validator.result = res + s.node.result = res } // unstakeValidator unstakes the Validator at the same address that stakeValidator uses @@ -142,10 +255,10 @@ func (s *rootSuite) unstakeValidator() { "Unstake", privKey.Address().String(), } - res, err := s.validator.RunCommand(args...) + res, err := s.node.RunCommand(args...) require.NoError(s, err) - s.validator.result = res + s.node.result = res } // getPrivateKey generates a new keypair from the private hex key that we get from the clientset @@ -190,3 +303,39 @@ func inClusterConfig(t gocuke.TestingT) *rest.Config { return config } + +// getResponseFromStdout returns the first output from stdout that passes the validate function provided. +// For example, when running `p1 Query Height`, the output is: +// +// {"level":"info","module":"e2e","time":"2023-07-11T15:46:07-07:00","message":"..."} +// {"height":3} +// +// And will return the following map so it can be used by the caller: +// +// map[height:3] +func getResponseFromStdout[T any](t gocuke.TestingT, stdout string, validate func(res *T) bool) *T { + t.Helper() + + for _, s := range strings.Split(stdout, "\n") { + var m T + if err := json.Unmarshal([]byte(s), &m); err != nil { + continue + } + if !validate(&m) { + continue + } + return &m + } + return nil +} + +func getStrFromStdout(t gocuke.TestingT, stdout string, validate func(res *string) bool) *string { + t.Helper() + for _, s := range strings.Split(stdout, "\n") { + if !validate(&s) { + continue + } + return &s + } + return nil +} diff --git a/e2e/tests/tilt_helpers.go b/e2e/tests/tilt_helpers.go new file mode 100644 index 000000000..635217fa2 --- /dev/null +++ b/e2e/tests/tilt_helpers.go @@ -0,0 +1,36 @@ +// // go:build e2e +package e2e + +import ( + "log" + "os/exec" +) + +// HACK: Dynamic scaling actors using `p1` and the `e2e test framework` is still a WIP so this is a +// functional interim solution until there's a need for a proper design. +func (s *rootSuite) syncLocalNetConfigFromHostToLocalFS() { + if !isPackageInstalled("tilt") { + e2eLogger.Debug().Msgf("syncLocalNetConfigFromHostToLocalFS: 'tilt' is not installed, skipping...") + return + } + sedCmd := exec.Command("tilt", "trigger", "syncback_localnet_config") + err := sedCmd.Run() + if err != nil { + e2eLogger.Err(err).Msgf("syncLocalNetConfigFromHostToLocalFS: failed to run command: '%s'", sedCmd.String()) + log.Fatal(err) + } +} + +func isPackageInstalled(pkg string) bool { + _, err := exec.LookPath(pkg) + // check error + if err != nil { + // the executable is not found, return false + if execErr, ok := err.(*exec.Error); ok && execErr.Err == exec.ErrNotFound { + return false + } + // another kind of error happened, let's log and exit + log.Fatal(err) + } + return true +} diff --git a/e2e/tests/valdator.feature b/e2e/tests/valdator.feature deleted file mode 100644 index ec8a2ca47..000000000 --- a/e2e/tests/valdator.feature +++ /dev/null @@ -1,28 +0,0 @@ -# TECHDEBT: Validator should eventually be changed to full node or just node. -Feature: Validator Namespace - - Scenario: User Wants Help Using The Validator Command - Given the user has a validator - When the user runs the command "Validator help" - Then the user should be able to see standard output containing "Available Commands" - And the validator should have exited without error - - Scenario: User Can Stake An Address - Given the user has a validator - When the user stakes their validator with amount 150000000001 uPOKT - Then the user should be able to see standard output containing "" - And the validator should have exited without error - - Scenario: User Can Unstake An Address - Given the user has a validator - When the user stakes their validator with amount 150000000001 uPOKT - Then the user should be able to see standard output containing "" - Then the user should be able to unstake their validator - Then the user should be able to see standard output containing "" - And the validator should have exited without error - - Scenario: User Can Send To An Address - Given the user has a validator - When the user sends 150000000 uPOKT to another address - Then the user should be able to see standard output containing "" - And the validator should have exited without error diff --git a/e2e/tests/validator.go b/e2e/tests/validator.go deleted file mode 100644 index 04b27bf7f..000000000 --- a/e2e/tests/validator.go +++ /dev/null @@ -1,67 +0,0 @@ -//go:build e2e - -package e2e - -import ( - "fmt" - "os/exec" - - "github.com/pokt-network/pocket/runtime" - "github.com/pokt-network/pocket/runtime/defaults" -) - -var ( - // rpcURL used by targetPod to build commands - rpcURL string - // targetPod is the kube pod that executes calls to the pocket binary under test - targetPod = "deploy/dev-cli-client" -) - -func init() { - rpcHost := runtime.GetEnv("RPC_HOST", defaults.RandomValidatorEndpointK8SHostname) - rpcURL = fmt.Sprintf("http://%s:%s", rpcHost, defaults.DefaultRPCPort) -} - -// cliPath is the path of the binary installed and is set by the Tiltfile -const cliPath = "/usr/local/bin/p1" - -// commandResult combines the stdout, stderr, and err of an operation -type commandResult struct { - Stdout string - Stderr string - Err error -} - -// PocketClient is a single function interface for interacting with a node -type PocketClient interface { - RunCommand(...string) (*commandResult, error) -} - -// Ensure that Validator fulfills PocketClient -var _ PocketClient = &validatorPod{} - -// validatorPod holds the connection information to pod validator-001 for testing -type validatorPod struct { - result *commandResult // stores the result of the last command that was run -} - -// RunCommand runs a command on a target kube pod -func (v *validatorPod) RunCommand(args ...string) (*commandResult, error) { - base := []string{ - "exec", "-i", targetPod, - "--container", "pocket", - "--", cliPath, - "--non_interactive=true", - "--remote_cli_url=" + rpcURL, - } - args = append(base, args...) - cmd := exec.Command("kubectl", args...) - r := &commandResult{} - out, err := cmd.Output() - r.Stdout = string(out) - v.result = r - if err != nil { - return r, err - } - return r, nil -} diff --git a/go.mod b/go.mod index e9772889b..1599c24de 100644 --- a/go.mod +++ b/go.mod @@ -251,7 +251,7 @@ require ( github.com/valyala/fasttemplate v1.2.2 // indirect golang.org/x/mod v0.7.0 // indirect golang.org/x/sys v0.6.0 // indirect - golang.org/x/text v0.7.0 // indirect + golang.org/x/text v0.7.0 golang.org/x/time v0.0.0-20220411224347-583f2d630306 // indirect golang.org/x/tools v0.3.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/persistence/docs/CHANGELOG.md b/persistence/docs/CHANGELOG.md index 1bdae0d55..53c60d026 100644 --- a/persistence/docs/CHANGELOG.md +++ b/persistence/docs/CHANGELOG.md @@ -422,7 +422,7 @@ Deprecate PrePersistence - Added PopulateGenesisState function to persistence module - Fixed the stake status iota issue - Discovered and documented (with TODO) double setting parameters issue -- Attached to the Utility Module and using in `make compose_and_watch` +- Attached to the Utility Module and using in `make lightweight_localnet` ## [0.0.0.1] - 2022-07-05 diff --git a/persistence/docs/README.md b/persistence/docs/README.md index d29b5fab0..08d8398e1 100644 --- a/persistence/docs/README.md +++ b/persistence/docs/README.md @@ -99,7 +99,7 @@ A subset of these are explained below. Any targets or helpers to configure and launch the database instances do not populate the actual database. -A LocalNet (see `make compose_and_watch`) must have been executed in order to trigger creation of schemas and hydration of the relevant tables. +A LocalNet (see `make lightweight_localnet`) must have been executed in order to trigger creation of schemas and hydration of the relevant tables. #### CLI Access - db_cli_node diff --git a/shared/modules/doc/CHANGELOG.md b/shared/modules/doc/CHANGELOG.md index d6d965cce..207e7a92a 100644 --- a/shared/modules/doc/CHANGELOG.md +++ b/shared/modules/doc/CHANGELOG.md @@ -125,7 +125,7 @@ UtilityModule - Opened followup issue #163 - Added config and genesis generator to build package - Deprecated old build files -- Use new config and genesis files for make compose_and_watch -- Use new config and genesis files for make client_start && make client_connect +- Use new config and genesis files for make lightweight_localnet +- Use new config and genesis files for make lightweight_localnet_client && make lightweight_localnet_client_debug diff --git a/telemetry/README.md b/telemetry/README.md index eed3bb8b9..fd61c911a 100644 --- a/telemetry/README.md +++ b/telemetry/README.md @@ -158,7 +158,7 @@ make docker_loki_install 1. Spin up the stack ```bash -make compose_and_watch +make lightweight_localnet ``` 2. Wait a few seconds and **Voila!** From 9d840ed87c9585d7dbfbe191440ed079c82cecda Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Fri, 28 Jul 2023 17:04:34 -0700 Subject: [PATCH 087/100] Linting improvements --- app/client/cli/debug.go | 6 +++--- e2e/tests/node.go | 2 +- e2e/tests/state_sync.feature | 4 ++-- e2e/tests/steps_init_test.go | 10 +++++----- e2e/tests/tilt_helpers.go | 10 ++++------ 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/app/client/cli/debug.go b/app/client/cli/debug.go index ca5a1b764..1d3784445 100644 --- a/app/client/cli/debug.go +++ b/app/client/cli/debug.go @@ -127,9 +127,9 @@ func newDebugSubCommands() []*cobra.Command { if !slices.Contains(validActors, actor) { logger.Global.Fatal().Msg("Invalid actor type provided") } - sedCmd := exec.Command("sed", "-i", fmt.Sprintf("/%s:/,/count:/ s/count: [0-9]*/count: %s/", actor, numActors), "/usr/local/localnet_config.yaml") - err := sedCmd.Run() - if err != nil { + sedReplaceCmd := fmt.Sprintf("/%s:/,/count:/ s/count: [0-9]*/count: %s/", actor, numActors) + sedCmd := exec.Command("sed", "-i", sedReplaceCmd, "/usr/local/localnet_config.yaml") + if err := sedCmd.Run(); err != nil { log.Fatal(err) } }, diff --git a/e2e/tests/node.go b/e2e/tests/node.go index 0d626756a..7e484d92d 100644 --- a/e2e/tests/node.go +++ b/e2e/tests/node.go @@ -1,4 +1,4 @@ -// //go:build e2e +//go:build e2e package e2e diff --git a/e2e/tests/state_sync.feature b/e2e/tests/state_sync.feature index 837159722..f296f88b0 100644 --- a/e2e/tests/state_sync.feature +++ b/e2e/tests/state_sync.feature @@ -15,6 +15,6 @@ Feature: State Sync Namespace # full_nodes is the key used in `localnet_config.yaml` When the developer runs the command "ScaleActor full_nodes 2" # IMPROVE: Figure out if there's something better to do then waiting for a node to spin up - And the developer waits for "20000" milliseconds + And the developer waits for "40000" milliseconds # TODO(#812): The full node should be at height "2" after state sync is implemented - Then "full-node-002" should be at height "1" \ No newline at end of file + Then "full-node-002" should be at height "0" \ No newline at end of file diff --git a/e2e/tests/steps_init_test.go b/e2e/tests/steps_init_test.go index 1bd8dea7d..f60dd2ae2 100644 --- a/e2e/tests/steps_init_test.go +++ b/e2e/tests/steps_init_test.go @@ -1,4 +1,4 @@ -// // go:build e2e +//go:build e2e package e2e @@ -161,8 +161,8 @@ func (s *rootSuite) TheNetworkHasActorsOfType(num int64, actor string) { } func (s *rootSuite) ShouldBeUnreachable(pod string) { - validate := func(res *string) bool { - return res != nil && strings.Contains(*res, "Unable to connect to the RPC") + validate := func(res string) bool { + return strings.Contains(res, "Unable to connect to the RPC") } args := []string{ "Query", @@ -329,10 +329,10 @@ func getResponseFromStdout[T any](t gocuke.TestingT, stdout string, validate fun return nil } -func getStrFromStdout(t gocuke.TestingT, stdout string, validate func(res *string) bool) *string { +func getStrFromStdout(t gocuke.TestingT, stdout string, validate func(res string) bool) *string { t.Helper() for _, s := range strings.Split(stdout, "\n") { - if !validate(&s) { + if !validate(s) { continue } return &s diff --git a/e2e/tests/tilt_helpers.go b/e2e/tests/tilt_helpers.go index 635217fa2..7cc10cb36 100644 --- a/e2e/tests/tilt_helpers.go +++ b/e2e/tests/tilt_helpers.go @@ -1,4 +1,5 @@ -// // go:build e2e +//go:build e2e + package e2e import ( @@ -14,17 +15,14 @@ func (s *rootSuite) syncLocalNetConfigFromHostToLocalFS() { return } sedCmd := exec.Command("tilt", "trigger", "syncback_localnet_config") - err := sedCmd.Run() - if err != nil { + if err := sedCmd.Run(); err != nil { e2eLogger.Err(err).Msgf("syncLocalNetConfigFromHostToLocalFS: failed to run command: '%s'", sedCmd.String()) log.Fatal(err) } } func isPackageInstalled(pkg string) bool { - _, err := exec.LookPath(pkg) - // check error - if err != nil { + if _, err := exec.LookPath(pkg); err != nil { // the executable is not found, return false if execErr, ok := err.(*exec.Error); ok && execErr.Err == exec.ErrNotFound { return false From 2395a6177ecc31e7b4e5fa0059acb610053590a7 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Tue, 1 Aug 2023 15:28:29 -0700 Subject: [PATCH 088/100] Update e2e/tests/steps_init_test.go Co-authored-by: d7t --- e2e/tests/steps_init_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/tests/steps_init_test.go b/e2e/tests/steps_init_test.go index f60dd2ae2..aeaffaf53 100644 --- a/e2e/tests/steps_init_test.go +++ b/e2e/tests/steps_init_test.go @@ -123,7 +123,7 @@ func (s *rootSuite) TheNetworkHasActorsOfType(num int64, actor string) { actor, } - // Depending ont he type of `actor` we're querying, we'll have a different expected responses + // Depending on the type of `actor` we're querying, we'll have a different set of expected responses // so not all of these fields will be populated, but at least one will be. type expectedResponse struct { NumValidators *int64 `json:"total_validators"` From 489978f040cffab5b146014ecaf12222f5828e70 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Tue, 1 Aug 2023 15:41:52 -0700 Subject: [PATCH 089/100] Update documentation related to installing rsync --- Makefile | 16 +++++++++++++--- build/localnet/README.md | 10 +++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index e3c289138..0bf862535 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,7 @@ help: docker_check: { \ if ( ! ( command -v docker >/dev/null && (docker compose version >/dev/null || command -v docker-compose >/dev/null) )); then \ - echo "Seems like you don't have Docker or docker-compose installed. Make sure you review docs/development/README.md before continuing"; \ + echo "Seems like you don't have Docker or docker-compose installed. Make sure you review build/localnet/README.md and docs/development/README.md before continuing"; \ exit 1; \ fi; \ } @@ -47,11 +47,21 @@ docker_check: kubectl_check: { \ if ( ! ( command -v kubectl >/dev/null )); then \ - echo "Seems like you don't have Kubectl installed. Make sure you review docs/development/README.md before continuing"; \ + echo "Seems like you don't have Kubectl installed. Make sure you review build/localnet/README.md and docs/development/README.md before continuing"; \ exit 1; \ fi; \ } +# Internal helper target - check if rsync is installed. +rsync_check: + { \ + if ( ! ( command -v kubectl >/dev/null )); then \ + echo "Seems like you don't have rsync installed. Make sure you review build/localnet/README.md and docs/development/README.md before continuing"; \ + exit 1; \ + fi; \ + } + + .PHONY: trigger_ci trigger_ci: ## Trigger the CI pipeline by submitting an empty commit; See https://github.com/pokt-network/pocket/issues/900 for details git commit --allow-empty -m "Empty commit" @@ -134,7 +144,7 @@ go_fmt: ## Format all the .go files in the project in place. gofmt -w -s . .PHONY: install_cli_deps -install_cli_deps: ## Installs `helm`, `tilt` and the underlying `ci_deps` +install_cli_deps: rsync_check kubectl_check docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` make install_ci_deps curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash diff --git a/build/localnet/README.md b/build/localnet/README.md index ae52c319a..02d9a1494 100644 --- a/build/localnet/README.md +++ b/build/localnet/README.md @@ -2,7 +2,7 @@ This guide shows how to deploy a LocalNet using [pocket-operator](https://github.com/pokt-network/pocket-operator). -- [TLDR](#tldr) +- [TL;DR](#tldr) - [Dependencies](#dependencies) - [Choosing Kubernetes Distribution](#choosing-kubernetes-distribution) - [How to create Kind Kubernetes cluster](#how-to-create-kind-kubernetes-cluster) @@ -16,6 +16,8 @@ This guide shows how to deploy a LocalNet using [pocket-operator](https://github - [Interacting w/ LocalNet](#interacting-w-localnet) - [Make Targets](#make-targets) - [Addresses and keys on LocalNet](#addresses-and-keys-on-localnet) + - [Applications staked on LocalNet](#applications-staked-on-localnet) + - [Servicers staked on LocalNet](#servicers-staked-on-localnet) - [How to change configuration files](#how-to-change-configuration-files) - [Overriding default values for localnet with Tilt](#overriding-default-values-for-localnet-with-tilt) - [How does it work?](#how-does-it-work) @@ -26,7 +28,7 @@ This guide shows how to deploy a LocalNet using [pocket-operator](https://github - [Full Cleanup](#full-cleanup) - [Code Structure](#code-structure) -## TLDR +## TL;DR If you feel adventurous, and you know what you're doing, here is a rapid guide to start LocalNet: @@ -46,6 +48,7 @@ All necessary dependencies, except Docker and Kubernetes cluster, are installed 3. `Kubernetes cluster`: refer to [Choosing Kubernetes Distribution](#choosing-kubernetes-distribution) section for more details. 4. `kubectl`: CLI is required and should be configured to access the cluster. This should happen automatically if using Docker Desktop, Rancher Desktop, k3s, k3d, minikube, etc. 5. [helm](https://helm.sh/docs/intro/install): required to template the YAML manifests for the dependencies (e.g., Postgres, Grafana). Installation instructions available. +6. [rsync](https://www.hostinger.com/tutorials/how-to-use-rsync): required to for some extensions used with `Tilt`; https://github.com/tilt-dev/tilt-extensions/tree/master/syncback#usage ### Choosing Kubernetes Distribution @@ -149,8 +152,8 @@ For example: - `0010297b55fc9278e4be4f1bcfe52bf9bd0443f8` is a servicer #001. - `314019dbb7faf8390c1f0cf4976ef1215c90b7e4` is an application #314. - #### Applications staked on LocalNet + Applications with the following addresses are staked on LocalNet, through the [applications field of the genesis.json in the LocalNet configuration](https://github.com/pokt-network/pocket/blob/main/build/localnet/manifests/configs.yaml#L4088) - `00001fff518b1cdddd74c197d76ba5b5dedc0301` @@ -159,6 +162,7 @@ Applications with the following addresses are staked on LocalNet, through the [a These addresses can be used for e.g. testing the CLI. #### Servicers staked on LocalNet + Servicers with the following addresses are staked on LocalNet, through the [servicers field of the genesis.json in the LocalNet configuration](https://github.com/pokt-network/pocket/blob/main/build/localnet/manifests/configs.yaml#L4120) - `00002b8cea1bcc3dadc72ebecf95564ceb9c2e2a` From 4cc8405cf51eb8a9e361c471d7e90ac8a03d6da2 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Tue, 1 Aug 2023 17:13:32 -0700 Subject: [PATCH 090/100] Added # IMPROVE(#959): Remove time-based waits from tests --- e2e/tests/debug.feature | 2 ++ e2e/tests/state_sync.feature | 2 ++ 2 files changed, 4 insertions(+) diff --git a/e2e/tests/debug.feature b/e2e/tests/debug.feature index e33bf33a7..38026897c 100644 --- a/e2e/tests/debug.feature +++ b/e2e/tests/debug.feature @@ -1,5 +1,7 @@ Feature: Debug Namespace + # IMPROVE(#959): Remove time-based waits from tests + # Since the configuration for consensus is optimistically responsive, we need to be in manual # Pacemaker mode and call TriggerView to further the blockchain. # 1 second was chosen arbitrarily for the time for block propagation. diff --git a/e2e/tests/state_sync.feature b/e2e/tests/state_sync.feature index f296f88b0..dc2d96beb 100644 --- a/e2e/tests/state_sync.feature +++ b/e2e/tests/state_sync.feature @@ -1,5 +1,7 @@ Feature: State Sync Namespace + # IMPROVE(#959): Remove time-based waits from tests + Scenario: New FullNode does not sync to Blockchain at height 2 Given the network is at genesis And the network has "4" actors of type "Validator" From 27922107e800ed90f3c2245d7c429c8270dd0768 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 15:00:57 -0700 Subject: [PATCH 091/100] Follow up on minor comments --- build/localnet/Tiltfile | 71 +++++---------------------------------- e2e/tests/node.go | 6 ++-- e2e/tests/tilt_helpers.go | 6 ++-- 3 files changed, 14 insertions(+), 69 deletions(-) diff --git a/build/localnet/Tiltfile b/build/localnet/Tiltfile index 0225c1d79..d4534df35 100644 --- a/build/localnet/Tiltfile +++ b/build/localnet/Tiltfile @@ -197,7 +197,6 @@ for x in range(localnet_config["validators"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) -<<<<<<< HEAD k8s_yaml( helm( chart_dir, @@ -211,6 +210,8 @@ for x in range(localnet_config["validators"]["count"]): "genesis.externalConfigMap.name=v1-localnet-genesis", "genesis.externalConfigMap.key=genesis.json", "postgresql.primary.persistence.enabled=false", + "podAnnotations.prometheus\\.io/scrape=true", + "podAnnotations.prometheus\\.io/port=9000", "nodeType=validator", ], values=[chart_dir + "/pocket-validator-overrides.yaml"] @@ -218,25 +219,6 @@ for x in range(localnet_config["validators"]["count"]): else [], ) ) -======= - k8s_yaml(helm(chart_dir, - name="validator-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=validators-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "podAnnotations.prometheus\\.io/scrape=true", - "podAnnotations.prometheus\\.io/port=9000", - "nodeType=validator", - ], - values=[chart_dir + "/pocket-validator-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-validator-overrides.yaml") else [],)) ->>>>>>> main - k8s_resource("validator-%s-pocket" % formatted_number, labels=["pocket-validators"]) # Provisions servicer nodes @@ -245,7 +227,6 @@ for x in range(localnet_config["servicers"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) -<<<<<<< HEAD k8s_yaml( helm( chart_dir, @@ -259,6 +240,8 @@ for x in range(localnet_config["servicers"]["count"]): "genesis.externalConfigMap.name=v1-localnet-genesis", "genesis.externalConfigMap.key=genesis.json", "postgresql.primary.persistence.enabled=false", + "podAnnotations.prometheus\\.io/scrape=true", + "podAnnotations.prometheus\\.io/port=9000", "config.servicer.enabled=true", "nodeType=servicer", ], @@ -267,26 +250,6 @@ for x in range(localnet_config["servicers"]["count"]): else [], ) ) -======= - k8s_yaml(helm(chart_dir, - name="servicer-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=servicers-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "podAnnotations.prometheus\\.io/scrape=true", - "podAnnotations.prometheus\\.io/port=9000", - "config.servicer.enabled=true", - "nodeType=servicer", - ], - values=[chart_dir + "/pocket-servicer-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-servicer-overrides.yaml") else [],)) ->>>>>>> main - k8s_resource("servicer-%s-pocket" % formatted_number, labels=["pocket-servicers"]) # Provisions fishermen nodes @@ -295,7 +258,6 @@ for x in range(localnet_config["fishermen"]["count"]): actor_number = actor_number + 1 formatted_number = formatted_actor_number(actor_number) -<<<<<<< HEAD k8s_yaml( helm( chart_dir, @@ -309,6 +271,8 @@ for x in range(localnet_config["fishermen"]["count"]): "genesis.externalConfigMap.name=v1-localnet-genesis", "genesis.externalConfigMap.key=genesis.json", "postgresql.primary.persistence.enabled=false", + "podAnnotations.prometheus\\.io/scrape=true", + "podAnnotations.prometheus\\.io/port=9000", "config.fisherman.enabled=true", "nodeType=fisherman", ], @@ -317,25 +281,6 @@ for x in range(localnet_config["fishermen"]["count"]): else [], ) ) -======= - k8s_yaml(helm(chart_dir, - name="fisherman-%s-pocket" % formatted_number, - set=[ - "global.postgresql.auth.postgresPassword=LocalNetPassword", - "image.repository=pocket-image", - "privateKeySecretKeyRef.name=fishermen-private-keys", - "privateKeySecretKeyRef.key=%s" % formatted_number, - "genesis.preProvisionedGenesis.enabled=false", - "genesis.externalConfigMap.name=v1-localnet-genesis", - "genesis.externalConfigMap.key=genesis.json", - "postgresql.primary.persistence.enabled=false", - "podAnnotations.prometheus\\.io/scrape=true", - "podAnnotations.prometheus\\.io/port=9000", - "config.fisherman.enabled=true", - "nodeType=fisherman", - ], - values=[chart_dir + "/pocket-fisherman-overrides.yaml"] if os.path.exists(chart_dir + "/pocket-fisherman-overrides.yaml") else [],)) ->>>>>>> main k8s_resource("fisherman-%s-pocket" % formatted_number, labels=["pocket-fishermen"]) @@ -357,8 +302,8 @@ for x in range(localnet_config["full_nodes"]["count"]): "genesis.externalConfigMap.name=v1-localnet-genesis", "genesis.externalConfigMap.key=genesis.json", "postgresql.primary.persistence.enabled=false", - "podAnnotations.prometheus\\.io/scrape=true", - "podAnnotations.prometheus\\.io/port=9000", + "podAnnotations.prometheus\\.io/scrape=true", + "podAnnotations.prometheus\\.io/port=9000", "nodeType=full", ], values=[chart_dir + "/pocket-full-node-overrides.yaml"] diff --git a/e2e/tests/node.go b/e2e/tests/node.go index 7e484d92d..422e6e009 100644 --- a/e2e/tests/node.go +++ b/e2e/tests/node.go @@ -65,12 +65,12 @@ func (n *nodePod) RunCommandOnHost(rpcUrl string, args ...string) (*commandResul cmd := exec.Command("kubectl", args...) r := &commandResult{} out, err := cmd.Output() + if err != nil { + return nil, err + } r.Stdout = string(out) n.result = r // IMPROVE: make targetPodName configurable n.targetPodName = targetDevClientPod - if err != nil { - return r, err - } return r, nil } diff --git a/e2e/tests/tilt_helpers.go b/e2e/tests/tilt_helpers.go index 7cc10cb36..a605ee22e 100644 --- a/e2e/tests/tilt_helpers.go +++ b/e2e/tests/tilt_helpers.go @@ -14,9 +14,9 @@ func (s *rootSuite) syncLocalNetConfigFromHostToLocalFS() { e2eLogger.Debug().Msgf("syncLocalNetConfigFromHostToLocalFS: 'tilt' is not installed, skipping...") return } - sedCmd := exec.Command("tilt", "trigger", "syncback_localnet_config") - if err := sedCmd.Run(); err != nil { - e2eLogger.Err(err).Msgf("syncLocalNetConfigFromHostToLocalFS: failed to run command: '%s'", sedCmd.String()) + tiltLocalnetConfigSyncbackTrigger := exec.Command("tilt", "trigger", "syncback_localnet_config") + if err := tiltLocalnetConfigSyncbackTrigger.Run(); err != nil { + e2eLogger.Err(err).Msgf("syncLocalNetConfigFromHostToLocalFS: failed to run command: '%s'", tiltLocalnetConfigSyncbackTrigger.String()) log.Fatal(err) } } From 9ce788287f17518f3827dcd29003ffe609d8e38a Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 15:25:35 -0700 Subject: [PATCH 092/100] s/validator/node in a few places --- e2e/docs/E2E_ADR.md | 2 +- e2e/tests/node.feature | 6 +++--- e2e/tests/query.feature | 10 +++++----- e2e/tests/root.feature | 4 ++-- e2e/tests/steps_init_test.go | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/e2e/docs/E2E_ADR.md b/e2e/docs/E2E_ADR.md index 59a7546dd..ecefeda62 100644 --- a/e2e/docs/E2E_ADR.md +++ b/e2e/docs/E2E_ADR.md @@ -79,7 +79,7 @@ Below is an example of testing the `help` command of the Pocket binary. Feature: Root Namespace Scenario: User Needs Help - Given the user has a validator + Given the user has a node When the user runs the command "help" Then the user should be able to see standard output containing "Available Commands" And the pocket client should have exited without error diff --git a/e2e/tests/node.feature b/e2e/tests/node.feature index 69e1cc26f..45c73ac41 100644 --- a/e2e/tests/node.feature +++ b/e2e/tests/node.feature @@ -8,15 +8,15 @@ Feature: Node Namespace Scenario: User Can Stake An Address Given the user has a node - When the user stakes their node with amount 150000000001 uPOKT + When the user stakes their validator with amount 150000000001 uPOKT Then the user should be able to see standard output containing "" And the node should have exited without error Scenario: User Can Unstake An Address Given the user has a node - When the user stakes their node with amount 150000000001 uPOKT + When the user stakes their validator with amount 150000000001 uPOKT Then the user should be able to see standard output containing "" - Then the user should be able to unstake their node + Then the user should be able to unstake their validator Then the user should be able to see standard output containing "" And the node should have exited without error diff --git a/e2e/tests/query.feature b/e2e/tests/query.feature index 91e3e4eb9..74cc60180 100644 --- a/e2e/tests/query.feature +++ b/e2e/tests/query.feature @@ -1,14 +1,14 @@ Feature: Query Namespace - Scenario: User Wants Help Using The Query Command - Given the user has a validator + Scenario: User Wants Help Using The Query Command + Given the user has a node When the user runs the command "Query help" Then the user should be able to see standard output containing "Available Commands" - And the validator should have exited without error + And the node should have exited without error Scenario: User Wants To See The Block At Current Height - Given the user has a validator + Given the user has a node When the user runs the command "Query Block" Then the user should be able to see standard output containing "state_hash" - And the validator should have exited without error \ No newline at end of file + And the node should have exited without error \ No newline at end of file diff --git a/e2e/tests/root.feature b/e2e/tests/root.feature index 754534f2e..b9d6225d4 100644 --- a/e2e/tests/root.feature +++ b/e2e/tests/root.feature @@ -1,7 +1,7 @@ Feature: Root Namespace Scenario: User Needs Help - Given the user has a validator + Given the user has a node When the user runs the command "help" Then the user should be able to see standard output containing "Available Commands" - And the validator should have exited without error \ No newline at end of file + And the node should have exited without error \ No newline at end of file diff --git a/e2e/tests/steps_init_test.go b/e2e/tests/steps_init_test.go index aeaffaf53..bf56d87dd 100644 --- a/e2e/tests/steps_init_test.go +++ b/e2e/tests/steps_init_test.go @@ -71,13 +71,13 @@ func TestFeatures(t *testing.T) { // InitializeScenario registers step regexes to function handlers -func (s *rootSuite) TheUserHasAValidator() { +func (s *rootSuite) TheUserHasANode() { res, err := s.node.RunCommand("help") require.NoErrorf(s, err, res.Stderr) s.node.result = res } -func (s *rootSuite) TheValidatorShouldHaveExitedWithoutError() { +func (s *rootSuite) TheNodeShouldHaveExitedWithoutError() { require.NoError(s, s.node.result.Err) } From 4df3e28621999a51767086ab8ff8e10daf0d40fd Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 15:32:55 -0700 Subject: [PATCH 093/100] Add account.feature and validator.feature --- Makefile | 4 ++- e2e/tests/{node.feature => account.feature} | 6 ++--- e2e/tests/validator.feature | 27 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 4 deletions(-) rename e2e/tests/{node.feature => account.feature} (89%) create mode 100644 e2e/tests/validator.feature diff --git a/Makefile b/Makefile index 0bf862535..f3799e0bf 100644 --- a/Makefile +++ b/Makefile @@ -143,8 +143,10 @@ go_imports: ## Group imports using rinchsan/gosimports go_fmt: ## Format all the .go files in the project in place. gofmt -w -s . +# TODO(kdas): add `rsync_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 + .PHONY: install_cli_deps -install_cli_deps: rsync_check kubectl_check docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` +install_cli_deps: kubectl_check docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` make install_ci_deps curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash diff --git a/e2e/tests/node.feature b/e2e/tests/account.feature similarity index 89% rename from e2e/tests/node.feature rename to e2e/tests/account.feature index 45c73ac41..8e793dcd9 100644 --- a/e2e/tests/node.feature +++ b/e2e/tests/account.feature @@ -2,17 +2,17 @@ Feature: Node Namespace Scenario: User Wants Help Using The Node Command Given the user has a node - When the user runs the command "Node help" + When the user runs the command "Validator help" Then the user should be able to see standard output containing "Available Commands" And the node should have exited without error - Scenario: User Can Stake An Address + Scenario: User Can Stake A Validator Given the user has a node When the user stakes their validator with amount 150000000001 uPOKT Then the user should be able to see standard output containing "" And the node should have exited without error - Scenario: User Can Unstake An Address + Scenario: User Can Unstake A Validator Given the user has a node When the user stakes their validator with amount 150000000001 uPOKT Then the user should be able to see standard output containing "" diff --git a/e2e/tests/validator.feature b/e2e/tests/validator.feature new file mode 100644 index 000000000..e1bd22c4f --- /dev/null +++ b/e2e/tests/validator.feature @@ -0,0 +1,27 @@ +Feature: Validator Namespace + + Scenario: User Wants Help Using The Validator Command + Given the user has a node + When the user runs the command "Validator help" + Then the user should be able to see standard output containing "Available Commands" + And the node should have exited without error + + Scenario: User Can Stake A Validator + Given the user has a node + When the user stakes their validator with amount 150000000001 uPOKT + Then the user should be able to see standard output containing "" + And the node should have exited without error + + Scenario: User Can Unstake A Validator + Given the user has a node + When the user stakes their validator with amount 150000000001 uPOKT + Then the user should be able to see standard output containing "" + Then the user should be able to unstake their validator + Then the user should be able to see standard output containing "" + And the node should have exited without error + + Scenario: User Can Send To An Address + Given the user has a node + When the user sends 150000000 uPOKT to another address + Then the user should be able to see standard output containing "" + And the node should have exited without error From 34f635a6857c1548579b06199fab48fcb7d7308e Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 15:49:27 -0700 Subject: [PATCH 094/100] Remove kubectl_check from install_cli_deps --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f3799e0bf..8e9b99a68 100644 --- a/Makefile +++ b/Makefile @@ -143,10 +143,10 @@ go_imports: ## Group imports using rinchsan/gosimports go_fmt: ## Format all the .go files in the project in place. gofmt -w -s . -# TODO(kdas): add `rsync_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 +# TODO(kdas): add `rsync_check` and `kubectl_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 .PHONY: install_cli_deps -install_cli_deps: kubectl_check docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` +install_cli_deps: docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` make install_ci_deps curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash From 12890634688fecee3675b15d7ba93f2150ef3fe6 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 16:06:03 -0700 Subject: [PATCH 095/100] Remove kubectl_check from install_cli_deps --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 8e9b99a68..5330a0ff2 100644 --- a/Makefile +++ b/Makefile @@ -143,10 +143,10 @@ go_imports: ## Group imports using rinchsan/gosimports go_fmt: ## Format all the .go files in the project in place. gofmt -w -s . -# TODO(kdas): add `rsync_check` and `kubectl_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 +# TODO(kdas): add `rsync_check`, `kubectl_check`, `docker_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 .PHONY: install_cli_deps -install_cli_deps: docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` +install_cli_deps: ## Installs `helm`, `tilt` and the underlying `ci_deps` make install_ci_deps curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash From 031c10cd70b66101f6deb77f975fcda11844061f Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 17:22:47 -0700 Subject: [PATCH 096/100] Skip E2E test --- .github/workflows/main.yml | 1 + e2e/tests/state_sync.feature | 2 ++ e2e/tests/steps_init_test.go | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24d809706..486ab2ba7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -150,6 +150,7 @@ jobs: needs: build-images if: contains(github.event.pull_request.labels.*.name, 'e2e-devnet-test') env: + POCKET_E2E_TEST_TAGS: "~@skip_in_ci" ARGO_HTTP1: true ARGO_SECURE: true ARGO_SERVER: ${{ vars.ARGO_SERVER }} diff --git a/e2e/tests/state_sync.feature b/e2e/tests/state_sync.feature index dc2d96beb..a412f033a 100644 --- a/e2e/tests/state_sync.feature +++ b/e2e/tests/state_sync.feature @@ -2,6 +2,8 @@ Feature: State Sync Namespace # IMPROVE(#959): Remove time-based waits from tests + + @skip_in_ci Scenario: New FullNode does not sync to Blockchain at height 2 Given the network is at genesis And the network has "4" actors of type "Validator" diff --git a/e2e/tests/steps_init_test.go b/e2e/tests/steps_init_test.go index bf56d87dd..1f83171f1 100644 --- a/e2e/tests/steps_init_test.go +++ b/e2e/tests/steps_init_test.go @@ -66,7 +66,8 @@ func (s *rootSuite) Before() { // TestFeatures runs the e2e tests specified in any .features files in this directory // * This test suite assumes that a LocalNet is running that can be accessed by `kubectl` func TestFeatures(t *testing.T) { - gocuke.NewRunner(t, &rootSuite{}).Path("*.feature").Run() + e2eTestTags := os.Getenv("POCKET_E2E_TEST_TAGS") + gocuke.NewRunner(t, &rootSuite{}).Path("*.feature").Tags(e2eTestTags).Run() } // InitializeScenario registers step regexes to function handlers From f5c68ba089cb519fc5d4ddfe41a1874cad6dff9e Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Wed, 2 Aug 2023 17:31:07 -0700 Subject: [PATCH 097/100] Updated TODOs --- Makefile | 5 +++-- e2e/tests/state_sync.feature | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 5330a0ff2..e268d49d8 100644 --- a/Makefile +++ b/Makefile @@ -143,8 +143,9 @@ go_imports: ## Group imports using rinchsan/gosimports go_fmt: ## Format all the .go files in the project in place. gofmt -w -s . -# TODO(kdas): add `rsync_check`, `kubectl_check`, `docker_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 - +# TODO(#964): add `rsync_check`, `kubectl_check`, `docker_check` as a validation in `install_cli_deps`; https://github.com/pokt-network/pocket/assets/1892194/a7a24a11-f54d-46e2-a73e-9e8ea7d06726 +# .PHONY: install_cli_deps +# install_cli_deps: rsync_check kubectl_check docker_check ## Installs `helm`, `tilt` and the underlying `ci_deps` .PHONY: install_cli_deps install_cli_deps: ## Installs `helm`, `tilt` and the underlying `ci_deps` make install_ci_deps diff --git a/e2e/tests/state_sync.feature b/e2e/tests/state_sync.feature index a412f033a..1aa85fe0e 100644 --- a/e2e/tests/state_sync.feature +++ b/e2e/tests/state_sync.feature @@ -1,8 +1,7 @@ Feature: State Sync Namespace # IMPROVE(#959): Remove time-based waits from tests - - + # TODO(#964): Remove the `skip_in_ci` tag for these tests @skip_in_ci Scenario: New FullNode does not sync to Blockchain at height 2 Given the network is at genesis From fa5577c4d8352334b33a38327d8a62adebc75031 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 3 Aug 2023 12:07:54 -0700 Subject: [PATCH 098/100] Added tags to the ./argo-linux-amd64 commands when running e2e-tests --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 486ab2ba7..c2c58e235 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -150,7 +150,6 @@ jobs: needs: build-images if: contains(github.event.pull_request.labels.*.name, 'e2e-devnet-test') env: - POCKET_E2E_TEST_TAGS: "~@skip_in_ci" ARGO_HTTP1: true ARGO_SECURE: true ARGO_SERVER: ${{ vars.ARGO_SERVER }} @@ -198,4 +197,4 @@ jobs: - id: "run-e2e-tests" run: | - ./argo-linux-amd64 submit --wait --log --namespace devnet-issue-${{ github.event.pull_request.number }} --from 'wftmpl/dev-e2e-tests' --parameter gitsha="${{ github.event.pull_request.head.sha }}" + ./argo-linux-amd64 submit --wait --log --namespace devnet-issue-${{ github.event.pull_request.number }} --from 'wftmpl/dev-e2e-tests' --paramter tags="~@skip_in_ci" --parameter gitsha="${{ github.event.pull_request.head.sha }}" From 819f990c836e6da1a46cdb5df2a922b4784d840e Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Thu, 3 Aug 2023 14:09:34 -0700 Subject: [PATCH 099/100] Updated a few comments --- consensus/doc/PROTOCOL_STATE_SYNC.md | 3 ++- consensus/e2e_tests/utils_test.go | 1 + consensus/module_consensus_debugging.go | 2 +- state_machine/fsm.go | 2 ++ state_machine/module.go | 2 ++ 5 files changed, 8 insertions(+), 2 deletions(-) diff --git a/consensus/doc/PROTOCOL_STATE_SYNC.md b/consensus/doc/PROTOCOL_STATE_SYNC.md index 4c28a11b3..af8a76e01 100644 --- a/consensus/doc/PROTOCOL_STATE_SYNC.md +++ b/consensus/doc/PROTOCOL_STATE_SYNC.md @@ -1,7 +1,8 @@ # State Sync Protocol Design -⚠️ IMPORTANT NOTES TO THE (last updated on 06/08/2023): +⚠️ IMPORTANT NOTES TO THE (last updated on 08/03/2023): +- TECHDEBT(#821): Once the FSM is remove, state sync will look completely different - State Sync implementation is a WIP and has taken several different shapes. - This document is out of date and needs to be updated to reflect the latest implementation. This will be done once a functional implementation is in place. - This document makes some assumption of P2P implementation details, so please see [p2p](../../p2p/README.md) for the latest source of truth. diff --git a/consensus/e2e_tests/utils_test.go b/consensus/e2e_tests/utils_test.go index e915c94ee..a812a5fbe 100644 --- a/consensus/e2e_tests/utils_test.go +++ b/consensus/e2e_tests/utils_test.go @@ -77,6 +77,7 @@ func generateNodeRuntimeMgrs(t *testing.T, validatorCount int, clockMgr clock.Cl return runtimeMgrs } +// TECHDEBT: Try to avoid exposing `modules.EventsChannel` outside the `shared` package and adding the appropriate mocks to the bus. func createTestConsensusPocketNodes( t *testing.T, buses []modules.Bus, diff --git a/consensus/module_consensus_debugging.go b/consensus/module_consensus_debugging.go index c1003670e..0af34ad83 100644 --- a/consensus/module_consensus_debugging.go +++ b/consensus/module_consensus_debugging.go @@ -1,7 +1,7 @@ package consensus // All the code below is used for debugging & testing purposes only and should not be used in prod. -// TODO: Add debug/test tags to avoid accidental production usage. +// TECHDEBT: Add debug/test tags to avoid accidental production usage. import ( typesCons "github.com/pokt-network/pocket/consensus/types" diff --git a/state_machine/fsm.go b/state_machine/fsm.go index 08ab7c6a2..7950efaa6 100644 --- a/state_machine/fsm.go +++ b/state_machine/fsm.go @@ -1,5 +1,7 @@ package state_machine +// TECHDEBT(#821): Remove the dependency of state sync on FSM, as well as the FSM in general. + import ( "github.com/looplab/fsm" coreTypes "github.com/pokt-network/pocket/shared/core/types" diff --git a/state_machine/module.go b/state_machine/module.go index 63c682677..9a8b6b505 100644 --- a/state_machine/module.go +++ b/state_machine/module.go @@ -1,5 +1,7 @@ package state_machine +// TECHDEBT(#821): Remove the dependency of state sync on FSM, as well as the FSM in general. + import ( "context" From 6ffe41d26653c8df13e4902e43d8907c14f743b0 Mon Sep 17 00:00:00 2001 From: Daniel Olshansky Date: Mon, 7 Aug 2023 11:21:50 -0700 Subject: [PATCH 100/100] Last week's state sync debugging changes --- build/config/config.validator1.json | 3 ++- build/localnet/Tiltfile | 7 ++++++- build/localnet/cluster-manager/main.go | 3 ++- consensus/block.go | 3 +++ consensus/event_handler.go | 1 + consensus/hotstuff_replica.go | 2 +- consensus/module.go | 2 +- consensus/module_consensus_pacemaker.go | 3 ++- consensus/state_sync/helpers.go | 8 +++++++- consensus/state_sync/module.go | 27 ++++++++++++++++++++++--- consensus/state_sync/server.go | 10 +++++++++ consensus/state_sync_handler.go | 4 ++-- e2e/tests/node.go | 3 ++- runtime/defaults/defaults.go | 1 + shared/node.go | 8 ++++---- state_machine/fsm.go | 3 +++ utility/unit_of_work/actor.go | 10 ++++----- utility/unit_of_work/module.go | 3 +++ 18 files changed, 79 insertions(+), 22 deletions(-) diff --git a/build/config/config.validator1.json b/build/config/config.validator1.json index 6031f1a37..216b2b512 100644 --- a/build/config/config.validator1.json +++ b/build/config/config.validator1.json @@ -56,13 +56,14 @@ }, "servicer": { "enabled": true, + "private_key": "0ca1a40ddecdab4f5b04fa0bfed1d235beaa2b8082e7554425607516f0862075dfe357de55649e6d2ce889acf15eb77e94ab3c5756fe46d3c7538d37f27f115e", "chains": ["0001"] }, "ibc": { "enabled": true, "stores_dir": "/var/ibc", "host": { - "private_key": "0ca1a40ddecdab4f5b04fa0bfed1d235beaa2b8082e7554425607516f0862075dfe357de55649e6d2ce889acf15eb77e94ab3c5756fe46d3c7538d37f27f115e" + "private_key": "0ca1a40ddecdab4f5b04fa0bfed1d235beaa2b8082e7554425607516f0862075dfe357de55649e6d2ce889acf15eb77e94ab3c5756fe46d3c7538d37f27f115e" } } } diff --git a/build/localnet/Tiltfile b/build/localnet/Tiltfile index d4534df35..abca365da 100644 --- a/build/localnet/Tiltfile +++ b/build/localnet/Tiltfile @@ -48,7 +48,7 @@ deps = [ "build/debug.go", "consensus", "p2p", - "persistance", + "persistence", "rpc", "runtime", "shared", @@ -56,6 +56,11 @@ deps = [ "utility", "vendor", "logger", + "e2e", + "ibc", + "internal", + "state_machine", + "tools", ] deps_full_path = [root_dir + "/" + depdir for depdir in deps] diff --git a/build/localnet/cluster-manager/main.go b/build/localnet/cluster-manager/main.go index 8fb1004fd..e443537df 100644 --- a/build/localnet/cluster-manager/main.go +++ b/build/localnet/cluster-manager/main.go @@ -57,7 +57,8 @@ func init() { clusterManagerCmd.PersistentFlags().StringVar( &flags.RemoteCLIURL, "remote_cli_url", - defaults.Validator1EndpointK8SHostname, + // defaults.Validator1EndpointK8SHostname, + defaults.FullNode1EndpointK8SHostname, "takes a remote endpoint in the form of ://: (uses RPC Port)", ) diff --git a/consensus/block.go b/consensus/block.go index 70d40fe1f..5a3750b4c 100644 --- a/consensus/block.go +++ b/consensus/block.go @@ -78,6 +78,9 @@ func (m *consensusModule) isBlockInMessageValidBasic(msg *typesCons.HotstuffMess // refreshUtilityUnitOfWork is a helper that creates a new Utility Unit Of Work and clears/nullifies a previous one if it exists func (m *consensusModule) refreshUtilityUnitOfWork() error { + // m.m.Lock() + // defer m.m.Unlock() + // Catch-all structure to release the previous utility UOW if it wasn't properly cleaned up. utilityUnitOfWork := m.utilityUnitOfWork diff --git a/consensus/event_handler.go b/consensus/event_handler.go index f428e6a88..51e7c89f0 100644 --- a/consensus/event_handler.go +++ b/consensus/event_handler.go @@ -99,6 +99,7 @@ func (m *consensusModule) HandleUnsynced(msg *messaging.StateMachineTransitionEv func (m *consensusModule) HandleSyncMode(msg *messaging.StateMachineTransitionEvent) error { m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. About to start synchronous sync loop...") go m.stateSync.StartSynchronousStateSync() + m.logger.Info().Str("source", consensusFSMHandlerSource).Msg("Node is in Sync Mode. Finished synchronous sync loop!!!") return nil } diff --git a/consensus/hotstuff_replica.go b/consensus/hotstuff_replica.go index 7eea2037e..15df2f262 100644 --- a/consensus/hotstuff_replica.go +++ b/consensus/hotstuff_replica.go @@ -163,7 +163,7 @@ func (handler *HotstuffReplicaMessageHandler) HandleDecideMessage(m *consensusMo if err := m.commitBlock(m.block); err != nil { m.logger.Error().Err(err).Msg("Could not commit block") m.paceMaker.InterruptRound("failed to commit block") - return + return } m.paceMaker.NewHeight() diff --git a/consensus/module.go b/consensus/module.go index 9c25df5a6..0ce5a8590 100644 --- a/consensus/module.go +++ b/consensus/module.go @@ -282,7 +282,7 @@ func (m *consensusModule) loadPersistedState() error { defer readCtx.Release() latestHeight, err := readCtx.GetMaximumBlockHeight() - if err != nil || latestHeight == 0 { + if err != nil { // TODO: Proper state sync not implemented yet return nil } diff --git a/consensus/module_consensus_pacemaker.go b/consensus/module_consensus_pacemaker.go index 6ac886b6d..e856f9ec7 100644 --- a/consensus/module_consensus_pacemaker.go +++ b/consensus/module_consensus_pacemaker.go @@ -54,7 +54,8 @@ func (m *consensusModule) BroadcastMessageToValidators(msg *anypb.Any) error { } func (m *consensusModule) IsLeader() bool { - return m.leaderId != nil && *m.leaderId == m.nodeId + valMod, err := m.GetBus().GetUtilityModule().GetValidatorModule() + return err == nil && valMod != nil && m.leaderId != nil && *m.leaderId == m.nodeId } func (m *consensusModule) IsLeaderSet() bool { diff --git a/consensus/state_sync/helpers.go b/consensus/state_sync/helpers.go index a930eb5af..6d2ba6078 100644 --- a/consensus/state_sync/helpers.go +++ b/consensus/state_sync/helpers.go @@ -21,7 +21,9 @@ func (m *stateSync) sendStateSyncMessage(msg *typesCons.StateSyncMessage, dst cr // For now, aggregating the messages when requests is good enough. func (m *stateSync) getAggregatedStateSyncMetadata() (minHeight, maxHeight uint64) { chanLen := len(m.metadataReceived) - m.logger.Info().Msgf("Looping over %d state sync metadata responses", chanLen) + m.logger.Info(). + Int16("num_state_sync_metadata_messages", int16(chanLen)). + Msgf("About to loop overstate sync metadata responses") for i := 0; i < chanLen; i++ { metadata := <-m.metadataReceived @@ -32,5 +34,9 @@ func (m *stateSync) getAggregatedStateSyncMetadata() (minHeight, maxHeight uint6 minHeight = metadata.MinHeight } } + m.logger.Info().Fields(map[string]any{ + "min_height": minHeight, + "max_height": maxHeight, + }).Msg("Finished aggregating state sync metadata") return } diff --git a/consensus/state_sync/module.go b/consensus/state_sync/module.go index f3bd7606c..968cf5af3 100644 --- a/consensus/state_sync/module.go +++ b/consensus/state_sync/module.go @@ -20,7 +20,7 @@ const ( committedBlocsChannelSize = 100 metadataChannelSize = 1000 blocksChannelSize = 1000 - metadataSyncPeriod = 45 * time.Second + metadataSyncPeriod = 10 * time.Second ) type StateSyncModule interface { @@ -99,10 +99,21 @@ func (m *stateSync) StartSynchronousStateSync() { // Get a view into the state of the network _, maxHeight := m.getAggregatedStateSyncMetadata() + m.logger.Info(). + Uint64("current_height", currentHeight). + Uint64("max_height", maxHeight). + Msg("Synchronous state sync is requesting blocks...") + // Synchronously request block requests from the current height to the aggregated metadata height // Note that we are using `<=` because: // - maxHeight is the max * committed * height of the network // - currentHeight is the latest * committing * height of the node + + // We do not need to request the genesis block from anyone + if currentHeight == 0 { + currentHeight += 1 + } + for currentHeight <= maxHeight { m.logger.Info().Msgf("Synchronous state sync is requesting block: %d, ending height: %d", currentHeight, maxHeight) @@ -132,7 +143,7 @@ func (m *stateSync) StartSynchronousStateSync() { case blockHeight := <-m.committedBlocksChannel: m.logger.Info().Msgf("State sync received event that block %d is committed!", blockHeight) case <-time.After(blockWaitingPeriod): - m.logger.Warn().Msgf("Timed out waiting for block %d to be committed...", currentHeight) + m.logger.Error().Msgf("Timed out waiting for block %d to be committed...", currentHeight) } // Update the height and continue catching up to the latest known state @@ -159,8 +170,18 @@ func (m *stateSync) StartSynchronousStateSync() { } func (m *stateSync) HandleStateSyncMetadataResponse(res *typesCons.StateSyncMetadataResponse) { - m.logger.Info().Msg("Handling state sync metadata response") + m.logger.Info().Fields(map[string]any{ + "peer_address": res.PeerAddress, + "min_height": res.MinHeight, + "max_height": res.MaxHeight, + }).Msg("Handling state sync metadata response") m.metadataReceived <- res + + if res.MaxHeight > 0 && m.GetBus().GetConsensusModule().CurrentHeight() <= res.MaxHeight { + if err := m.GetBus().GetStateMachineModule().SendEvent(coreTypes.StateMachineEvent_Consensus_IsUnsynced); err != nil { + m.logger.Error().Err(err).Msg("Failed to send state machine event") + } + } } func (m *stateSync) HandleBlockCommittedEvent(msg *messaging.ConsensusNewHeightEvent) { diff --git a/consensus/state_sync/server.go b/consensus/state_sync/server.go index 40900e157..7b2e6e4e3 100644 --- a/consensus/state_sync/server.go +++ b/consensus/state_sync/server.go @@ -24,6 +24,11 @@ func (m *stateSync) HandleStateSyncMetadataRequest(metadataReq *typesCons.StateS serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := metadataReq.PeerAddress + // No blocks or metadata to share at genesis + if consensusMod.CurrentHeight() == 0 { + return + } + // current height is the height of the block that is being processed, so we need to subtract 1 for the last finalized block prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 @@ -81,6 +86,11 @@ func (m *stateSync) HandleGetBlockRequest(blockReq *typesCons.GetBlockRequest) { serverNodePeerAddress := consensusMod.GetNodeAddress() clientPeerAddress := blockReq.PeerAddress + // No blocks or metadata to share at genesis + if consensusMod.CurrentHeight() == 0 { + return + } + // Check if the block should be retrievable based on the node's consensus height prevPersistedBlockHeight := consensusMod.CurrentHeight() - 1 if prevPersistedBlockHeight < blockReq.Height { diff --git a/consensus/state_sync_handler.go b/consensus/state_sync_handler.go index 99ee7822a..f33d809db 100644 --- a/consensus/state_sync_handler.go +++ b/consensus/state_sync_handler.go @@ -33,7 +33,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta switch stateSyncMessage.Message.(type) { case *typesCons.StateSyncMessage_MetadataReq: - m.logger.Info().Str("proto_type", "MetadataRequest").Msg("Handling StateSyncMessage MetadataReq") + // m.logger.Info().Str("proto_type", "MetadataRequest").Msg("Handling StateSyncMessage MetadataReq") if !m.consCfg.ServerModeEnabled { m.logger.Warn().Msg("Node's server module is not enabled") return nil @@ -42,7 +42,7 @@ func (m *consensusModule) handleStateSyncMessage(stateSyncMessage *typesCons.Sta return nil case *typesCons.StateSyncMessage_GetBlockReq: - m.logger.Info().Str("proto_type", "GetBlockRequest").Msg("Handling StateSyncMessage GetBlockRequest") + // m.logger.Info().Str("proto_type", "GetBlockRequest").Msg("Handling StateSyncMessage GetBlockRequest") if !m.consCfg.ServerModeEnabled { m.logger.Warn().Msg("Node's server module is not enabled") return nil diff --git a/e2e/tests/node.go b/e2e/tests/node.go index 422e6e009..9e570a5d1 100644 --- a/e2e/tests/node.go +++ b/e2e/tests/node.go @@ -21,7 +21,8 @@ var ( ) func init() { - defaultRPCHost := runtime.GetEnv("RPC_HOST", defaults.RandomValidatorEndpointK8SHostname) + // defaultRPCHost := runtime.GetEnv("RPC_HOST", defaults.RandomValidatorEndpointK8SHostname) + defaultRPCHost := runtime.GetEnv("RPC_HOST", defaults.FullNode1EndpointK8SHostname) defaultRPCURL = fmt.Sprintf("http://%s:%s", defaultRPCHost, defaults.DefaultRPCPort) } diff --git a/runtime/defaults/defaults.go b/runtime/defaults/defaults.go index 6a66726b3..a01f0ff58 100644 --- a/runtime/defaults/defaults.go +++ b/runtime/defaults/defaults.go @@ -28,6 +28,7 @@ const ( DefaultRPCHost = "localhost" Validator1EndpointDockerComposeHostname = "validator1" Validator1EndpointK8SHostname = "validator-001-pocket" + FullNode1EndpointK8SHostname = "full-node-001-pocket" RandomValidatorEndpointK8SHostname = "pocket-validators" ) diff --git a/shared/node.go b/shared/node.go index 221942126..c1cda79a1 100644 --- a/shared/node.go +++ b/shared/node.go @@ -163,10 +163,10 @@ func (m *Node) GetBus() modules.Bus { // TODO: Move all message types this is dependant on to the `messaging` package func (node *Node) handleEvent(message *messaging.PocketEnvelope) error { contentType := message.GetContentType() - logger.Global.Debug().Fields(map[string]any{ - "message": message, - "contentType": contentType, - }).Msg("node handling event") + // logger.Global.Debug().Fields(map[string]any{ + // "message": message, + // "contentType": contentType, + // }).Msg("node handling event") switch contentType { diff --git a/state_machine/fsm.go b/state_machine/fsm.go index 7950efaa6..0c5227203 100644 --- a/state_machine/fsm.go +++ b/state_machine/fsm.go @@ -52,6 +52,7 @@ func NewNodeFSM(callbacks *fsm.Callbacks, options ...func(*fsm.FSM)) *fsm.FSM { Name: string(coreTypes.StateMachineEvent_Consensus_IsSyncedNonValidator), Src: []string{ string(coreTypes.StateMachineState_Consensus_SyncMode), + // string(coreTypes.StateMachineState_Consensus_Synced), }, Dst: string(coreTypes.StateMachineState_Consensus_Synced), }, @@ -60,6 +61,8 @@ func NewNodeFSM(callbacks *fsm.Callbacks, options ...func(*fsm.FSM)) *fsm.FSM { Src: []string{ string(coreTypes.StateMachineState_Consensus_Pacemaker), string(coreTypes.StateMachineState_Consensus_Synced), + // string(coreTypes.StateMachineState_Consensus_Unsynced), + string(coreTypes.StateMachineState_Consensus_SyncMode), string(coreTypes.StateMachineState_P2P_Bootstrapped), }, Dst: string(coreTypes.StateMachineState_Consensus_Unsynced), diff --git a/utility/unit_of_work/actor.go b/utility/unit_of_work/actor.go index 4f99a0e70..75a523c6c 100644 --- a/utility/unit_of_work/actor.go +++ b/utility/unit_of_work/actor.go @@ -307,19 +307,19 @@ func (u *baseUtilityUnitOfWork) getActorExists(actorType coreTypes.ActorType, ad // IMPROVE: Need to re-evaluate the design of `Output Address` to support things like "rev-share" // and multiple output addresses. -func (u *baseUtilityUnitOfWork) getActorOutputAddress(actorType coreTypes.ActorType, operator []byte) ([]byte, coreTypes.Error) { +func (uow *baseUtilityUnitOfWork) getActorOutputAddress(actorType coreTypes.ActorType, operator []byte) ([]byte, coreTypes.Error) { var outputAddr []byte var err error switch actorType { case coreTypes.ActorType_ACTOR_TYPE_APP: - outputAddr, err = u.persistenceReadContext.GetAppOutputAddress(operator, u.height) + outputAddr, err = uow.persistenceReadContext.GetAppOutputAddress(operator, uow.height) case coreTypes.ActorType_ACTOR_TYPE_FISH: - outputAddr, err = u.persistenceReadContext.GetFishermanOutputAddress(operator, u.height) + outputAddr, err = uow.persistenceReadContext.GetFishermanOutputAddress(operator, uow.height) case coreTypes.ActorType_ACTOR_TYPE_SERVICER: - outputAddr, err = u.persistenceReadContext.GetServicerOutputAddress(operator, u.height) + outputAddr, err = uow.persistenceReadContext.GetServicerOutputAddress(operator, uow.height) case coreTypes.ActorType_ACTOR_TYPE_VAL: - outputAddr, err = u.persistenceReadContext.GetValidatorOutputAddress(operator, u.height) + outputAddr, err = uow.persistenceReadContext.GetValidatorOutputAddress(operator, uow.height) default: err = coreTypes.ErrUnknownActorType(actorType.String()) } diff --git a/utility/unit_of_work/module.go b/utility/unit_of_work/module.go index a654218ac..60df5f5aa 100644 --- a/utility/unit_of_work/module.go +++ b/utility/unit_of_work/module.go @@ -15,6 +15,7 @@ const ( var _ modules.UtilityUnitOfWork = &baseUtilityUnitOfWork{} +// TODO: Rename all `u * baseUtilityUnitOfWork` to `uow * baseUtilityUnitOfWork` for consistency type baseUtilityUnitOfWork struct { base_modules.IntegrableModule @@ -117,6 +118,8 @@ func (uow *baseUtilityUnitOfWork) Commit(quorumCert []byte) error { } func (uow *baseUtilityUnitOfWork) Release() error { + uow.logger.Info().Msg("releasing the unit of work...") + rwCtx := uow.persistenceRWContext if rwCtx != nil { uow.persistenceRWContext = nil