github.com/onflow/flow-go@v0.33.17/engine/access/ping/engine.go (about) 1 package ping 2 3 import ( 4 "context" 5 "encoding/binary" 6 "time" 7 8 "github.com/rs/zerolog" 9 10 "github.com/onflow/flow-go/engine" 11 "github.com/onflow/flow-go/model/flow" 12 "github.com/onflow/flow-go/model/flow/filter" 13 "github.com/onflow/flow-go/module" 14 "github.com/onflow/flow-go/network" 15 "github.com/onflow/flow-go/network/p2p" 16 ) 17 18 // PingTimeout is maximum time to wait for a ping reply from a remote node 19 const PingTimeout = time.Second * 4 20 const PingInterval = time.Minute 21 22 type Engine struct { 23 unit *engine.Unit 24 log zerolog.Logger 25 idProvider module.IdentityProvider 26 idTranslator p2p.IDTranslator 27 me module.Local 28 metrics module.PingMetrics 29 30 pingEnabled bool 31 pingService network.PingService 32 nodeInfo map[flow.Identifier]string // additional details about a node such as operator name 33 } 34 35 func New( 36 log zerolog.Logger, 37 idProvider module.IdentityProvider, 38 idTranslator p2p.IDTranslator, 39 me module.Local, 40 metrics module.PingMetrics, 41 pingEnabled bool, 42 nodeInfoFile string, 43 pingService network.PingService, 44 ) (*Engine, error) { 45 eng := &Engine{ 46 unit: engine.NewUnit(), 47 log: log.With().Str("engine", "ping").Logger(), 48 idProvider: idProvider, 49 idTranslator: idTranslator, 50 me: me, 51 metrics: metrics, 52 pingEnabled: pingEnabled, 53 pingService: pingService, 54 } 55 56 // if a node info file is provided, it is read and the additional node information is reported as part of the ping metric 57 if nodeInfoFile != "" { 58 nodeInfo, err := readExtraNodeInfoJSON(nodeInfoFile) 59 if err != nil { 60 log.Error().Err(err).Str("node_info_file", nodeInfoFile).Msg("failed to read node info file") 61 } else { 62 eng.nodeInfo = nodeInfo 63 log.Debug().Str("node_info_file", nodeInfoFile).Msg("using node info file") 64 } 65 } else { 66 // initialize nodeInfo with an empty map 67 eng.nodeInfo = make(map[flow.Identifier]string) 68 // the node info file is not mandatory and should not stop the Ping engine from running 69 log.Trace().Msg("no node info file specified") 70 } 71 72 return eng, nil 73 } 74 75 // Ready returns a ready channel that is closed once the engine has fully 76 // started. For the ingestion engine, we consider the engine up and running 77 // upon initialization. 78 func (e *Engine) Ready() <-chan struct{} { 79 // only launch when ping is enabled 80 if e.pingEnabled { 81 e.unit.Launch(e.startPing) 82 } 83 e.log.Info().Bool("ping enabled", e.pingEnabled).Msg("ping enabled") 84 return e.unit.Ready() 85 } 86 87 // Done returns a done channel that is closed once the engine has fully stopped. 88 // For the ingestion engine, it only waits for all submit goroutines to end. 89 func (e *Engine) Done() <-chan struct{} { 90 return e.unit.Done() 91 } 92 93 func (e *Engine) startPing() { 94 95 e.unit.LaunchPeriodically(func() { 96 peers := e.idProvider.Identities(filter.Not(filter.HasNodeID(e.me.NodeID()))) 97 98 // for each peer, send a ping every ping interval 99 for _, peer := range peers { 100 peer := peer 101 pid := peer.ID() 102 delay := time.Duration(binary.BigEndian.Uint16(pid[:2])) % (PingInterval / time.Millisecond) 103 e.unit.LaunchAfter(delay, func() { 104 e.pingNode(peer) 105 }) 106 } 107 }, PingInterval, 0) 108 } 109 110 // pingNode pings the given peer and updates the metrics with the result and the additional node information 111 func (e *Engine) pingNode(peer *flow.Identity) { 112 pid, err := e.idTranslator.GetPeerID(peer.ID()) 113 114 if err != nil { 115 e.log.Error().Err(err).Str("peer", peer.String()).Msg("failed to get peer ID") 116 return 117 } 118 119 ctx, cancel := context.WithTimeout(context.Background(), PingTimeout) 120 defer cancel() 121 122 // ping the node 123 resp, rtt, pingErr := e.pingService.Ping(ctx, pid) // ping will timeout in libp2p.PingTimeout seconds 124 if pingErr != nil { 125 e.log.Debug().Err(pingErr).Str("target", peer.ID().String()).Msg("failed to ping") 126 // report the rtt duration as negative to make it easier to distinguish between pingable and non-pingable nodes 127 rtt = -1 128 } 129 130 // get the additional info about the node 131 info := e.nodeInfo[peer.ID()] 132 133 // update metric 134 e.metrics.NodeReachable(peer, info, rtt) 135 136 // if ping succeeded then update the node info metric 137 if pingErr == nil { 138 e.metrics.NodeInfo(peer, info, resp.Version, resp.BlockHeight, resp.HotstuffView) 139 } 140 }