github.com/onflow/flow-go@v0.33.17/engine/access/ping/engine.go (about)

     1  package ping
     2  
     3  import (
     4  	"context"
     5  	"encoding/binary"
     6  	"time"
     7  
     8  	"github.com/rs/zerolog"
     9  
    10  	"github.com/onflow/flow-go/engine"
    11  	"github.com/onflow/flow-go/model/flow"
    12  	"github.com/onflow/flow-go/model/flow/filter"
    13  	"github.com/onflow/flow-go/module"
    14  	"github.com/onflow/flow-go/network"
    15  	"github.com/onflow/flow-go/network/p2p"
    16  )
    17  
    18  // PingTimeout is maximum time to wait for a ping reply from a remote node
    19  const PingTimeout = time.Second * 4
    20  const PingInterval = time.Minute
    21  
    22  type Engine struct {
    23  	unit         *engine.Unit
    24  	log          zerolog.Logger
    25  	idProvider   module.IdentityProvider
    26  	idTranslator p2p.IDTranslator
    27  	me           module.Local
    28  	metrics      module.PingMetrics
    29  
    30  	pingEnabled bool
    31  	pingService network.PingService
    32  	nodeInfo    map[flow.Identifier]string // additional details about a node such as operator name
    33  }
    34  
    35  func New(
    36  	log zerolog.Logger,
    37  	idProvider module.IdentityProvider,
    38  	idTranslator p2p.IDTranslator,
    39  	me module.Local,
    40  	metrics module.PingMetrics,
    41  	pingEnabled bool,
    42  	nodeInfoFile string,
    43  	pingService network.PingService,
    44  ) (*Engine, error) {
    45  	eng := &Engine{
    46  		unit:         engine.NewUnit(),
    47  		log:          log.With().Str("engine", "ping").Logger(),
    48  		idProvider:   idProvider,
    49  		idTranslator: idTranslator,
    50  		me:           me,
    51  		metrics:      metrics,
    52  		pingEnabled:  pingEnabled,
    53  		pingService:  pingService,
    54  	}
    55  
    56  	// if a node info file is provided, it is read and the additional node information is reported as part of the ping metric
    57  	if nodeInfoFile != "" {
    58  		nodeInfo, err := readExtraNodeInfoJSON(nodeInfoFile)
    59  		if err != nil {
    60  			log.Error().Err(err).Str("node_info_file", nodeInfoFile).Msg("failed to read node info file")
    61  		} else {
    62  			eng.nodeInfo = nodeInfo
    63  			log.Debug().Str("node_info_file", nodeInfoFile).Msg("using node info file")
    64  		}
    65  	} else {
    66  		// initialize nodeInfo with an empty map
    67  		eng.nodeInfo = make(map[flow.Identifier]string)
    68  		// the node info file is not mandatory and should not stop the Ping engine from running
    69  		log.Trace().Msg("no node info file specified")
    70  	}
    71  
    72  	return eng, nil
    73  }
    74  
    75  // Ready returns a ready channel that is closed once the engine has fully
    76  // started. For the ingestion engine, we consider the engine up and running
    77  // upon initialization.
    78  func (e *Engine) Ready() <-chan struct{} {
    79  	// only launch when ping is enabled
    80  	if e.pingEnabled {
    81  		e.unit.Launch(e.startPing)
    82  	}
    83  	e.log.Info().Bool("ping enabled", e.pingEnabled).Msg("ping enabled")
    84  	return e.unit.Ready()
    85  }
    86  
    87  // Done returns a done channel that is closed once the engine has fully stopped.
    88  // For the ingestion engine, it only waits for all submit goroutines to end.
    89  func (e *Engine) Done() <-chan struct{} {
    90  	return e.unit.Done()
    91  }
    92  
    93  func (e *Engine) startPing() {
    94  
    95  	e.unit.LaunchPeriodically(func() {
    96  		peers := e.idProvider.Identities(filter.Not(filter.HasNodeID(e.me.NodeID())))
    97  
    98  		// for each peer, send a ping every ping interval
    99  		for _, peer := range peers {
   100  			peer := peer
   101  			pid := peer.ID()
   102  			delay := time.Duration(binary.BigEndian.Uint16(pid[:2])) % (PingInterval / time.Millisecond)
   103  			e.unit.LaunchAfter(delay, func() {
   104  				e.pingNode(peer)
   105  			})
   106  		}
   107  	}, PingInterval, 0)
   108  }
   109  
   110  // pingNode pings the given peer and updates the metrics with the result and the additional node information
   111  func (e *Engine) pingNode(peer *flow.Identity) {
   112  	pid, err := e.idTranslator.GetPeerID(peer.ID())
   113  
   114  	if err != nil {
   115  		e.log.Error().Err(err).Str("peer", peer.String()).Msg("failed to get peer ID")
   116  		return
   117  	}
   118  
   119  	ctx, cancel := context.WithTimeout(context.Background(), PingTimeout)
   120  	defer cancel()
   121  
   122  	// ping the node
   123  	resp, rtt, pingErr := e.pingService.Ping(ctx, pid) // ping will timeout in libp2p.PingTimeout seconds
   124  	if pingErr != nil {
   125  		e.log.Debug().Err(pingErr).Str("target", peer.ID().String()).Msg("failed to ping")
   126  		// report the rtt duration as negative to make it easier to distinguish between pingable and non-pingable nodes
   127  		rtt = -1
   128  	}
   129  
   130  	// get the additional info about the node
   131  	info := e.nodeInfo[peer.ID()]
   132  
   133  	// update metric
   134  	e.metrics.NodeReachable(peer, info, rtt)
   135  
   136  	// if ping succeeded then update the node info metric
   137  	if pingErr == nil {
   138  		e.metrics.NodeInfo(peer, info, resp.Version, resp.BlockHeight, resp.HotstuffView)
   139  	}
   140  }