github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/metrics/libp2p_resource_manager.go (about)

     1  package metrics
     2  
     3  import (
     4  	"strconv"
     5  
     6  	"github.com/libp2p/go-libp2p/core/network"
     7  	"github.com/libp2p/go-libp2p/core/peer"
     8  	"github.com/libp2p/go-libp2p/core/protocol"
     9  	rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager"
    10  	"github.com/prometheus/client_golang/prometheus"
    11  	"github.com/prometheus/client_golang/prometheus/promauto"
    12  	"github.com/rs/zerolog"
    13  
    14  	p2plogging "github.com/onflow/flow-go/network/p2p/logging"
    15  	"github.com/onflow/flow-go/utils/logging"
    16  )
    17  
    18  type LibP2PResourceManagerMetrics struct {
    19  	logger zerolog.Logger
    20  	// libp2p resource manager metrics
    21  	// connections
    22  	allowConnectionCount *prometheus.CounterVec
    23  	blockConnectionCount *prometheus.CounterVec
    24  	// streams
    25  	allowStreamCount *prometheus.CounterVec
    26  	blockStreamCount *prometheus.CounterVec
    27  	// peers
    28  	allowPeerCount prometheus.Counter
    29  	blockPeerCount prometheus.Counter
    30  	// protocol
    31  	allowProtocolCount     prometheus.Counter
    32  	blockProtocolCount     prometheus.Counter
    33  	blockProtocolPeerCount prometheus.Counter
    34  	// services
    35  	allowServiceCount     prometheus.Counter
    36  	blockServiceCount     prometheus.Counter
    37  	blockServicePeerCount prometheus.Counter
    38  	// memory
    39  	allowMemoryHistogram prometheus.Histogram
    40  	blockMemoryHistogram prometheus.Histogram
    41  
    42  	prefix string
    43  }
    44  
    45  var _ rcmgr.MetricsReporter = (*LibP2PResourceManagerMetrics)(nil)
    46  
    47  func NewLibP2PResourceManagerMetrics(logger zerolog.Logger, prefix string) *LibP2PResourceManagerMetrics {
    48  	l := &LibP2PResourceManagerMetrics{logger: logger, prefix: prefix}
    49  
    50  	l.allowConnectionCount = promauto.NewCounterVec(prometheus.CounterOpts{
    51  		Namespace: namespaceNetwork,
    52  		Subsystem: subsystemLibp2p,
    53  		Name:      l.prefix + "resource_manager_allow_connection_total",
    54  		Help:      "total number of connections allowed by the libp2p resource manager",
    55  
    56  		// labels are: "inbound", "outbound" and whether the connection is using a file descriptor
    57  	}, []string{LabelConnectionDirection, LabelConnectionUseFD})
    58  
    59  	l.blockConnectionCount = promauto.NewCounterVec(prometheus.CounterOpts{
    60  		Namespace: namespaceNetwork,
    61  		Subsystem: subsystemLibp2p,
    62  		Name:      l.prefix + "resource_manager_block_connection_total",
    63  		Help:      "total number of connections blocked by the libp2p resource manager",
    64  
    65  		// labels are: "inbound", "outbound" and whether the connection is using a file descriptor
    66  	}, []string{LabelConnectionDirection, LabelConnectionUseFD})
    67  
    68  	l.allowStreamCount = promauto.NewCounterVec(prometheus.CounterOpts{
    69  		Namespace: namespaceNetwork,
    70  		Subsystem: subsystemLibp2p,
    71  		Name:      l.prefix + "resource_manager_allow_stream_total",
    72  		Help:      "total number of streams allowed by the libp2p resource manager",
    73  	}, []string{LabelConnectionDirection})
    74  
    75  	l.blockStreamCount = promauto.NewCounterVec(prometheus.CounterOpts{
    76  		Namespace: namespaceNetwork,
    77  		Subsystem: subsystemLibp2p,
    78  		Name:      l.prefix + "resource_manager_block_stream_total",
    79  		Help:      "total number of streams blocked by the libp2p resource manager",
    80  	}, []string{LabelConnectionDirection})
    81  
    82  	l.allowPeerCount = promauto.NewCounter(prometheus.CounterOpts{
    83  		Namespace: namespaceNetwork,
    84  		Subsystem: subsystemLibp2p,
    85  		Name:      l.prefix + "resource_manager_allow_peer_total",
    86  		Help:      "total number of remote peers allowed by the libp2p resource manager to attach to their relevant incoming/outgoing streams",
    87  	})
    88  
    89  	// Note: this is a low level metric than blockProtocolPeerCount.
    90  	// This metric is incremented when a peer is blocked by the libp2p resource manager on attaching as one end of a stream (on any protocol).
    91  	l.blockPeerCount = promauto.NewCounter(prometheus.CounterOpts{
    92  		Namespace: namespaceNetwork,
    93  		Subsystem: subsystemLibp2p,
    94  		Name:      l.prefix + "resource_manager_block_peer_total",
    95  		Help:      "total number of remote peers blocked by the libp2p resource manager from attaching to their relevant incoming/outgoing streams",
    96  	})
    97  
    98  	l.allowProtocolCount = promauto.NewCounter(prometheus.CounterOpts{
    99  		Namespace: namespaceNetwork,
   100  		Subsystem: subsystemLibp2p,
   101  		Name:      l.prefix + "resource_manager_allow_protocol_total",
   102  		Help:      "total number of protocols allowed by the libp2p resource manager to attach to their relevant incoming/outgoing streams",
   103  	})
   104  
   105  	l.blockProtocolCount = promauto.NewCounter(prometheus.CounterOpts{
   106  		Namespace: namespaceNetwork,
   107  		Subsystem: subsystemLibp2p,
   108  		Name:      l.prefix + "resource_manager_block_protocol_total",
   109  		Help:      "total number of protocols blocked by the libp2p resource manager from attaching to their relevant incoming/outgoing streams",
   110  	})
   111  
   112  	// Note: this is a higher level metric than blockPeerCount and blockProtocolCount.
   113  	// This metric is incremented when a peer is already attached as one end of a stream but on a different reserved protocol.
   114  	l.blockProtocolPeerCount = promauto.NewCounter(prometheus.CounterOpts{
   115  		Namespace: namespaceNetwork,
   116  		Subsystem: subsystemLibp2p,
   117  		Name:      l.prefix + "resource_manager_block_protocol_peer_total",
   118  		Help:      "total number of remote peers blocked by the libp2p resource manager from attaching to their relevant incoming/outgoing streams on a specific protocol",
   119  	})
   120  
   121  	l.allowServiceCount = promauto.NewCounter(prometheus.CounterOpts{
   122  		Namespace: namespaceNetwork,
   123  		Subsystem: subsystemLibp2p,
   124  		Name:      l.prefix + "resource_manager_allow_service_total",
   125  		Help:      "total number of remote services (e.g., ping, relay) allowed by the libp2p resource manager to attach to their relevant incoming/outgoing streams",
   126  	})
   127  
   128  	l.blockServiceCount = promauto.NewCounter(prometheus.CounterOpts{
   129  		Namespace: namespaceNetwork,
   130  		Subsystem: subsystemLibp2p,
   131  		Name:      l.prefix + "resource_manager_block_service_total",
   132  		Help:      "total number of remote services (e.g., ping, relay) blocked by the libp2p resource manager from attaching to their relevant incoming/outgoing streams",
   133  	})
   134  
   135  	// Note: this is a higher level metric than blockServiceCount and blockPeerCount.
   136  	// This metric is incremented when a service is already attached as one end of a stream but on a different reserved protocol.
   137  	l.blockServicePeerCount = promauto.NewCounter(prometheus.CounterOpts{
   138  		Namespace: namespaceNetwork,
   139  		Subsystem: subsystemLibp2p,
   140  		Name:      l.prefix + "resource_manager_block_service_peer_total",
   141  		Help:      "total number of remote services (e.g., ping, relay) blocked by the libp2p resource manager from attaching to their relevant incoming/outgoing streams on a specific peer",
   142  	})
   143  
   144  	l.allowMemoryHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
   145  		Namespace: namespaceNetwork,
   146  		Subsystem: subsystemLibp2p,
   147  		Name:      l.prefix + "resource_manager_allowed_memory_bytes",
   148  		Help:      "size of memory allocation requests allowed by the libp2p resource manager",
   149  		Buckets:   []float64{KiB, 10 * KiB, 100 * KiB, 500 * KiB, 1 * MiB, 10 * MiB, 100 * MiB, 500 * MiB, 1 * GiB},
   150  	})
   151  
   152  	l.blockMemoryHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
   153  		Namespace: namespaceNetwork,
   154  		Subsystem: subsystemLibp2p,
   155  		Name:      l.prefix + "resource_manager_blocked_memory_bytes",
   156  		Help:      "size of memory allocation requests blocked by the libp2p resource manager",
   157  		Buckets:   []float64{KiB, 10 * KiB, 100 * KiB, 500 * KiB, 1 * MiB, 10 * MiB, 100 * MiB, 500 * MiB, 1 * GiB},
   158  	})
   159  
   160  	return l
   161  }
   162  
   163  func (l *LibP2PResourceManagerMetrics) AllowConn(dir network.Direction, usefd bool) {
   164  	l.allowConnectionCount.WithLabelValues(dir.String(), strconv.FormatBool(usefd)).Inc()
   165  	l.logger.Trace().Str("direction", dir.String()).Bool("use_file_descriptor", usefd).Msg("allowing connection")
   166  }
   167  
   168  func (l *LibP2PResourceManagerMetrics) BlockConn(dir network.Direction, usefd bool) {
   169  	l.blockConnectionCount.WithLabelValues(dir.String(), strconv.FormatBool(usefd)).Inc()
   170  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("direction", dir.String()).Bool("using_file_descriptor", usefd).Msg("blocking connection")
   171  }
   172  
   173  func (l *LibP2PResourceManagerMetrics) AllowStream(p peer.ID, dir network.Direction) {
   174  	l.allowStreamCount.WithLabelValues(dir.String()).Inc()
   175  	l.logger.Trace().Str("peer", p2plogging.PeerId(p)).Str("direction", dir.String()).Msg("allowing stream")
   176  }
   177  
   178  func (l *LibP2PResourceManagerMetrics) BlockStream(p peer.ID, dir network.Direction) {
   179  	l.blockStreamCount.WithLabelValues(dir.String()).Inc()
   180  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("peer", p2plogging.PeerId(p)).Str("direction", dir.String()).Msg("blocking stream")
   181  }
   182  
   183  func (l *LibP2PResourceManagerMetrics) AllowPeer(p peer.ID) {
   184  	l.allowPeerCount.Inc()
   185  	l.logger.Trace().Str("peer", p2plogging.PeerId(p)).Msg("allowing peer")
   186  }
   187  
   188  func (l *LibP2PResourceManagerMetrics) BlockPeer(p peer.ID) {
   189  	l.blockPeerCount.Inc()
   190  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("peer", p2plogging.PeerId(p)).Msg("blocking peer")
   191  }
   192  
   193  func (l *LibP2PResourceManagerMetrics) AllowProtocol(proto protocol.ID) {
   194  	l.allowProtocolCount.Inc()
   195  	l.logger.Trace().Str("protocol", string(proto)).Msg("allowing protocol")
   196  }
   197  
   198  func (l *LibP2PResourceManagerMetrics) BlockProtocol(proto protocol.ID) {
   199  	l.blockProtocolCount.Inc()
   200  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("protocol", string(proto)).Msg("blocking protocol")
   201  }
   202  
   203  func (l *LibP2PResourceManagerMetrics) BlockProtocolPeer(proto protocol.ID, p peer.ID) {
   204  	l.blockProtocolPeerCount.Inc()
   205  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("protocol", string(proto)).Str("peer", p2plogging.PeerId(p)).Msg("blocking protocol for peer")
   206  }
   207  
   208  func (l *LibP2PResourceManagerMetrics) AllowService(svc string) {
   209  	l.allowServiceCount.Inc()
   210  	l.logger.Trace().Str("service", svc).Msg("allowing service")
   211  }
   212  
   213  func (l *LibP2PResourceManagerMetrics) BlockService(svc string) {
   214  	l.blockServiceCount.Inc()
   215  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("service", svc).Msg("blocking service")
   216  }
   217  
   218  func (l *LibP2PResourceManagerMetrics) BlockServicePeer(svc string, p peer.ID) {
   219  	l.blockServicePeerCount.Inc()
   220  	l.logger.Debug().Bool(logging.KeySuspicious, true).Str("service", svc).Str("peer", p2plogging.PeerId(p)).Msg("blocking service for peer")
   221  }
   222  
   223  func (l *LibP2PResourceManagerMetrics) AllowMemory(size int) {
   224  	l.allowMemoryHistogram.Observe(float64(size))
   225  	l.logger.Trace().Int("size", size).Msg("allowing memory")
   226  }
   227  
   228  func (l *LibP2PResourceManagerMetrics) BlockMemory(size int) {
   229  	l.blockMemoryHistogram.Observe(float64(size))
   230  	l.logger.Debug().Bool(logging.KeySuspicious, true).Int("size", size).Msg("blocking memory")
   231  }