github.com/corverroos/quorum@v21.1.0+incompatible/dashboard/peers.go (about)

     1  // Copyright 2019 The go-ethereum Authors
     2  // This file is part of the go-ethereum library.
     3  //
     4  // The go-ethereum library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // The go-ethereum library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package dashboard
    18  
    19  import (
    20  	"container/list"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/ethereum/go-ethereum/metrics"
    25  
    26  	"github.com/ethereum/go-ethereum/log"
    27  	"github.com/ethereum/go-ethereum/p2p"
    28  )
    29  
    30  const (
    31  	eventBufferLimit = 128 // Maximum number of buffered peer events.
    32  	knownPeerLimit   = 100 // Maximum number of stored peers, which successfully made the handshake.
    33  	attemptLimit     = 200 // Maximum number of stored peers, which failed to make the handshake.
    34  
    35  	// eventLimit is the maximum number of the dashboard's custom peer events,
    36  	// that are collected between two metering period and sent to the clients
    37  	// as one message.
    38  	// TODO (kurkomisi): Limit the number of events.
    39  	eventLimit = knownPeerLimit << 2
    40  )
    41  
    42  // peerContainer contains information about the node's peers. This data structure
    43  // maintains the metered peer data based on the different behaviours of the peers.
    44  //
    45  // Every peer has an IP address, and the peers that manage to make the handshake
    46  // (known peers) have node IDs too. There can appear more peers with the same IP,
    47  // therefore the peer container data structure is a tree consisting of a map of
    48  // maps, where the first key groups the peers by IP, while the second one groups
    49  // them by the node ID. The known peers can be active if their connection is still
    50  // open, or inactive otherwise. The peers failing before the handshake (unknown
    51  // peers) only have IP addresses, so their connection attempts are stored as part
    52  // of the value of the outer map.
    53  //
    54  // Another criteria is to limit the number of metered peers so that
    55  // they don't fill the memory. The selection order is based on the
    56  // peers activity: the peers that are inactive for the longest time
    57  // are thrown first. For the selection a fifo list is used which is
    58  // linked to the bottom of the peer tree in a way that every activity
    59  // of the peer pushes the peer to the end of the list, so the inactive
    60  // ones come to the front. When a peer has some activity, it is removed
    61  // from and reinserted into the list. When the length of the list reaches
    62  // the limit, the first element is removed from the list, as well as from
    63  // the tree.
    64  //
    65  // The active peers have priority over the inactive ones, therefore
    66  // they have their own list. The separation makes it sure that the
    67  // inactive peers are always removed before the active ones.
    68  //
    69  // The peers that don't manage to make handshake are not inserted into the list,
    70  // only their connection attempts are appended to the array belonging to their IP.
    71  // In order to keep the fifo principle, a super array contains the order of the
    72  // attempts, and when the overall count reaches the limit, the earliest attempt is
    73  // removed from the beginning of its array.
    74  //
    75  // This data structure makes it possible to marshal the peer
    76  // history simply by passing it to the JSON marshaler.
    77  type peerContainer struct {
    78  	// Bundles is the outer map using the peer's IP address as key.
    79  	Bundles map[string]*peerBundle `json:"bundles,omitempty"`
    80  
    81  	activeCount int // Number of the still connected peers
    82  
    83  	// inactivePeers contains the peers with closed connection in chronological order.
    84  	inactivePeers *list.List
    85  
    86  	// attemptOrder is the super array containing the IP addresses, from which
    87  	// the peers attempted to connect then failed before/during the handshake.
    88  	// Its values are appended in chronological order, which means that the
    89  	// oldest attempt is at the beginning of the array. When the first element
    90  	// is removed, the first element of the related bundle's attempt array is
    91  	// removed too, ensuring that always the latest attempts are stored.
    92  	attemptOrder []string
    93  
    94  	// geodb is the geoip database used to retrieve the peers' geographical location.
    95  	geodb *geoDB
    96  }
    97  
    98  // newPeerContainer returns a new instance of the peer container.
    99  func newPeerContainer(geodb *geoDB) *peerContainer {
   100  	return &peerContainer{
   101  		Bundles:       make(map[string]*peerBundle),
   102  		inactivePeers: list.New(),
   103  		attemptOrder:  make([]string, 0, attemptLimit),
   104  		geodb:         geodb,
   105  	}
   106  }
   107  
   108  // bundle inserts a new peer bundle into the map, if the peer belonging
   109  // to the given IP wasn't metered so far. In this case retrieves the location of
   110  // the IP address from the database and creates a corresponding peer event.
   111  // Returns the bundle belonging to the given IP and the events occurring during
   112  // the initialization.
   113  func (pc *peerContainer) bundle(ip string) (*peerBundle, []*peerEvent) {
   114  	var events []*peerEvent
   115  	if _, ok := pc.Bundles[ip]; !ok {
   116  		location := pc.geodb.location(ip)
   117  		events = append(events, &peerEvent{
   118  			IP:       ip,
   119  			Location: location,
   120  		})
   121  		pc.Bundles[ip] = &peerBundle{
   122  			Location:   location,
   123  			KnownPeers: make(map[string]*knownPeer),
   124  		}
   125  	}
   126  	return pc.Bundles[ip], events
   127  }
   128  
   129  // extendKnown handles the events of the successfully connected peers.
   130  // Returns the events occurring during the extension.
   131  func (pc *peerContainer) extendKnown(event *peerEvent) []*peerEvent {
   132  	bundle, events := pc.bundle(event.IP)
   133  	peer, peerEvents := bundle.knownPeer(event.IP, event.ID)
   134  	events = append(events, peerEvents...)
   135  	// Append the connect and the disconnect events to
   136  	// the corresponding arrays keeping the limit.
   137  	switch {
   138  	case event.Connected != nil:
   139  		peer.Connected = append(peer.Connected, event.Connected)
   140  		if first := len(peer.Connected) - sampleLimit; first > 0 {
   141  			peer.Connected = peer.Connected[first:]
   142  		}
   143  		peer.Active = true
   144  		events = append(events, &peerEvent{
   145  			Activity: Active,
   146  			IP:       peer.ip,
   147  			ID:       peer.id,
   148  		})
   149  		pc.activeCount++
   150  		if peer.listElement != nil {
   151  			_ = pc.inactivePeers.Remove(peer.listElement)
   152  			peer.listElement = nil
   153  		}
   154  	case event.Disconnected != nil:
   155  		peer.Disconnected = append(peer.Disconnected, event.Disconnected)
   156  		if first := len(peer.Disconnected) - sampleLimit; first > 0 {
   157  			peer.Disconnected = peer.Disconnected[first:]
   158  		}
   159  		peer.Active = false
   160  		events = append(events, &peerEvent{
   161  			Activity: Inactive,
   162  			IP:       peer.ip,
   163  			ID:       peer.id,
   164  		})
   165  		pc.activeCount--
   166  		if peer.listElement != nil {
   167  			// If the peer is already in the list, remove and reinsert it.
   168  			_ = pc.inactivePeers.Remove(peer.listElement)
   169  		}
   170  		// Insert the peer into the list.
   171  		peer.listElement = pc.inactivePeers.PushBack(peer)
   172  	}
   173  	for pc.inactivePeers.Len() > 0 && pc.activeCount+pc.inactivePeers.Len() > knownPeerLimit {
   174  		// While the count of the known peers is greater than the limit,
   175  		// remove the first element from the inactive peer list and from the map.
   176  		if removedPeer, ok := pc.inactivePeers.Remove(pc.inactivePeers.Front()).(*knownPeer); ok {
   177  			events = append(events, pc.removeKnown(removedPeer.ip, removedPeer.id)...)
   178  		} else {
   179  			log.Warn("Failed to parse the removed peer")
   180  		}
   181  	}
   182  	if pc.activeCount > knownPeerLimit {
   183  		log.Warn("Number of active peers is greater than the limit")
   184  	}
   185  	return events
   186  }
   187  
   188  // handleAttempt handles the events of the peers failing before/during the handshake.
   189  // Returns the events occurring during the extension.
   190  func (pc *peerContainer) handleAttempt(event *peerEvent) []*peerEvent {
   191  	bundle, events := pc.bundle(event.IP)
   192  	bundle.Attempts = append(bundle.Attempts, &peerAttempt{
   193  		Connected:    *event.Connected,
   194  		Disconnected: *event.Disconnected,
   195  	})
   196  	pc.attemptOrder = append(pc.attemptOrder, event.IP)
   197  	for len(pc.attemptOrder) > attemptLimit {
   198  		// While the length of the connection attempt order array is greater
   199  		// than the limit, remove the first element from the involved peer's
   200  		// array and also from the super array.
   201  		events = append(events, pc.removeAttempt(pc.attemptOrder[0])...)
   202  		pc.attemptOrder = pc.attemptOrder[1:]
   203  	}
   204  	return events
   205  }
   206  
   207  // peerBundle contains the peers belonging to a given IP address.
   208  type peerBundle struct {
   209  	// Location contains the geographical location based on the bundle's IP address.
   210  	Location *geoLocation `json:"location,omitempty"`
   211  
   212  	// KnownPeers is the inner map of the metered peer
   213  	// maintainer data structure using the node ID as key.
   214  	KnownPeers map[string]*knownPeer `json:"knownPeers,omitempty"`
   215  
   216  	// Attempts contains the failed connection attempts of the
   217  	// peers belonging to a given IP address in chronological order.
   218  	Attempts []*peerAttempt `json:"attempts,omitempty"`
   219  }
   220  
   221  // removeKnown removes the known peer belonging to the
   222  // given IP address and node ID from the peer tree.
   223  func (pc *peerContainer) removeKnown(ip, id string) (events []*peerEvent) {
   224  	// TODO (kurkomisi): Remove peers that don't have traffic samples anymore.
   225  	if bundle, ok := pc.Bundles[ip]; ok {
   226  		if _, ok := bundle.KnownPeers[id]; ok {
   227  			events = append(events, &peerEvent{
   228  				Remove: RemoveKnown,
   229  				IP:     ip,
   230  				ID:     id,
   231  			})
   232  			delete(bundle.KnownPeers, id)
   233  		} else {
   234  			log.Warn("No peer to remove", "ip", ip, "id", id)
   235  		}
   236  		if len(bundle.KnownPeers) < 1 && len(bundle.Attempts) < 1 {
   237  			events = append(events, &peerEvent{
   238  				Remove: RemoveBundle,
   239  				IP:     ip,
   240  			})
   241  			delete(pc.Bundles, ip)
   242  		}
   243  	} else {
   244  		log.Warn("No bundle to remove", "ip", ip)
   245  	}
   246  	return events
   247  }
   248  
   249  // removeAttempt removes the peer attempt belonging to the
   250  // given IP address and node ID from the peer tree.
   251  func (pc *peerContainer) removeAttempt(ip string) (events []*peerEvent) {
   252  	if bundle, ok := pc.Bundles[ip]; ok {
   253  		if len(bundle.Attempts) > 0 {
   254  			events = append(events, &peerEvent{
   255  				Remove: RemoveAttempt,
   256  				IP:     ip,
   257  			})
   258  			bundle.Attempts = bundle.Attempts[1:]
   259  		}
   260  		if len(bundle.Attempts) < 1 && len(bundle.KnownPeers) < 1 {
   261  			events = append(events, &peerEvent{
   262  				Remove: RemoveBundle,
   263  				IP:     ip,
   264  			})
   265  			delete(pc.Bundles, ip)
   266  		}
   267  	}
   268  	return events
   269  }
   270  
   271  // knownPeer inserts a new peer into the map, if the peer belonging
   272  // to the given IP address and node ID wasn't metered so far. Returns the peer
   273  // belonging to the given IP and ID as well as the events occurring during the
   274  // initialization.
   275  func (bundle *peerBundle) knownPeer(ip, id string) (*knownPeer, []*peerEvent) {
   276  	var events []*peerEvent
   277  	if _, ok := bundle.KnownPeers[id]; !ok {
   278  		now := time.Now()
   279  		ingress := emptyChartEntries(now, sampleLimit)
   280  		egress := emptyChartEntries(now, sampleLimit)
   281  		events = append(events, &peerEvent{
   282  			IP:      ip,
   283  			ID:      id,
   284  			Ingress: append([]*ChartEntry{}, ingress...),
   285  			Egress:  append([]*ChartEntry{}, egress...),
   286  		})
   287  		bundle.KnownPeers[id] = &knownPeer{
   288  			ip:      ip,
   289  			id:      id,
   290  			Ingress: ingress,
   291  			Egress:  egress,
   292  		}
   293  	}
   294  	return bundle.KnownPeers[id], events
   295  }
   296  
   297  // knownPeer contains the metered data of a particular peer.
   298  type knownPeer struct {
   299  	// Connected contains the timestamps of the peer's connection events.
   300  	Connected []*time.Time `json:"connected,omitempty"`
   301  
   302  	// Disconnected contains the timestamps of the peer's disconnection events.
   303  	Disconnected []*time.Time `json:"disconnected,omitempty"`
   304  
   305  	// Ingress and Egress contain the peer's traffic samples, which are collected
   306  	// periodically from the metrics registry.
   307  	//
   308  	// A peer can connect multiple times, and we want to visualize the time
   309  	// passed between two connections, so after the first connection a 0 value
   310  	// is appended to the traffic arrays even if the peer is inactive until the
   311  	// peer is removed.
   312  	Ingress ChartEntries `json:"ingress,omitempty"`
   313  	Egress  ChartEntries `json:"egress,omitempty"`
   314  
   315  	Active bool `json:"active"` // Denotes if the peer is still connected.
   316  
   317  	listElement *list.Element // Pointer to the peer element in the list.
   318  	ip, id      string        // The IP and the ID by which the peer can be accessed in the tree.
   319  	prevIngress float64
   320  	prevEgress  float64
   321  }
   322  
   323  // peerAttempt contains a failed peer connection attempt's attributes.
   324  type peerAttempt struct {
   325  	// Connected contains the timestamp of the connection attempt's moment.
   326  	Connected time.Time `json:"connected"`
   327  
   328  	// Disconnected contains the timestamp of the
   329  	// moment when the connection attempt failed.
   330  	Disconnected time.Time `json:"disconnected"`
   331  }
   332  
   333  type RemovedPeerType string
   334  type ActivityType string
   335  
   336  const (
   337  	RemoveKnown   RemovedPeerType = "known"
   338  	RemoveAttempt RemovedPeerType = "attempt"
   339  	RemoveBundle  RemovedPeerType = "bundle"
   340  
   341  	Active   ActivityType = "active"
   342  	Inactive ActivityType = "inactive"
   343  )
   344  
   345  // peerEvent contains the attributes of a peer event.
   346  type peerEvent struct {
   347  	IP           string          `json:"ip,omitempty"`           // IP address of the peer.
   348  	ID           string          `json:"id,omitempty"`           // Node ID of the peer.
   349  	Remove       RemovedPeerType `json:"remove,omitempty"`       // Type of the peer that is to be removed.
   350  	Location     *geoLocation    `json:"location,omitempty"`     // Geographical location of the peer.
   351  	Connected    *time.Time      `json:"connected,omitempty"`    // Timestamp of the connection moment.
   352  	Disconnected *time.Time      `json:"disconnected,omitempty"` // Timestamp of the disonnection moment.
   353  	Ingress      ChartEntries    `json:"ingress,omitempty"`      // Ingress samples.
   354  	Egress       ChartEntries    `json:"egress,omitempty"`       // Egress samples.
   355  	Activity     ActivityType    `json:"activity,omitempty"`     // Connection status change.
   356  }
   357  
   358  // trafficMap is a container for the periodically collected peer traffic.
   359  type trafficMap map[string]map[string]float64
   360  
   361  // insert inserts a new value to the traffic map. Overwrites
   362  // the value at the given ip and id if that already exists.
   363  func (m *trafficMap) insert(ip, id string, val float64) {
   364  	if _, ok := (*m)[ip]; !ok {
   365  		(*m)[ip] = make(map[string]float64)
   366  	}
   367  	(*m)[ip][id] = val
   368  }
   369  
   370  // collectPeerData gathers data about the peers and sends it to the clients.
   371  func (db *Dashboard) collectPeerData() {
   372  	defer db.wg.Done()
   373  
   374  	// Open the geodb database for IP to geographical information conversions.
   375  	var err error
   376  	db.geodb, err = openGeoDB()
   377  	if err != nil {
   378  		log.Warn("Failed to open geodb", "err", err)
   379  		return
   380  	}
   381  	defer db.geodb.close()
   382  
   383  	peerCh := make(chan p2p.MeteredPeerEvent, eventBufferLimit) // Peer event channel.
   384  	subPeer := p2p.SubscribeMeteredPeerEvent(peerCh)            // Subscribe to peer events.
   385  	defer subPeer.Unsubscribe()                                 // Unsubscribe at the end.
   386  
   387  	ticker := time.NewTicker(db.config.Refresh)
   388  	defer ticker.Stop()
   389  
   390  	type registryFunc func(name string, i interface{})
   391  	type collectorFunc func(traffic *trafficMap) registryFunc
   392  
   393  	// trafficCollector generates a function that can be passed to
   394  	// the prefixed peer registry in order to collect the metered
   395  	// traffic data from each peer meter.
   396  	trafficCollector := func(prefix string) collectorFunc {
   397  		// This part makes is possible to collect the
   398  		// traffic data into a map from outside.
   399  		return func(traffic *trafficMap) registryFunc {
   400  			// The function which can be passed to the registry.
   401  			return func(name string, i interface{}) {
   402  				if m, ok := i.(metrics.Meter); ok {
   403  					// The name of the meter has the format: <common traffic prefix><IP>/<ID>
   404  					if k := strings.Split(strings.TrimPrefix(name, prefix), "/"); len(k) == 2 {
   405  						traffic.insert(k[0], k[1], float64(m.Count()))
   406  					} else {
   407  						log.Warn("Invalid meter name", "name", name, "prefix", prefix)
   408  					}
   409  				} else {
   410  					log.Warn("Invalid meter type", "name", name)
   411  				}
   412  			}
   413  		}
   414  	}
   415  	collectIngress := trafficCollector(p2p.MetricsInboundTraffic + "/")
   416  	collectEgress := trafficCollector(p2p.MetricsOutboundTraffic + "/")
   417  
   418  	peers := newPeerContainer(db.geodb)
   419  	db.peerLock.Lock()
   420  	db.history.Network = &NetworkMessage{
   421  		Peers: peers,
   422  	}
   423  	db.peerLock.Unlock()
   424  
   425  	// newPeerEvents contains peer events, which trigger operations that
   426  	// will be executed on the peer tree after a metering period.
   427  	newPeerEvents := make([]*peerEvent, 0, eventLimit)
   428  	ingress, egress := new(trafficMap), new(trafficMap)
   429  	*ingress, *egress = make(trafficMap), make(trafficMap)
   430  
   431  	for {
   432  		select {
   433  		case event := <-peerCh:
   434  			now := time.Now()
   435  			switch event.Type {
   436  			case p2p.PeerConnected:
   437  				connected := now.Add(-event.Elapsed)
   438  				newPeerEvents = append(newPeerEvents, &peerEvent{
   439  					IP:        event.IP.String(),
   440  					ID:        event.ID.String(),
   441  					Connected: &connected,
   442  				})
   443  			case p2p.PeerDisconnected:
   444  				ip, id := event.IP.String(), event.ID.String()
   445  				newPeerEvents = append(newPeerEvents, &peerEvent{
   446  					IP:           ip,
   447  					ID:           id,
   448  					Disconnected: &now,
   449  				})
   450  				// The disconnect event comes with the last metered traffic count,
   451  				// because after the disconnection the peer's meter is removed
   452  				// from the registry. It can happen, that between two metering
   453  				// period the same peer disconnects multiple times, and appending
   454  				// all the samples to the traffic arrays would shift the metering,
   455  				// so only the last metering is stored, overwriting the previous one.
   456  				ingress.insert(ip, id, float64(event.Ingress))
   457  				egress.insert(ip, id, float64(event.Egress))
   458  			case p2p.PeerHandshakeFailed:
   459  				connected := now.Add(-event.Elapsed)
   460  				newPeerEvents = append(newPeerEvents, &peerEvent{
   461  					IP:           event.IP.String(),
   462  					Connected:    &connected,
   463  					Disconnected: &now,
   464  				})
   465  			default:
   466  				log.Error("Unknown metered peer event type", "type", event.Type)
   467  			}
   468  		case <-ticker.C:
   469  			// Collect the traffic samples from the registry.
   470  			p2p.PeerIngressRegistry.Each(collectIngress(ingress))
   471  			p2p.PeerEgressRegistry.Each(collectEgress(egress))
   472  
   473  			// Protect 'peers', because it is part of the history.
   474  			db.peerLock.Lock()
   475  
   476  			var diff []*peerEvent
   477  			for i := 0; i < len(newPeerEvents); i++ {
   478  				if newPeerEvents[i].IP == "" {
   479  					log.Warn("Peer event without IP", "event", *newPeerEvents[i])
   480  					continue
   481  				}
   482  				diff = append(diff, newPeerEvents[i])
   483  				// There are two main branches of peer events coming from the event
   484  				// feed, one belongs to the known peers, one to the unknown peers.
   485  				// If the event has node ID, it belongs to a known peer, otherwise
   486  				// to an unknown one, which is considered as connection attempt.
   487  				//
   488  				// The extension can produce additional peer events, such
   489  				// as remove, location and initial samples events.
   490  				if newPeerEvents[i].ID == "" {
   491  					diff = append(diff, peers.handleAttempt(newPeerEvents[i])...)
   492  					continue
   493  				}
   494  				diff = append(diff, peers.extendKnown(newPeerEvents[i])...)
   495  			}
   496  			// Update the peer tree using the traffic maps.
   497  			for ip, bundle := range peers.Bundles {
   498  				for id, peer := range bundle.KnownPeers {
   499  					// Value is 0 if the traffic map doesn't have the
   500  					// entry corresponding to the given IP and ID.
   501  					curIngress, curEgress := (*ingress)[ip][id], (*egress)[ip][id]
   502  					deltaIngress, deltaEgress := curIngress, curEgress
   503  					if deltaIngress >= peer.prevIngress {
   504  						deltaIngress -= peer.prevIngress
   505  					}
   506  					if deltaEgress >= peer.prevEgress {
   507  						deltaEgress -= peer.prevEgress
   508  					}
   509  					peer.prevIngress, peer.prevEgress = curIngress, curEgress
   510  					i := &ChartEntry{
   511  						Value: deltaIngress,
   512  					}
   513  					e := &ChartEntry{
   514  						Value: deltaEgress,
   515  					}
   516  					peer.Ingress = append(peer.Ingress, i)
   517  					peer.Egress = append(peer.Egress, e)
   518  					if first := len(peer.Ingress) - sampleLimit; first > 0 {
   519  						peer.Ingress = peer.Ingress[first:]
   520  					}
   521  					if first := len(peer.Egress) - sampleLimit; first > 0 {
   522  						peer.Egress = peer.Egress[first:]
   523  					}
   524  					// Creating the traffic sample events.
   525  					diff = append(diff, &peerEvent{
   526  						IP:      ip,
   527  						ID:      id,
   528  						Ingress: ChartEntries{i},
   529  						Egress:  ChartEntries{e},
   530  					})
   531  				}
   532  			}
   533  			db.peerLock.Unlock()
   534  
   535  			if len(diff) > 0 {
   536  				db.sendToAll(&Message{Network: &NetworkMessage{
   537  					Diff: diff,
   538  				}})
   539  			}
   540  			// Clear the traffic maps, and the event array,
   541  			// prepare them for the next metering.
   542  			*ingress, *egress = make(trafficMap), make(trafficMap)
   543  			newPeerEvents = newPeerEvents[:0]
   544  		case err := <-subPeer.Err():
   545  			log.Warn("Peer subscription error", "err", err)
   546  			return
   547  		case errc := <-db.quit:
   548  			errc <- nil
   549  			return
   550  		}
   551  	}
   552  }