github.com/bloxroute-labs/bor@v0.1.4/dashboard/peers.go (about) 1 // Copyright 2018 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package dashboard 18 19 import ( 20 "container/list" 21 "strings" 22 "time" 23 24 "github.com/maticnetwork/bor/metrics" 25 26 "github.com/maticnetwork/bor/log" 27 "github.com/maticnetwork/bor/p2p" 28 ) 29 30 const ( 31 eventBufferLimit = 128 // Maximum number of buffered peer events. 32 knownPeerLimit = 100 // Maximum number of stored peers, which successfully made the handshake. 33 attemptLimit = 200 // Maximum number of stored peers, which failed to make the handshake. 34 35 // eventLimit is the maximum number of the dashboard's custom peer events, 36 // that are collected between two metering period and sent to the clients 37 // as one message. 38 // TODO (kurkomisi): Limit the number of events. 39 eventLimit = knownPeerLimit << 2 40 ) 41 42 // peerContainer contains information about the node's peers. This data structure 43 // maintains the metered peer data based on the different behaviours of the peers. 44 // 45 // Every peer has an IP address, and the peers that manage to make the handshake 46 // (known peers) have node IDs too. There can appear more peers with the same IP, 47 // therefore the peer container data structure is a tree consisting of a map of 48 // maps, where the first key groups the peers by IP, while the second one groups 49 // them by the node ID. The known peers can be active if their connection is still 50 // open, or inactive otherwise. The peers failing before the handshake (unknown 51 // peers) only have IP addresses, so their connection attempts are stored as part 52 // of the value of the outer map. 53 // 54 // Another criteria is to limit the number of metered peers so that 55 // they don't fill the memory. The selection order is based on the 56 // peers activity: the peers that are inactive for the longest time 57 // are thrown first. For the selection a fifo list is used which is 58 // linked to the bottom of the peer tree in a way that every activity 59 // of the peer pushes the peer to the end of the list, so the inactive 60 // ones come to the front. When a peer has some activity, it is removed 61 // from and reinserted into the list. When the length of the list reaches 62 // the limit, the first element is removed from the list, as well as from 63 // the tree. 64 // 65 // The active peers have priority over the inactive ones, therefore 66 // they have their own list. The separation makes it sure that the 67 // inactive peers are always removed before the active ones. 68 // 69 // The peers that don't manage to make handshake are not inserted into the list, 70 // only their connection attempts are appended to the array belonging to their IP. 71 // In order to keep the fifo principle, a super array contains the order of the 72 // attempts, and when the overall count reaches the limit, the earliest attempt is 73 // removed from the beginning of its array. 74 // 75 // This data structure makes it possible to marshal the peer 76 // history simply by passing it to the JSON marshaler. 77 type peerContainer struct { 78 // Bundles is the outer map using the peer's IP address as key. 79 Bundles map[string]*peerBundle `json:"bundles,omitempty"` 80 81 activeCount int // Number of the still connected peers 82 83 // inactivePeers contains the peers with closed connection in chronological order. 84 inactivePeers *list.List 85 86 // attemptOrder is the super array containing the IP addresses, from which 87 // the peers attempted to connect then failed before/during the handshake. 88 // Its values are appended in chronological order, which means that the 89 // oldest attempt is at the beginning of the array. When the first element 90 // is removed, the first element of the related bundle's attempt array is 91 // removed too, ensuring that always the latest attempts are stored. 92 attemptOrder []string 93 94 // geodb is the geoip database used to retrieve the peers' geographical location. 95 geodb *geoDB 96 } 97 98 // newPeerContainer returns a new instance of the peer container. 99 func newPeerContainer(geodb *geoDB) *peerContainer { 100 return &peerContainer{ 101 Bundles: make(map[string]*peerBundle), 102 inactivePeers: list.New(), 103 attemptOrder: make([]string, 0, attemptLimit), 104 geodb: geodb, 105 } 106 } 107 108 // bundle inserts a new peer bundle into the map, if the peer belonging 109 // to the given IP wasn't metered so far. In this case retrieves the location of 110 // the IP address from the database and creates a corresponding peer event. 111 // Returns the bundle belonging to the given IP and the events occurring during 112 // the initialization. 113 func (pc *peerContainer) bundle(ip string) (*peerBundle, []*peerEvent) { 114 var events []*peerEvent 115 if _, ok := pc.Bundles[ip]; !ok { 116 location := pc.geodb.location(ip) 117 events = append(events, &peerEvent{ 118 IP: ip, 119 Location: location, 120 }) 121 pc.Bundles[ip] = &peerBundle{ 122 Location: location, 123 KnownPeers: make(map[string]*knownPeer), 124 } 125 } 126 return pc.Bundles[ip], events 127 } 128 129 // extendKnown handles the events of the successfully connected peers. 130 // Returns the events occurring during the extension. 131 func (pc *peerContainer) extendKnown(event *peerEvent) []*peerEvent { 132 bundle, events := pc.bundle(event.IP) 133 peer, peerEvents := bundle.knownPeer(event.IP, event.ID) 134 events = append(events, peerEvents...) 135 // Append the connect and the disconnect events to 136 // the corresponding arrays keeping the limit. 137 switch { 138 case event.Connected != nil: 139 peer.Connected = append(peer.Connected, event.Connected) 140 if first := len(peer.Connected) - sampleLimit; first > 0 { 141 peer.Connected = peer.Connected[first:] 142 } 143 peer.Active = true 144 events = append(events, &peerEvent{ 145 Activity: Active, 146 IP: peer.ip, 147 ID: peer.id, 148 }) 149 pc.activeCount++ 150 if peer.listElement != nil { 151 _ = pc.inactivePeers.Remove(peer.listElement) 152 peer.listElement = nil 153 } 154 case event.Disconnected != nil: 155 peer.Disconnected = append(peer.Disconnected, event.Disconnected) 156 if first := len(peer.Disconnected) - sampleLimit; first > 0 { 157 peer.Disconnected = peer.Disconnected[first:] 158 } 159 peer.Active = false 160 events = append(events, &peerEvent{ 161 Activity: Inactive, 162 IP: peer.ip, 163 ID: peer.id, 164 }) 165 pc.activeCount-- 166 if peer.listElement != nil { 167 // If the peer is already in the list, remove and reinsert it. 168 _ = pc.inactivePeers.Remove(peer.listElement) 169 } 170 // Insert the peer into the list. 171 peer.listElement = pc.inactivePeers.PushBack(peer) 172 } 173 for pc.inactivePeers.Len() > 0 && pc.activeCount+pc.inactivePeers.Len() > knownPeerLimit { 174 // While the count of the known peers is greater than the limit, 175 // remove the first element from the inactive peer list and from the map. 176 if removedPeer, ok := pc.inactivePeers.Remove(pc.inactivePeers.Front()).(*knownPeer); ok { 177 events = append(events, pc.removeKnown(removedPeer.ip, removedPeer.id)...) 178 } else { 179 log.Warn("Failed to parse the removed peer") 180 } 181 } 182 if pc.activeCount > knownPeerLimit { 183 log.Warn("Number of active peers is greater than the limit") 184 } 185 return events 186 } 187 188 // handleAttempt handles the events of the peers failing before/during the handshake. 189 // Returns the events occurring during the extension. 190 func (pc *peerContainer) handleAttempt(event *peerEvent) []*peerEvent { 191 bundle, events := pc.bundle(event.IP) 192 bundle.Attempts = append(bundle.Attempts, &peerAttempt{ 193 Connected: *event.Connected, 194 Disconnected: *event.Disconnected, 195 }) 196 pc.attemptOrder = append(pc.attemptOrder, event.IP) 197 for len(pc.attemptOrder) > attemptLimit { 198 // While the length of the connection attempt order array is greater 199 // than the limit, remove the first element from the involved peer's 200 // array and also from the super array. 201 events = append(events, pc.removeAttempt(pc.attemptOrder[0])...) 202 pc.attemptOrder = pc.attemptOrder[1:] 203 } 204 return events 205 } 206 207 // peerBundle contains the peers belonging to a given IP address. 208 type peerBundle struct { 209 // Location contains the geographical location based on the bundle's IP address. 210 Location *geoLocation `json:"location,omitempty"` 211 212 // KnownPeers is the inner map of the metered peer 213 // maintainer data structure using the node ID as key. 214 KnownPeers map[string]*knownPeer `json:"knownPeers,omitempty"` 215 216 // Attempts contains the failed connection attempts of the 217 // peers belonging to a given IP address in chronological order. 218 Attempts []*peerAttempt `json:"attempts,omitempty"` 219 } 220 221 // removeKnown removes the known peer belonging to the 222 // given IP address and node ID from the peer tree. 223 func (pc *peerContainer) removeKnown(ip, id string) (events []*peerEvent) { 224 // TODO (kurkomisi): Remove peers that don't have traffic samples anymore. 225 if bundle, ok := pc.Bundles[ip]; ok { 226 if _, ok := bundle.KnownPeers[id]; ok { 227 events = append(events, &peerEvent{ 228 Remove: RemoveKnown, 229 IP: ip, 230 ID: id, 231 }) 232 delete(bundle.KnownPeers, id) 233 } else { 234 log.Warn("No peer to remove", "ip", ip, "id", id) 235 } 236 if len(bundle.KnownPeers) < 1 && len(bundle.Attempts) < 1 { 237 events = append(events, &peerEvent{ 238 Remove: RemoveBundle, 239 IP: ip, 240 }) 241 delete(pc.Bundles, ip) 242 } 243 } else { 244 log.Warn("No bundle to remove", "ip", ip) 245 } 246 return events 247 } 248 249 // removeAttempt removes the peer attempt belonging to the 250 // given IP address and node ID from the peer tree. 251 func (pc *peerContainer) removeAttempt(ip string) (events []*peerEvent) { 252 if bundle, ok := pc.Bundles[ip]; ok { 253 if len(bundle.Attempts) > 0 { 254 events = append(events, &peerEvent{ 255 Remove: RemoveAttempt, 256 IP: ip, 257 }) 258 bundle.Attempts = bundle.Attempts[1:] 259 } 260 if len(bundle.Attempts) < 1 && len(bundle.KnownPeers) < 1 { 261 events = append(events, &peerEvent{ 262 Remove: RemoveBundle, 263 IP: ip, 264 }) 265 delete(pc.Bundles, ip) 266 } 267 } 268 return events 269 } 270 271 // knownPeer inserts a new peer into the map, if the peer belonging 272 // to the given IP address and node ID wasn't metered so far. Returns the peer 273 // belonging to the given IP and ID as well as the events occurring during the 274 // initialization. 275 func (bundle *peerBundle) knownPeer(ip, id string) (*knownPeer, []*peerEvent) { 276 var events []*peerEvent 277 if _, ok := bundle.KnownPeers[id]; !ok { 278 now := time.Now() 279 ingress := emptyChartEntries(now, sampleLimit) 280 egress := emptyChartEntries(now, sampleLimit) 281 events = append(events, &peerEvent{ 282 IP: ip, 283 ID: id, 284 Ingress: append([]*ChartEntry{}, ingress...), 285 Egress: append([]*ChartEntry{}, egress...), 286 }) 287 bundle.KnownPeers[id] = &knownPeer{ 288 ip: ip, 289 id: id, 290 Ingress: ingress, 291 Egress: egress, 292 } 293 } 294 return bundle.KnownPeers[id], events 295 } 296 297 // knownPeer contains the metered data of a particular peer. 298 type knownPeer struct { 299 // Connected contains the timestamps of the peer's connection events. 300 Connected []*time.Time `json:"connected,omitempty"` 301 302 // Disconnected contains the timestamps of the peer's disconnection events. 303 Disconnected []*time.Time `json:"disconnected,omitempty"` 304 305 // Ingress and Egress contain the peer's traffic samples, which are collected 306 // periodically from the metrics registry. 307 // 308 // A peer can connect multiple times, and we want to visualize the time 309 // passed between two connections, so after the first connection a 0 value 310 // is appended to the traffic arrays even if the peer is inactive until the 311 // peer is removed. 312 Ingress ChartEntries `json:"ingress,omitempty"` 313 Egress ChartEntries `json:"egress,omitempty"` 314 315 Active bool `json:"active"` // Denotes if the peer is still connected. 316 317 listElement *list.Element // Pointer to the peer element in the list. 318 ip, id string // The IP and the ID by which the peer can be accessed in the tree. 319 prevIngress float64 320 prevEgress float64 321 } 322 323 // peerAttempt contains a failed peer connection attempt's attributes. 324 type peerAttempt struct { 325 // Connected contains the timestamp of the connection attempt's moment. 326 Connected time.Time `json:"connected"` 327 328 // Disconnected contains the timestamp of the 329 // moment when the connection attempt failed. 330 Disconnected time.Time `json:"disconnected"` 331 } 332 333 type RemovedPeerType string 334 type ActivityType string 335 336 const ( 337 RemoveKnown RemovedPeerType = "known" 338 RemoveAttempt RemovedPeerType = "attempt" 339 RemoveBundle RemovedPeerType = "bundle" 340 341 Active ActivityType = "active" 342 Inactive ActivityType = "inactive" 343 ) 344 345 // peerEvent contains the attributes of a peer event. 346 type peerEvent struct { 347 IP string `json:"ip,omitempty"` // IP address of the peer. 348 ID string `json:"id,omitempty"` // Node ID of the peer. 349 Remove RemovedPeerType `json:"remove,omitempty"` // Type of the peer that is to be removed. 350 Location *geoLocation `json:"location,omitempty"` // Geographical location of the peer. 351 Connected *time.Time `json:"connected,omitempty"` // Timestamp of the connection moment. 352 Disconnected *time.Time `json:"disconnected,omitempty"` // Timestamp of the disonnection moment. 353 Ingress ChartEntries `json:"ingress,omitempty"` // Ingress samples. 354 Egress ChartEntries `json:"egress,omitempty"` // Egress samples. 355 Activity ActivityType `json:"activity,omitempty"` // Connection status change. 356 } 357 358 // trafficMap is a container for the periodically collected peer traffic. 359 type trafficMap map[string]map[string]float64 360 361 // insert inserts a new value to the traffic map. Overwrites 362 // the value at the given ip and id if that already exists. 363 func (m *trafficMap) insert(ip, id string, val float64) { 364 if _, ok := (*m)[ip]; !ok { 365 (*m)[ip] = make(map[string]float64) 366 } 367 (*m)[ip][id] = val 368 } 369 370 // collectPeerData gathers data about the peers and sends it to the clients. 371 func (db *Dashboard) collectPeerData() { 372 defer db.wg.Done() 373 374 // Open the geodb database for IP to geographical information conversions. 375 var err error 376 db.geodb, err = openGeoDB() 377 if err != nil { 378 log.Warn("Failed to open geodb", "err", err) 379 return 380 } 381 defer db.geodb.close() 382 383 peerCh := make(chan p2p.MeteredPeerEvent, eventBufferLimit) // Peer event channel. 384 subPeer := p2p.SubscribeMeteredPeerEvent(peerCh) // Subscribe to peer events. 385 defer subPeer.Unsubscribe() // Unsubscribe at the end. 386 387 ticker := time.NewTicker(db.config.Refresh) 388 defer ticker.Stop() 389 390 type registryFunc func(name string, i interface{}) 391 type collectorFunc func(traffic *trafficMap) registryFunc 392 393 // trafficCollector generates a function that can be passed to 394 // the prefixed peer registry in order to collect the metered 395 // traffic data from each peer meter. 396 trafficCollector := func(prefix string) collectorFunc { 397 // This part makes is possible to collect the 398 // traffic data into a map from outside. 399 return func(traffic *trafficMap) registryFunc { 400 // The function which can be passed to the registry. 401 return func(name string, i interface{}) { 402 if m, ok := i.(metrics.Meter); ok { 403 // The name of the meter has the format: <common traffic prefix><IP>/<ID> 404 if k := strings.Split(strings.TrimPrefix(name, prefix), "/"); len(k) == 2 { 405 traffic.insert(k[0], k[1], float64(m.Count())) 406 } else { 407 log.Warn("Invalid meter name", "name", name, "prefix", prefix) 408 } 409 } else { 410 log.Warn("Invalid meter type", "name", name) 411 } 412 } 413 } 414 } 415 collectIngress := trafficCollector(p2p.MetricsInboundTraffic + "/") 416 collectEgress := trafficCollector(p2p.MetricsOutboundTraffic + "/") 417 418 peers := newPeerContainer(db.geodb) 419 db.peerLock.Lock() 420 db.history.Network = &NetworkMessage{ 421 Peers: peers, 422 } 423 db.peerLock.Unlock() 424 425 // newPeerEvents contains peer events, which trigger operations that 426 // will be executed on the peer tree after a metering period. 427 newPeerEvents := make([]*peerEvent, 0, eventLimit) 428 ingress, egress := new(trafficMap), new(trafficMap) 429 *ingress, *egress = make(trafficMap), make(trafficMap) 430 431 for { 432 select { 433 case event := <-peerCh: 434 now := time.Now() 435 switch event.Type { 436 case p2p.PeerConnected: 437 connected := now.Add(-event.Elapsed) 438 newPeerEvents = append(newPeerEvents, &peerEvent{ 439 IP: event.IP.String(), 440 ID: event.ID.String(), 441 Connected: &connected, 442 }) 443 case p2p.PeerDisconnected: 444 ip, id := event.IP.String(), event.ID.String() 445 newPeerEvents = append(newPeerEvents, &peerEvent{ 446 IP: ip, 447 ID: id, 448 Disconnected: &now, 449 }) 450 // The disconnect event comes with the last metered traffic count, 451 // because after the disconnection the peer's meter is removed 452 // from the registry. It can happen, that between two metering 453 // period the same peer disconnects multiple times, and appending 454 // all the samples to the traffic arrays would shift the metering, 455 // so only the last metering is stored, overwriting the previous one. 456 ingress.insert(ip, id, float64(event.Ingress)) 457 egress.insert(ip, id, float64(event.Egress)) 458 case p2p.PeerHandshakeFailed: 459 connected := now.Add(-event.Elapsed) 460 newPeerEvents = append(newPeerEvents, &peerEvent{ 461 IP: event.IP.String(), 462 Connected: &connected, 463 Disconnected: &now, 464 }) 465 default: 466 log.Error("Unknown metered peer event type", "type", event.Type) 467 } 468 case <-ticker.C: 469 // Collect the traffic samples from the registry. 470 p2p.PeerIngressRegistry.Each(collectIngress(ingress)) 471 p2p.PeerEgressRegistry.Each(collectEgress(egress)) 472 473 // Protect 'peers', because it is part of the history. 474 db.peerLock.Lock() 475 476 var diff []*peerEvent 477 for i := 0; i < len(newPeerEvents); i++ { 478 if newPeerEvents[i].IP == "" { 479 log.Warn("Peer event without IP", "event", *newPeerEvents[i]) 480 continue 481 } 482 diff = append(diff, newPeerEvents[i]) 483 // There are two main branches of peer events coming from the event 484 // feed, one belongs to the known peers, one to the unknown peers. 485 // If the event has node ID, it belongs to a known peer, otherwise 486 // to an unknown one, which is considered as connection attempt. 487 // 488 // The extension can produce additional peer events, such 489 // as remove, location and initial samples events. 490 if newPeerEvents[i].ID == "" { 491 diff = append(diff, peers.handleAttempt(newPeerEvents[i])...) 492 continue 493 } 494 diff = append(diff, peers.extendKnown(newPeerEvents[i])...) 495 } 496 // Update the peer tree using the traffic maps. 497 for ip, bundle := range peers.Bundles { 498 for id, peer := range bundle.KnownPeers { 499 // Value is 0 if the traffic map doesn't have the 500 // entry corresponding to the given IP and ID. 501 curIngress, curEgress := (*ingress)[ip][id], (*egress)[ip][id] 502 deltaIngress, deltaEgress := curIngress, curEgress 503 if deltaIngress >= peer.prevIngress { 504 deltaIngress -= peer.prevIngress 505 } 506 if deltaEgress >= peer.prevEgress { 507 deltaEgress -= peer.prevEgress 508 } 509 peer.prevIngress, peer.prevEgress = curIngress, curEgress 510 i := &ChartEntry{ 511 Value: deltaIngress, 512 } 513 e := &ChartEntry{ 514 Value: deltaEgress, 515 } 516 peer.Ingress = append(peer.Ingress, i) 517 peer.Egress = append(peer.Egress, e) 518 if first := len(peer.Ingress) - sampleLimit; first > 0 { 519 peer.Ingress = peer.Ingress[first:] 520 } 521 if first := len(peer.Egress) - sampleLimit; first > 0 { 522 peer.Egress = peer.Egress[first:] 523 } 524 // Creating the traffic sample events. 525 diff = append(diff, &peerEvent{ 526 IP: ip, 527 ID: id, 528 Ingress: ChartEntries{i}, 529 Egress: ChartEntries{e}, 530 }) 531 } 532 } 533 db.peerLock.Unlock() 534 535 if len(diff) > 0 { 536 db.sendToAll(&Message{Network: &NetworkMessage{ 537 Diff: diff, 538 }}) 539 } 540 // Clear the traffic maps, and the event array, 541 // prepare them for the next metering. 542 *ingress, *egress = make(trafficMap), make(trafficMap) 543 newPeerEvents = newPeerEvents[:0] 544 case err := <-subPeer.Err(): 545 log.Warn("Peer subscription error", "err", err) 546 return 547 case errc := <-db.quit: 548 errc <- nil 549 return 550 } 551 } 552 }