github.com/ava-labs/avalanchego@v1.11.11/network/ip_tracker.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package network 5 6 import ( 7 "crypto/rand" 8 "errors" 9 "sync" 10 11 "github.com/prometheus/client_golang/prometheus" 12 "go.uber.org/zap" 13 14 "github.com/ava-labs/avalanchego/ids" 15 "github.com/ava-labs/avalanchego/snow/validators" 16 "github.com/ava-labs/avalanchego/utils/bloom" 17 "github.com/ava-labs/avalanchego/utils/constants" 18 "github.com/ava-labs/avalanchego/utils/crypto/bls" 19 "github.com/ava-labs/avalanchego/utils/ips" 20 "github.com/ava-labs/avalanchego/utils/logging" 21 "github.com/ava-labs/avalanchego/utils/sampler" 22 "github.com/ava-labs/avalanchego/utils/set" 23 ) 24 25 const ( 26 saltSize = 32 27 minCountEstimate = 128 28 targetFalsePositiveProbability = .001 29 maxFalsePositiveProbability = .01 30 // By setting maxIPEntriesPerNode > 1, we allow nodes to update their IP at 31 // least once per bloom filter reset. 32 maxIPEntriesPerNode = 2 33 34 untrackedTimestamp = -2 35 olderTimestamp = -1 36 sameTimestamp = 0 37 newerTimestamp = 1 38 newTimestamp = 2 39 ) 40 41 var _ validators.ManagerCallbackListener = (*ipTracker)(nil) 42 43 func newIPTracker( 44 trackedSubnets set.Set[ids.ID], 45 log logging.Logger, 46 registerer prometheus.Registerer, 47 ) (*ipTracker, error) { 48 bloomMetrics, err := bloom.NewMetrics("ip_bloom", registerer) 49 if err != nil { 50 return nil, err 51 } 52 tracker := &ipTracker{ 53 trackedSubnets: trackedSubnets, 54 log: log, 55 numTrackedPeers: prometheus.NewGauge(prometheus.GaugeOpts{ 56 Name: "tracked_peers", 57 Help: "number of peers this node is monitoring", 58 }), 59 numGossipableIPs: prometheus.NewGauge(prometheus.GaugeOpts{ 60 Name: "gossipable_ips", 61 Help: "number of IPs this node considers able to be gossiped", 62 }), 63 numTrackedSubnets: prometheus.NewGauge(prometheus.GaugeOpts{ 64 Name: "tracked_subnets", 65 Help: "number of subnets this node is monitoring", 66 }), 67 bloomMetrics: bloomMetrics, 68 tracked: make(map[ids.NodeID]*trackedNode), 69 bloomAdditions: make(map[ids.NodeID]int), 70 connected: make(map[ids.NodeID]*connectedNode), 71 subnet: make(map[ids.ID]*gossipableSubnet), 72 } 73 err = errors.Join( 74 registerer.Register(tracker.numTrackedPeers), 75 registerer.Register(tracker.numGossipableIPs), 76 registerer.Register(tracker.numTrackedSubnets), 77 ) 78 if err != nil { 79 return nil, err 80 } 81 return tracker, tracker.resetBloom() 82 } 83 84 // A node is tracked if any of the following conditions are met: 85 // - The node was manually tracked 86 // - The node is a validator on any subnet 87 type trackedNode struct { 88 // manuallyTracked tracks if this node's connection was manually requested. 89 manuallyTracked bool 90 // validatedSubnets contains all the subnets that this node is a validator 91 // of, including potentially the primary network. 92 validatedSubnets set.Set[ids.ID] 93 // subnets contains the subset of [subnets] that the local node also tracks, 94 // including potentially the primary network. 95 trackedSubnets set.Set[ids.ID] 96 // ip is the most recently known IP of this node. 97 ip *ips.ClaimedIPPort 98 } 99 100 func (n *trackedNode) wantsConnection() bool { 101 return n.manuallyTracked || n.trackedSubnets.Len() > 0 102 } 103 104 func (n *trackedNode) canDelete() bool { 105 return !n.manuallyTracked && n.validatedSubnets.Len() == 0 106 } 107 108 type connectedNode struct { 109 // trackedSubnets contains all the subnets that this node is syncing, 110 // including the primary network. 111 trackedSubnets set.Set[ids.ID] 112 // ip this node claimed when connecting. The IP is not necessarily the same 113 // IP as in the tracked map. 114 ip *ips.ClaimedIPPort 115 } 116 117 type gossipableSubnet struct { 118 numGossipableIPs prometheus.Gauge 119 120 // manuallyGossipable contains the nodeIDs of all nodes whose IP was 121 // manually configured to be gossiped for this subnet. 122 manuallyGossipable set.Set[ids.NodeID] 123 124 // gossipableIDs contains the nodeIDs of all nodes whose IP could be 125 // gossiped. This is a superset of manuallyGossipable. 126 gossipableIDs set.Set[ids.NodeID] 127 128 // An IP is marked as gossipable if all of the following conditions are met: 129 // - The node is a validator or was manually requested to be gossiped 130 // - The node is connected 131 // - The node reported that they are syncing this subnet 132 // - The IP the node connected with is its latest IP 133 gossipableIndices map[ids.NodeID]int 134 gossipableIPs []*ips.ClaimedIPPort 135 } 136 137 func (s *gossipableSubnet) setGossipableIP(ip *ips.ClaimedIPPort) { 138 if index, ok := s.gossipableIndices[ip.NodeID]; ok { 139 s.gossipableIPs[index] = ip 140 return 141 } 142 143 s.numGossipableIPs.Inc() 144 s.gossipableIndices[ip.NodeID] = len(s.gossipableIPs) 145 s.gossipableIPs = append(s.gossipableIPs, ip) 146 } 147 148 func (s *gossipableSubnet) removeGossipableIP(nodeID ids.NodeID) { 149 indexToRemove, wasGossipable := s.gossipableIndices[nodeID] 150 if !wasGossipable { 151 return 152 } 153 154 // If we aren't removing the last IP, we need to swap the last IP with the 155 // IP we are removing so that the slice is contiguous. 156 newNumGossipable := len(s.gossipableIPs) - 1 157 if newNumGossipable != indexToRemove { 158 replacementIP := s.gossipableIPs[newNumGossipable] 159 s.gossipableIndices[replacementIP.NodeID] = indexToRemove 160 s.gossipableIPs[indexToRemove] = replacementIP 161 } 162 163 s.numGossipableIPs.Dec() 164 delete(s.gossipableIndices, nodeID) 165 s.gossipableIPs[newNumGossipable] = nil 166 s.gossipableIPs = s.gossipableIPs[:newNumGossipable] 167 } 168 169 // [maxNumIPs] applies to the total number of IPs returned, including the IPs 170 // initially provided in [ips]. 171 // [ips] and [nodeIDs] are extended and returned with the additional IPs added. 172 func (s *gossipableSubnet) getGossipableIPs( 173 exceptNodeID ids.NodeID, 174 exceptIPs *bloom.ReadFilter, 175 salt []byte, 176 maxNumIPs int, 177 ips []*ips.ClaimedIPPort, 178 nodeIDs set.Set[ids.NodeID], 179 ) ([]*ips.ClaimedIPPort, set.Set[ids.NodeID]) { 180 uniform := sampler.NewUniform() 181 uniform.Initialize(uint64(len(s.gossipableIPs))) 182 183 for len(ips) < maxNumIPs { 184 index, hasNext := uniform.Next() 185 if !hasNext { 186 return ips, nodeIDs 187 } 188 189 ip := s.gossipableIPs[index] 190 if ip.NodeID == exceptNodeID || 191 nodeIDs.Contains(ip.NodeID) || 192 bloom.Contains(exceptIPs, ip.GossipID[:], salt) { 193 continue 194 } 195 196 ips = append(ips, ip) 197 nodeIDs.Add(ip.NodeID) 198 } 199 return ips, nodeIDs 200 } 201 202 func (s *gossipableSubnet) canDelete() bool { 203 return s.gossipableIDs.Len() == 0 204 } 205 206 type ipTracker struct { 207 // trackedSubnets does not include the primary network. 208 trackedSubnets set.Set[ids.ID] 209 log logging.Logger 210 numTrackedPeers prometheus.Gauge 211 numGossipableIPs prometheus.Gauge // IPs are not deduplicated across subnets 212 numTrackedSubnets prometheus.Gauge 213 bloomMetrics *bloom.Metrics 214 215 lock sync.RWMutex 216 tracked map[ids.NodeID]*trackedNode 217 218 // The bloom filter contains the most recent tracked IPs to avoid 219 // unnecessary IP gossip. 220 bloom *bloom.Filter 221 // To prevent validators from causing the bloom filter to have too many 222 // false positives, we limit each validator to maxIPEntriesPerValidator in 223 // the bloom filter. 224 bloomAdditions map[ids.NodeID]int // Number of IPs added to the bloom 225 bloomSalt []byte 226 maxBloomCount int 227 228 // Connected tracks the information of currently connected peers, including 229 // tracked and untracked nodes. 230 connected map[ids.NodeID]*connectedNode 231 // subnet tracks all the subnets that have at least one gossipable ID. 232 subnet map[ids.ID]*gossipableSubnet 233 } 234 235 // ManuallyTrack marks the provided nodeID as being desirable to connect to. 236 // 237 // In order for a node to learn about these nodeIDs, other nodes in the network 238 // must have marked them as gossipable. 239 // 240 // Even if nodes disagree on the set of manually tracked nodeIDs, they will not 241 // introduce persistent network gossip. 242 func (i *ipTracker) ManuallyTrack(nodeID ids.NodeID) { 243 i.lock.Lock() 244 defer i.lock.Unlock() 245 246 i.addTrackableID(nodeID, nil) 247 } 248 249 // ManuallyGossip marks the provided nodeID as being desirable to connect to and 250 // marks the IPs that this node provides as being valid to gossip. 251 // 252 // In order to avoid persistent network gossip, it's important for nodes in the 253 // network to agree upon manually gossiped nodeIDs. 254 func (i *ipTracker) ManuallyGossip(subnetID ids.ID, nodeID ids.NodeID) { 255 i.lock.Lock() 256 defer i.lock.Unlock() 257 258 if subnetID == constants.PrimaryNetworkID || i.trackedSubnets.Contains(subnetID) { 259 i.addTrackableID(nodeID, nil) 260 } 261 262 i.addTrackableID(nodeID, &subnetID) 263 i.addGossipableID(nodeID, subnetID, true) 264 } 265 266 // WantsConnection returns true if any of the following conditions are met: 267 // 1. The node has been manually tracked. 268 // 2. The node has been manually gossiped on a tracked subnet. 269 // 3. The node is currently a validator on a tracked subnet. 270 func (i *ipTracker) WantsConnection(nodeID ids.NodeID) bool { 271 i.lock.RLock() 272 defer i.lock.RUnlock() 273 274 node, ok := i.tracked[nodeID] 275 return ok && node.wantsConnection() 276 } 277 278 // ShouldVerifyIP is used as an optimization to avoid unnecessary IP 279 // verification. It returns true if all of the following conditions are met: 280 // 1. The provided IP is from a node whose connection is desired. 281 // 2. This IP is newer than the most recent IP we know of for the node. 282 func (i *ipTracker) ShouldVerifyIP( 283 ip *ips.ClaimedIPPort, 284 trackAllSubnets bool, 285 ) bool { 286 i.lock.RLock() 287 defer i.lock.RUnlock() 288 289 node, ok := i.tracked[ip.NodeID] 290 if !ok { 291 return false 292 } 293 294 if !trackAllSubnets && !node.wantsConnection() { 295 return false 296 } 297 298 return node.ip == nil || // This would be the first IP 299 node.ip.Timestamp < ip.Timestamp // This would be a newer IP 300 } 301 302 // AddIP attempts to update the node's IP to the provided IP. This function 303 // assumes the provided IP has been verified. Returns true if all of the 304 // following conditions are met: 305 // 1. The provided IP is from a node whose connection is desired on a tracked 306 // subnet. 307 // 2. This IP is newer than the most recent IP we know of for the node. 308 // 309 // If this IP is replacing a gossipable IP, this IP will also be marked as 310 // gossipable. 311 func (i *ipTracker) AddIP(ip *ips.ClaimedIPPort) bool { 312 i.lock.Lock() 313 defer i.lock.Unlock() 314 315 timestampComparison, trackedNode := i.addIP(ip) 316 if timestampComparison <= sameTimestamp { 317 return false 318 } 319 320 if connectedNode, ok := i.connected[ip.NodeID]; ok { 321 i.setGossipableIP(trackedNode.ip, connectedNode.trackedSubnets) 322 } 323 return trackedNode.wantsConnection() 324 } 325 326 // GetIP returns the most recent IP of the provided nodeID. Returns true if all 327 // of the following conditions are met: 328 // 1. There is currently an IP for the provided nodeID. 329 // 2. The provided IP is from a node whose connection is desired on a tracked 330 // subnet. 331 func (i *ipTracker) GetIP(nodeID ids.NodeID) (*ips.ClaimedIPPort, bool) { 332 i.lock.RLock() 333 defer i.lock.RUnlock() 334 335 node, ok := i.tracked[nodeID] 336 if !ok || node.ip == nil { 337 return nil, false 338 } 339 return node.ip, node.wantsConnection() 340 } 341 342 // Connected is called when a connection is established. The peer should have 343 // provided [ip] during the handshake. 344 func (i *ipTracker) Connected(ip *ips.ClaimedIPPort, trackedSubnets set.Set[ids.ID]) { 345 i.lock.Lock() 346 defer i.lock.Unlock() 347 348 i.connected[ip.NodeID] = &connectedNode{ 349 trackedSubnets: trackedSubnets, 350 ip: ip, 351 } 352 353 timestampComparison, trackedNode := i.addIP(ip) 354 if timestampComparison != untrackedTimestamp { 355 i.setGossipableIP(trackedNode.ip, trackedSubnets) 356 } 357 } 358 359 func (i *ipTracker) addIP(ip *ips.ClaimedIPPort) (int, *trackedNode) { 360 node, ok := i.tracked[ip.NodeID] 361 if !ok { 362 return untrackedTimestamp, nil 363 } 364 365 if node.ip == nil { 366 // This is the first IP we've heard from the validator, so it is the 367 // most recent. 368 i.updateMostRecentTrackedIP(node, ip) 369 return newTimestamp, node 370 } 371 372 if node.ip.Timestamp > ip.Timestamp { 373 return olderTimestamp, node // This IP is older than the previously known IP. 374 } 375 if node.ip.Timestamp == ip.Timestamp { 376 return sameTimestamp, node // This IP is equal to the previously known IP. 377 } 378 379 // This IP is newer than the previously known IP. 380 i.updateMostRecentTrackedIP(node, ip) 381 return newerTimestamp, node 382 } 383 384 func (i *ipTracker) setGossipableIP(ip *ips.ClaimedIPPort, trackedSubnets set.Set[ids.ID]) { 385 for subnetID := range trackedSubnets { 386 if subnet, ok := i.subnet[subnetID]; ok && subnet.gossipableIDs.Contains(ip.NodeID) { 387 subnet.setGossipableIP(ip) 388 } 389 } 390 } 391 392 // Disconnected is called when a connection to the peer is closed. 393 func (i *ipTracker) Disconnected(nodeID ids.NodeID) { 394 i.lock.Lock() 395 defer i.lock.Unlock() 396 397 connectedNode, ok := i.connected[nodeID] 398 if !ok { 399 return 400 } 401 delete(i.connected, nodeID) 402 403 for subnetID := range connectedNode.trackedSubnets { 404 if subnet, ok := i.subnet[subnetID]; ok { 405 subnet.removeGossipableIP(nodeID) 406 } 407 } 408 } 409 410 func (i *ipTracker) OnValidatorAdded(subnetID ids.ID, nodeID ids.NodeID, _ *bls.PublicKey, _ ids.ID, _ uint64) { 411 i.lock.Lock() 412 defer i.lock.Unlock() 413 414 i.addTrackableID(nodeID, &subnetID) 415 i.addGossipableID(nodeID, subnetID, false) 416 } 417 418 // If [subnetID] is nil, the nodeID is being manually tracked. 419 func (i *ipTracker) addTrackableID(nodeID ids.NodeID, subnetID *ids.ID) { 420 nodeTracker, previouslyTracked := i.tracked[nodeID] 421 if !previouslyTracked { 422 i.numTrackedPeers.Inc() 423 nodeTracker = &trackedNode{} 424 i.tracked[nodeID] = nodeTracker 425 } 426 427 if subnetID == nil { 428 nodeTracker.manuallyTracked = true 429 } else { 430 nodeTracker.validatedSubnets.Add(*subnetID) 431 if *subnetID == constants.PrimaryNetworkID || i.trackedSubnets.Contains(*subnetID) { 432 nodeTracker.trackedSubnets.Add(*subnetID) 433 } 434 } 435 436 if previouslyTracked { 437 return 438 } 439 440 node, connected := i.connected[nodeID] 441 if !connected { 442 return 443 } 444 445 // Because we previously weren't tracking this nodeID, the IP from the 446 // connection is guaranteed to be the most up-to-date IP that we know. 447 i.updateMostRecentTrackedIP(nodeTracker, node.ip) 448 } 449 450 func (i *ipTracker) addGossipableID(nodeID ids.NodeID, subnetID ids.ID, manuallyGossiped bool) { 451 subnet, ok := i.subnet[subnetID] 452 if !ok { 453 i.numTrackedSubnets.Inc() 454 subnet = &gossipableSubnet{ 455 numGossipableIPs: i.numGossipableIPs, 456 gossipableIndices: make(map[ids.NodeID]int), 457 } 458 i.subnet[subnetID] = subnet 459 } 460 461 if manuallyGossiped { 462 subnet.manuallyGossipable.Add(nodeID) 463 } 464 if subnet.gossipableIDs.Contains(nodeID) { 465 return 466 } 467 468 subnet.gossipableIDs.Add(nodeID) 469 node, connected := i.connected[nodeID] 470 if !connected || !node.trackedSubnets.Contains(subnetID) { 471 return 472 } 473 474 if trackedNode, ok := i.tracked[nodeID]; ok { 475 subnet.setGossipableIP(trackedNode.ip) 476 } 477 } 478 479 func (*ipTracker) OnValidatorWeightChanged(ids.ID, ids.NodeID, uint64, uint64) {} 480 481 func (i *ipTracker) OnValidatorRemoved(subnetID ids.ID, nodeID ids.NodeID, _ uint64) { 482 i.lock.Lock() 483 defer i.lock.Unlock() 484 485 subnet, ok := i.subnet[subnetID] 486 if !ok { 487 i.log.Error("attempted removal of validator from untracked subnet", 488 zap.Stringer("subnetID", subnetID), 489 zap.Stringer("nodeID", nodeID), 490 ) 491 return 492 } 493 494 if subnet.manuallyGossipable.Contains(nodeID) { 495 return 496 } 497 498 subnet.gossipableIDs.Remove(nodeID) 499 subnet.removeGossipableIP(nodeID) 500 501 if subnet.canDelete() { 502 i.numTrackedSubnets.Dec() 503 delete(i.subnet, subnetID) 504 } 505 506 trackedNode, ok := i.tracked[nodeID] 507 if !ok { 508 i.log.Error("attempted removal of untracked validator", 509 zap.Stringer("subnetID", subnetID), 510 zap.Stringer("nodeID", nodeID), 511 ) 512 return 513 } 514 515 trackedNode.validatedSubnets.Remove(subnetID) 516 trackedNode.trackedSubnets.Remove(subnetID) 517 518 if trackedNode.canDelete() { 519 i.numTrackedPeers.Dec() 520 delete(i.tracked, nodeID) 521 } 522 } 523 524 func (i *ipTracker) updateMostRecentTrackedIP(node *trackedNode, ip *ips.ClaimedIPPort) { 525 node.ip = ip 526 527 oldCount := i.bloomAdditions[ip.NodeID] 528 if oldCount >= maxIPEntriesPerNode { 529 return 530 } 531 532 // If the validator set is growing rapidly, we should increase the size of 533 // the bloom filter. 534 if count := i.bloom.Count(); count >= i.maxBloomCount { 535 if err := i.resetBloom(); err != nil { 536 i.log.Error("failed to reset validator tracker bloom filter", 537 zap.Int("maxCount", i.maxBloomCount), 538 zap.Int("currentCount", count), 539 zap.Error(err), 540 ) 541 } else { 542 i.log.Info("reset validator tracker bloom filter", 543 zap.Int("currentCount", count), 544 ) 545 } 546 return 547 } 548 549 i.bloomAdditions[ip.NodeID] = oldCount + 1 550 bloom.Add(i.bloom, ip.GossipID[:], i.bloomSalt) 551 i.bloomMetrics.Count.Inc() 552 } 553 554 // ResetBloom prunes the current bloom filter. This must be called periodically 555 // to ensure that validators that change their IPs are updated correctly and 556 // that validators that left the validator set are removed. 557 func (i *ipTracker) ResetBloom() error { 558 i.lock.Lock() 559 defer i.lock.Unlock() 560 561 return i.resetBloom() 562 } 563 564 // Bloom returns the binary representation of the bloom filter along with the 565 // random salt. 566 func (i *ipTracker) Bloom() ([]byte, []byte) { 567 i.lock.RLock() 568 defer i.lock.RUnlock() 569 570 return i.bloom.Marshal(), i.bloomSalt 571 } 572 573 // resetBloom creates a new bloom filter with a reasonable size for the current 574 // validator set size. This function additionally populates the new bloom filter 575 // with the current most recently known IPs of validators. 576 func (i *ipTracker) resetBloom() error { 577 newSalt := make([]byte, saltSize) 578 _, err := rand.Reader.Read(newSalt) 579 if err != nil { 580 return err 581 } 582 583 count := max(maxIPEntriesPerNode*len(i.tracked), minCountEstimate) 584 numHashes, numEntries := bloom.OptimalParameters( 585 count, 586 targetFalsePositiveProbability, 587 ) 588 newFilter, err := bloom.New(numHashes, numEntries) 589 if err != nil { 590 return err 591 } 592 593 i.bloom = newFilter 594 clear(i.bloomAdditions) 595 i.bloomSalt = newSalt 596 i.maxBloomCount = bloom.EstimateCount(numHashes, numEntries, maxFalsePositiveProbability) 597 598 for nodeID, trackedNode := range i.tracked { 599 if trackedNode.ip == nil { 600 continue 601 } 602 603 bloom.Add(newFilter, trackedNode.ip.GossipID[:], newSalt) 604 i.bloomAdditions[nodeID] = 1 605 } 606 i.bloomMetrics.Reset(newFilter, i.maxBloomCount) 607 return nil 608 } 609 610 func getGossipableIPs[T any]( 611 i *ipTracker, 612 iter map[ids.ID]T, // The values in this map aren't actually used. 613 allowed func(ids.ID) bool, 614 exceptNodeID ids.NodeID, 615 exceptIPs *bloom.ReadFilter, 616 salt []byte, 617 maxNumIPs int, 618 ) []*ips.ClaimedIPPort { 619 var ( 620 ips = make([]*ips.ClaimedIPPort, 0, maxNumIPs) 621 nodeIDs = set.NewSet[ids.NodeID](maxNumIPs) 622 ) 623 624 i.lock.RLock() 625 defer i.lock.RUnlock() 626 627 for subnetID := range iter { 628 if !allowed(subnetID) { 629 continue 630 } 631 632 subnet, ok := i.subnet[subnetID] 633 if !ok { 634 continue 635 } 636 637 ips, nodeIDs = subnet.getGossipableIPs( 638 exceptNodeID, 639 exceptIPs, 640 salt, 641 maxNumIPs, 642 ips, 643 nodeIDs, 644 ) 645 if len(ips) >= maxNumIPs { 646 break 647 } 648 } 649 return ips 650 }