github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/networkdb/cluster.go (about) 1 package networkdb 2 3 import ( 4 "bytes" 5 "context" 6 "crypto/rand" 7 "encoding/hex" 8 "fmt" 9 "log" 10 "math/big" 11 rnd "math/rand" 12 "net" 13 "strings" 14 "time" 15 16 "github.com/hashicorp/memberlist" 17 "github.com/sirupsen/logrus" 18 ) 19 20 const ( 21 reapPeriod = 5 * time.Second 22 retryInterval = 1 * time.Second 23 nodeReapInterval = 24 * time.Hour 24 nodeReapPeriod = 2 * time.Hour 25 // considering a cluster with > 20 nodes and a drain speed of 100 msg/s 26 // the following is roughly 1 minute 27 maxQueueLenBroadcastOnSync = 500 28 ) 29 30 type logWriter struct{} 31 32 func (l *logWriter) Write(p []byte) (int, error) { 33 str := string(p) 34 str = strings.TrimSuffix(str, "\n") 35 36 switch { 37 case strings.HasPrefix(str, "[WARN] "): 38 str = strings.TrimPrefix(str, "[WARN] ") 39 logrus.Warn(str) 40 case strings.HasPrefix(str, "[DEBUG] "): 41 str = strings.TrimPrefix(str, "[DEBUG] ") 42 logrus.Debug(str) 43 case strings.HasPrefix(str, "[INFO] "): 44 str = strings.TrimPrefix(str, "[INFO] ") 45 logrus.Info(str) 46 case strings.HasPrefix(str, "[ERR] "): 47 str = strings.TrimPrefix(str, "[ERR] ") 48 logrus.Warn(str) 49 } 50 51 return len(p), nil 52 } 53 54 // SetKey adds a new key to the key ring 55 func (nDB *NetworkDB) SetKey(key []byte) { 56 logrus.Debugf("Adding key %.5s", hex.EncodeToString(key)) 57 nDB.Lock() 58 defer nDB.Unlock() 59 for _, dbKey := range nDB.config.Keys { 60 if bytes.Equal(key, dbKey) { 61 return 62 } 63 } 64 nDB.config.Keys = append(nDB.config.Keys, key) 65 if nDB.keyring != nil { 66 nDB.keyring.AddKey(key) 67 } 68 } 69 70 // SetPrimaryKey sets the given key as the primary key. This should have 71 // been added apriori through SetKey 72 func (nDB *NetworkDB) SetPrimaryKey(key []byte) { 73 logrus.Debugf("Primary Key %.5s", hex.EncodeToString(key)) 74 nDB.RLock() 75 defer nDB.RUnlock() 76 for _, dbKey := range nDB.config.Keys { 77 if bytes.Equal(key, dbKey) { 78 if nDB.keyring != nil { 79 nDB.keyring.UseKey(dbKey) 80 } 81 break 82 } 83 } 84 } 85 86 // RemoveKey removes a key from the key ring. The key being removed 87 // can't be the primary key 88 func (nDB *NetworkDB) RemoveKey(key []byte) { 89 logrus.Debugf("Remove Key %.5s", hex.EncodeToString(key)) 90 nDB.Lock() 91 defer nDB.Unlock() 92 for i, dbKey := range nDB.config.Keys { 93 if bytes.Equal(key, dbKey) { 94 nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...) 95 if nDB.keyring != nil { 96 nDB.keyring.RemoveKey(dbKey) 97 } 98 break 99 } 100 } 101 } 102 103 func (nDB *NetworkDB) clusterInit() error { 104 nDB.lastStatsTimestamp = time.Now() 105 nDB.lastHealthTimestamp = nDB.lastStatsTimestamp 106 107 config := memberlist.DefaultLANConfig() 108 config.Name = nDB.config.NodeID 109 config.BindAddr = nDB.config.BindAddr 110 config.AdvertiseAddr = nDB.config.AdvertiseAddr 111 config.UDPBufferSize = nDB.config.PacketBufferSize 112 113 if nDB.config.BindPort != 0 { 114 config.BindPort = nDB.config.BindPort 115 } 116 117 config.ProtocolVersion = memberlist.ProtocolVersion2Compatible 118 config.Delegate = &delegate{nDB: nDB} 119 config.Events = &eventDelegate{nDB: nDB} 120 // custom logger that does not add time or date, so they are not 121 // duplicated by logrus 122 config.Logger = log.New(&logWriter{}, "", 0) 123 124 var err error 125 if len(nDB.config.Keys) > 0 { 126 for i, key := range nDB.config.Keys { 127 logrus.Debugf("Encryption key %d: %.5s", i+1, hex.EncodeToString(key)) 128 } 129 nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0]) 130 if err != nil { 131 return err 132 } 133 config.Keyring = nDB.keyring 134 } 135 136 nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{ 137 NumNodes: func() int { 138 nDB.RLock() 139 num := len(nDB.nodes) 140 nDB.RUnlock() 141 return num 142 }, 143 RetransmitMult: config.RetransmitMult, 144 } 145 146 nDB.nodeBroadcasts = &memberlist.TransmitLimitedQueue{ 147 NumNodes: func() int { 148 nDB.RLock() 149 num := len(nDB.nodes) 150 nDB.RUnlock() 151 return num 152 }, 153 RetransmitMult: config.RetransmitMult, 154 } 155 156 mlist, err := memberlist.Create(config) 157 if err != nil { 158 return fmt.Errorf("failed to create memberlist: %v", err) 159 } 160 161 nDB.ctx, nDB.cancelCtx = context.WithCancel(context.Background()) 162 nDB.memberlist = mlist 163 164 for _, trigger := range []struct { 165 interval time.Duration 166 fn func() 167 }{ 168 {reapPeriod, nDB.reapState}, 169 {config.GossipInterval, nDB.gossip}, 170 {config.PushPullInterval, nDB.bulkSyncTables}, 171 {retryInterval, nDB.reconnectNode}, 172 {nodeReapPeriod, nDB.reapDeadNode}, 173 {nDB.config.rejoinClusterInterval, nDB.rejoinClusterBootStrap}, 174 } { 175 t := time.NewTicker(trigger.interval) 176 go nDB.triggerFunc(trigger.interval, t.C, trigger.fn) 177 nDB.tickers = append(nDB.tickers, t) 178 } 179 180 return nil 181 } 182 183 func (nDB *NetworkDB) retryJoin(ctx context.Context, members []string) { 184 t := time.NewTicker(retryInterval) 185 defer t.Stop() 186 187 for { 188 select { 189 case <-t.C: 190 if _, err := nDB.memberlist.Join(members); err != nil { 191 logrus.Errorf("Failed to join memberlist %s on retry: %v", members, err) 192 continue 193 } 194 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 195 logrus.Errorf("failed to send node join on retry: %v", err) 196 continue 197 } 198 return 199 case <-ctx.Done(): 200 return 201 } 202 } 203 } 204 205 func (nDB *NetworkDB) clusterJoin(members []string) error { 206 mlist := nDB.memberlist 207 208 if _, err := mlist.Join(members); err != nil { 209 // In case of failure, we no longer need to explicitly call retryJoin. 210 // rejoinClusterBootStrap, which runs every nDB.config.rejoinClusterInterval, 211 // will retryJoin for nDB.config.rejoinClusterDuration. 212 return fmt.Errorf("could not join node to memberlist: %v", err) 213 } 214 215 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 216 return fmt.Errorf("failed to send node join: %v", err) 217 } 218 219 return nil 220 } 221 222 func (nDB *NetworkDB) clusterLeave() error { 223 mlist := nDB.memberlist 224 225 if err := nDB.sendNodeEvent(NodeEventTypeLeave); err != nil { 226 logrus.Errorf("failed to send node leave: %v", err) 227 } 228 229 if err := mlist.Leave(time.Second); err != nil { 230 return err 231 } 232 233 // cancel the context 234 nDB.cancelCtx() 235 236 for _, t := range nDB.tickers { 237 t.Stop() 238 } 239 240 return mlist.Shutdown() 241 } 242 243 func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, f func()) { 244 // Use a random stagger to avoid synchronizing 245 randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger)) //nolint:gosec // gosec complains about the use of rand here. It should be fine. 246 select { 247 case <-time.After(randStagger): 248 case <-nDB.ctx.Done(): 249 return 250 } 251 for { 252 select { 253 case <-C: 254 f() 255 case <-nDB.ctx.Done(): 256 return 257 } 258 } 259 } 260 261 func (nDB *NetworkDB) reapDeadNode() { 262 nDB.Lock() 263 defer nDB.Unlock() 264 for _, nodeMap := range []map[string]*node{ 265 nDB.failedNodes, 266 nDB.leftNodes, 267 } { 268 for id, n := range nodeMap { 269 if n.reapTime > nodeReapPeriod { 270 n.reapTime -= nodeReapPeriod 271 continue 272 } 273 logrus.Debugf("Garbage collect node %v", n.Name) 274 delete(nodeMap, id) 275 } 276 } 277 } 278 279 // rejoinClusterBootStrap is called periodically to check if all bootStrap nodes are active in the cluster, 280 // if not, call the cluster join to merge 2 separate clusters that are formed when all managers 281 // stopped/started at the same time 282 func (nDB *NetworkDB) rejoinClusterBootStrap() { 283 nDB.RLock() 284 if len(nDB.bootStrapIP) == 0 { 285 nDB.RUnlock() 286 return 287 } 288 289 myself, ok := nDB.nodes[nDB.config.NodeID] 290 if !ok { 291 nDB.RUnlock() 292 logrus.Warnf("rejoinClusterBootstrap unable to find local node info using ID:%v", nDB.config.NodeID) 293 return 294 } 295 bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP)) 296 for _, bootIP := range nDB.bootStrapIP { 297 // botostrap IPs are usually IP:port from the Join 298 var bootstrapIP net.IP 299 ipStr, _, err := net.SplitHostPort(bootIP) 300 if err != nil { 301 // try to parse it as an IP with port 302 // Note this seems to be the case for swarm that do not specify any port 303 ipStr = bootIP 304 } 305 bootstrapIP = net.ParseIP(ipStr) 306 if bootstrapIP != nil { 307 for _, node := range nDB.nodes { 308 if node.Addr.Equal(bootstrapIP) && !node.Addr.Equal(myself.Addr) { 309 // One of the bootstrap nodes (and not myself) is part of the cluster, return 310 nDB.RUnlock() 311 return 312 } 313 } 314 bootStrapIPs = append(bootStrapIPs, bootIP) 315 } 316 } 317 nDB.RUnlock() 318 if len(bootStrapIPs) == 0 { 319 // this will also avoid to call the Join with an empty list erasing the current bootstrap ip list 320 logrus.Debug("rejoinClusterBootStrap did not find any valid IP") 321 return 322 } 323 // None of the bootStrap nodes are in the cluster, call memberlist join 324 logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs) 325 ctx, cancel := context.WithTimeout(nDB.ctx, nDB.config.rejoinClusterDuration) 326 defer cancel() 327 nDB.retryJoin(ctx, bootStrapIPs) 328 } 329 330 func (nDB *NetworkDB) reconnectNode() { 331 nDB.RLock() 332 if len(nDB.failedNodes) == 0 { 333 nDB.RUnlock() 334 return 335 } 336 337 nodes := make([]*node, 0, len(nDB.failedNodes)) 338 for _, n := range nDB.failedNodes { 339 nodes = append(nodes, n) 340 } 341 nDB.RUnlock() 342 343 node := nodes[randomOffset(len(nodes))] 344 addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)} 345 346 if _, err := nDB.memberlist.Join([]string{addr.String()}); err != nil { 347 return 348 } 349 350 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 351 return 352 } 353 354 logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name) 355 nDB.bulkSync([]string{node.Name}, true) 356 } 357 358 // For timing the entry deletion in the reaper APIs that doesn't use monotonic clock 359 // source (time.Now, Sub etc.) should be avoided. Hence we use reapTime in every 360 // entry which is set initially to reapInterval and decremented by reapPeriod every time 361 // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This 362 // is safe as long as no other concurrent path touches the reapTime field. 363 func (nDB *NetworkDB) reapState() { 364 // The reapTableEntries leverage the presence of the network so garbage collect entries first 365 nDB.reapTableEntries() 366 nDB.reapNetworks() 367 } 368 369 func (nDB *NetworkDB) reapNetworks() { 370 nDB.Lock() 371 for _, nn := range nDB.networks { 372 for id, n := range nn { 373 if n.leaving { 374 if n.reapTime <= 0 { 375 delete(nn, id) 376 continue 377 } 378 n.reapTime -= reapPeriod 379 } 380 } 381 } 382 nDB.Unlock() 383 } 384 385 func (nDB *NetworkDB) reapTableEntries() { 386 var nodeNetworks []string 387 // This is best effort, if the list of network changes will be picked up in the next cycle 388 nDB.RLock() 389 for nid := range nDB.networks[nDB.config.NodeID] { 390 nodeNetworks = append(nodeNetworks, nid) 391 } 392 nDB.RUnlock() 393 394 cycleStart := time.Now() 395 // In order to avoid blocking the database for a long time, apply the garbage collection logic by network 396 // The lock is taken at the beginning of the cycle and the deletion is inline 397 for _, nid := range nodeNetworks { 398 nDB.Lock() 399 nDB.indexes[byNetwork].WalkPrefix("/"+nid, func(path string, v interface{}) bool { 400 // timeCompensation compensate in case the lock took some time to be released 401 timeCompensation := time.Since(cycleStart) 402 entry, ok := v.(*entry) 403 if !ok || !entry.deleting { 404 return false 405 } 406 407 // In this check we are adding an extra 1 second to guarantee that when the number is truncated to int32 to fit the packet 408 // for the tableEvent the number is always strictly > 1 and never 0 409 if entry.reapTime > reapPeriod+timeCompensation+time.Second { 410 entry.reapTime -= reapPeriod + timeCompensation 411 return false 412 } 413 414 params := strings.Split(path[1:], "/") 415 nid := params[0] 416 tname := params[1] 417 key := params[2] 418 419 okTable, okNetwork := nDB.deleteEntry(nid, tname, key) 420 if !okTable { 421 logrus.Errorf("Table tree delete failed, entry with key:%s does not exist in the table:%s network:%s", key, tname, nid) 422 } 423 if !okNetwork { 424 logrus.Errorf("Network tree delete failed, entry with key:%s does not exist in the network:%s table:%s", key, nid, tname) 425 } 426 427 return false 428 }) 429 nDB.Unlock() 430 } 431 } 432 433 func (nDB *NetworkDB) gossip() { 434 networkNodes := make(map[string][]string) 435 nDB.RLock() 436 thisNodeNetworks := nDB.networks[nDB.config.NodeID] 437 for nid := range thisNodeNetworks { 438 networkNodes[nid] = nDB.networkNodes[nid] 439 } 440 printStats := time.Since(nDB.lastStatsTimestamp) >= nDB.config.StatsPrintPeriod 441 printHealth := time.Since(nDB.lastHealthTimestamp) >= nDB.config.HealthPrintPeriod 442 nDB.RUnlock() 443 444 if printHealth { 445 healthScore := nDB.memberlist.GetHealthScore() 446 if healthScore != 0 { 447 logrus.Warnf("NetworkDB stats %v(%v) - healthscore:%d (connectivity issues)", nDB.config.Hostname, nDB.config.NodeID, healthScore) 448 } 449 nDB.lastHealthTimestamp = time.Now() 450 } 451 452 for nid, nodes := range networkNodes { 453 mNodes := nDB.mRandomNodes(3, nodes) 454 bytesAvail := nDB.config.PacketBufferSize - compoundHeaderOverhead 455 456 nDB.RLock() 457 network, ok := thisNodeNetworks[nid] 458 nDB.RUnlock() 459 if !ok || network == nil { 460 // It is normal for the network to be removed 461 // between the time we collect the network 462 // attachments of this node and processing 463 // them here. 464 continue 465 } 466 467 broadcastQ := network.tableBroadcasts 468 469 if broadcastQ == nil { 470 logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid) 471 continue 472 } 473 474 msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail) 475 // Collect stats and print the queue info, note this code is here also to have a view of the queues empty 476 network.qMessagesSent += len(msgs) 477 if printStats { 478 logrus.Infof("NetworkDB stats %v(%v) - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d", 479 nDB.config.Hostname, nDB.config.NodeID, 480 nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(), 481 network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second))) 482 network.qMessagesSent = 0 483 } 484 485 if len(msgs) == 0 { 486 continue 487 } 488 489 // Create a compound message 490 compound := makeCompoundMessage(msgs) 491 492 for _, node := range mNodes { 493 nDB.RLock() 494 mnode := nDB.nodes[node] 495 nDB.RUnlock() 496 497 if mnode == nil { 498 break 499 } 500 501 // Send the compound message 502 if err := nDB.memberlist.SendBestEffort(&mnode.Node, compound); err != nil { 503 logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err) 504 } 505 } 506 } 507 // Reset the stats 508 if printStats { 509 nDB.lastStatsTimestamp = time.Now() 510 } 511 } 512 513 func (nDB *NetworkDB) bulkSyncTables() { 514 var networks []string 515 nDB.RLock() 516 for nid, network := range nDB.networks[nDB.config.NodeID] { 517 if network.leaving { 518 continue 519 } 520 networks = append(networks, nid) 521 } 522 nDB.RUnlock() 523 524 for { 525 if len(networks) == 0 { 526 break 527 } 528 529 nid := networks[0] 530 networks = networks[1:] 531 532 nDB.RLock() 533 nodes := nDB.networkNodes[nid] 534 nDB.RUnlock() 535 536 // No peer nodes on this network. Move on. 537 if len(nodes) == 0 { 538 continue 539 } 540 541 completed, err := nDB.bulkSync(nodes, false) 542 if err != nil { 543 logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err) 544 continue 545 } 546 547 // Remove all the networks for which we have 548 // successfully completed bulk sync in this iteration. 549 updatedNetworks := make([]string, 0, len(networks)) 550 for _, nid := range networks { 551 var found bool 552 for _, completedNid := range completed { 553 if nid == completedNid { 554 found = true 555 break 556 } 557 } 558 559 if !found { 560 updatedNetworks = append(updatedNetworks, nid) 561 } 562 } 563 564 networks = updatedNetworks 565 } 566 } 567 568 func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) { 569 if !all { 570 // Get 2 random nodes. 2nd node will be tried if the bulk sync to 571 // 1st node fails. 572 nodes = nDB.mRandomNodes(2, nodes) 573 } 574 575 if len(nodes) == 0 { 576 return nil, nil 577 } 578 579 var err error 580 var networks []string 581 var success bool 582 for _, node := range nodes { 583 if node == nDB.config.NodeID { 584 continue 585 } 586 logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node) 587 networks = nDB.findCommonNetworks(node) 588 err = nDB.bulkSyncNode(networks, node, true) 589 if err != nil { 590 err = fmt.Errorf("bulk sync to node %s failed: %v", node, err) 591 logrus.Warn(err.Error()) 592 } else { 593 // bulk sync succeeded 594 success = true 595 // if its periodic bulksync stop after the first successful sync 596 if !all { 597 break 598 } 599 } 600 } 601 602 if success { 603 // if at least one node sync succeeded 604 return networks, nil 605 } 606 607 return nil, err 608 } 609 610 // Bulk sync all the table entries belonging to a set of networks to a 611 // single peer node. It can be unsolicited or can be in response to an 612 // unsolicited bulk sync 613 func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error { 614 var msgs [][]byte 615 616 var unsolMsg string 617 if unsolicited { 618 unsolMsg = "unsolicited" 619 } 620 621 logrus.Debugf("%v(%v): Initiating %s bulk sync for networks %v with node %s", 622 nDB.config.Hostname, nDB.config.NodeID, unsolMsg, networks, node) 623 624 nDB.RLock() 625 mnode := nDB.nodes[node] 626 if mnode == nil { 627 nDB.RUnlock() 628 return nil 629 } 630 631 for _, nid := range networks { 632 nDB.indexes[byNetwork].WalkPrefix("/"+nid, func(path string, v interface{}) bool { 633 entry, ok := v.(*entry) 634 if !ok { 635 return false 636 } 637 638 eType := TableEventTypeCreate 639 if entry.deleting { 640 eType = TableEventTypeDelete 641 } 642 643 params := strings.Split(path[1:], "/") 644 tEvent := TableEvent{ 645 Type: eType, 646 LTime: entry.ltime, 647 NodeName: entry.node, 648 NetworkID: nid, 649 TableName: params[1], 650 Key: params[2], 651 Value: entry.value, 652 // The duration in second is a float that below would be truncated 653 ResidualReapTime: int32(entry.reapTime.Seconds()), 654 } 655 656 msg, err := encodeMessage(MessageTypeTableEvent, &tEvent) 657 if err != nil { 658 logrus.Errorf("Encode failure during bulk sync: %#v", tEvent) 659 return false 660 } 661 662 msgs = append(msgs, msg) 663 return false 664 }) 665 } 666 nDB.RUnlock() 667 668 // Create a compound message 669 compound := makeCompoundMessage(msgs) 670 671 bsm := BulkSyncMessage{ 672 LTime: nDB.tableClock.Time(), 673 Unsolicited: unsolicited, 674 NodeName: nDB.config.NodeID, 675 Networks: networks, 676 Payload: compound, 677 } 678 679 buf, err := encodeMessage(MessageTypeBulkSync, &bsm) 680 if err != nil { 681 return fmt.Errorf("failed to encode bulk sync message: %v", err) 682 } 683 684 nDB.Lock() 685 ch := make(chan struct{}) 686 nDB.bulkSyncAckTbl[node] = ch 687 nDB.Unlock() 688 689 err = nDB.memberlist.SendReliable(&mnode.Node, buf) 690 if err != nil { 691 nDB.Lock() 692 delete(nDB.bulkSyncAckTbl, node) 693 nDB.Unlock() 694 695 return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err) 696 } 697 698 // Wait on a response only if it is unsolicited. 699 if unsolicited { 700 startTime := time.Now() 701 t := time.NewTimer(30 * time.Second) 702 select { 703 case <-t.C: 704 logrus.Errorf("Bulk sync to node %s timed out", node) 705 case <-ch: 706 logrus.Debugf("%v(%v): Bulk sync to node %s took %s", nDB.config.Hostname, nDB.config.NodeID, node, time.Since(startTime)) 707 } 708 t.Stop() 709 } 710 711 return nil 712 } 713 714 // Returns a random offset between 0 and n 715 func randomOffset(n int) int { 716 if n == 0 { 717 return 0 718 } 719 720 val, err := rand.Int(rand.Reader, big.NewInt(int64(n))) // #nosec G404 -- False positive; see https://github.com/securego/gosec/issues/862 721 if err != nil { 722 logrus.Errorf("Failed to get a random offset: %v", err) 723 return 0 724 } 725 726 return int(val.Int64()) 727 } 728 729 // mRandomNodes is used to select up to m random nodes. It is possible 730 // that less than m nodes are returned. 731 func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string { 732 n := len(nodes) 733 mNodes := make([]string, 0, m) 734 OUTER: 735 // Probe up to 3*n times, with large n this is not necessary 736 // since k << n, but with small n we want search to be 737 // exhaustive 738 for i := 0; i < 3*n && len(mNodes) < m; i++ { 739 // Get random node 740 idx := randomOffset(n) 741 node := nodes[idx] 742 743 if node == nDB.config.NodeID { 744 continue 745 } 746 747 // Check if we have this node already 748 for j := 0; j < len(mNodes); j++ { 749 if node == mNodes[j] { 750 continue OUTER 751 } 752 } 753 754 // Append the node 755 mNodes = append(mNodes, node) 756 } 757 758 return mNodes 759 }