github.com/docker/engine@v22.0.0-20211208180946-d456264580cf+incompatible/libnetwork/networkdb/cluster.go (about) 1 package networkdb 2 3 import ( 4 "bytes" 5 "context" 6 "crypto/rand" 7 "encoding/hex" 8 "fmt" 9 "log" 10 "math/big" 11 rnd "math/rand" 12 "net" 13 "strings" 14 "time" 15 16 "github.com/hashicorp/memberlist" 17 "github.com/sirupsen/logrus" 18 ) 19 20 const ( 21 reapPeriod = 5 * time.Second 22 retryInterval = 1 * time.Second 23 nodeReapInterval = 24 * time.Hour 24 nodeReapPeriod = 2 * time.Hour 25 // considering a cluster with > 20 nodes and a drain speed of 100 msg/s 26 // the following is roughly 1 minute 27 maxQueueLenBroadcastOnSync = 500 28 ) 29 30 type logWriter struct{} 31 32 func (l *logWriter) Write(p []byte) (int, error) { 33 str := string(p) 34 str = strings.TrimSuffix(str, "\n") 35 36 switch { 37 case strings.HasPrefix(str, "[WARN] "): 38 str = strings.TrimPrefix(str, "[WARN] ") 39 logrus.Warn(str) 40 case strings.HasPrefix(str, "[DEBUG] "): 41 str = strings.TrimPrefix(str, "[DEBUG] ") 42 logrus.Debug(str) 43 case strings.HasPrefix(str, "[INFO] "): 44 str = strings.TrimPrefix(str, "[INFO] ") 45 logrus.Info(str) 46 case strings.HasPrefix(str, "[ERR] "): 47 str = strings.TrimPrefix(str, "[ERR] ") 48 logrus.Warn(str) 49 } 50 51 return len(p), nil 52 } 53 54 // SetKey adds a new key to the key ring 55 func (nDB *NetworkDB) SetKey(key []byte) { 56 logrus.Debugf("Adding key %.5s", hex.EncodeToString(key)) 57 nDB.Lock() 58 defer nDB.Unlock() 59 for _, dbKey := range nDB.config.Keys { 60 if bytes.Equal(key, dbKey) { 61 return 62 } 63 } 64 nDB.config.Keys = append(nDB.config.Keys, key) 65 if nDB.keyring != nil { 66 nDB.keyring.AddKey(key) 67 } 68 } 69 70 // SetPrimaryKey sets the given key as the primary key. This should have 71 // been added apriori through SetKey 72 func (nDB *NetworkDB) SetPrimaryKey(key []byte) { 73 logrus.Debugf("Primary Key %.5s", hex.EncodeToString(key)) 74 nDB.RLock() 75 defer nDB.RUnlock() 76 for _, dbKey := range nDB.config.Keys { 77 if bytes.Equal(key, dbKey) { 78 if nDB.keyring != nil { 79 nDB.keyring.UseKey(dbKey) 80 } 81 break 82 } 83 } 84 } 85 86 // RemoveKey removes a key from the key ring. The key being removed 87 // can't be the primary key 88 func (nDB *NetworkDB) RemoveKey(key []byte) { 89 logrus.Debugf("Remove Key %.5s", hex.EncodeToString(key)) 90 nDB.Lock() 91 defer nDB.Unlock() 92 for i, dbKey := range nDB.config.Keys { 93 if bytes.Equal(key, dbKey) { 94 nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...) 95 if nDB.keyring != nil { 96 nDB.keyring.RemoveKey(dbKey) 97 } 98 break 99 } 100 } 101 } 102 103 func (nDB *NetworkDB) clusterInit() error { 104 nDB.lastStatsTimestamp = time.Now() 105 nDB.lastHealthTimestamp = nDB.lastStatsTimestamp 106 107 config := memberlist.DefaultLANConfig() 108 config.Name = nDB.config.NodeID 109 config.BindAddr = nDB.config.BindAddr 110 config.AdvertiseAddr = nDB.config.AdvertiseAddr 111 config.UDPBufferSize = nDB.config.PacketBufferSize 112 113 if nDB.config.BindPort != 0 { 114 config.BindPort = nDB.config.BindPort 115 } 116 117 config.ProtocolVersion = memberlist.ProtocolVersion2Compatible 118 config.Delegate = &delegate{nDB: nDB} 119 config.Events = &eventDelegate{nDB: nDB} 120 // custom logger that does not add time or date, so they are not 121 // duplicated by logrus 122 config.Logger = log.New(&logWriter{}, "", 0) 123 124 var err error 125 if len(nDB.config.Keys) > 0 { 126 for i, key := range nDB.config.Keys { 127 logrus.Debugf("Encryption key %d: %.5s", i+1, hex.EncodeToString(key)) 128 } 129 nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0]) 130 if err != nil { 131 return err 132 } 133 config.Keyring = nDB.keyring 134 } 135 136 nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{ 137 NumNodes: func() int { 138 nDB.RLock() 139 num := len(nDB.nodes) 140 nDB.RUnlock() 141 return num 142 }, 143 RetransmitMult: config.RetransmitMult, 144 } 145 146 nDB.nodeBroadcasts = &memberlist.TransmitLimitedQueue{ 147 NumNodes: func() int { 148 nDB.RLock() 149 num := len(nDB.nodes) 150 nDB.RUnlock() 151 return num 152 }, 153 RetransmitMult: config.RetransmitMult, 154 } 155 156 mlist, err := memberlist.Create(config) 157 if err != nil { 158 return fmt.Errorf("failed to create memberlist: %v", err) 159 } 160 161 nDB.ctx, nDB.cancelCtx = context.WithCancel(context.Background()) 162 nDB.memberlist = mlist 163 164 for _, trigger := range []struct { 165 interval time.Duration 166 fn func() 167 }{ 168 {reapPeriod, nDB.reapState}, 169 {config.GossipInterval, nDB.gossip}, 170 {config.PushPullInterval, nDB.bulkSyncTables}, 171 {retryInterval, nDB.reconnectNode}, 172 {nodeReapPeriod, nDB.reapDeadNode}, 173 {nDB.config.rejoinClusterInterval, nDB.rejoinClusterBootStrap}, 174 } { 175 t := time.NewTicker(trigger.interval) 176 go nDB.triggerFunc(trigger.interval, t.C, trigger.fn) 177 nDB.tickers = append(nDB.tickers, t) 178 } 179 180 return nil 181 } 182 183 func (nDB *NetworkDB) retryJoin(ctx context.Context, members []string) { 184 t := time.NewTicker(retryInterval) 185 defer t.Stop() 186 187 for { 188 select { 189 case <-t.C: 190 if _, err := nDB.memberlist.Join(members); err != nil { 191 logrus.Errorf("Failed to join memberlist %s on retry: %v", members, err) 192 continue 193 } 194 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 195 logrus.Errorf("failed to send node join on retry: %v", err) 196 continue 197 } 198 return 199 case <-ctx.Done(): 200 return 201 } 202 } 203 204 } 205 206 func (nDB *NetworkDB) clusterJoin(members []string) error { 207 mlist := nDB.memberlist 208 209 if _, err := mlist.Join(members); err != nil { 210 // In case of failure, we no longer need to explicitly call retryJoin. 211 // rejoinClusterBootStrap, which runs every nDB.config.rejoinClusterInterval, 212 // will retryJoin for nDB.config.rejoinClusterDuration. 213 return fmt.Errorf("could not join node to memberlist: %v", err) 214 } 215 216 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 217 return fmt.Errorf("failed to send node join: %v", err) 218 } 219 220 return nil 221 } 222 223 func (nDB *NetworkDB) clusterLeave() error { 224 mlist := nDB.memberlist 225 226 if err := nDB.sendNodeEvent(NodeEventTypeLeave); err != nil { 227 logrus.Errorf("failed to send node leave: %v", err) 228 } 229 230 if err := mlist.Leave(time.Second); err != nil { 231 return err 232 } 233 234 // cancel the context 235 nDB.cancelCtx() 236 237 for _, t := range nDB.tickers { 238 t.Stop() 239 } 240 241 return mlist.Shutdown() 242 } 243 244 func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, f func()) { 245 // Use a random stagger to avoid synchronizing 246 randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger)) //nolint:gosec // gosec complains about the use of rand here. It should be fine. 247 select { 248 case <-time.After(randStagger): 249 case <-nDB.ctx.Done(): 250 return 251 } 252 for { 253 select { 254 case <-C: 255 f() 256 case <-nDB.ctx.Done(): 257 return 258 } 259 } 260 } 261 262 func (nDB *NetworkDB) reapDeadNode() { 263 nDB.Lock() 264 defer nDB.Unlock() 265 for _, nodeMap := range []map[string]*node{ 266 nDB.failedNodes, 267 nDB.leftNodes, 268 } { 269 for id, n := range nodeMap { 270 if n.reapTime > nodeReapPeriod { 271 n.reapTime -= nodeReapPeriod 272 continue 273 } 274 logrus.Debugf("Garbage collect node %v", n.Name) 275 delete(nodeMap, id) 276 } 277 } 278 } 279 280 // rejoinClusterBootStrap is called periodically to check if all bootStrap nodes are active in the cluster, 281 // if not, call the cluster join to merge 2 separate clusters that are formed when all managers 282 // stopped/started at the same time 283 func (nDB *NetworkDB) rejoinClusterBootStrap() { 284 nDB.RLock() 285 if len(nDB.bootStrapIP) == 0 { 286 nDB.RUnlock() 287 return 288 } 289 290 myself, ok := nDB.nodes[nDB.config.NodeID] 291 if !ok { 292 nDB.RUnlock() 293 logrus.Warnf("rejoinClusterBootstrap unable to find local node info using ID:%v", nDB.config.NodeID) 294 return 295 } 296 bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP)) 297 for _, bootIP := range nDB.bootStrapIP { 298 // botostrap IPs are usually IP:port from the Join 299 var bootstrapIP net.IP 300 ipStr, _, err := net.SplitHostPort(bootIP) 301 if err != nil { 302 // try to parse it as an IP with port 303 // Note this seems to be the case for swarm that do not specify any port 304 ipStr = bootIP 305 } 306 bootstrapIP = net.ParseIP(ipStr) 307 if bootstrapIP != nil { 308 for _, node := range nDB.nodes { 309 if node.Addr.Equal(bootstrapIP) && !node.Addr.Equal(myself.Addr) { 310 // One of the bootstrap nodes (and not myself) is part of the cluster, return 311 nDB.RUnlock() 312 return 313 } 314 } 315 bootStrapIPs = append(bootStrapIPs, bootIP) 316 } 317 } 318 nDB.RUnlock() 319 if len(bootStrapIPs) == 0 { 320 // this will also avoid to call the Join with an empty list erasing the current bootstrap ip list 321 logrus.Debug("rejoinClusterBootStrap did not find any valid IP") 322 return 323 } 324 // None of the bootStrap nodes are in the cluster, call memberlist join 325 logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs) 326 ctx, cancel := context.WithTimeout(nDB.ctx, nDB.config.rejoinClusterDuration) 327 defer cancel() 328 nDB.retryJoin(ctx, bootStrapIPs) 329 } 330 331 func (nDB *NetworkDB) reconnectNode() { 332 nDB.RLock() 333 if len(nDB.failedNodes) == 0 { 334 nDB.RUnlock() 335 return 336 } 337 338 nodes := make([]*node, 0, len(nDB.failedNodes)) 339 for _, n := range nDB.failedNodes { 340 nodes = append(nodes, n) 341 } 342 nDB.RUnlock() 343 344 node := nodes[randomOffset(len(nodes))] 345 addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)} 346 347 if _, err := nDB.memberlist.Join([]string{addr.String()}); err != nil { 348 return 349 } 350 351 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 352 return 353 } 354 355 logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name) 356 nDB.bulkSync([]string{node.Name}, true) 357 } 358 359 // For timing the entry deletion in the reaper APIs that doesn't use monotonic clock 360 // source (time.Now, Sub etc.) should be avoided. Hence we use reapTime in every 361 // entry which is set initially to reapInterval and decremented by reapPeriod every time 362 // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This 363 // is safe as long as no other concurrent path touches the reapTime field. 364 func (nDB *NetworkDB) reapState() { 365 // The reapTableEntries leverage the presence of the network so garbage collect entries first 366 nDB.reapTableEntries() 367 nDB.reapNetworks() 368 } 369 370 func (nDB *NetworkDB) reapNetworks() { 371 nDB.Lock() 372 for _, nn := range nDB.networks { 373 for id, n := range nn { 374 if n.leaving { 375 if n.reapTime <= 0 { 376 delete(nn, id) 377 continue 378 } 379 n.reapTime -= reapPeriod 380 } 381 } 382 } 383 nDB.Unlock() 384 } 385 386 func (nDB *NetworkDB) reapTableEntries() { 387 var nodeNetworks []string 388 // This is best effort, if the list of network changes will be picked up in the next cycle 389 nDB.RLock() 390 for nid := range nDB.networks[nDB.config.NodeID] { 391 nodeNetworks = append(nodeNetworks, nid) 392 } 393 nDB.RUnlock() 394 395 cycleStart := time.Now() 396 // In order to avoid blocking the database for a long time, apply the garbage collection logic by network 397 // The lock is taken at the beginning of the cycle and the deletion is inline 398 for _, nid := range nodeNetworks { 399 nDB.Lock() 400 nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool { 401 // timeCompensation compensate in case the lock took some time to be released 402 timeCompensation := time.Since(cycleStart) 403 entry, ok := v.(*entry) 404 if !ok || !entry.deleting { 405 return false 406 } 407 408 // In this check we are adding an extra 1 second to guarantee that when the number is truncated to int32 to fit the packet 409 // for the tableEvent the number is always strictly > 1 and never 0 410 if entry.reapTime > reapPeriod+timeCompensation+time.Second { 411 entry.reapTime -= reapPeriod + timeCompensation 412 return false 413 } 414 415 params := strings.Split(path[1:], "/") 416 nid := params[0] 417 tname := params[1] 418 key := params[2] 419 420 okTable, okNetwork := nDB.deleteEntry(nid, tname, key) 421 if !okTable { 422 logrus.Errorf("Table tree delete failed, entry with key:%s does not exists in the table:%s network:%s", key, tname, nid) 423 } 424 if !okNetwork { 425 logrus.Errorf("Network tree delete failed, entry with key:%s does not exists in the network:%s table:%s", key, nid, tname) 426 } 427 428 return false 429 }) 430 nDB.Unlock() 431 } 432 } 433 434 func (nDB *NetworkDB) gossip() { 435 networkNodes := make(map[string][]string) 436 nDB.RLock() 437 thisNodeNetworks := nDB.networks[nDB.config.NodeID] 438 for nid := range thisNodeNetworks { 439 networkNodes[nid] = nDB.networkNodes[nid] 440 } 441 printStats := time.Since(nDB.lastStatsTimestamp) >= nDB.config.StatsPrintPeriod 442 printHealth := time.Since(nDB.lastHealthTimestamp) >= nDB.config.HealthPrintPeriod 443 nDB.RUnlock() 444 445 if printHealth { 446 healthScore := nDB.memberlist.GetHealthScore() 447 if healthScore != 0 { 448 logrus.Warnf("NetworkDB stats %v(%v) - healthscore:%d (connectivity issues)", nDB.config.Hostname, nDB.config.NodeID, healthScore) 449 } 450 nDB.lastHealthTimestamp = time.Now() 451 } 452 453 for nid, nodes := range networkNodes { 454 mNodes := nDB.mRandomNodes(3, nodes) 455 bytesAvail := nDB.config.PacketBufferSize - compoundHeaderOverhead 456 457 nDB.RLock() 458 network, ok := thisNodeNetworks[nid] 459 nDB.RUnlock() 460 if !ok || network == nil { 461 // It is normal for the network to be removed 462 // between the time we collect the network 463 // attachments of this node and processing 464 // them here. 465 continue 466 } 467 468 broadcastQ := network.tableBroadcasts 469 470 if broadcastQ == nil { 471 logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid) 472 continue 473 } 474 475 msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail) 476 // Collect stats and print the queue info, note this code is here also to have a view of the queues empty 477 network.qMessagesSent += len(msgs) 478 if printStats { 479 logrus.Infof("NetworkDB stats %v(%v) - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d", 480 nDB.config.Hostname, nDB.config.NodeID, 481 nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(), 482 network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second))) 483 network.qMessagesSent = 0 484 } 485 486 if len(msgs) == 0 { 487 continue 488 } 489 490 // Create a compound message 491 compound := makeCompoundMessage(msgs) 492 493 for _, node := range mNodes { 494 nDB.RLock() 495 mnode := nDB.nodes[node] 496 nDB.RUnlock() 497 498 if mnode == nil { 499 break 500 } 501 502 // Send the compound message 503 if err := nDB.memberlist.SendBestEffort(&mnode.Node, compound); err != nil { 504 logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err) 505 } 506 } 507 } 508 // Reset the stats 509 if printStats { 510 nDB.lastStatsTimestamp = time.Now() 511 } 512 } 513 514 func (nDB *NetworkDB) bulkSyncTables() { 515 var networks []string 516 nDB.RLock() 517 for nid, network := range nDB.networks[nDB.config.NodeID] { 518 if network.leaving { 519 continue 520 } 521 networks = append(networks, nid) 522 } 523 nDB.RUnlock() 524 525 for { 526 if len(networks) == 0 { 527 break 528 } 529 530 nid := networks[0] 531 networks = networks[1:] 532 533 nDB.RLock() 534 nodes := nDB.networkNodes[nid] 535 nDB.RUnlock() 536 537 // No peer nodes on this network. Move on. 538 if len(nodes) == 0 { 539 continue 540 } 541 542 completed, err := nDB.bulkSync(nodes, false) 543 if err != nil { 544 logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err) 545 continue 546 } 547 548 // Remove all the networks for which we have 549 // successfully completed bulk sync in this iteration. 550 updatedNetworks := make([]string, 0, len(networks)) 551 for _, nid := range networks { 552 var found bool 553 for _, completedNid := range completed { 554 if nid == completedNid { 555 found = true 556 break 557 } 558 } 559 560 if !found { 561 updatedNetworks = append(updatedNetworks, nid) 562 } 563 } 564 565 networks = updatedNetworks 566 } 567 } 568 569 func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) { 570 if !all { 571 // Get 2 random nodes. 2nd node will be tried if the bulk sync to 572 // 1st node fails. 573 nodes = nDB.mRandomNodes(2, nodes) 574 } 575 576 if len(nodes) == 0 { 577 return nil, nil 578 } 579 580 var err error 581 var networks []string 582 var success bool 583 for _, node := range nodes { 584 if node == nDB.config.NodeID { 585 continue 586 } 587 logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node) 588 networks = nDB.findCommonNetworks(node) 589 err = nDB.bulkSyncNode(networks, node, true) 590 if err != nil { 591 err = fmt.Errorf("bulk sync to node %s failed: %v", node, err) 592 logrus.Warn(err.Error()) 593 } else { 594 // bulk sync succeeded 595 success = true 596 // if its periodic bulksync stop after the first successful sync 597 if !all { 598 break 599 } 600 } 601 } 602 603 if success { 604 // if at least one node sync succeeded 605 return networks, nil 606 } 607 608 return nil, err 609 } 610 611 // Bulk sync all the table entries belonging to a set of networks to a 612 // single peer node. It can be unsolicited or can be in response to an 613 // unsolicited bulk sync 614 func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error { 615 var msgs [][]byte 616 617 var unsolMsg string 618 if unsolicited { 619 unsolMsg = "unsolicited" 620 } 621 622 logrus.Debugf("%v(%v): Initiating %s bulk sync for networks %v with node %s", 623 nDB.config.Hostname, nDB.config.NodeID, unsolMsg, networks, node) 624 625 nDB.RLock() 626 mnode := nDB.nodes[node] 627 if mnode == nil { 628 nDB.RUnlock() 629 return nil 630 } 631 632 for _, nid := range networks { 633 nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool { 634 entry, ok := v.(*entry) 635 if !ok { 636 return false 637 } 638 639 eType := TableEventTypeCreate 640 if entry.deleting { 641 eType = TableEventTypeDelete 642 } 643 644 params := strings.Split(path[1:], "/") 645 tEvent := TableEvent{ 646 Type: eType, 647 LTime: entry.ltime, 648 NodeName: entry.node, 649 NetworkID: nid, 650 TableName: params[1], 651 Key: params[2], 652 Value: entry.value, 653 // The duration in second is a float that below would be truncated 654 ResidualReapTime: int32(entry.reapTime.Seconds()), 655 } 656 657 msg, err := encodeMessage(MessageTypeTableEvent, &tEvent) 658 if err != nil { 659 logrus.Errorf("Encode failure during bulk sync: %#v", tEvent) 660 return false 661 } 662 663 msgs = append(msgs, msg) 664 return false 665 }) 666 } 667 nDB.RUnlock() 668 669 // Create a compound message 670 compound := makeCompoundMessage(msgs) 671 672 bsm := BulkSyncMessage{ 673 LTime: nDB.tableClock.Time(), 674 Unsolicited: unsolicited, 675 NodeName: nDB.config.NodeID, 676 Networks: networks, 677 Payload: compound, 678 } 679 680 buf, err := encodeMessage(MessageTypeBulkSync, &bsm) 681 if err != nil { 682 return fmt.Errorf("failed to encode bulk sync message: %v", err) 683 } 684 685 nDB.Lock() 686 ch := make(chan struct{}) 687 nDB.bulkSyncAckTbl[node] = ch 688 nDB.Unlock() 689 690 err = nDB.memberlist.SendReliable(&mnode.Node, buf) 691 if err != nil { 692 nDB.Lock() 693 delete(nDB.bulkSyncAckTbl, node) 694 nDB.Unlock() 695 696 return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err) 697 } 698 699 // Wait on a response only if it is unsolicited. 700 if unsolicited { 701 startTime := time.Now() 702 t := time.NewTimer(30 * time.Second) 703 select { 704 case <-t.C: 705 logrus.Errorf("Bulk sync to node %s timed out", node) 706 case <-ch: 707 logrus.Debugf("%v(%v): Bulk sync to node %s took %s", nDB.config.Hostname, nDB.config.NodeID, node, time.Since(startTime)) 708 } 709 t.Stop() 710 } 711 712 return nil 713 } 714 715 // Returns a random offset between 0 and n 716 func randomOffset(n int) int { 717 if n == 0 { 718 return 0 719 } 720 721 val, err := rand.Int(rand.Reader, big.NewInt(int64(n))) 722 if err != nil { 723 logrus.Errorf("Failed to get a random offset: %v", err) 724 return 0 725 } 726 727 return int(val.Int64()) 728 } 729 730 // mRandomNodes is used to select up to m random nodes. It is possible 731 // that less than m nodes are returned. 732 func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string { 733 n := len(nodes) 734 mNodes := make([]string, 0, m) 735 OUTER: 736 // Probe up to 3*n times, with large n this is not necessary 737 // since k << n, but with small n we want search to be 738 // exhaustive 739 for i := 0; i < 3*n && len(mNodes) < m; i++ { 740 // Get random node 741 idx := randomOffset(n) 742 node := nodes[idx] 743 744 if node == nDB.config.NodeID { 745 continue 746 } 747 748 // Check if we have this node already 749 for j := 0; j < len(mNodes); j++ { 750 if node == mNodes[j] { 751 continue OUTER 752 } 753 } 754 755 // Append the node 756 mNodes = append(mNodes, node) 757 } 758 759 return mNodes 760 }