github.com/pwn-term/docker@v0.0.0-20210616085119-6e977cce2565/libnetwork/networkdb/cluster.go (about) 1 package networkdb 2 3 import ( 4 "bytes" 5 "context" 6 "crypto/rand" 7 "encoding/hex" 8 "fmt" 9 "log" 10 "math/big" 11 rnd "math/rand" 12 "net" 13 "strings" 14 "time" 15 16 "github.com/hashicorp/memberlist" 17 "github.com/sirupsen/logrus" 18 ) 19 20 const ( 21 reapPeriod = 5 * time.Second 22 rejoinClusterDuration = 10 * time.Second 23 rejoinInterval = 60 * time.Second 24 retryInterval = 1 * time.Second 25 nodeReapInterval = 24 * time.Hour 26 nodeReapPeriod = 2 * time.Hour 27 // considering a cluster with > 20 nodes and a drain speed of 100 msg/s 28 // the following is roughly 1 minute 29 maxQueueLenBroadcastOnSync = 500 30 ) 31 32 type logWriter struct{} 33 34 func (l *logWriter) Write(p []byte) (int, error) { 35 str := string(p) 36 str = strings.TrimSuffix(str, "\n") 37 38 switch { 39 case strings.HasPrefix(str, "[WARN] "): 40 str = strings.TrimPrefix(str, "[WARN] ") 41 logrus.Warn(str) 42 case strings.HasPrefix(str, "[DEBUG] "): 43 str = strings.TrimPrefix(str, "[DEBUG] ") 44 logrus.Debug(str) 45 case strings.HasPrefix(str, "[INFO] "): 46 str = strings.TrimPrefix(str, "[INFO] ") 47 logrus.Info(str) 48 case strings.HasPrefix(str, "[ERR] "): 49 str = strings.TrimPrefix(str, "[ERR] ") 50 logrus.Warn(str) 51 } 52 53 return len(p), nil 54 } 55 56 // SetKey adds a new key to the key ring 57 func (nDB *NetworkDB) SetKey(key []byte) { 58 logrus.Debugf("Adding key %.5s", hex.EncodeToString(key)) 59 nDB.Lock() 60 defer nDB.Unlock() 61 for _, dbKey := range nDB.config.Keys { 62 if bytes.Equal(key, dbKey) { 63 return 64 } 65 } 66 nDB.config.Keys = append(nDB.config.Keys, key) 67 if nDB.keyring != nil { 68 nDB.keyring.AddKey(key) 69 } 70 } 71 72 // SetPrimaryKey sets the given key as the primary key. This should have 73 // been added apriori through SetKey 74 func (nDB *NetworkDB) SetPrimaryKey(key []byte) { 75 logrus.Debugf("Primary Key %.5s", hex.EncodeToString(key)) 76 nDB.RLock() 77 defer nDB.RUnlock() 78 for _, dbKey := range nDB.config.Keys { 79 if bytes.Equal(key, dbKey) { 80 if nDB.keyring != nil { 81 nDB.keyring.UseKey(dbKey) 82 } 83 break 84 } 85 } 86 } 87 88 // RemoveKey removes a key from the key ring. The key being removed 89 // can't be the primary key 90 func (nDB *NetworkDB) RemoveKey(key []byte) { 91 logrus.Debugf("Remove Key %.5s", hex.EncodeToString(key)) 92 nDB.Lock() 93 defer nDB.Unlock() 94 for i, dbKey := range nDB.config.Keys { 95 if bytes.Equal(key, dbKey) { 96 nDB.config.Keys = append(nDB.config.Keys[:i], nDB.config.Keys[i+1:]...) 97 if nDB.keyring != nil { 98 nDB.keyring.RemoveKey(dbKey) 99 } 100 break 101 } 102 } 103 } 104 105 func (nDB *NetworkDB) clusterInit() error { 106 nDB.lastStatsTimestamp = time.Now() 107 nDB.lastHealthTimestamp = nDB.lastStatsTimestamp 108 109 config := memberlist.DefaultLANConfig() 110 config.Name = nDB.config.NodeID 111 config.BindAddr = nDB.config.BindAddr 112 config.AdvertiseAddr = nDB.config.AdvertiseAddr 113 config.UDPBufferSize = nDB.config.PacketBufferSize 114 115 if nDB.config.BindPort != 0 { 116 config.BindPort = nDB.config.BindPort 117 } 118 119 config.ProtocolVersion = memberlist.ProtocolVersion2Compatible 120 config.Delegate = &delegate{nDB: nDB} 121 config.Events = &eventDelegate{nDB: nDB} 122 // custom logger that does not add time or date, so they are not 123 // duplicated by logrus 124 config.Logger = log.New(&logWriter{}, "", 0) 125 126 var err error 127 if len(nDB.config.Keys) > 0 { 128 for i, key := range nDB.config.Keys { 129 logrus.Debugf("Encryption key %d: %.5s", i+1, hex.EncodeToString(key)) 130 } 131 nDB.keyring, err = memberlist.NewKeyring(nDB.config.Keys, nDB.config.Keys[0]) 132 if err != nil { 133 return err 134 } 135 config.Keyring = nDB.keyring 136 } 137 138 nDB.networkBroadcasts = &memberlist.TransmitLimitedQueue{ 139 NumNodes: func() int { 140 nDB.RLock() 141 num := len(nDB.nodes) 142 nDB.RUnlock() 143 return num 144 }, 145 RetransmitMult: config.RetransmitMult, 146 } 147 148 nDB.nodeBroadcasts = &memberlist.TransmitLimitedQueue{ 149 NumNodes: func() int { 150 nDB.RLock() 151 num := len(nDB.nodes) 152 nDB.RUnlock() 153 return num 154 }, 155 RetransmitMult: config.RetransmitMult, 156 } 157 158 mlist, err := memberlist.Create(config) 159 if err != nil { 160 return fmt.Errorf("failed to create memberlist: %v", err) 161 } 162 163 nDB.ctx, nDB.cancelCtx = context.WithCancel(context.Background()) 164 nDB.memberlist = mlist 165 166 for _, trigger := range []struct { 167 interval time.Duration 168 fn func() 169 }{ 170 {reapPeriod, nDB.reapState}, 171 {config.GossipInterval, nDB.gossip}, 172 {config.PushPullInterval, nDB.bulkSyncTables}, 173 {retryInterval, nDB.reconnectNode}, 174 {nodeReapPeriod, nDB.reapDeadNode}, 175 {rejoinInterval, nDB.rejoinClusterBootStrap}, 176 } { 177 t := time.NewTicker(trigger.interval) 178 go nDB.triggerFunc(trigger.interval, t.C, trigger.fn) 179 nDB.tickers = append(nDB.tickers, t) 180 } 181 182 return nil 183 } 184 185 func (nDB *NetworkDB) retryJoin(ctx context.Context, members []string) { 186 t := time.NewTicker(retryInterval) 187 defer t.Stop() 188 189 for { 190 select { 191 case <-t.C: 192 if _, err := nDB.memberlist.Join(members); err != nil { 193 logrus.Errorf("Failed to join memberlist %s on retry: %v", members, err) 194 continue 195 } 196 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 197 logrus.Errorf("failed to send node join on retry: %v", err) 198 continue 199 } 200 return 201 case <-ctx.Done(): 202 return 203 } 204 } 205 206 } 207 208 func (nDB *NetworkDB) clusterJoin(members []string) error { 209 mlist := nDB.memberlist 210 211 if _, err := mlist.Join(members); err != nil { 212 // In case of failure, we no longer need to explicitly call retryJoin. 213 // rejoinClusterBootStrap, which runs every minute, will retryJoin for 10sec 214 return fmt.Errorf("could not join node to memberlist: %v", err) 215 } 216 217 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 218 return fmt.Errorf("failed to send node join: %v", err) 219 } 220 221 return nil 222 } 223 224 func (nDB *NetworkDB) clusterLeave() error { 225 mlist := nDB.memberlist 226 227 if err := nDB.sendNodeEvent(NodeEventTypeLeave); err != nil { 228 logrus.Errorf("failed to send node leave: %v", err) 229 } 230 231 if err := mlist.Leave(time.Second); err != nil { 232 return err 233 } 234 235 // cancel the context 236 nDB.cancelCtx() 237 238 for _, t := range nDB.tickers { 239 t.Stop() 240 } 241 242 return mlist.Shutdown() 243 } 244 245 func (nDB *NetworkDB) triggerFunc(stagger time.Duration, C <-chan time.Time, f func()) { 246 // Use a random stagger to avoid synchronizing 247 randStagger := time.Duration(uint64(rnd.Int63()) % uint64(stagger)) 248 select { 249 case <-time.After(randStagger): 250 case <-nDB.ctx.Done(): 251 return 252 } 253 for { 254 select { 255 case <-C: 256 f() 257 case <-nDB.ctx.Done(): 258 return 259 } 260 } 261 } 262 263 func (nDB *NetworkDB) reapDeadNode() { 264 nDB.Lock() 265 defer nDB.Unlock() 266 for _, nodeMap := range []map[string]*node{ 267 nDB.failedNodes, 268 nDB.leftNodes, 269 } { 270 for id, n := range nodeMap { 271 if n.reapTime > nodeReapPeriod { 272 n.reapTime -= nodeReapPeriod 273 continue 274 } 275 logrus.Debugf("Garbage collect node %v", n.Name) 276 delete(nodeMap, id) 277 } 278 } 279 } 280 281 // rejoinClusterBootStrap is called periodically to check if all bootStrap nodes are active in the cluster, 282 // if not, call the cluster join to merge 2 separate clusters that are formed when all managers 283 // stopped/started at the same time 284 func (nDB *NetworkDB) rejoinClusterBootStrap() { 285 nDB.RLock() 286 if len(nDB.bootStrapIP) == 0 { 287 nDB.RUnlock() 288 return 289 } 290 291 myself, ok := nDB.nodes[nDB.config.NodeID] 292 if !ok { 293 nDB.RUnlock() 294 logrus.Warnf("rejoinClusterBootstrap unable to find local node info using ID:%v", nDB.config.NodeID) 295 return 296 } 297 bootStrapIPs := make([]string, 0, len(nDB.bootStrapIP)) 298 for _, bootIP := range nDB.bootStrapIP { 299 // botostrap IPs are usually IP:port from the Join 300 var bootstrapIP net.IP 301 ipStr, _, err := net.SplitHostPort(bootIP) 302 if err != nil { 303 // try to parse it as an IP with port 304 // Note this seems to be the case for swarm that do not specify any port 305 ipStr = bootIP 306 } 307 bootstrapIP = net.ParseIP(ipStr) 308 if bootstrapIP != nil { 309 for _, node := range nDB.nodes { 310 if node.Addr.Equal(bootstrapIP) && !node.Addr.Equal(myself.Addr) { 311 // One of the bootstrap nodes (and not myself) is part of the cluster, return 312 nDB.RUnlock() 313 return 314 } 315 } 316 bootStrapIPs = append(bootStrapIPs, bootIP) 317 } 318 } 319 nDB.RUnlock() 320 if len(bootStrapIPs) == 0 { 321 // this will also avoid to call the Join with an empty list erasing the current bootstrap ip list 322 logrus.Debug("rejoinClusterBootStrap did not find any valid IP") 323 return 324 } 325 // None of the bootStrap nodes are in the cluster, call memberlist join 326 logrus.Debugf("rejoinClusterBootStrap, calling cluster join with bootStrap %v", bootStrapIPs) 327 ctx, cancel := context.WithTimeout(nDB.ctx, rejoinClusterDuration) 328 defer cancel() 329 nDB.retryJoin(ctx, bootStrapIPs) 330 } 331 332 func (nDB *NetworkDB) reconnectNode() { 333 nDB.RLock() 334 if len(nDB.failedNodes) == 0 { 335 nDB.RUnlock() 336 return 337 } 338 339 nodes := make([]*node, 0, len(nDB.failedNodes)) 340 for _, n := range nDB.failedNodes { 341 nodes = append(nodes, n) 342 } 343 nDB.RUnlock() 344 345 node := nodes[randomOffset(len(nodes))] 346 addr := net.UDPAddr{IP: node.Addr, Port: int(node.Port)} 347 348 if _, err := nDB.memberlist.Join([]string{addr.String()}); err != nil { 349 return 350 } 351 352 if err := nDB.sendNodeEvent(NodeEventTypeJoin); err != nil { 353 return 354 } 355 356 logrus.Debugf("Initiating bulk sync with node %s after reconnect", node.Name) 357 nDB.bulkSync([]string{node.Name}, true) 358 } 359 360 // For timing the entry deletion in the reaper APIs that doesn't use monotonic clock 361 // source (time.Now, Sub etc.) should be avoided. Hence we use reapTime in every 362 // entry which is set initially to reapInterval and decremented by reapPeriod every time 363 // the reaper runs. NOTE nDB.reapTableEntries updates the reapTime with a readlock. This 364 // is safe as long as no other concurrent path touches the reapTime field. 365 func (nDB *NetworkDB) reapState() { 366 // The reapTableEntries leverage the presence of the network so garbage collect entries first 367 nDB.reapTableEntries() 368 nDB.reapNetworks() 369 } 370 371 func (nDB *NetworkDB) reapNetworks() { 372 nDB.Lock() 373 for _, nn := range nDB.networks { 374 for id, n := range nn { 375 if n.leaving { 376 if n.reapTime <= 0 { 377 delete(nn, id) 378 continue 379 } 380 n.reapTime -= reapPeriod 381 } 382 } 383 } 384 nDB.Unlock() 385 } 386 387 func (nDB *NetworkDB) reapTableEntries() { 388 var nodeNetworks []string 389 // This is best effort, if the list of network changes will be picked up in the next cycle 390 nDB.RLock() 391 for nid := range nDB.networks[nDB.config.NodeID] { 392 nodeNetworks = append(nodeNetworks, nid) 393 } 394 nDB.RUnlock() 395 396 cycleStart := time.Now() 397 // In order to avoid blocking the database for a long time, apply the garbage collection logic by network 398 // The lock is taken at the beginning of the cycle and the deletion is inline 399 for _, nid := range nodeNetworks { 400 nDB.Lock() 401 nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool { 402 // timeCompensation compensate in case the lock took some time to be released 403 timeCompensation := time.Since(cycleStart) 404 entry, ok := v.(*entry) 405 if !ok || !entry.deleting { 406 return false 407 } 408 409 // In this check we are adding an extra 1 second to guarantee that when the number is truncated to int32 to fit the packet 410 // for the tableEvent the number is always strictly > 1 and never 0 411 if entry.reapTime > reapPeriod+timeCompensation+time.Second { 412 entry.reapTime -= reapPeriod + timeCompensation 413 return false 414 } 415 416 params := strings.Split(path[1:], "/") 417 nid := params[0] 418 tname := params[1] 419 key := params[2] 420 421 okTable, okNetwork := nDB.deleteEntry(nid, tname, key) 422 if !okTable { 423 logrus.Errorf("Table tree delete failed, entry with key:%s does not exists in the table:%s network:%s", key, tname, nid) 424 } 425 if !okNetwork { 426 logrus.Errorf("Network tree delete failed, entry with key:%s does not exists in the network:%s table:%s", key, nid, tname) 427 } 428 429 return false 430 }) 431 nDB.Unlock() 432 } 433 } 434 435 func (nDB *NetworkDB) gossip() { 436 networkNodes := make(map[string][]string) 437 nDB.RLock() 438 thisNodeNetworks := nDB.networks[nDB.config.NodeID] 439 for nid := range thisNodeNetworks { 440 networkNodes[nid] = nDB.networkNodes[nid] 441 } 442 printStats := time.Since(nDB.lastStatsTimestamp) >= nDB.config.StatsPrintPeriod 443 printHealth := time.Since(nDB.lastHealthTimestamp) >= nDB.config.HealthPrintPeriod 444 nDB.RUnlock() 445 446 if printHealth { 447 healthScore := nDB.memberlist.GetHealthScore() 448 if healthScore != 0 { 449 logrus.Warnf("NetworkDB stats %v(%v) - healthscore:%d (connectivity issues)", nDB.config.Hostname, nDB.config.NodeID, healthScore) 450 } 451 nDB.lastHealthTimestamp = time.Now() 452 } 453 454 for nid, nodes := range networkNodes { 455 mNodes := nDB.mRandomNodes(3, nodes) 456 bytesAvail := nDB.config.PacketBufferSize - compoundHeaderOverhead 457 458 nDB.RLock() 459 network, ok := thisNodeNetworks[nid] 460 nDB.RUnlock() 461 if !ok || network == nil { 462 // It is normal for the network to be removed 463 // between the time we collect the network 464 // attachments of this node and processing 465 // them here. 466 continue 467 } 468 469 broadcastQ := network.tableBroadcasts 470 471 if broadcastQ == nil { 472 logrus.Errorf("Invalid broadcastQ encountered while gossiping for network %s", nid) 473 continue 474 } 475 476 msgs := broadcastQ.GetBroadcasts(compoundOverhead, bytesAvail) 477 // Collect stats and print the queue info, note this code is here also to have a view of the queues empty 478 network.qMessagesSent += len(msgs) 479 if printStats { 480 logrus.Infof("NetworkDB stats %v(%v) - netID:%s leaving:%t netPeers:%d entries:%d Queue qLen:%d netMsg/s:%d", 481 nDB.config.Hostname, nDB.config.NodeID, 482 nid, network.leaving, broadcastQ.NumNodes(), network.entriesNumber, broadcastQ.NumQueued(), 483 network.qMessagesSent/int((nDB.config.StatsPrintPeriod/time.Second))) 484 network.qMessagesSent = 0 485 } 486 487 if len(msgs) == 0 { 488 continue 489 } 490 491 // Create a compound message 492 compound := makeCompoundMessage(msgs) 493 494 for _, node := range mNodes { 495 nDB.RLock() 496 mnode := nDB.nodes[node] 497 nDB.RUnlock() 498 499 if mnode == nil { 500 break 501 } 502 503 // Send the compound message 504 if err := nDB.memberlist.SendBestEffort(&mnode.Node, compound); err != nil { 505 logrus.Errorf("Failed to send gossip to %s: %s", mnode.Addr, err) 506 } 507 } 508 } 509 // Reset the stats 510 if printStats { 511 nDB.lastStatsTimestamp = time.Now() 512 } 513 } 514 515 func (nDB *NetworkDB) bulkSyncTables() { 516 var networks []string 517 nDB.RLock() 518 for nid, network := range nDB.networks[nDB.config.NodeID] { 519 if network.leaving { 520 continue 521 } 522 networks = append(networks, nid) 523 } 524 nDB.RUnlock() 525 526 for { 527 if len(networks) == 0 { 528 break 529 } 530 531 nid := networks[0] 532 networks = networks[1:] 533 534 nDB.RLock() 535 nodes := nDB.networkNodes[nid] 536 nDB.RUnlock() 537 538 // No peer nodes on this network. Move on. 539 if len(nodes) == 0 { 540 continue 541 } 542 543 completed, err := nDB.bulkSync(nodes, false) 544 if err != nil { 545 logrus.Errorf("periodic bulk sync failure for network %s: %v", nid, err) 546 continue 547 } 548 549 // Remove all the networks for which we have 550 // successfully completed bulk sync in this iteration. 551 updatedNetworks := make([]string, 0, len(networks)) 552 for _, nid := range networks { 553 var found bool 554 for _, completedNid := range completed { 555 if nid == completedNid { 556 found = true 557 break 558 } 559 } 560 561 if !found { 562 updatedNetworks = append(updatedNetworks, nid) 563 } 564 } 565 566 networks = updatedNetworks 567 } 568 } 569 570 func (nDB *NetworkDB) bulkSync(nodes []string, all bool) ([]string, error) { 571 if !all { 572 // Get 2 random nodes. 2nd node will be tried if the bulk sync to 573 // 1st node fails. 574 nodes = nDB.mRandomNodes(2, nodes) 575 } 576 577 if len(nodes) == 0 { 578 return nil, nil 579 } 580 581 var err error 582 var networks []string 583 var success bool 584 for _, node := range nodes { 585 if node == nDB.config.NodeID { 586 continue 587 } 588 logrus.Debugf("%v(%v): Initiating bulk sync with node %v", nDB.config.Hostname, nDB.config.NodeID, node) 589 networks = nDB.findCommonNetworks(node) 590 err = nDB.bulkSyncNode(networks, node, true) 591 if err != nil { 592 err = fmt.Errorf("bulk sync to node %s failed: %v", node, err) 593 logrus.Warn(err.Error()) 594 } else { 595 // bulk sync succeeded 596 success = true 597 // if its periodic bulksync stop after the first successful sync 598 if !all { 599 break 600 } 601 } 602 } 603 604 if success { 605 // if at least one node sync succeeded 606 return networks, nil 607 } 608 609 return nil, err 610 } 611 612 // Bulk sync all the table entries belonging to a set of networks to a 613 // single peer node. It can be unsolicited or can be in response to an 614 // unsolicited bulk sync 615 func (nDB *NetworkDB) bulkSyncNode(networks []string, node string, unsolicited bool) error { 616 var msgs [][]byte 617 618 var unsolMsg string 619 if unsolicited { 620 unsolMsg = "unsolicited" 621 } 622 623 logrus.Debugf("%v(%v): Initiating %s bulk sync for networks %v with node %s", 624 nDB.config.Hostname, nDB.config.NodeID, unsolMsg, networks, node) 625 626 nDB.RLock() 627 mnode := nDB.nodes[node] 628 if mnode == nil { 629 nDB.RUnlock() 630 return nil 631 } 632 633 for _, nid := range networks { 634 nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), func(path string, v interface{}) bool { 635 entry, ok := v.(*entry) 636 if !ok { 637 return false 638 } 639 640 eType := TableEventTypeCreate 641 if entry.deleting { 642 eType = TableEventTypeDelete 643 } 644 645 params := strings.Split(path[1:], "/") 646 tEvent := TableEvent{ 647 Type: eType, 648 LTime: entry.ltime, 649 NodeName: entry.node, 650 NetworkID: nid, 651 TableName: params[1], 652 Key: params[2], 653 Value: entry.value, 654 // The duration in second is a float that below would be truncated 655 ResidualReapTime: int32(entry.reapTime.Seconds()), 656 } 657 658 msg, err := encodeMessage(MessageTypeTableEvent, &tEvent) 659 if err != nil { 660 logrus.Errorf("Encode failure during bulk sync: %#v", tEvent) 661 return false 662 } 663 664 msgs = append(msgs, msg) 665 return false 666 }) 667 } 668 nDB.RUnlock() 669 670 // Create a compound message 671 compound := makeCompoundMessage(msgs) 672 673 bsm := BulkSyncMessage{ 674 LTime: nDB.tableClock.Time(), 675 Unsolicited: unsolicited, 676 NodeName: nDB.config.NodeID, 677 Networks: networks, 678 Payload: compound, 679 } 680 681 buf, err := encodeMessage(MessageTypeBulkSync, &bsm) 682 if err != nil { 683 return fmt.Errorf("failed to encode bulk sync message: %v", err) 684 } 685 686 nDB.Lock() 687 ch := make(chan struct{}) 688 nDB.bulkSyncAckTbl[node] = ch 689 nDB.Unlock() 690 691 err = nDB.memberlist.SendReliable(&mnode.Node, buf) 692 if err != nil { 693 nDB.Lock() 694 delete(nDB.bulkSyncAckTbl, node) 695 nDB.Unlock() 696 697 return fmt.Errorf("failed to send a TCP message during bulk sync: %v", err) 698 } 699 700 // Wait on a response only if it is unsolicited. 701 if unsolicited { 702 startTime := time.Now() 703 t := time.NewTimer(30 * time.Second) 704 select { 705 case <-t.C: 706 logrus.Errorf("Bulk sync to node %s timed out", node) 707 case <-ch: 708 logrus.Debugf("%v(%v): Bulk sync to node %s took %s", nDB.config.Hostname, nDB.config.NodeID, node, time.Since(startTime)) 709 } 710 t.Stop() 711 } 712 713 return nil 714 } 715 716 // Returns a random offset between 0 and n 717 func randomOffset(n int) int { 718 if n == 0 { 719 return 0 720 } 721 722 val, err := rand.Int(rand.Reader, big.NewInt(int64(n))) 723 if err != nil { 724 logrus.Errorf("Failed to get a random offset: %v", err) 725 return 0 726 } 727 728 return int(val.Int64()) 729 } 730 731 // mRandomNodes is used to select up to m random nodes. It is possible 732 // that less than m nodes are returned. 733 func (nDB *NetworkDB) mRandomNodes(m int, nodes []string) []string { 734 n := len(nodes) 735 mNodes := make([]string, 0, m) 736 OUTER: 737 // Probe up to 3*n times, with large n this is not necessary 738 // since k << n, but with small n we want search to be 739 // exhaustive 740 for i := 0; i < 3*n && len(mNodes) < m; i++ { 741 // Get random node 742 idx := randomOffset(n) 743 node := nodes[idx] 744 745 if node == nDB.config.NodeID { 746 continue 747 } 748 749 // Check if we have this node already 750 for j := 0; j < len(mNodes); j++ { 751 if node == mNodes[j] { 752 continue OUTER 753 } 754 } 755 756 // Append the node 757 mNodes = append(mNodes, node) 758 } 759 760 return mNodes 761 }