github.com/unionj-cloud/go-doudou/v2@v2.3.5/toolkit/memberlist/state.go (about) 1 package memberlist 2 3 import ( 4 "bytes" 5 "fmt" 6 "github.com/shirou/gopsutil/cpu" 7 "math" 8 "math/rand" 9 "net" 10 "strings" 11 "sync/atomic" 12 "time" 13 14 metrics "github.com/armon/go-metrics" 15 ) 16 17 type NodeStateType int 18 19 const ( 20 StateAlive NodeStateType = iota 21 StateSuspect 22 StateDead 23 StateLeft 24 ) 25 26 // Node represents a node in the cluster. 27 type Node struct { 28 Name string 29 Addr string 30 Port uint16 31 Meta []byte // Metadata from the delegate for this node. 32 State NodeStateType // State of the node. 33 PMin uint8 // Minimum protocol version this understands 34 PMax uint8 // Maximum protocol version this understands 35 PCur uint8 // Current version node is speaking 36 DMin uint8 // Min protocol version for the delegate to understand 37 DMax uint8 // Max protocol version for the delegate to understand 38 DCur uint8 // Current version delegate is speaking 39 Weight int // node weight for load balancing 40 } 41 42 // Address returns the host:port form of a node's address, suitable for use 43 // with a transport. 44 func (n *Node) Address() string { 45 return joinHostPort(n.Addr, n.Port) 46 } 47 48 // FullAddress returns the node name and host:port form of a node's address, 49 // suitable for use with a transport. 50 func (n *Node) FullAddress() Address { 51 return Address{ 52 Addr: joinHostPort(n.Addr, n.Port), 53 Name: n.Name, 54 } 55 } 56 57 // String returns the node name 58 func (n *Node) String() string { 59 return n.Name 60 } 61 62 // NodeState is used to manage our state view of another node 63 type nodeState struct { 64 Node 65 Incarnation uint32 // Last known incarnation number 66 State NodeStateType // Current state 67 StateChange time.Time // Time last state change happened 68 Weight int // node weight for load balancing 69 WeightAt int64 // UTC timestamp which node weight calculated at 70 } 71 72 func NewNodeState(node Node, state NodeStateType) *nodeState { 73 return &nodeState{Node: node, State: state} 74 } 75 76 // Address returns the host:port form of a node's address, suitable for use 77 // with a transport. 78 func (n *nodeState) Address() string { 79 return n.Node.Address() 80 } 81 82 // FullAddress returns the node name and host:port form of a node's address, 83 // suitable for use with a transport. 84 func (n *nodeState) FullAddress() Address { 85 return n.Node.FullAddress() 86 } 87 88 func (n *nodeState) DeadOrLeft() bool { 89 return n.State == StateDead || n.State == StateLeft 90 } 91 92 // ackHandler is used to register handlers for incoming acks and nacks. 93 type ackHandler struct { 94 ackFn func([]byte, time.Time) 95 nackFn func() 96 timer *time.Timer 97 } 98 99 // NoPingResponseError is used to indicate a 'ping' packet was 100 // successfully issued but no response was received 101 type NoPingResponseError struct { 102 node string 103 } 104 105 func (f NoPingResponseError) Error() string { 106 return fmt.Sprintf("No response from node %s", f.node) 107 } 108 109 // Schedule is used to ensure the Tick is performed periodically. This 110 // function is safe to call multiple times. If the memberlist is already 111 // scheduled, then it won't do anything. 112 func (m *Memberlist) schedule() { 113 m.tickerLock.Lock() 114 defer m.tickerLock.Unlock() 115 116 // If we already have tickers, then don't do anything, since we're 117 // scheduled 118 if len(m.tickers) > 0 { 119 return 120 } 121 122 // Create the stop tick channel, a blocking channel. We close this 123 // when we should stop the tickers. 124 stopCh := make(chan struct{}) 125 126 // Create a new probeTicker 127 if m.config.ProbeInterval > 0 { 128 t := time.NewTicker(m.config.ProbeInterval) 129 go m.triggerFuncDynamic(func() time.Duration { 130 return m.config.ProbeInterval 131 }, t, stopCh, m.probe) 132 m.tickers = append(m.tickers, t) 133 } 134 135 // Create a push pull ticker if needed 136 if m.config.PushPullInterval > 0 { 137 go m.pushPullTrigger(stopCh) 138 } 139 140 // Create a gossip ticker if needed 141 if m.config.GossipInterval > 0 && m.config.GossipNodes > 0 { 142 t := time.NewTicker(m.config.GossipInterval) 143 go m.triggerFuncDynamic(func() time.Duration { 144 return m.config.GossipInterval 145 }, t, stopCh, m.gossip) 146 m.tickers = append(m.tickers, t) 147 } 148 149 // Create node weight ticker if needed 150 if m.config.WeightInterval > 0 { 151 t := time.NewTicker(m.config.WeightInterval) 152 go m.triggerFunc(m.config.WeightInterval, t.C, stopCh, m.weight) 153 m.tickers = append(m.tickers, t) 154 } 155 156 // If we made any tickers, then record the stopTick channel for 157 // later. 158 if len(m.tickers) > 0 { 159 m.stopTick = stopCh 160 } 161 } 162 163 // triggerFunc is used to trigger a function call each time a 164 // message is received until a stop tick arrives. 165 func (m *Memberlist) triggerFunc(stagger time.Duration, C <-chan time.Time, stop <-chan struct{}, f func()) { 166 // Use a random stagger to avoid syncronizing 167 randStagger := time.Duration(uint64(rand.Int63()) % uint64(stagger)) 168 select { 169 case <-time.After(randStagger): 170 case <-stop: 171 return 172 } 173 for { 174 select { 175 case <-C: 176 f() 177 case <-stop: 178 return 179 } 180 } 181 } 182 183 func (m *Memberlist) triggerFuncDynamic(getter func() time.Duration, t *time.Ticker, stop <-chan struct{}, f func()) { 184 stagger := getter() 185 randStagger := time.Duration(uint64(rand.Int63()) % uint64(stagger)) 186 select { 187 case <-time.After(randStagger): 188 case <-stop: 189 return 190 } 191 for { 192 select { 193 case <-t.C: 194 t.Reset(getter()) 195 f() 196 case <-stop: 197 return 198 } 199 } 200 } 201 202 // pushPullTrigger is used to periodically trigger a push/pull until 203 // a stop tick arrives. We don't use triggerFunc since the push/pull 204 // timer is dynamically scaled based on cluster size to avoid network 205 // saturation 206 func (m *Memberlist) pushPullTrigger(stop <-chan struct{}) { 207 interval := m.config.PushPullInterval 208 209 // Use a random stagger to avoid syncronizing 210 randStagger := time.Duration(uint64(rand.Int63()) % uint64(interval)) 211 select { 212 case <-time.After(randStagger): 213 case <-stop: 214 return 215 } 216 217 // Tick using a dynamic timer 218 for { 219 tickTime := pushPullScale(m.config.PushPullInterval, m.estNumNodes()) 220 select { 221 case <-time.After(tickTime): 222 m.pushPull() 223 case <-stop: 224 return 225 } 226 } 227 } 228 229 // Deschedule is used to stop the background maintenance. This is safe 230 // to call multiple times. 231 func (m *Memberlist) deschedule() { 232 m.tickerLock.Lock() 233 defer m.tickerLock.Unlock() 234 235 // If we have no tickers, then we aren't scheduled. 236 if len(m.tickers) == 0 { 237 return 238 } 239 240 // Close the stop channel so all the ticker listeners stop. 241 close(m.stopTick) 242 243 // Explicitly stop all the tickers themselves so they don't take 244 // up any more resources, and get rid of the list. 245 for _, t := range m.tickers { 246 t.Stop() 247 } 248 m.tickers = nil 249 } 250 251 // Tick is used to perform a single round of failure detection and gossip 252 func (m *Memberlist) probe() { 253 // Track the number of indexes we've considered probing 254 numCheck := 0 255 START: 256 m.nodeLock.RLock() 257 258 // Make sure we don't wrap around infinitely 259 if numCheck >= len(m.nodes) { 260 m.nodeLock.RUnlock() 261 return 262 } 263 264 // Handle the wrap around case 265 if m.probeIndex >= len(m.nodes) { 266 m.nodeLock.RUnlock() 267 m.resetNodes() 268 m.probeIndex = 0 269 numCheck++ 270 goto START 271 } 272 273 // Determine if we should probe this node 274 skip := false 275 var node nodeState 276 277 node = *m.nodes[m.probeIndex] 278 if node.Name == m.config.Name { 279 skip = true 280 } else if node.DeadOrLeft() { 281 skip = true 282 } 283 284 // Potentially skip 285 m.nodeLock.RUnlock() 286 m.probeIndex++ 287 if skip { 288 numCheck++ 289 goto START 290 } 291 292 // Probe the specific node 293 m.probeNode(&node) 294 } 295 296 // probeNodeByAddr just safely calls probeNode given only the address of the node (for tests) 297 func (m *Memberlist) probeNodeByAddr(addr string) { 298 m.nodeLock.RLock() 299 n := m.nodeMap[addr] 300 m.nodeLock.RUnlock() 301 302 m.probeNode(n) 303 } 304 305 // failedRemote checks the error and decides if it indicates a failure on the 306 // other end. 307 func failedRemote(err error) bool { 308 switch t := err.(type) { 309 case *net.OpError: 310 if strings.HasPrefix(t.Net, "tcp") { 311 switch t.Op { 312 case "dial", "read", "write": 313 return true 314 } 315 } 316 } 317 return false 318 } 319 320 // probeNode handles a single round of failure checking on a node. 321 func (m *Memberlist) probeNode(node *nodeState) { 322 defer metrics.MeasureSince([]string{"memberlist", "probeNode"}, time.Now()) 323 324 // We use our health awareness to scale the overall probe interval, so we 325 // slow down if we detect problems. The ticker that calls us can handle 326 // us running over the base interval, and will skip missed ticks. 327 probeInterval := m.awareness.ScaleTimeout(m.config.ProbeInterval) 328 if probeInterval > m.config.ProbeInterval { 329 metrics.IncrCounter([]string{"memberlist", "degraded", "probe"}, 1) 330 } 331 332 // Prepare a ping message and setup an ack handler. 333 selfAddr, selfPort := m.getAdvertise() 334 ping := ping{ 335 SeqNo: m.nextSeqNo(), 336 Node: node.Name, 337 SourceAddr: selfAddr, 338 SourcePort: selfPort, 339 SourceNode: m.config.Name, 340 } 341 ackCh := make(chan ackMessage, m.config.IndirectChecks+1) 342 nackCh := make(chan struct{}, m.config.IndirectChecks+1) 343 m.setProbeChannels(ping.SeqNo, ackCh, nackCh, probeInterval) 344 345 // Mark the sent time here, which should be after any pre-processing but 346 // before system calls to do the actual send. This probably over-reports 347 // a bit, but it's the best we can do. We had originally put this right 348 // after the I/O, but that would sometimes give negative RTT measurements 349 // which was not desirable. 350 sent := time.Now() 351 352 // Send a ping to the node. If this node looks like it's suspect or dead, 353 // also tack on a suspect message so that it has a chance to refute as 354 // soon as possible. 355 deadline := sent.Add(probeInterval) 356 addr := node.Address() 357 358 // Arrange for our self-awareness to get updated. 359 var awarenessDelta int 360 defer func() { 361 m.awareness.ApplyDelta(awarenessDelta) 362 }() 363 if node.State == StateAlive { 364 if err := m.encodeAndSendMsg(node.FullAddress(), pingMsg, &ping); err != nil { 365 m.logger.Printf("[ERR] memberlist: Failed to send ping: %s", err) 366 if failedRemote(err) { 367 goto HANDLE_REMOTE_FAILURE 368 } else { 369 if _, ok := err.(*net.DNSError); ok { 370 // Update our self-awareness based on the results of this failed probe. 371 // If we don't have peers who will send nacks then we penalize for any 372 // failed probe as a simple health metric. If we do have peers to nack 373 // verify, then we can use that as a more sophisticated measure of self- 374 // health because we assume them to be working, and they can help us 375 // decide if the probed node was really dead or if it was something wrong 376 // with ourselves. 377 awarenessDelta = 1 378 s := suspect{Incarnation: node.Incarnation, Node: node.Name, From: m.config.Name} 379 m.suspectNode(&s) 380 } 381 return 382 } 383 } 384 } else { 385 var msgs [][]byte 386 if buf, err := encode(pingMsg, &ping); err != nil { 387 m.logger.Printf("[ERR] memberlist: Failed to encode ping message: %s", err) 388 return 389 } else { 390 msgs = append(msgs, buf.Bytes()) 391 } 392 s := suspect{Incarnation: node.Incarnation, Node: node.Name, From: m.config.Name} 393 if buf, err := encode(suspectMsg, &s); err != nil { 394 m.logger.Printf("[ERR] memberlist: Failed to encode suspect message: %s", err) 395 return 396 } else { 397 msgs = append(msgs, buf.Bytes()) 398 } 399 400 compound := makeCompoundMessage(msgs) 401 if err := m.rawSendMsgPacket(node.FullAddress(), &node.Node, compound.Bytes()); err != nil { 402 m.logger.Printf("[ERR] memberlist: Failed to send compound ping and suspect message to %s: %s", addr, err) 403 if failedRemote(err) { 404 goto HANDLE_REMOTE_FAILURE 405 } else { 406 return 407 } 408 } 409 } 410 411 // Arrange for our self-awareness to get updated. At this point we've 412 // sent the ping, so any return statement means the probe succeeded 413 // which will improve our health until we get to the failure scenarios 414 // at the end of this function, which will alter this delta variable 415 // accordingly. 416 awarenessDelta = -1 417 418 // Wait for response or round-trip-time. 419 select { 420 case v := <-ackCh: 421 if v.Complete == true { 422 rtt := v.Timestamp.Sub(sent) 423 m.logger.Printf("[DEBUG] memberlist: ping remote node %s success in %s", node.Node.Name, rtt.String()) 424 if m.config.Ping != nil { 425 m.config.Ping.NotifyPingComplete(&node.Node, rtt, v.Payload) 426 } 427 return 428 } 429 430 // As an edge case, if we get a timeout, we need to re-enqueue it 431 // here to break out of the select below. 432 if v.Complete == false { 433 ackCh <- v 434 } 435 case <-time.After(m.config.ProbeTimeout): 436 // Note that we don't scale this timeout based on awareness and 437 // the health score. That's because we don't really expect waiting 438 // longer to help get UDP through. Since health does extend the 439 // probe interval it will give the TCP fallback more time, which 440 // is more active in dealing with lost packets, and it gives more 441 // time to wait for indirect acks/nacks. 442 m.logger.Printf("[DEBUG] memberlist: Failed ping: %s (timeout reached)", node.Name) 443 } 444 445 HANDLE_REMOTE_FAILURE: 446 // Get some random live nodes. 447 m.nodeLock.RLock() 448 kNodes := kRandomNodes(m.config.IndirectChecks, m.nodes, func(n *nodeState) bool { 449 return n.Name == m.config.Name || 450 n.Name == node.Name || 451 n.State != StateAlive 452 }) 453 m.nodeLock.RUnlock() 454 455 // Attempt an indirect ping. 456 expectedNacks := 0 457 selfAddr, selfPort = m.getAdvertise() 458 ind := indirectPingReq{ 459 SeqNo: ping.SeqNo, 460 Target: node.Addr, 461 Port: node.Port, 462 Node: node.Name, 463 SourceAddr: selfAddr, 464 SourcePort: selfPort, 465 SourceNode: m.config.Name, 466 } 467 for _, peer := range kNodes { 468 // We only expect nack to be sent from peers who understand 469 // version 4 of the protocol. 470 if ind.Nack = peer.PMax >= 4; ind.Nack { 471 expectedNacks++ 472 } 473 474 if err := m.encodeAndSendMsg(peer.FullAddress(), indirectPingMsg, &ind); err != nil { 475 m.logger.Printf("[ERR] memberlist: Failed to send indirect ping: %s", err) 476 } 477 } 478 479 // Also make an attempt to contact the node directly over TCP. This 480 // helps prevent confused clients who get isolated from UDP traffic 481 // but can still speak TCP (which also means they can possibly report 482 // misinformation to other nodes via anti-entropy), avoiding flapping in 483 // the cluster. 484 // 485 // This is a little unusual because we will attempt a TCP ping to any 486 // member who understands version 3 of the protocol, regardless of 487 // which protocol version we are speaking. That's why we've included a 488 // config option to turn this off if desired. 489 fallbackCh := make(chan bool, 1) 490 491 disableTcpPings := m.config.DisableTcpPings || 492 (m.config.DisableTcpPingsForNode != nil && m.config.DisableTcpPingsForNode(node.Name)) 493 if (!disableTcpPings) && (node.PMax >= 3) { 494 go func() { 495 defer close(fallbackCh) 496 didContact, err := m.sendPingAndWaitForAck(node.FullAddress(), ping, deadline) 497 if err != nil { 498 m.logger.Printf("[ERR] memberlist: Failed fallback ping: %s", err) 499 } else { 500 fallbackCh <- didContact 501 } 502 }() 503 } else { 504 close(fallbackCh) 505 } 506 507 // Wait for the acks or timeout. Note that we don't check the fallback 508 // channel here because we want to issue a warning below if that's the 509 // *only* way we hear back from the peer, so we have to let this time 510 // out first to allow the normal UDP-based acks to come in. 511 select { 512 case v := <-ackCh: 513 if v.Complete == true { 514 return 515 } 516 } 517 518 // Finally, poll the fallback channel. The timeouts are set such that 519 // the channel will have something or be closed without having to wait 520 // any additional time here. 521 for didContact := range fallbackCh { 522 if didContact { 523 m.logger.Printf("[WARN] memberlist: Was able to connect to %s but other probes failed, network may be misconfigured", node.Name) 524 return 525 } 526 } 527 // Update our self-awareness based on the results of this failed probe. 528 // If we don't have peers who will send nacks then we penalize for any 529 // failed probe as a simple health metric. If we do have peers to nack 530 // verify, then we can use that as a more sophisticated measure of self- 531 // health because we assume them to be working, and they can help us 532 // decide if the probed node was really dead or if it was something wrong 533 // with ourselves. 534 awarenessDelta = 0 535 if expectedNacks > 0 { 536 if nackCount := len(nackCh); nackCount < expectedNacks { 537 awarenessDelta += (expectedNacks - nackCount) 538 } 539 } else { 540 awarenessDelta += 1 541 } 542 543 // No acks received from target, suspect it as failed. 544 m.logger.Printf("[DEBUG] memberlist: Suspect %s has failed, no acks received", node.Name) 545 s := suspect{Incarnation: node.Incarnation, Node: node.Name, From: m.config.Name} 546 m.suspectNode(&s) 547 } 548 549 // Ping initiates a ping to the node with the specified name. 550 func (m *Memberlist) Ping(node string, addr net.Addr) (time.Duration, error) { 551 // Prepare a ping message and setup an ack handler. 552 selfAddr, selfPort := m.getAdvertise() 553 ping := ping{ 554 SeqNo: m.nextSeqNo(), 555 Node: node, 556 SourceAddr: selfAddr, 557 SourcePort: selfPort, 558 SourceNode: m.config.Name, 559 } 560 ackCh := make(chan ackMessage, m.config.IndirectChecks+1) 561 m.setProbeChannels(ping.SeqNo, ackCh, nil, m.config.ProbeInterval) 562 563 a := Address{Addr: addr.String(), Name: node} 564 565 // Send a ping to the node. 566 if err := m.encodeAndSendMsg(a, pingMsg, &ping); err != nil { 567 return 0, err 568 } 569 570 // Mark the sent time here, which should be after any pre-processing and 571 // system calls to do the actual send. This probably under-reports a bit, 572 // but it's the best we can do. 573 sent := time.Now() 574 575 // Wait for response or timeout. 576 select { 577 case v := <-ackCh: 578 if v.Complete == true { 579 return v.Timestamp.Sub(sent), nil 580 } 581 case <-time.After(m.config.ProbeTimeout): 582 // Timeout, return an error below. 583 } 584 585 m.logger.Printf("[DEBUG] memberlist: Failed UDP ping: %v (timeout reached)", node) 586 return 0, NoPingResponseError{ping.Node} 587 } 588 589 // resetNodes is used when the tick wraps around. It will reap the 590 // dead nodes and shuffle the node list. 591 func (m *Memberlist) resetNodes() { 592 m.nodeLock.Lock() 593 defer m.nodeLock.Unlock() 594 595 // Move dead nodes, but respect gossip to the dead interval 596 deadIdx := moveDeadNodes(m.nodes, m.config.GossipToTheDeadTime) 597 598 // Deregister the dead nodes 599 for i := deadIdx; i < len(m.nodes); i++ { 600 delete(m.nodeMap, m.nodes[i].Name) 601 m.nodes[i] = nil 602 } 603 604 // Trim the nodes to exclude the dead nodes 605 m.nodes = m.nodes[0:deadIdx] 606 607 // Update numNodes after we've trimmed the dead nodes 608 atomic.StoreUint32(&m.numNodes, uint32(deadIdx)) 609 610 // Shuffle live nodes 611 shuffleNodes(m.nodes) 612 } 613 614 // gossip is invoked every GossipInterval period to broadcast our gossip 615 // messages to a few random nodes. 616 func (m *Memberlist) gossip() { 617 defer metrics.MeasureSince([]string{"memberlist", "gossip"}, time.Now()) 618 619 // Get some random live, suspect, or recently dead nodes 620 m.nodeLock.RLock() 621 kNodes := kRandomNodes(m.config.GossipNodes, m.nodes, func(n *nodeState) bool { 622 if n.Name == m.config.Name { 623 return true 624 } 625 626 switch n.State { 627 case StateAlive, StateSuspect: 628 return false 629 630 case StateDead: 631 return time.Since(n.StateChange) > m.config.GossipToTheDeadTime 632 633 default: 634 return true 635 } 636 }) 637 m.nodeLock.RUnlock() 638 639 // Compute the bytes available 640 bytesAvail := m.config.UDPBufferSize - compoundHeaderOverhead 641 if m.config.EncryptionEnabled() { 642 bytesAvail -= encryptOverhead(m.encryptionVersion()) 643 } 644 645 for _, node := range kNodes { 646 // Get any pending broadcasts 647 msgs := m.getBroadcasts(compoundOverhead, bytesAvail) 648 if len(msgs) == 0 { 649 return 650 } 651 652 addr := node.Address() 653 if len(msgs) == 1 { 654 // Send single message as is 655 if err := m.rawSendMsgPacket(node.FullAddress(), &node, msgs[0]); err != nil { 656 m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err) 657 } 658 } else { 659 // Otherwise create and send a compound message 660 compound := makeCompoundMessage(msgs) 661 if err := m.rawSendMsgPacket(node.FullAddress(), &node, compound.Bytes()); err != nil { 662 m.logger.Printf("[ERR] memberlist: Failed to send gossip to %s: %s", addr, err) 663 } 664 } 665 } 666 } 667 668 // weight is invoked every WeightInterval period to calculate local node weight and 669 // enqueue a message carrying the result 670 func (m *Memberlist) weight() { 671 defer metrics.MeasureSince([]string{"memberlist", "weight"}, time.Now()) 672 673 // Weight = (AwarenessMaxMultiplier - AwarenessScore) * 0.5 + AwarenessMaxMultiplier * CPUIdlePercent * 0.5 674 percent, err := cpu.Percent(0, false) 675 if err != nil { 676 m.logger.Printf("[ERR] memberlist: Failed to get cpu busy percent: %s", err) 677 return 678 } 679 cpuIdlePercent := 100 - percent[0] 680 result := int(math.Round(float64(m.config.AwarenessMaxMultiplier-m.awareness.GetHealthScore())*0.6 + 681 float64(m.config.AwarenessMaxMultiplier)*cpuIdlePercent/100*0.4)) 682 683 w := weight{Incarnation: m.incarnation, Node: m.config.Name, From: m.config.Name, Weight: result, WeightAt: time.Now().UTC().UnixNano() / 1000000} 684 m.encodeWeightMsgAndBroadcast(m.config.Name, w) 685 m.logger.Printf("[DEBUG] memberlist: enqueued latest weight of local node %s: %d", m.config.Name, result) 686 } 687 688 // pushPull is invoked periodically to randomly perform a complete state 689 // exchange. Used to ensure a high level of convergence, but is also 690 // reasonably expensive as the entire state of this node is exchanged 691 // with the other node. 692 func (m *Memberlist) pushPull() { 693 // Get a random live node 694 m.nodeLock.RLock() 695 nodes := kRandomNodes(1, m.nodes, func(n *nodeState) bool { 696 return n.Name == m.config.Name || 697 n.State != StateAlive 698 }) 699 m.nodeLock.RUnlock() 700 701 // If no nodes, bail 702 if len(nodes) == 0 { 703 return 704 } 705 node := nodes[0] 706 707 // Attempt a push pull 708 if err := m.pushPullNode(node.FullAddress(), false); err != nil { 709 m.logger.Printf("[ERR] memberlist: Push/Pull with %s failed: %s", node.Name, err) 710 } 711 } 712 713 // pushPullNode does a complete state exchange with a specific node. 714 func (m *Memberlist) pushPullNode(a Address, join bool) error { 715 defer metrics.MeasureSince([]string{"memberlist", "pushPullNode"}, time.Now()) 716 717 // Attempt to send and receive with the node 718 remote, userState, err := m.sendAndReceiveState(a, join) 719 if err != nil { 720 return err 721 } 722 723 if err := m.mergeRemoteState(join, remote, userState); err != nil { 724 return err 725 } 726 return nil 727 } 728 729 // verifyProtocol verifies that all the remote nodes can speak with our 730 // nodes and vice versa on both the core protocol as well as the 731 // delegate protocol level. 732 // 733 // The verification works by finding the maximum minimum and 734 // minimum maximum understood protocol and delegate versions. In other words, 735 // it finds the common denominator of protocol and delegate version ranges 736 // for the entire cluster. 737 // 738 // After this, it goes through the entire cluster (local and remote) and 739 // verifies that everyone's speaking protocol versions satisfy this range. 740 // If this passes, it means that every node can understand each other. 741 func (m *Memberlist) verifyProtocol(remote []pushNodeState) error { 742 m.nodeLock.RLock() 743 defer m.nodeLock.RUnlock() 744 745 // Maximum minimum understood and minimum maximum understood for both 746 // the protocol and delegate versions. We use this to verify everyone 747 // can be understood. 748 var maxpmin, minpmax uint8 749 var maxdmin, mindmax uint8 750 minpmax = math.MaxUint8 751 mindmax = math.MaxUint8 752 753 for _, rn := range remote { 754 // If the node isn't alive, then skip it 755 if rn.State != StateAlive { 756 continue 757 } 758 759 // Skip nodes that don't have versions set, it just means 760 // their version is zero. 761 if len(rn.Vsn) == 0 { 762 continue 763 } 764 765 if rn.Vsn[0] > maxpmin { 766 maxpmin = rn.Vsn[0] 767 } 768 769 if rn.Vsn[1] < minpmax { 770 minpmax = rn.Vsn[1] 771 } 772 773 if rn.Vsn[3] > maxdmin { 774 maxdmin = rn.Vsn[3] 775 } 776 777 if rn.Vsn[4] < mindmax { 778 mindmax = rn.Vsn[4] 779 } 780 } 781 782 for _, n := range m.nodes { 783 // Ignore non-alive nodes 784 if n.State != StateAlive { 785 continue 786 } 787 788 if n.PMin > maxpmin { 789 maxpmin = n.PMin 790 } 791 792 if n.PMax < minpmax { 793 minpmax = n.PMax 794 } 795 796 if n.DMin > maxdmin { 797 maxdmin = n.DMin 798 } 799 800 if n.DMax < mindmax { 801 mindmax = n.DMax 802 } 803 } 804 805 // Now that we definitively know the minimum and maximum understood 806 // version that satisfies the whole cluster, we verify that every 807 // node in the cluster satisifies this. 808 for _, n := range remote { 809 var nPCur, nDCur uint8 810 if len(n.Vsn) > 0 { 811 nPCur = n.Vsn[2] 812 nDCur = n.Vsn[5] 813 } 814 815 if nPCur < maxpmin || nPCur > minpmax { 816 return fmt.Errorf( 817 "Node '%s' protocol version (%d) is incompatible: [%d, %d]", 818 n.Name, nPCur, maxpmin, minpmax) 819 } 820 821 if nDCur < maxdmin || nDCur > mindmax { 822 return fmt.Errorf( 823 "Node '%s' delegate protocol version (%d) is incompatible: [%d, %d]", 824 n.Name, nDCur, maxdmin, mindmax) 825 } 826 } 827 828 for _, n := range m.nodes { 829 nPCur := n.PCur 830 nDCur := n.DCur 831 832 if nPCur < maxpmin || nPCur > minpmax { 833 return fmt.Errorf( 834 "Node '%s' protocol version (%d) is incompatible: [%d, %d]", 835 n.Name, nPCur, maxpmin, minpmax) 836 } 837 838 if nDCur < maxdmin || nDCur > mindmax { 839 return fmt.Errorf( 840 "Node '%s' delegate protocol version (%d) is incompatible: [%d, %d]", 841 n.Name, nDCur, maxdmin, mindmax) 842 } 843 } 844 845 return nil 846 } 847 848 // nextSeqNo returns a usable sequence number in a thread safe way 849 func (m *Memberlist) nextSeqNo() uint32 { 850 return atomic.AddUint32(&m.sequenceNum, 1) 851 } 852 853 // nextIncarnation returns the next incarnation number in a thread safe way 854 func (m *Memberlist) nextIncarnation() uint32 { 855 return atomic.AddUint32(&m.incarnation, 1) 856 } 857 858 // skipIncarnation adds the positive offset to the incarnation number. 859 func (m *Memberlist) skipIncarnation(offset uint32) uint32 { 860 return atomic.AddUint32(&m.incarnation, offset) 861 } 862 863 // estNumNodes is used to get the current estimate of the number of nodes 864 func (m *Memberlist) estNumNodes() int { 865 return int(atomic.LoadUint32(&m.numNodes)) 866 } 867 868 type ackMessage struct { 869 Complete bool 870 Payload []byte 871 Timestamp time.Time 872 } 873 874 // setProbeChannels is used to attach the ackCh to receive a message when an ack 875 // with a given sequence number is received. The `complete` field of the message 876 // will be false on timeout. Any nack messages will cause an empty struct to be 877 // passed to the nackCh, which can be nil if not needed. 878 func (m *Memberlist) setProbeChannels(seqNo uint32, ackCh chan ackMessage, nackCh chan struct{}, timeout time.Duration) { 879 // Create handler functions for acks and nacks 880 ackFn := func(payload []byte, timestamp time.Time) { 881 select { 882 case ackCh <- ackMessage{true, payload, timestamp}: 883 default: 884 } 885 } 886 nackFn := func() { 887 select { 888 case nackCh <- struct{}{}: 889 default: 890 } 891 } 892 893 // Add the handlers 894 ah := &ackHandler{ackFn, nackFn, nil} 895 m.ackLock.Lock() 896 m.ackHandlers[seqNo] = ah 897 m.ackLock.Unlock() 898 899 // Setup a reaping routing 900 ah.timer = time.AfterFunc(timeout, func() { 901 m.ackLock.Lock() 902 delete(m.ackHandlers, seqNo) 903 m.ackLock.Unlock() 904 select { 905 case ackCh <- ackMessage{false, nil, time.Now()}: 906 default: 907 } 908 }) 909 } 910 911 // setAckHandler is used to attach a handler to be invoked when an ack with a 912 // given sequence number is received. If a timeout is reached, the handler is 913 // deleted. This is used for indirect pings so does not configure a function 914 // for nacks. 915 func (m *Memberlist) setAckHandler(seqNo uint32, ackFn func([]byte, time.Time), timeout time.Duration) { 916 // Add the handler 917 ah := &ackHandler{ackFn, nil, nil} 918 m.ackLock.Lock() 919 m.ackHandlers[seqNo] = ah 920 m.ackLock.Unlock() 921 922 // Setup a reaping routing 923 ah.timer = time.AfterFunc(timeout, func() { 924 m.ackLock.Lock() 925 delete(m.ackHandlers, seqNo) 926 m.ackLock.Unlock() 927 }) 928 } 929 930 // Invokes an ack handler if any is associated, and reaps the handler immediately 931 func (m *Memberlist) invokeAckHandler(ack ackResp, timestamp time.Time) { 932 m.ackLock.Lock() 933 ah, ok := m.ackHandlers[ack.SeqNo] 934 delete(m.ackHandlers, ack.SeqNo) 935 m.ackLock.Unlock() 936 if !ok { 937 return 938 } 939 ah.timer.Stop() 940 ah.ackFn(ack.Payload, timestamp) 941 } 942 943 // Invokes nack handler if any is associated. 944 func (m *Memberlist) invokeNackHandler(nack nackResp) { 945 m.ackLock.Lock() 946 ah, ok := m.ackHandlers[nack.SeqNo] 947 m.ackLock.Unlock() 948 if !ok || ah.nackFn == nil { 949 return 950 } 951 ah.nackFn() 952 } 953 954 // refute gossips an alive message in response to incoming information that we 955 // are suspect or dead. It will make sure the incarnation number beats the given 956 // accusedInc value, or you can supply 0 to just get the next incarnation number. 957 // This alters the node state that's passed in so this MUST be called while the 958 // nodeLock is held. 959 func (m *Memberlist) refute(me *nodeState, accusedInc uint32) { 960 // Make sure the incarnation number beats the accusation. 961 inc := m.nextIncarnation() 962 if accusedInc >= inc { 963 inc = m.skipIncarnation(accusedInc - inc + 1) 964 } 965 me.Incarnation = inc 966 967 // Decrease our health because we are being asked to refute a problem. 968 m.awareness.ApplyDelta(1) 969 970 // Format and broadcast an alive message. 971 a := alive{ 972 Incarnation: inc, 973 Node: me.Name, 974 Addr: me.Addr, 975 Port: me.Port, 976 Meta: me.Meta, 977 Vsn: []uint8{ 978 me.PMin, me.PMax, me.PCur, 979 me.DMin, me.DMax, me.DCur, 980 }, 981 } 982 m.encodeAndBroadcast(me.Addr, aliveMsg, a) 983 } 984 985 // aliveNode is invoked by the network layer when we get a message about a 986 // live node. 987 func (m *Memberlist) aliveNode(a *alive, notify chan struct{}, bootstrap bool) { 988 m.nodeLock.Lock() 989 defer m.nodeLock.Unlock() 990 state, ok := m.nodeMap[a.Node] 991 992 // It is possible that during a Leave(), there is already an aliveMsg 993 // in-queue to be processed but blocked by the locks above. If we let 994 // that aliveMsg process, it'll cause us to re-join the cluster. This 995 // ensures that we don't. 996 if m.hasLeft() && a.Node == m.config.Name { 997 return 998 } 999 1000 if len(a.Vsn) >= 3 { 1001 pMin := a.Vsn[0] 1002 pMax := a.Vsn[1] 1003 pCur := a.Vsn[2] 1004 if pMin == 0 || pMax == 0 || pMin > pMax { 1005 m.logger.Printf("[WARN] memberlist: Ignoring an alive message for '%s' (%v:%d) because protocol version(s) are wrong: %d <= %d <= %d should be >0", a.Node, a.Addr, a.Port, pMin, pCur, pMax) 1006 return 1007 } 1008 } 1009 1010 // Invoke the Alive delegate if any. This can be used to filter out 1011 // alive messages based on custom logic. For example, using a cluster name. 1012 // Using a merge delegate is not enough, as it is possible for passive 1013 // cluster merging to still occur. 1014 if m.config.Alive != nil { 1015 if len(a.Vsn) < 6 { 1016 m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s' (%v:%d) because Vsn is not present", 1017 a.Node, a.Addr, a.Port) 1018 return 1019 } 1020 node := &Node{ 1021 Name: a.Node, 1022 Addr: a.Addr, 1023 Port: a.Port, 1024 Meta: a.Meta, 1025 PMin: a.Vsn[0], 1026 PMax: a.Vsn[1], 1027 PCur: a.Vsn[2], 1028 DMin: a.Vsn[3], 1029 DMax: a.Vsn[4], 1030 DCur: a.Vsn[5], 1031 } 1032 if err := m.config.Alive.NotifyAlive(node); err != nil { 1033 m.logger.Printf("[WARN] memberlist: ignoring alive message for '%s': %s", 1034 a.Node, err) 1035 return 1036 } 1037 } 1038 1039 // Check if we've never seen this node before, and if not, then 1040 // store this node in our node map. 1041 var updatesNode bool 1042 if !ok { 1043 errCon := m.config.AddrAllowed(a.Addr) 1044 if errCon != nil { 1045 m.logger.Printf("[WARN] memberlist: Rejected node %s (%v): %s", a.Node, a.Addr, errCon) 1046 return 1047 } 1048 state = &nodeState{ 1049 Node: Node{ 1050 Name: a.Node, 1051 Addr: a.Addr, 1052 Port: a.Port, 1053 Meta: a.Meta, 1054 }, 1055 State: StateDead, 1056 } 1057 if len(a.Vsn) > 5 { 1058 state.PMin = a.Vsn[0] 1059 state.PMax = a.Vsn[1] 1060 state.PCur = a.Vsn[2] 1061 state.DMin = a.Vsn[3] 1062 state.DMax = a.Vsn[4] 1063 state.DCur = a.Vsn[5] 1064 } 1065 1066 // Add to map 1067 m.nodeMap[a.Node] = state 1068 1069 // Get a random offset. This is important to ensure 1070 // the failure detection bound is low on average. If all 1071 // nodes did an append, failure detection bound would be 1072 // very high. 1073 n := len(m.nodes) 1074 offset := randomOffset(n) 1075 1076 // Add at the end and swap with the node at the offset 1077 m.nodes = append(m.nodes, state) 1078 m.nodes[offset], m.nodes[n] = m.nodes[n], m.nodes[offset] 1079 1080 // Update numNodes after we've added a new node 1081 atomic.AddUint32(&m.numNodes, 1) 1082 } else { 1083 // Check if this address is different from the existing node unless the old node is dead. 1084 if state.Addr != a.Addr || state.Port != a.Port { 1085 errCon := m.config.AddrAllowed(a.Addr) 1086 if errCon != nil { 1087 m.logger.Printf("[WARN] memberlist: Rejected IP update from %v to %v for node %s: %s", a.Node, state.Addr, a.Addr, errCon) 1088 return 1089 } 1090 // If DeadNodeReclaimTime is configured, check if enough time has elapsed since the node died. 1091 canReclaim := (m.config.DeadNodeReclaimTime > 0 && 1092 time.Since(state.StateChange) > m.config.DeadNodeReclaimTime) 1093 1094 // Allow the address to be updated if a dead node is being replaced. 1095 if state.State == StateLeft || (state.State == StateDead && canReclaim) { 1096 m.logger.Printf("[INFO] memberlist: Updating address for left or failed node %s from %v:%d to %v:%d", 1097 state.Name, state.Addr, state.Port, a.Addr, a.Port) 1098 updatesNode = true 1099 } else { 1100 m.logger.Printf("[ERR] memberlist: Conflicting address for %s. Mine: %v:%d Theirs: %v:%d Old state: %v", 1101 state.Name, state.Addr, state.Port, a.Addr, a.Port, state.State) 1102 1103 // Inform the conflict delegate if provided 1104 if m.config.Conflict != nil { 1105 other := Node{ 1106 Name: a.Node, 1107 Addr: a.Addr, 1108 Port: a.Port, 1109 Meta: a.Meta, 1110 } 1111 m.config.Conflict.NotifyConflict(&state.Node, &other) 1112 } 1113 return 1114 } 1115 } 1116 } 1117 1118 // Bail if the incarnation number is older, and this is not about us 1119 isLocalNode := state.Name == m.config.Name 1120 if a.Incarnation <= state.Incarnation && !isLocalNode && !updatesNode { 1121 return 1122 } 1123 1124 // Bail if strictly less and this is about us 1125 if a.Incarnation < state.Incarnation && isLocalNode { 1126 return 1127 } 1128 1129 // Clear out any suspicion timer that may be in effect. 1130 delete(m.nodeTimers, a.Node) 1131 1132 // Store the old state and meta data 1133 oldState := state.State 1134 oldMeta := state.Meta 1135 1136 // If this is us we need to refute, otherwise re-broadcast 1137 if !bootstrap && isLocalNode { 1138 // Compute the version vector 1139 versions := []uint8{ 1140 state.PMin, state.PMax, state.PCur, 1141 state.DMin, state.DMax, state.DCur, 1142 } 1143 1144 // If the Incarnation is the same, we need special handling, since it 1145 // possible for the following situation to happen: 1146 // 1) Start with configuration C, join cluster 1147 // 2) Hard fail / Kill / Shutdown 1148 // 3) Restart with configuration C', join cluster 1149 // 1150 // In this case, other nodes and the local node see the same incarnation, 1151 // but the values may not be the same. For this reason, we always 1152 // need to do an equality check for this Incarnation. In most cases, 1153 // we just ignore, but we may need to refute. 1154 // 1155 if a.Incarnation == state.Incarnation && 1156 bytes.Equal(a.Meta, state.Meta) && 1157 bytes.Equal(a.Vsn, versions) { 1158 return 1159 } 1160 m.refute(state, a.Incarnation) 1161 m.logger.Printf("[WARN] memberlist: Refuting an alive message for '%s' (%v:%d) meta:(%v VS %v), vsn:(%v VS %v)", a.Node, a.Addr, a.Port, a.Meta, state.Meta, a.Vsn, versions) 1162 } else { 1163 m.encodeBroadcastNotify(a.Node, aliveMsg, a, notify) 1164 1165 // Update protocol versions if it arrived 1166 if len(a.Vsn) > 0 { 1167 state.PMin = a.Vsn[0] 1168 state.PMax = a.Vsn[1] 1169 state.PCur = a.Vsn[2] 1170 state.DMin = a.Vsn[3] 1171 state.DMax = a.Vsn[4] 1172 state.DCur = a.Vsn[5] 1173 } 1174 1175 // Update the state and incarnation number 1176 state.Incarnation = a.Incarnation 1177 state.Meta = a.Meta 1178 state.Addr = a.Addr 1179 state.Port = a.Port 1180 if state.State != StateAlive { 1181 state.State = StateAlive 1182 state.StateChange = time.Now() 1183 } 1184 } 1185 1186 // Update metrics 1187 metrics.IncrCounter([]string{"memberlist", "msg", "alive"}, 1) 1188 1189 // Notify the delegate of any relevant updates 1190 if m.config.Events != nil { 1191 if oldState == StateDead || oldState == StateLeft { 1192 // if Dead/Left -> Alive, notify of join 1193 state.Node.State = state.State 1194 m.config.Events.NotifyJoin(&state.Node) 1195 } else if oldState == StateSuspect { 1196 state.Node.State = state.State 1197 m.config.Events.NotifySuspectSateChange(&state.Node) 1198 } else if !bytes.Equal(oldMeta, state.Meta) { 1199 // if Meta changed, trigger an update notification 1200 m.config.Events.NotifyUpdate(&state.Node) 1201 } 1202 } 1203 } 1204 1205 // suspectNode is invoked by the network layer when we get a message 1206 // about a suspect node 1207 func (m *Memberlist) suspectNode(s *suspect) { 1208 m.nodeLock.Lock() 1209 defer m.nodeLock.Unlock() 1210 state, ok := m.nodeMap[s.Node] 1211 1212 // If we've never heard about this node before, ignore it 1213 if !ok { 1214 return 1215 } 1216 1217 // Ignore old incarnation numbers 1218 if s.Incarnation < state.Incarnation { 1219 return 1220 } 1221 1222 // See if there's a suspicion timer we can confirm. If the info is new 1223 // to us we will go ahead and re-gossip it. This allows for multiple 1224 // independent confirmations to flow even when a node probes a node 1225 // that's already suspect. 1226 if timer, ok := m.nodeTimers[s.Node]; ok { 1227 if timer.Confirm(s.From) { 1228 m.encodeAndBroadcast(s.Node, suspectMsg, s) 1229 } 1230 return 1231 } 1232 1233 // Ignore non-alive nodes 1234 if state.State != StateAlive { 1235 return 1236 } 1237 1238 // If this is us we need to refute, otherwise re-broadcast 1239 if state.Name == m.config.Name { 1240 m.refute(state, s.Incarnation) 1241 m.logger.Printf("[WARN] memberlist: Refuting a suspect message (from: %s)", s.From) 1242 return // Do not mark ourself suspect 1243 } else { 1244 m.encodeAndBroadcast(s.Node, suspectMsg, s) 1245 } 1246 1247 // Update metrics 1248 metrics.IncrCounter([]string{"memberlist", "msg", "suspect"}, 1) 1249 1250 // Update the state 1251 state.Incarnation = s.Incarnation 1252 state.State = StateSuspect 1253 changeTime := time.Now() 1254 state.StateChange = changeTime 1255 1256 // Setup a suspicion timer. Given that we don't have any known phase 1257 // relationship with our peers, we set up k such that we hit the nominal 1258 // timeout two probe intervals short of what we expect given the suspicion 1259 // multiplier. 1260 k := m.config.SuspicionMult - 2 1261 1262 // If there aren't enough nodes to give the expected confirmations, just 1263 // set k to 0 to say that we don't expect any. Note we subtract 2 from n 1264 // here to take out ourselves and the node being probed. 1265 n := m.estNumNodes() 1266 if n-2 < k { 1267 k = 0 1268 } 1269 1270 // Compute the timeouts based on the size of the cluster. 1271 min := suspicionTimeout(m.config.SuspicionMult, n, m.config.ProbeInterval) 1272 max := time.Duration(m.config.SuspicionMaxTimeoutMult) * min 1273 fn := func(numConfirmations int) { 1274 var d *dead 1275 1276 m.nodeLock.Lock() 1277 state, ok := m.nodeMap[s.Node] 1278 timeout := ok && state.State == StateSuspect && state.StateChange == changeTime 1279 if timeout { 1280 d = &dead{Incarnation: state.Incarnation, Node: state.Name, From: m.config.Name} 1281 } 1282 m.nodeLock.Unlock() 1283 1284 if timeout { 1285 if k > 0 && numConfirmations < k { 1286 metrics.IncrCounter([]string{"memberlist", "degraded", "timeout"}, 1) 1287 } 1288 1289 m.logger.Printf("[INFO] memberlist: Marking %s as failed, suspect timeout reached (%d peer confirmations)", 1290 state.Name, numConfirmations) 1291 1292 m.deadNode(d) 1293 } 1294 } 1295 m.nodeTimers[s.Node] = newSuspicion(s.From, k, min, max, fn) 1296 if m.config.Events != nil { 1297 state.Node.State = state.State 1298 m.config.Events.NotifySuspectSateChange(&state.Node) 1299 } 1300 } 1301 1302 // deadNode is invoked by the network layer when we get a message 1303 // about a dead node 1304 func (m *Memberlist) deadNode(d *dead) { 1305 m.nodeLock.Lock() 1306 defer m.nodeLock.Unlock() 1307 state, ok := m.nodeMap[d.Node] 1308 1309 // If we've never heard about this node before, ignore it 1310 if !ok { 1311 return 1312 } 1313 1314 // Ignore old incarnation numbers 1315 if d.Incarnation < state.Incarnation { 1316 return 1317 } 1318 1319 // Clear out any suspicion timer that may be in effect. 1320 delete(m.nodeTimers, d.Node) 1321 1322 // Ignore if node is already dead 1323 if state.DeadOrLeft() { 1324 return 1325 } 1326 1327 // Check if this is us 1328 if state.Name == m.config.Name { 1329 // If we are not leaving we need to refute 1330 if !m.hasLeft() { 1331 m.refute(state, d.Incarnation) 1332 m.logger.Printf("[WARN] memberlist: Refuting a dead message (from: %s)", d.From) 1333 return // Do not mark ourself dead 1334 } 1335 1336 // If we are leaving, we broadcast and wait 1337 m.encodeBroadcastNotify(d.Node, deadMsg, d, m.leaveBroadcast) 1338 } else { 1339 m.encodeAndBroadcast(d.Node, deadMsg, d) 1340 } 1341 1342 // Update metrics 1343 metrics.IncrCounter([]string{"memberlist", "msg", "dead"}, 1) 1344 1345 // Update the state 1346 state.Incarnation = d.Incarnation 1347 1348 // If the dead message was send by the node itself, mark it is left 1349 // instead of dead. 1350 if d.Node == d.From { 1351 state.State = StateLeft 1352 } else { 1353 state.State = StateDead 1354 } 1355 state.StateChange = time.Now() 1356 1357 // Notify of death 1358 if m.config.Events != nil { 1359 m.config.Events.NotifyLeave(&state.Node) 1360 } 1361 } 1362 1363 // weightNode is invoked by the network layer when we get a message 1364 // about node weight 1365 func (m *Memberlist) weightNode(s *weight) { 1366 m.nodeLock.Lock() 1367 defer m.nodeLock.Unlock() 1368 state, ok := m.nodeMap[s.Node] 1369 1370 // If we've never heard about this node before, ignore it 1371 if !ok { 1372 return 1373 } 1374 1375 // Ignore old incarnation numbers 1376 if s.Incarnation < state.Incarnation { 1377 return 1378 } 1379 1380 // Ignore non-alive nodes or this is about us 1381 if state.State != StateAlive || state.Name == m.config.Name { 1382 return 1383 } 1384 1385 // Ignore old weight messages 1386 if s.WeightAt <= state.WeightAt { 1387 return 1388 } 1389 1390 m.encodeWeightMsgAndBroadcast(s.Node, s) 1391 1392 // Update metrics 1393 metrics.IncrCounter([]string{"memberlist", "msg", "weight"}, 1) 1394 1395 // Update the state 1396 old := state.Weight 1397 state.Weight = s.Weight 1398 state.WeightAt = s.WeightAt 1399 if state.Weight != old { 1400 if m.config.Events != nil { 1401 state.Node.Weight = state.Weight 1402 m.config.Events.NotifyWeight(&state.Node) 1403 } 1404 m.logger.Printf("[DEBUG] memberlist: updated weight (calculated at %s) of node %s from %d to %d", 1405 time.Unix(s.WeightAt/1000, (s.WeightAt%1000)*1000000).Local().Format("2006-01-02T15:04:05-0700"), state.Name, old, state.Weight) 1406 } 1407 } 1408 1409 // mergeState is invoked by the network layer when we get a Push/Pull 1410 // state transfer 1411 func (m *Memberlist) mergeState(remote []pushNodeState) { 1412 for _, r := range remote { 1413 switch r.State { 1414 case StateAlive: 1415 a := alive{ 1416 Incarnation: r.Incarnation, 1417 Node: r.Name, 1418 Addr: r.Addr, 1419 Port: r.Port, 1420 Meta: r.Meta, 1421 Vsn: r.Vsn, 1422 } 1423 m.aliveNode(&a, nil, false) 1424 1425 case StateLeft: 1426 d := dead{Incarnation: r.Incarnation, Node: r.Name, From: r.Name} 1427 m.deadNode(&d) 1428 case StateDead: 1429 // If the remote node believes a node is dead, we prefer to 1430 // suspect that node instead of declaring it dead instantly 1431 fallthrough 1432 case StateSuspect: 1433 s := suspect{Incarnation: r.Incarnation, Node: r.Name, From: m.config.Name} 1434 m.suspectNode(&s) 1435 } 1436 } 1437 }