github.com/adnan-c/fabric_e2e_couchdb@v0.6.1-preview.0.20170228180935-21ce6b23cf91/gossip/discovery/discovery_impl.go (about) 1 /* 2 Copyright IBM Corp. 2016 All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package discovery 18 19 import ( 20 "bytes" 21 "fmt" 22 "sync" 23 "sync/atomic" 24 "time" 25 26 "strconv" 27 "strings" 28 29 "github.com/hyperledger/fabric/gossip/common" 30 "github.com/hyperledger/fabric/gossip/util" 31 proto "github.com/hyperledger/fabric/protos/gossip" 32 "github.com/op/go-logging" 33 "github.com/spf13/viper" 34 ) 35 36 const defaultHelloInterval = time.Duration(5) * time.Second 37 38 var aliveExpirationCheckInterval time.Duration 39 40 // SetAliveTimeInterval sets the alive time interval 41 func SetAliveTimeInterval(interval time.Duration) { 42 viper.Set("peer.gossip.aliveTimeInterval", interval) 43 } 44 45 // SetAliveExpirationTimeout sets the expiration timeout 46 func SetAliveExpirationTimeout(timeout time.Duration) { 47 viper.Set("peer.gossip.aliveExpirationTimeout", timeout) 48 aliveExpirationCheckInterval = time.Duration(timeout / 10) 49 } 50 51 // SetAliveExpirationCheckInterval sets the expiration check interval 52 func SetAliveExpirationCheckInterval(interval time.Duration) { 53 aliveExpirationCheckInterval = interval 54 } 55 56 // SetReconnectInterval sets the reconnect interval 57 func SetReconnectInterval(interval time.Duration) { 58 viper.Set("peer.gossip.reconnectInterval", interval) 59 } 60 61 type timestamp struct { 62 incTime time.Time 63 seqNum uint64 64 lastSeen time.Time 65 } 66 67 func (ts *timestamp) String() string { 68 return fmt.Sprintf("%v, %v", ts.incTime.UnixNano(), ts.seqNum) 69 } 70 71 type gossipDiscoveryImpl struct { 72 incTime uint64 73 seqNum uint64 74 self NetworkMember 75 deadLastTS map[string]*timestamp // H 76 aliveLastTS map[string]*timestamp // V 77 id2Member map[string]*NetworkMember // all known members 78 aliveMembership *util.MembershipStore 79 deadMembership *util.MembershipStore 80 81 bootstrapPeers []string 82 83 comm CommService 84 crypt CryptoService 85 lock *sync.RWMutex 86 87 toDieChan chan struct{} 88 toDieFlag int32 89 logger *logging.Logger 90 } 91 92 // NewDiscoveryService returns a new discovery service with the comm module passed and the crypto service passed 93 func NewDiscoveryService(bootstrapPeers []string, self NetworkMember, comm CommService, crypt CryptoService) Discovery { 94 d := &gossipDiscoveryImpl{ 95 self: self, 96 incTime: uint64(time.Now().UnixNano()), 97 seqNum: uint64(0), 98 deadLastTS: make(map[string]*timestamp), 99 aliveLastTS: make(map[string]*timestamp), 100 id2Member: make(map[string]*NetworkMember), 101 aliveMembership: util.NewMembershipStore(), 102 deadMembership: util.NewMembershipStore(), 103 crypt: crypt, 104 comm: comm, 105 lock: &sync.RWMutex{}, 106 toDieChan: make(chan struct{}, 1), 107 toDieFlag: int32(0), 108 logger: util.GetLogger(util.LoggingDiscoveryModule, self.InternalEndpoint), 109 } 110 111 go d.periodicalSendAlive() 112 go d.periodicalCheckAlive() 113 go d.handleMessages() 114 go d.periodicalReconnectToDead() 115 go d.handlePresumedDeadPeers() 116 117 go d.connect2BootstrapPeers(bootstrapPeers) 118 119 d.logger.Info("Started", self, "incTime is", d.incTime) 120 121 return d 122 } 123 124 // Exists returns whether a peer with given 125 // PKI-ID is known 126 func (d *gossipDiscoveryImpl) Exists(PKIID common.PKIidType) bool { 127 d.lock.RLock() 128 defer d.lock.RUnlock() 129 _, exists := d.id2Member[string(PKIID)] 130 return exists 131 } 132 133 func (d *gossipDiscoveryImpl) Connect(member NetworkMember) { 134 d.logger.Debug("Entering", member) 135 defer d.logger.Debug("Exiting") 136 137 d.lock.Lock() 138 defer d.lock.Unlock() 139 140 if _, exists := d.id2Member[string(member.PKIid)]; exists { 141 d.logger.Info("Member", member, "already known") 142 return 143 } 144 145 d.deadLastTS[string(member.PKIid)] = ×tamp{ 146 incTime: time.Unix(0, 0), 147 lastSeen: time.Now(), 148 seqNum: 0, 149 } 150 d.id2Member[string(member.PKIid)] = &member 151 } 152 153 func (d *gossipDiscoveryImpl) connect2BootstrapPeers(endpoints []string) { 154 if len(d.self.InternalEndpoint) == 0 { 155 d.logger.Panic("Internal endpoint is empty:", d.self.InternalEndpoint) 156 } 157 158 if len(strings.Split(d.self.InternalEndpoint, ":")) != 2 { 159 d.logger.Panicf("Self endpoint %s isn't formatted as 'host:port'", d.self.InternalEndpoint) 160 } 161 162 myPort, err := strconv.ParseInt(strings.Split(d.self.InternalEndpoint, ":")[1], 10, 64) 163 if err != nil { 164 d.logger.Panicf("Self endpoint %s has not valid port'", d.self.InternalEndpoint) 165 } 166 167 d.logger.Info("Entering:", endpoints) 168 defer d.logger.Info("Exiting") 169 endpoints = filterOutLocalhost(endpoints, int(myPort)) 170 if len(endpoints) == 0 { 171 return 172 } 173 174 for !d.somePeerIsKnown() { 175 var wg sync.WaitGroup 176 req := d.createMembershipRequest().NoopSign() 177 wg.Add(len(endpoints)) 178 for _, endpoint := range endpoints { 179 go func(endpoint string) { 180 defer wg.Done() 181 peer := &NetworkMember{ 182 Endpoint: endpoint, 183 InternalEndpoint: endpoint, 184 } 185 if !d.comm.Ping(peer) { 186 return 187 } 188 d.comm.SendToPeer(peer, req) 189 }(endpoint) 190 } 191 wg.Wait() 192 time.Sleep(getReconnectInterval()) 193 } 194 } 195 196 func (d *gossipDiscoveryImpl) somePeerIsKnown() bool { 197 d.lock.RLock() 198 defer d.lock.RUnlock() 199 return len(d.aliveLastTS) != 0 200 } 201 202 func (d *gossipDiscoveryImpl) InitiateSync(peerNum int) { 203 if d.toDie() { 204 return 205 } 206 var peers2SendTo []*NetworkMember 207 memReq := d.createMembershipRequest().NoopSign() 208 209 d.lock.RLock() 210 211 n := d.aliveMembership.Size() 212 k := peerNum 213 if k > n { 214 k = n 215 } 216 217 aliveMembersAsSlice := d.aliveMembership.ToSlice() 218 for _, i := range util.GetRandomIndices(k, n-1) { 219 pulledPeer := aliveMembersAsSlice[i].GetAliveMsg().Membership 220 var internalEndpoint string 221 if aliveMembersAsSlice[i].Envelope.SecretEnvelope != nil { 222 internalEndpoint = aliveMembersAsSlice[i].Envelope.SecretEnvelope.InternalEndpoint() 223 } 224 netMember := &NetworkMember{ 225 Endpoint: pulledPeer.Endpoint, 226 Metadata: pulledPeer.Metadata, 227 PKIid: pulledPeer.PkiID, 228 InternalEndpoint: internalEndpoint, 229 } 230 peers2SendTo = append(peers2SendTo, netMember) 231 } 232 233 d.lock.RUnlock() 234 235 for _, netMember := range peers2SendTo { 236 d.comm.SendToPeer(netMember, memReq) 237 } 238 } 239 240 func (d *gossipDiscoveryImpl) handlePresumedDeadPeers() { 241 defer d.logger.Debug("Stopped") 242 243 for !d.toDie() { 244 select { 245 case deadPeer := <-d.comm.PresumedDead(): 246 if d.isAlive(deadPeer) { 247 d.expireDeadMembers([]common.PKIidType{deadPeer}) 248 } 249 break 250 case s := <-d.toDieChan: 251 d.toDieChan <- s 252 return 253 } 254 } 255 } 256 257 func (d *gossipDiscoveryImpl) isAlive(pkiID common.PKIidType) bool { 258 d.lock.RLock() 259 defer d.lock.RUnlock() 260 _, alive := d.aliveLastTS[string(pkiID)] 261 return alive 262 } 263 264 func (d *gossipDiscoveryImpl) handleMessages() { 265 defer d.logger.Debug("Stopped") 266 267 in := d.comm.Accept() 268 for !d.toDie() { 269 select { 270 case s := <-d.toDieChan: 271 d.toDieChan <- s 272 return 273 case m := <-in: 274 d.handleMsgFromComm(m) 275 break 276 } 277 } 278 } 279 280 func (d *gossipDiscoveryImpl) handleMsgFromComm(m *proto.SignedGossipMessage) { 281 if m == nil { 282 return 283 } 284 if m.GetAliveMsg() == nil && m.GetMemRes() == nil && m.GetMemReq() == nil { 285 d.logger.Warning("Got message with wrong type (expected Alive or MembershipResponse or MembershipRequest message):", m.Content) // TODO: write only message type 286 d.logger.Warning(m) 287 return 288 } 289 290 d.logger.Debug("Got message:", m) 291 defer d.logger.Debug("Exiting") 292 293 // TODO: make sure somehow that the membership request is "fresh" 294 if memReq := m.GetMemReq(); memReq != nil { 295 selfInfoGossipMsg, err := memReq.SelfInformation.ToGossipMessage() 296 if err != nil { 297 d.logger.Warning("Failed deserializing GossipMessage from envelope:", err) 298 return 299 } 300 d.handleAliveMessage(selfInfoGossipMsg) 301 302 var internalEndpoint string 303 if m.Envelope.SecretEnvelope != nil { 304 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 305 } 306 307 // Sending a membership response to a peer may block this routine 308 // in case the sending is deliberately slow (i.e attack). 309 // will keep this async until I'll write a timeout detector in the comm layer 310 go d.sendMemResponse(selfInfoGossipMsg.GetAliveMsg().Membership, memReq.Known, internalEndpoint) 311 return 312 } 313 314 if m.IsAliveMsg() { 315 d.handleAliveMessage(m) 316 return 317 } 318 319 if memResp := m.GetMemRes(); memResp != nil { 320 for _, env := range memResp.Alive { 321 am, err := env.ToGossipMessage() 322 if err != nil { 323 d.logger.Warning("Membership response contains an invalid message from an online peer:", err) 324 return 325 } 326 if !am.IsAliveMsg() { 327 d.logger.Warning("Expected alive message, got", am, "instead") 328 return 329 } 330 d.handleAliveMessage(am) 331 } 332 333 for _, env := range memResp.Dead { 334 dm, err := env.ToGossipMessage() 335 if err != nil { 336 d.logger.Warning("Membership response contains an invalid message from an offline peer", err) 337 return 338 } 339 if !d.crypt.ValidateAliveMsg(m) { 340 d.logger.Warningf("Alive message isn't authentic, someone spoofed %s's identity", dm.GetAliveMsg().Membership) 341 continue 342 } 343 344 newDeadMembers := []*proto.SignedGossipMessage{} 345 d.lock.RLock() 346 if _, known := d.id2Member[string(dm.GetAliveMsg().Membership.PkiID)]; !known { 347 newDeadMembers = append(newDeadMembers, dm) 348 } 349 d.lock.RUnlock() 350 d.learnNewMembers([]*proto.SignedGossipMessage{}, newDeadMembers) 351 } 352 } 353 } 354 355 func (d *gossipDiscoveryImpl) sendMemResponse(member *proto.Member, known [][]byte, internalEndpoint string) { 356 d.logger.Debug("Entering", member) 357 358 memResp := d.createMembershipResponse(known) 359 360 defer d.logger.Debug("Exiting, replying with", memResp) 361 362 d.comm.SendToPeer(&NetworkMember{ 363 Endpoint: member.Endpoint, 364 Metadata: member.Metadata, 365 PKIid: member.PkiID, 366 InternalEndpoint: internalEndpoint, 367 }, (&proto.GossipMessage{ 368 Tag: proto.GossipMessage_EMPTY, 369 Nonce: uint64(0), 370 Content: &proto.GossipMessage_MemRes{ 371 MemRes: memResp, 372 }, 373 }).NoopSign()) 374 } 375 376 func (d *gossipDiscoveryImpl) createMembershipResponse(known [][]byte) *proto.MembershipResponse { 377 aliveMsg := d.createAliveMessage() 378 379 d.lock.RLock() 380 defer d.lock.RUnlock() 381 382 deadPeers := []*proto.Envelope{} 383 384 for _, dm := range d.deadMembership.ToSlice() { 385 isKnown := false 386 for _, knownPeer := range known { 387 if equalPKIid(knownPeer, dm.GetAliveMsg().Membership.PkiID) { 388 isKnown = true 389 break 390 } 391 } 392 if !isKnown { 393 deadPeers = append(deadPeers, dm.Envelope) 394 break 395 } 396 } 397 398 aliveMembersAsSlice := d.aliveMembership.ToSlice() 399 aliveSnapshot := make([]*proto.Envelope, len(aliveMembersAsSlice)) 400 for i, msg := range aliveMembersAsSlice { 401 aliveSnapshot[i] = msg.Envelope 402 } 403 404 return &proto.MembershipResponse{ 405 Alive: append(aliveSnapshot, aliveMsg.Envelope), 406 Dead: deadPeers, 407 } 408 } 409 410 func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.SignedGossipMessage) { 411 d.logger.Debug("Entering", m) 412 defer d.logger.Debug("Exiting") 413 414 if !d.crypt.ValidateAliveMsg(m) { 415 d.logger.Warningf("Alive message isn't authentic, someone must be spoofing %s's identity", m.GetAliveMsg()) 416 return 417 } 418 419 pkiID := m.GetAliveMsg().Membership.PkiID 420 if equalPKIid(pkiID, d.self.PKIid) { 421 d.logger.Debug("Got alive message about ourselves,", m) 422 return 423 } 424 425 ts := m.GetAliveMsg().Timestamp 426 427 d.lock.RLock() 428 _, known := d.id2Member[string(pkiID)] 429 d.lock.RUnlock() 430 431 if !known { 432 d.learnNewMembers([]*proto.SignedGossipMessage{m}, []*proto.SignedGossipMessage{}) 433 return 434 } 435 436 d.lock.RLock() 437 lastAliveTS, isAlive := d.aliveLastTS[string(pkiID)] 438 lastDeadTS, isDead := d.deadLastTS[string(pkiID)] 439 d.lock.RUnlock() 440 441 if !isAlive && !isDead { 442 d.logger.Panicf("Member %s is known but not found neither in alive nor in dead lastTS maps, isAlive=%v, isDead=%v", m.GetAliveMsg().Membership.Endpoint, isAlive, isDead) 443 return 444 } 445 446 if isAlive && isDead { 447 d.logger.Panicf("Member %s is both alive and dead at the same time", m.GetAliveMsg().Membership) 448 return 449 } 450 451 if isDead { 452 if before(lastDeadTS, ts) { 453 // resurrect peer 454 d.resurrectMember(m, *ts) 455 } else if !same(lastDeadTS, ts) { 456 d.logger.Debug(m.GetAliveMsg().Membership, "lastDeadTS:", lastDeadTS, "but got ts:", ts) 457 } 458 return 459 } 460 461 d.lock.RLock() 462 lastAliveTS, isAlive = d.aliveLastTS[string(pkiID)] 463 d.lock.RUnlock() 464 465 if isAlive { 466 if before(lastAliveTS, ts) { 467 d.learnExistingMembers([]*proto.SignedGossipMessage{m}) 468 } else if !same(lastAliveTS, ts) { 469 d.logger.Debug(m.GetAliveMsg().Membership, "lastAliveTS:", lastAliveTS, "but got ts:", ts) 470 } 471 472 } 473 // else, ignore the message because it is too old 474 } 475 476 func (d *gossipDiscoveryImpl) resurrectMember(am *proto.SignedGossipMessage, t proto.PeerTime) { 477 d.logger.Info("Entering, AliveMessage:", am, "t:", t) 478 defer d.logger.Info("Exiting") 479 d.lock.Lock() 480 defer d.lock.Unlock() 481 482 member := am.GetAliveMsg().Membership 483 pkiID := member.PkiID 484 d.aliveLastTS[string(pkiID)] = ×tamp{ 485 lastSeen: time.Now(), 486 seqNum: t.SeqNum, 487 incTime: tsToTime(t.IncNumber), 488 } 489 490 var internalEndpoint string 491 if am.Envelope.SecretEnvelope != nil { 492 internalEndpoint = am.Envelope.SecretEnvelope.InternalEndpoint() 493 } 494 495 d.id2Member[string(pkiID)] = &NetworkMember{ 496 Endpoint: member.Endpoint, 497 Metadata: member.Metadata, 498 PKIid: member.PkiID, 499 InternalEndpoint: internalEndpoint, 500 } 501 502 delete(d.deadLastTS, string(pkiID)) 503 d.deadMembership.Remove(common.PKIidType(pkiID)) 504 d.aliveMembership.Put(common.PKIidType(pkiID), &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 505 } 506 507 func (d *gossipDiscoveryImpl) periodicalReconnectToDead() { 508 defer d.logger.Debug("Stopped") 509 510 for !d.toDie() { 511 wg := &sync.WaitGroup{} 512 513 for _, member := range d.copyLastSeen(d.deadLastTS) { 514 wg.Add(1) 515 go func(member NetworkMember) { 516 defer wg.Done() 517 if d.comm.Ping(&member) { 518 d.logger.Debug(member, "is responding, sending membership request") 519 d.sendMembershipRequest(&member) 520 } else { 521 d.logger.Debug(member, "is still dead") 522 } 523 }(member) 524 } 525 526 wg.Wait() 527 d.logger.Debug("Sleeping", getReconnectInterval()) 528 time.Sleep(getReconnectInterval()) 529 } 530 } 531 532 func (d *gossipDiscoveryImpl) sendMembershipRequest(member *NetworkMember) { 533 d.comm.SendToPeer(member, d.createMembershipRequest()) 534 } 535 536 func (d *gossipDiscoveryImpl) createMembershipRequest() *proto.SignedGossipMessage { 537 req := &proto.MembershipRequest{ 538 SelfInformation: d.createAliveMessage().Envelope, 539 Known: d.getKnownPeers(), 540 } 541 return (&proto.GossipMessage{ 542 Tag: proto.GossipMessage_EMPTY, 543 Nonce: uint64(0), 544 Content: &proto.GossipMessage_MemReq{ 545 MemReq: req, 546 }, 547 }).NoopSign() 548 } 549 550 func (d *gossipDiscoveryImpl) getKnownPeers() [][]byte { 551 d.lock.RLock() 552 defer d.lock.RUnlock() 553 554 peers := [][]byte{} 555 for id := range d.id2Member { 556 peers = append(peers, common.PKIidType(id)) 557 } 558 return peers 559 } 560 561 func (d *gossipDiscoveryImpl) copyLastSeen(lastSeenMap map[string]*timestamp) []NetworkMember { 562 d.lock.RLock() 563 defer d.lock.RUnlock() 564 565 res := []NetworkMember{} 566 for pkiIDStr := range lastSeenMap { 567 res = append(res, *(d.id2Member[pkiIDStr])) 568 } 569 return res 570 } 571 572 func (d *gossipDiscoveryImpl) periodicalCheckAlive() { 573 defer d.logger.Debug("Stopped") 574 575 for !d.toDie() { 576 time.Sleep(getAliveExpirationCheckInterval()) 577 dead := d.getDeadMembers() 578 if len(dead) > 0 { 579 d.logger.Debugf("Got %v dead members: %v", len(dead), dead) 580 d.expireDeadMembers(dead) 581 } 582 } 583 } 584 585 func (d *gossipDiscoveryImpl) expireDeadMembers(dead []common.PKIidType) { 586 d.logger.Warning("Entering", dead) 587 defer d.logger.Warning("Exiting") 588 589 var deadMembers2Expire []*NetworkMember 590 591 d.lock.Lock() 592 593 for _, pkiID := range dead { 594 if _, isAlive := d.aliveLastTS[string(pkiID)]; !isAlive { 595 continue 596 } 597 deadMembers2Expire = append(deadMembers2Expire, d.id2Member[string(pkiID)]) 598 // move lastTS from alive to dead 599 lastTS, hasLastTS := d.aliveLastTS[string(pkiID)] 600 if hasLastTS { 601 d.deadLastTS[string(pkiID)] = lastTS 602 delete(d.aliveLastTS, string(pkiID)) 603 } 604 605 if am := d.aliveMembership.MsgByID(pkiID); am != nil { 606 d.deadMembership.Put(pkiID, am) 607 d.aliveMembership.Remove(pkiID) 608 } 609 } 610 611 d.lock.Unlock() 612 613 for _, member2Expire := range deadMembers2Expire { 614 d.logger.Warning("Closing connection to", member2Expire) 615 d.comm.CloseConn(member2Expire) 616 } 617 } 618 619 func (d *gossipDiscoveryImpl) getDeadMembers() []common.PKIidType { 620 d.lock.RLock() 621 defer d.lock.RUnlock() 622 623 dead := []common.PKIidType{} 624 for id, last := range d.aliveLastTS { 625 elapsedNonAliveTime := time.Since(last.lastSeen) 626 if elapsedNonAliveTime.Nanoseconds() > getAliveExpirationTimeout().Nanoseconds() { 627 d.logger.Warning("Haven't heard from", id, "for", elapsedNonAliveTime) 628 dead = append(dead, common.PKIidType(id)) 629 } 630 } 631 return dead 632 } 633 634 func (d *gossipDiscoveryImpl) periodicalSendAlive() { 635 defer d.logger.Debug("Stopped") 636 637 for !d.toDie() { 638 d.logger.Debug("Sleeping", getAliveTimeInterval()) 639 time.Sleep(getAliveTimeInterval()) 640 d.comm.Gossip(d.createAliveMessage()) 641 } 642 } 643 644 func (d *gossipDiscoveryImpl) createAliveMessage() *proto.SignedGossipMessage { 645 d.lock.Lock() 646 d.seqNum++ 647 seqNum := d.seqNum 648 649 endpoint := d.self.Endpoint 650 meta := d.self.Metadata 651 pkiID := d.self.PKIid 652 internalEndpoint := d.self.InternalEndpoint 653 654 d.lock.Unlock() 655 656 msg2Gossip := &proto.GossipMessage{ 657 Tag: proto.GossipMessage_EMPTY, 658 Content: &proto.GossipMessage_AliveMsg{ 659 AliveMsg: &proto.AliveMessage{ 660 Membership: &proto.Member{ 661 Endpoint: endpoint, 662 Metadata: meta, 663 PkiID: pkiID, 664 }, 665 Timestamp: &proto.PeerTime{ 666 IncNumber: uint64(d.incTime), 667 SeqNum: seqNum, 668 }, 669 }, 670 }, 671 } 672 673 return &proto.SignedGossipMessage{ 674 GossipMessage: msg2Gossip, 675 Envelope: d.crypt.SignMessage(msg2Gossip, internalEndpoint), 676 } 677 } 678 679 func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*proto.SignedGossipMessage) { 680 d.logger.Infof("Entering: learnedMembers={%v}", aliveArr) 681 defer d.logger.Debug("Exiting") 682 683 d.lock.Lock() 684 defer d.lock.Unlock() 685 686 for _, m := range aliveArr { 687 am := m.GetAliveMsg() 688 if m == nil { 689 d.logger.Warning("Expected alive message, got instead:", m) 690 return 691 } 692 d.logger.Debug("updating", am) 693 694 var internalEndpoint string 695 696 if m.Envelope.SecretEnvelope != nil { 697 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 698 } 699 700 // update member's data 701 member := d.id2Member[string(am.Membership.PkiID)] 702 member.Endpoint = am.Membership.Endpoint 703 member.Metadata = am.Membership.Metadata 704 member.InternalEndpoint = internalEndpoint 705 706 if _, isKnownAsDead := d.deadLastTS[string(am.Membership.PkiID)]; isKnownAsDead { 707 d.logger.Warning(am.Membership, "has already expired") 708 continue 709 } 710 711 if _, isKnownAsAlive := d.aliveLastTS[string(am.Membership.PkiID)]; !isKnownAsAlive { 712 d.logger.Warning(am.Membership, "has already expired") 713 continue 714 } else { 715 d.logger.Debug("Updating aliveness data:", am) 716 // update existing aliveness data 717 alive := d.aliveLastTS[string(am.Membership.PkiID)] 718 alive.incTime = tsToTime(am.Timestamp.IncNumber) 719 alive.lastSeen = time.Now() 720 alive.seqNum = am.Timestamp.SeqNum 721 722 if am := d.aliveMembership.MsgByID(m.GetAliveMsg().Membership.PkiID); am == nil { 723 d.logger.Debug("Adding", am, "to aliveMembership") 724 msg := &proto.SignedGossipMessage{GossipMessage: m.GossipMessage, Envelope: am.Envelope} 725 d.aliveMembership.Put(m.GetAliveMsg().Membership.PkiID, msg) 726 } else { 727 d.logger.Debug("Replacing", am, "in aliveMembership") 728 am.GossipMessage = m.GossipMessage 729 am.Envelope = m.Envelope 730 } 731 } 732 } 733 } 734 735 func (d *gossipDiscoveryImpl) learnNewMembers(aliveMembers []*proto.SignedGossipMessage, deadMembers []*proto.SignedGossipMessage) { 736 d.logger.Debugf("Entering: learnedMembers={%v}, deadMembers={%v}", aliveMembers, deadMembers) 737 defer d.logger.Debugf("Exiting") 738 739 d.lock.Lock() 740 defer d.lock.Unlock() 741 742 for _, am := range aliveMembers { 743 if equalPKIid(am.GetAliveMsg().Membership.PkiID, d.self.PKIid) { 744 continue 745 } 746 d.aliveLastTS[string(am.GetAliveMsg().Membership.PkiID)] = ×tamp{ 747 incTime: tsToTime(am.GetAliveMsg().Timestamp.IncNumber), 748 lastSeen: time.Now(), 749 seqNum: am.GetAliveMsg().Timestamp.SeqNum, 750 } 751 752 d.aliveMembership.Put(am.GetAliveMsg().Membership.PkiID, &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 753 d.logger.Infof("Learned about a new alive member: %v", am) 754 } 755 756 for _, dm := range deadMembers { 757 if equalPKIid(dm.GetAliveMsg().Membership.PkiID, d.self.PKIid) { 758 continue 759 } 760 d.deadLastTS[string(dm.GetAliveMsg().Membership.PkiID)] = ×tamp{ 761 incTime: tsToTime(dm.GetAliveMsg().Timestamp.IncNumber), 762 lastSeen: time.Now(), 763 seqNum: dm.GetAliveMsg().Timestamp.SeqNum, 764 } 765 766 d.deadMembership.Put(dm.GetAliveMsg().Membership.PkiID, &proto.SignedGossipMessage{GossipMessage: dm.GossipMessage, Envelope: dm.Envelope}) 767 d.logger.Infof("Learned about a new dead member: %v", dm) 768 } 769 770 // update the member in any case 771 for _, a := range [][]*proto.SignedGossipMessage{aliveMembers, deadMembers} { 772 for _, m := range a { 773 member := m.GetAliveMsg() 774 if member == nil { 775 d.logger.Warning("Expected alive message, got instead:", m) 776 return 777 } 778 779 var internalEndpoint string 780 if m.Envelope.SecretEnvelope != nil { 781 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 782 } 783 784 d.id2Member[string(member.Membership.PkiID)] = &NetworkMember{ 785 Endpoint: member.Membership.Endpoint, 786 Metadata: member.Membership.Metadata, 787 PKIid: member.Membership.PkiID, 788 InternalEndpoint: internalEndpoint, 789 } 790 } 791 } 792 } 793 794 func (d *gossipDiscoveryImpl) GetMembership() []NetworkMember { 795 if d.toDie() { 796 return []NetworkMember{} 797 } 798 d.lock.RLock() 799 defer d.lock.RUnlock() 800 801 response := []NetworkMember{} 802 for _, m := range d.aliveMembership.ToSlice() { 803 var internalEndpoint string 804 805 if m.Envelope.SecretEnvelope != nil { 806 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 807 } 808 809 member := m.GetAliveMsg() 810 response = append(response, NetworkMember{ 811 PKIid: member.Membership.PkiID, 812 Endpoint: member.Membership.Endpoint, 813 Metadata: member.Membership.Metadata, 814 InternalEndpoint: internalEndpoint, 815 }) 816 } 817 return response 818 819 } 820 821 func tsToTime(ts uint64) time.Time { 822 return time.Unix(int64(0), int64(ts)) 823 } 824 825 func (d *gossipDiscoveryImpl) UpdateMetadata(md []byte) { 826 d.lock.Lock() 827 defer d.lock.Unlock() 828 d.self.Metadata = md 829 } 830 831 func (d *gossipDiscoveryImpl) UpdateEndpoint(endpoint string) { 832 d.lock.Lock() 833 defer d.lock.Unlock() 834 835 d.self.Endpoint = endpoint 836 } 837 838 func (d *gossipDiscoveryImpl) Self() NetworkMember { 839 return NetworkMember{ 840 Endpoint: d.self.Endpoint, 841 Metadata: d.self.Metadata, 842 PKIid: d.self.PKIid, 843 InternalEndpoint: d.self.InternalEndpoint, 844 } 845 } 846 847 func (d *gossipDiscoveryImpl) toDie() bool { 848 toDie := atomic.LoadInt32(&d.toDieFlag) == int32(1) 849 return toDie 850 } 851 852 func (d *gossipDiscoveryImpl) Stop() { 853 defer d.logger.Info("Stopped") 854 d.logger.Info("Stopping") 855 atomic.StoreInt32(&d.toDieFlag, int32(1)) 856 d.toDieChan <- struct{}{} 857 } 858 859 func equalPKIid(a, b common.PKIidType) bool { 860 return bytes.Equal(a, b) 861 } 862 863 func same(a *timestamp, b *proto.PeerTime) bool { 864 return uint64(a.incTime.UnixNano()) == b.IncNumber && a.seqNum == b.SeqNum 865 } 866 867 func before(a *timestamp, b *proto.PeerTime) bool { 868 return (uint64(a.incTime.UnixNano()) == b.IncNumber && a.seqNum < b.SeqNum) || 869 uint64(a.incTime.UnixNano()) < b.IncNumber 870 } 871 872 func getAliveTimeInterval() time.Duration { 873 return util.GetDurationOrDefault("peer.gossip.aliveTimeInterval", defaultHelloInterval) 874 } 875 876 func getAliveExpirationTimeout() time.Duration { 877 return util.GetDurationOrDefault("peer.gossip.aliveExpirationTimeout", 5*getAliveTimeInterval()) 878 } 879 880 func getAliveExpirationCheckInterval() time.Duration { 881 if aliveExpirationCheckInterval != 0 { 882 return aliveExpirationCheckInterval 883 } 884 885 return time.Duration(getAliveExpirationTimeout() / 10) 886 } 887 888 func getReconnectInterval() time.Duration { 889 return util.GetDurationOrDefault("peer.gossip.reconnectInterval", getAliveExpirationTimeout()) 890 } 891 892 func filterOutLocalhost(endpoints []string, port int) []string { 893 var returnedEndpoints []string 894 for _, endpoint := range endpoints { 895 if endpoint == fmt.Sprintf("127.0.0.1:%d", port) || endpoint == fmt.Sprintf("localhost:%d", port) { 896 continue 897 } 898 returnedEndpoints = append(returnedEndpoints, endpoint) 899 } 900 return returnedEndpoints 901 }