github.com/leonlxy/hyperledger@v1.0.0-alpha.0.20170427033203-34922035d248/gossip/discovery/discovery_impl.go (about) 1 /* 2 Copyright IBM Corp. 2016 All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package discovery 18 19 import ( 20 "bytes" 21 "fmt" 22 "strconv" 23 "strings" 24 "sync" 25 "sync/atomic" 26 "time" 27 28 "github.com/hyperledger/fabric/gossip/common" 29 "github.com/hyperledger/fabric/gossip/gossip/msgstore" 30 "github.com/hyperledger/fabric/gossip/util" 31 proto "github.com/hyperledger/fabric/protos/gossip" 32 "github.com/op/go-logging" 33 "github.com/spf13/viper" 34 ) 35 36 const defaultHelloInterval = time.Duration(5) * time.Second 37 const msgExpirationFactor = 20 38 39 var aliveExpirationCheckInterval time.Duration 40 var maxConnectionAttempts = 120 41 42 // SetAliveTimeInterval sets the alive time interval 43 func SetAliveTimeInterval(interval time.Duration) { 44 viper.Set("peer.gossip.aliveTimeInterval", interval) 45 } 46 47 // SetAliveExpirationTimeout sets the expiration timeout 48 func SetAliveExpirationTimeout(timeout time.Duration) { 49 viper.Set("peer.gossip.aliveExpirationTimeout", timeout) 50 aliveExpirationCheckInterval = time.Duration(timeout / 10) 51 } 52 53 // SetAliveExpirationCheckInterval sets the expiration check interval 54 func SetAliveExpirationCheckInterval(interval time.Duration) { 55 aliveExpirationCheckInterval = interval 56 } 57 58 // SetReconnectInterval sets the reconnect interval 59 func SetReconnectInterval(interval time.Duration) { 60 viper.Set("peer.gossip.reconnectInterval", interval) 61 } 62 63 type timestamp struct { 64 incTime time.Time 65 seqNum uint64 66 lastSeen time.Time 67 } 68 69 func (ts *timestamp) String() string { 70 return fmt.Sprintf("%v, %v", ts.incTime.UnixNano(), ts.seqNum) 71 } 72 73 type gossipDiscoveryImpl struct { 74 incTime uint64 75 seqNum uint64 76 self NetworkMember 77 deadLastTS map[string]*timestamp // H 78 aliveLastTS map[string]*timestamp // V 79 id2Member map[string]*NetworkMember // all known members 80 aliveMembership *util.MembershipStore 81 deadMembership *util.MembershipStore 82 83 msgStore msgstore.MessageStore 84 85 bootstrapPeers []string 86 87 comm CommService 88 crypt CryptoService 89 lock *sync.RWMutex 90 91 toDieChan chan struct{} 92 toDieFlag int32 93 logger *logging.Logger 94 disclosurePolicy DisclosurePolicy 95 } 96 97 // NewDiscoveryService returns a new discovery service with the comm module passed and the crypto service passed 98 func NewDiscoveryService(bootstrapPeers []string, self NetworkMember, comm CommService, crypt CryptoService, disPol DisclosurePolicy) Discovery { 99 d := &gossipDiscoveryImpl{ 100 self: self, 101 incTime: uint64(time.Now().UnixNano()), 102 seqNum: uint64(0), 103 deadLastTS: make(map[string]*timestamp), 104 aliveLastTS: make(map[string]*timestamp), 105 id2Member: make(map[string]*NetworkMember), 106 aliveMembership: util.NewMembershipStore(), 107 deadMembership: util.NewMembershipStore(), 108 crypt: crypt, 109 comm: comm, 110 lock: &sync.RWMutex{}, 111 toDieChan: make(chan struct{}, 1), 112 toDieFlag: int32(0), 113 logger: util.GetLogger(util.LoggingDiscoveryModule, self.InternalEndpoint), 114 disclosurePolicy: disPol, 115 } 116 117 policy := proto.NewGossipMessageComparator(0) 118 trigger := func(m interface{}) {} 119 aliveMsgTTL := getAliveExpirationTimeout() * msgExpirationFactor 120 externalLock := func() { d.lock.Lock() } 121 externalUnlock := func() { d.lock.Unlock() } 122 callback := func(m interface{}) { 123 msg := m.(*proto.SignedGossipMessage) 124 if !msg.IsAliveMsg() { 125 return 126 } 127 id := msg.GetAliveMsg().Membership.PkiId 128 d.aliveMembership.Remove(id) 129 d.deadMembership.Remove(id) 130 delete(d.id2Member, string(id)) 131 delete(d.deadLastTS, string(id)) 132 delete(d.aliveLastTS, string(id)) 133 } 134 135 d.msgStore = msgstore.NewMessageStoreExpirable(policy, trigger, aliveMsgTTL, externalLock, externalUnlock, callback) 136 137 go d.periodicalSendAlive() 138 go d.periodicalCheckAlive() 139 go d.handleMessages() 140 go d.periodicalReconnectToDead() 141 go d.handlePresumedDeadPeers() 142 143 go d.connect2BootstrapPeers(bootstrapPeers) 144 145 d.logger.Info("Started", self, "incTime is", d.incTime) 146 147 return d 148 } 149 150 // Lookup returns a network member, or nil if not found 151 func (d *gossipDiscoveryImpl) Lookup(PKIID common.PKIidType) *NetworkMember { 152 d.lock.RLock() 153 defer d.lock.RUnlock() 154 nm := d.id2Member[string(PKIID)] 155 return nm 156 } 157 158 func (d *gossipDiscoveryImpl) Connect(member NetworkMember, sendInternalEndpoint func() bool) { 159 d.logger.Debug("Entering", member) 160 defer d.logger.Debug("Exiting") 161 162 go func() { 163 for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ { 164 peer := &NetworkMember{ 165 InternalEndpoint: member.InternalEndpoint, 166 Endpoint: member.Endpoint, 167 } 168 169 if !d.comm.Ping(peer) { 170 if d.toDie() { 171 return 172 } 173 d.logger.Warning("Could not connect to", member) 174 time.Sleep(getReconnectInterval()) 175 continue 176 } 177 req := d.createMembershipRequest(sendInternalEndpoint()).NoopSign() 178 d.comm.SendToPeer(peer, req) 179 return 180 } 181 }() 182 } 183 184 func (d *gossipDiscoveryImpl) connect2BootstrapPeers(endpoints []string) { 185 if len(d.self.InternalEndpoint) == 0 { 186 d.logger.Panic("Internal endpoint is empty:", d.self.InternalEndpoint) 187 } 188 189 if len(strings.Split(d.self.InternalEndpoint, ":")) != 2 { 190 d.logger.Panicf("Self endpoint %s isn't formatted as 'host:port'", d.self.InternalEndpoint) 191 } 192 193 myPort, err := strconv.ParseInt(strings.Split(d.self.InternalEndpoint, ":")[1], 10, 64) 194 if err != nil { 195 d.logger.Panicf("Self endpoint %s has not valid port'", d.self.InternalEndpoint) 196 } 197 198 d.logger.Info("Entering:", endpoints) 199 defer d.logger.Info("Exiting") 200 endpoints = filterOutLocalhost(endpoints, int(myPort)) 201 if len(endpoints) == 0 { 202 return 203 } 204 205 for i := 0; i < maxConnectionAttempts && !d.somePeerIsKnown() && !d.toDie(); i++ { 206 var wg sync.WaitGroup 207 req := d.createMembershipRequest(true).NoopSign() 208 wg.Add(len(endpoints)) 209 for _, endpoint := range endpoints { 210 go func(endpoint string) { 211 defer wg.Done() 212 peer := &NetworkMember{ 213 Endpoint: endpoint, 214 InternalEndpoint: endpoint, 215 } 216 if !d.comm.Ping(peer) { 217 return 218 } 219 d.comm.SendToPeer(peer, req) 220 }(endpoint) 221 } 222 wg.Wait() 223 time.Sleep(getReconnectInterval()) 224 } 225 } 226 227 func (d *gossipDiscoveryImpl) somePeerIsKnown() bool { 228 d.lock.RLock() 229 defer d.lock.RUnlock() 230 return len(d.aliveLastTS) != 0 231 } 232 233 func (d *gossipDiscoveryImpl) InitiateSync(peerNum int) { 234 if d.toDie() { 235 return 236 } 237 var peers2SendTo []*NetworkMember 238 memReq := d.createMembershipRequest(true).NoopSign() 239 240 d.lock.RLock() 241 242 n := d.aliveMembership.Size() 243 k := peerNum 244 if k > n { 245 k = n 246 } 247 248 aliveMembersAsSlice := d.aliveMembership.ToSlice() 249 for _, i := range util.GetRandomIndices(k, n-1) { 250 pulledPeer := aliveMembersAsSlice[i].GetAliveMsg().Membership 251 var internalEndpoint string 252 if aliveMembersAsSlice[i].Envelope.SecretEnvelope != nil { 253 internalEndpoint = aliveMembersAsSlice[i].Envelope.SecretEnvelope.InternalEndpoint() 254 } 255 netMember := &NetworkMember{ 256 Endpoint: pulledPeer.Endpoint, 257 Metadata: pulledPeer.Metadata, 258 PKIid: pulledPeer.PkiId, 259 InternalEndpoint: internalEndpoint, 260 } 261 peers2SendTo = append(peers2SendTo, netMember) 262 } 263 264 d.lock.RUnlock() 265 266 for _, netMember := range peers2SendTo { 267 d.comm.SendToPeer(netMember, memReq) 268 } 269 } 270 271 func (d *gossipDiscoveryImpl) handlePresumedDeadPeers() { 272 defer d.logger.Debug("Stopped") 273 274 for !d.toDie() { 275 select { 276 case deadPeer := <-d.comm.PresumedDead(): 277 if d.isAlive(deadPeer) { 278 d.expireDeadMembers([]common.PKIidType{deadPeer}) 279 } 280 case s := <-d.toDieChan: 281 d.toDieChan <- s 282 return 283 } 284 } 285 } 286 287 func (d *gossipDiscoveryImpl) isAlive(pkiID common.PKIidType) bool { 288 d.lock.RLock() 289 defer d.lock.RUnlock() 290 _, alive := d.aliveLastTS[string(pkiID)] 291 return alive 292 } 293 294 func (d *gossipDiscoveryImpl) handleMessages() { 295 defer d.logger.Debug("Stopped") 296 297 in := d.comm.Accept() 298 for !d.toDie() { 299 select { 300 case s := <-d.toDieChan: 301 d.toDieChan <- s 302 return 303 case m := <-in: 304 d.handleMsgFromComm(m) 305 } 306 } 307 } 308 309 func (d *gossipDiscoveryImpl) handleMsgFromComm(m *proto.SignedGossipMessage) { 310 if m == nil { 311 return 312 } 313 if m.GetAliveMsg() == nil && m.GetMemRes() == nil && m.GetMemReq() == nil { 314 d.logger.Warning("Got message with wrong type (expected Alive or MembershipResponse or MembershipRequest message):", m.GossipMessage) 315 return 316 } 317 318 d.logger.Debug("Got message:", m) 319 defer d.logger.Debug("Exiting") 320 321 if memReq := m.GetMemReq(); memReq != nil { 322 selfInfoGossipMsg, err := memReq.SelfInformation.ToGossipMessage() 323 if err != nil { 324 d.logger.Warning("Failed deserializing GossipMessage from envelope:", err) 325 return 326 } 327 328 if d.msgStore.CheckValid(m) { 329 d.handleAliveMessage(selfInfoGossipMsg) 330 } 331 332 var internalEndpoint string 333 if m.Envelope.SecretEnvelope != nil { 334 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 335 } 336 337 // Sending a membership response to a peer may block this routine 338 // in case the sending is deliberately slow (i.e attack). 339 // will keep this async until I'll write a timeout detector in the comm layer 340 go d.sendMemResponse(selfInfoGossipMsg.GetAliveMsg().Membership, internalEndpoint) 341 return 342 } 343 344 if m.IsAliveMsg() { 345 346 if !d.msgStore.Add(m) { 347 return 348 } 349 d.handleAliveMessage(m) 350 351 d.comm.Gossip(m) 352 return 353 } 354 355 if memResp := m.GetMemRes(); memResp != nil { 356 for _, env := range memResp.Alive { 357 am, err := env.ToGossipMessage() 358 if err != nil { 359 d.logger.Warning("Membership response contains an invalid message from an online peer:", err) 360 return 361 } 362 if !am.IsAliveMsg() { 363 d.logger.Warning("Expected alive message, got", am, "instead") 364 return 365 } 366 367 if d.msgStore.CheckValid(m) { 368 d.handleAliveMessage(am) 369 } 370 371 } 372 373 for _, env := range memResp.Dead { 374 dm, err := env.ToGossipMessage() 375 if err != nil { 376 d.logger.Warning("Membership response contains an invalid message from an offline peer", err) 377 return 378 } 379 if !d.crypt.ValidateAliveMsg(dm) { 380 d.logger.Warningf("Alive message isn't authentic, someone spoofed %s's identity", dm.GetAliveMsg().Membership) 381 continue 382 } 383 384 if !d.msgStore.CheckValid(m) { 385 //Newer alive message exist 386 return 387 } 388 389 newDeadMembers := []*proto.SignedGossipMessage{} 390 d.lock.RLock() 391 if _, known := d.id2Member[string(dm.GetAliveMsg().Membership.PkiId)]; !known { 392 newDeadMembers = append(newDeadMembers, dm) 393 } 394 d.lock.RUnlock() 395 d.learnNewMembers([]*proto.SignedGossipMessage{}, newDeadMembers) 396 } 397 } 398 } 399 400 func (d *gossipDiscoveryImpl) sendMemResponse(targetMember *proto.Member, internalEndpoint string) { 401 d.logger.Debug("Entering", targetMember) 402 403 targetPeer := &NetworkMember{ 404 Endpoint: targetMember.Endpoint, 405 Metadata: targetMember.Metadata, 406 PKIid: targetMember.PkiId, 407 InternalEndpoint: internalEndpoint, 408 } 409 410 memResp := d.createMembershipResponse(targetPeer) 411 if memResp == nil { 412 errMsg := `Got a membership request from a peer that shouldn't have sent one: %v, closing connection to the peer as a result.` 413 d.logger.Warningf(errMsg, targetMember) 414 d.comm.CloseConn(targetPeer) 415 return 416 } 417 418 defer d.logger.Debug("Exiting, replying with", memResp) 419 420 d.comm.SendToPeer(targetPeer, (&proto.GossipMessage{ 421 Tag: proto.GossipMessage_EMPTY, 422 Nonce: uint64(0), 423 Content: &proto.GossipMessage_MemRes{ 424 MemRes: memResp, 425 }, 426 }).NoopSign()) 427 } 428 429 func (d *gossipDiscoveryImpl) createMembershipResponse(targetMember *NetworkMember) *proto.MembershipResponse { 430 shouldBeDisclosed, omitConcealedFields := d.disclosurePolicy(targetMember) 431 aliveMsg := d.createAliveMessage(true) 432 433 if !shouldBeDisclosed(aliveMsg) { 434 return nil 435 } 436 437 d.lock.RLock() 438 defer d.lock.RUnlock() 439 440 deadPeers := []*proto.Envelope{} 441 442 for _, dm := range d.deadMembership.ToSlice() { 443 444 if !shouldBeDisclosed(dm) { 445 continue 446 } 447 deadPeers = append(deadPeers, omitConcealedFields(dm)) 448 } 449 450 var aliveSnapshot []*proto.Envelope 451 for _, am := range d.aliveMembership.ToSlice() { 452 if !shouldBeDisclosed(am) { 453 continue 454 } 455 aliveSnapshot = append(aliveSnapshot, omitConcealedFields(am)) 456 } 457 458 return &proto.MembershipResponse{ 459 Alive: append(aliveSnapshot, omitConcealedFields(aliveMsg)), 460 Dead: deadPeers, 461 } 462 } 463 464 func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.SignedGossipMessage) { 465 d.logger.Debug("Entering", m) 466 defer d.logger.Debug("Exiting") 467 468 if !d.crypt.ValidateAliveMsg(m) { 469 d.logger.Warningf("Alive message isn't authentic, someone must be spoofing %s's identity", m.GetAliveMsg()) 470 return 471 } 472 473 pkiID := m.GetAliveMsg().Membership.PkiId 474 if equalPKIid(pkiID, d.self.PKIid) { 475 d.logger.Debug("Got alive message about ourselves,", m) 476 diffExternalEndpoint := d.self.Endpoint != m.GetAliveMsg().Membership.Endpoint 477 var diffInternalEndpoint bool 478 secretEnvelope := m.GetSecretEnvelope() 479 if secretEnvelope != nil && secretEnvelope.InternalEndpoint() != "" { 480 diffInternalEndpoint = secretEnvelope.InternalEndpoint() != d.self.InternalEndpoint 481 } 482 if diffInternalEndpoint || diffExternalEndpoint { 483 d.logger.Error("Bad configuration detected: Received AliveMessage from a peer with the same PKI-ID as myself:", m.GossipMessage) 484 } 485 486 return 487 } 488 489 ts := m.GetAliveMsg().Timestamp 490 491 d.lock.RLock() 492 _, known := d.id2Member[string(pkiID)] 493 d.lock.RUnlock() 494 495 if !known { 496 d.learnNewMembers([]*proto.SignedGossipMessage{m}, []*proto.SignedGossipMessage{}) 497 return 498 } 499 500 d.lock.RLock() 501 _, isAlive := d.aliveLastTS[string(pkiID)] 502 lastDeadTS, isDead := d.deadLastTS[string(pkiID)] 503 d.lock.RUnlock() 504 505 if !isAlive && !isDead { 506 d.logger.Panicf("Member %s is known but not found neither in alive nor in dead lastTS maps, isAlive=%v, isDead=%v", m.GetAliveMsg().Membership.Endpoint, isAlive, isDead) 507 return 508 } 509 510 if isAlive && isDead { 511 d.logger.Panicf("Member %s is both alive and dead at the same time", m.GetAliveMsg().Membership) 512 return 513 } 514 515 if isDead { 516 if before(lastDeadTS, ts) { 517 // resurrect peer 518 d.resurrectMember(m, *ts) 519 } else if !same(lastDeadTS, ts) { 520 d.logger.Debug(m.GetAliveMsg().Membership, "lastDeadTS:", lastDeadTS, "but got ts:", ts) 521 } 522 return 523 } 524 525 d.lock.RLock() 526 lastAliveTS, isAlive := d.aliveLastTS[string(pkiID)] 527 d.lock.RUnlock() 528 529 if isAlive { 530 if before(lastAliveTS, ts) { 531 d.learnExistingMembers([]*proto.SignedGossipMessage{m}) 532 } else if !same(lastAliveTS, ts) { 533 d.logger.Debug(m.GetAliveMsg().Membership, "lastAliveTS:", lastAliveTS, "but got ts:", ts) 534 } 535 536 } 537 // else, ignore the message because it is too old 538 } 539 540 func (d *gossipDiscoveryImpl) resurrectMember(am *proto.SignedGossipMessage, t proto.PeerTime) { 541 d.logger.Info("Entering, AliveMessage:", am, "t:", t) 542 defer d.logger.Info("Exiting") 543 d.lock.Lock() 544 defer d.lock.Unlock() 545 546 member := am.GetAliveMsg().Membership 547 pkiID := member.PkiId 548 d.aliveLastTS[string(pkiID)] = ×tamp{ 549 lastSeen: time.Now(), 550 seqNum: t.SeqNum, 551 incTime: tsToTime(t.IncNumber), 552 } 553 554 var internalEndpoint string 555 if prevNetMem := d.id2Member[string(pkiID)]; prevNetMem != nil { 556 internalEndpoint = prevNetMem.InternalEndpoint 557 } 558 if am.Envelope.SecretEnvelope != nil { 559 internalEndpoint = am.Envelope.SecretEnvelope.InternalEndpoint() 560 } 561 562 d.id2Member[string(pkiID)] = &NetworkMember{ 563 Endpoint: member.Endpoint, 564 Metadata: member.Metadata, 565 PKIid: member.PkiId, 566 InternalEndpoint: internalEndpoint, 567 } 568 569 delete(d.deadLastTS, string(pkiID)) 570 d.deadMembership.Remove(common.PKIidType(pkiID)) 571 d.aliveMembership.Put(common.PKIidType(pkiID), &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 572 } 573 574 func (d *gossipDiscoveryImpl) periodicalReconnectToDead() { 575 defer d.logger.Debug("Stopped") 576 577 for !d.toDie() { 578 wg := &sync.WaitGroup{} 579 580 for _, member := range d.copyLastSeen(d.deadLastTS) { 581 wg.Add(1) 582 go func(member NetworkMember) { 583 defer wg.Done() 584 if d.comm.Ping(&member) { 585 d.logger.Debug(member, "is responding, sending membership request") 586 d.sendMembershipRequest(&member, true) 587 } else { 588 d.logger.Debug(member, "is still dead") 589 } 590 }(member) 591 } 592 593 wg.Wait() 594 d.logger.Debug("Sleeping", getReconnectInterval()) 595 time.Sleep(getReconnectInterval()) 596 } 597 } 598 599 func (d *gossipDiscoveryImpl) sendMembershipRequest(member *NetworkMember, includeInternalEndpoint bool) { 600 d.comm.SendToPeer(member, d.createMembershipRequest(includeInternalEndpoint)) 601 } 602 603 func (d *gossipDiscoveryImpl) createMembershipRequest(includeInternalEndpoint bool) *proto.SignedGossipMessage { 604 req := &proto.MembershipRequest{ 605 SelfInformation: d.createAliveMessage(includeInternalEndpoint).Envelope, 606 // TODO: sending the known peers is not secure because the remote peer might shouldn't know 607 // TODO: about the known peers. I'm deprecating this until a secure mechanism will be implemented. 608 // TODO: See FAB-2570 for tracking this issue. 609 Known: [][]byte{}, 610 } 611 return (&proto.GossipMessage{ 612 Tag: proto.GossipMessage_EMPTY, 613 Nonce: uint64(0), 614 Content: &proto.GossipMessage_MemReq{ 615 MemReq: req, 616 }, 617 }).NoopSign() 618 } 619 620 func (d *gossipDiscoveryImpl) copyLastSeen(lastSeenMap map[string]*timestamp) []NetworkMember { 621 d.lock.RLock() 622 defer d.lock.RUnlock() 623 624 res := []NetworkMember{} 625 for pkiIDStr := range lastSeenMap { 626 res = append(res, *(d.id2Member[pkiIDStr])) 627 } 628 return res 629 } 630 631 func (d *gossipDiscoveryImpl) periodicalCheckAlive() { 632 defer d.logger.Debug("Stopped") 633 634 for !d.toDie() { 635 time.Sleep(getAliveExpirationCheckInterval()) 636 dead := d.getDeadMembers() 637 if len(dead) > 0 { 638 d.logger.Debugf("Got %v dead members: %v", len(dead), dead) 639 d.expireDeadMembers(dead) 640 } 641 } 642 } 643 644 func (d *gossipDiscoveryImpl) expireDeadMembers(dead []common.PKIidType) { 645 d.logger.Warning("Entering", dead) 646 defer d.logger.Warning("Exiting") 647 648 var deadMembers2Expire []*NetworkMember 649 650 d.lock.Lock() 651 652 for _, pkiID := range dead { 653 if _, isAlive := d.aliveLastTS[string(pkiID)]; !isAlive { 654 continue 655 } 656 deadMembers2Expire = append(deadMembers2Expire, d.id2Member[string(pkiID)]) 657 // move lastTS from alive to dead 658 lastTS, hasLastTS := d.aliveLastTS[string(pkiID)] 659 if hasLastTS { 660 d.deadLastTS[string(pkiID)] = lastTS 661 delete(d.aliveLastTS, string(pkiID)) 662 } 663 664 if am := d.aliveMembership.MsgByID(pkiID); am != nil { 665 d.deadMembership.Put(pkiID, am) 666 d.aliveMembership.Remove(pkiID) 667 } 668 } 669 670 d.lock.Unlock() 671 672 for _, member2Expire := range deadMembers2Expire { 673 d.logger.Warning("Closing connection to", member2Expire) 674 d.comm.CloseConn(member2Expire) 675 } 676 } 677 678 func (d *gossipDiscoveryImpl) getDeadMembers() []common.PKIidType { 679 d.lock.RLock() 680 defer d.lock.RUnlock() 681 682 dead := []common.PKIidType{} 683 for id, last := range d.aliveLastTS { 684 elapsedNonAliveTime := time.Since(last.lastSeen) 685 if elapsedNonAliveTime.Nanoseconds() > getAliveExpirationTimeout().Nanoseconds() { 686 d.logger.Warning("Haven't heard from", id, "for", elapsedNonAliveTime) 687 dead = append(dead, common.PKIidType(id)) 688 } 689 } 690 return dead 691 } 692 693 func (d *gossipDiscoveryImpl) periodicalSendAlive() { 694 defer d.logger.Debug("Stopped") 695 696 for !d.toDie() { 697 d.logger.Debug("Sleeping", getAliveTimeInterval()) 698 time.Sleep(getAliveTimeInterval()) 699 d.comm.Gossip(d.createAliveMessage(true)) 700 } 701 } 702 703 func (d *gossipDiscoveryImpl) createAliveMessage(includeInternalEndpoint bool) *proto.SignedGossipMessage { 704 d.lock.Lock() 705 d.seqNum++ 706 seqNum := d.seqNum 707 708 endpoint := d.self.Endpoint 709 meta := d.self.Metadata 710 pkiID := d.self.PKIid 711 internalEndpoint := d.self.InternalEndpoint 712 713 d.lock.Unlock() 714 715 msg2Gossip := &proto.GossipMessage{ 716 Tag: proto.GossipMessage_EMPTY, 717 Content: &proto.GossipMessage_AliveMsg{ 718 AliveMsg: &proto.AliveMessage{ 719 Membership: &proto.Member{ 720 Endpoint: endpoint, 721 Metadata: meta, 722 PkiId: pkiID, 723 }, 724 Timestamp: &proto.PeerTime{ 725 IncNumber: uint64(d.incTime), 726 SeqNum: seqNum, 727 }, 728 }, 729 }, 730 } 731 732 signedMsg := &proto.SignedGossipMessage{ 733 GossipMessage: msg2Gossip, 734 Envelope: d.crypt.SignMessage(msg2Gossip, internalEndpoint), 735 } 736 737 if !includeInternalEndpoint { 738 signedMsg.Envelope.SecretEnvelope = nil 739 } 740 741 return signedMsg 742 } 743 744 func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*proto.SignedGossipMessage) { 745 d.logger.Debugf("Entering: learnedMembers={%v}", aliveArr) 746 defer d.logger.Debug("Exiting") 747 748 d.lock.Lock() 749 defer d.lock.Unlock() 750 751 for _, m := range aliveArr { 752 am := m.GetAliveMsg() 753 if m == nil { 754 d.logger.Warning("Expected alive message, got instead:", m) 755 return 756 } 757 d.logger.Debug("updating", am) 758 759 var internalEndpoint string 760 if prevNetMem := d.id2Member[string(am.Membership.PkiId)]; prevNetMem != nil { 761 internalEndpoint = prevNetMem.InternalEndpoint 762 } 763 if m.Envelope.SecretEnvelope != nil { 764 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 765 } 766 767 // update member's data 768 member := d.id2Member[string(am.Membership.PkiId)] 769 member.Endpoint = am.Membership.Endpoint 770 member.Metadata = am.Membership.Metadata 771 member.InternalEndpoint = internalEndpoint 772 773 if _, isKnownAsDead := d.deadLastTS[string(am.Membership.PkiId)]; isKnownAsDead { 774 d.logger.Warning(am.Membership, "has already expired") 775 continue 776 } 777 778 if _, isKnownAsAlive := d.aliveLastTS[string(am.Membership.PkiId)]; !isKnownAsAlive { 779 d.logger.Warning(am.Membership, "has already expired") 780 continue 781 } else { 782 d.logger.Debug("Updating aliveness data:", am) 783 // update existing aliveness data 784 alive := d.aliveLastTS[string(am.Membership.PkiId)] 785 alive.incTime = tsToTime(am.Timestamp.IncNumber) 786 alive.lastSeen = time.Now() 787 alive.seqNum = am.Timestamp.SeqNum 788 789 if am := d.aliveMembership.MsgByID(m.GetAliveMsg().Membership.PkiId); am == nil { 790 d.logger.Debug("Adding", am, "to aliveMembership") 791 msg := &proto.SignedGossipMessage{GossipMessage: m.GossipMessage, Envelope: am.Envelope} 792 d.aliveMembership.Put(m.GetAliveMsg().Membership.PkiId, msg) 793 } else { 794 d.logger.Debug("Replacing", am, "in aliveMembership") 795 am.GossipMessage = m.GossipMessage 796 am.Envelope = m.Envelope 797 } 798 } 799 } 800 } 801 802 func (d *gossipDiscoveryImpl) learnNewMembers(aliveMembers []*proto.SignedGossipMessage, deadMembers []*proto.SignedGossipMessage) { 803 d.logger.Debugf("Entering: learnedMembers={%v}, deadMembers={%v}", aliveMembers, deadMembers) 804 defer d.logger.Debugf("Exiting") 805 806 d.lock.Lock() 807 defer d.lock.Unlock() 808 809 for _, am := range aliveMembers { 810 if equalPKIid(am.GetAliveMsg().Membership.PkiId, d.self.PKIid) { 811 continue 812 } 813 d.aliveLastTS[string(am.GetAliveMsg().Membership.PkiId)] = ×tamp{ 814 incTime: tsToTime(am.GetAliveMsg().Timestamp.IncNumber), 815 lastSeen: time.Now(), 816 seqNum: am.GetAliveMsg().Timestamp.SeqNum, 817 } 818 819 d.aliveMembership.Put(am.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 820 d.logger.Debugf("Learned about a new alive member: %v", am) 821 } 822 823 for _, dm := range deadMembers { 824 if equalPKIid(dm.GetAliveMsg().Membership.PkiId, d.self.PKIid) { 825 continue 826 } 827 d.deadLastTS[string(dm.GetAliveMsg().Membership.PkiId)] = ×tamp{ 828 incTime: tsToTime(dm.GetAliveMsg().Timestamp.IncNumber), 829 lastSeen: time.Now(), 830 seqNum: dm.GetAliveMsg().Timestamp.SeqNum, 831 } 832 833 d.deadMembership.Put(dm.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: dm.GossipMessage, Envelope: dm.Envelope}) 834 d.logger.Debugf("Learned about a new dead member: %v", dm) 835 } 836 837 // update the member in any case 838 for _, a := range [][]*proto.SignedGossipMessage{aliveMembers, deadMembers} { 839 for _, m := range a { 840 member := m.GetAliveMsg() 841 if member == nil { 842 d.logger.Warning("Expected alive message, got instead:", m) 843 return 844 } 845 846 var internalEndpoint string 847 if m.Envelope.SecretEnvelope != nil { 848 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 849 } 850 851 if prevNetMem := d.id2Member[string(member.Membership.PkiId)]; prevNetMem != nil { 852 internalEndpoint = prevNetMem.InternalEndpoint 853 } 854 855 d.id2Member[string(member.Membership.PkiId)] = &NetworkMember{ 856 Endpoint: member.Membership.Endpoint, 857 Metadata: member.Membership.Metadata, 858 PKIid: member.Membership.PkiId, 859 InternalEndpoint: internalEndpoint, 860 } 861 } 862 } 863 } 864 865 func (d *gossipDiscoveryImpl) GetMembership() []NetworkMember { 866 if d.toDie() { 867 return []NetworkMember{} 868 } 869 d.lock.RLock() 870 defer d.lock.RUnlock() 871 872 response := []NetworkMember{} 873 for _, m := range d.aliveMembership.ToSlice() { 874 member := m.GetAliveMsg() 875 response = append(response, NetworkMember{ 876 PKIid: member.Membership.PkiId, 877 Endpoint: member.Membership.Endpoint, 878 Metadata: member.Membership.Metadata, 879 InternalEndpoint: d.id2Member[string(m.GetAliveMsg().Membership.PkiId)].InternalEndpoint, 880 }) 881 } 882 return response 883 884 } 885 886 func tsToTime(ts uint64) time.Time { 887 return time.Unix(int64(0), int64(ts)) 888 } 889 890 func (d *gossipDiscoveryImpl) UpdateMetadata(md []byte) { 891 d.lock.Lock() 892 defer d.lock.Unlock() 893 d.self.Metadata = md 894 } 895 896 func (d *gossipDiscoveryImpl) UpdateEndpoint(endpoint string) { 897 d.lock.Lock() 898 defer d.lock.Unlock() 899 900 d.self.Endpoint = endpoint 901 } 902 903 func (d *gossipDiscoveryImpl) Self() NetworkMember { 904 return NetworkMember{ 905 Endpoint: d.self.Endpoint, 906 Metadata: d.self.Metadata, 907 PKIid: d.self.PKIid, 908 InternalEndpoint: d.self.InternalEndpoint, 909 } 910 } 911 912 func (d *gossipDiscoveryImpl) toDie() bool { 913 toDie := atomic.LoadInt32(&d.toDieFlag) == int32(1) 914 return toDie 915 } 916 917 func (d *gossipDiscoveryImpl) Stop() { 918 defer d.logger.Info("Stopped") 919 d.logger.Info("Stopping") 920 atomic.StoreInt32(&d.toDieFlag, int32(1)) 921 d.msgStore.Stop() 922 d.toDieChan <- struct{}{} 923 } 924 925 func equalPKIid(a, b common.PKIidType) bool { 926 return bytes.Equal(a, b) 927 } 928 929 func same(a *timestamp, b *proto.PeerTime) bool { 930 return uint64(a.incTime.UnixNano()) == b.IncNumber && a.seqNum == b.SeqNum 931 } 932 933 func before(a *timestamp, b *proto.PeerTime) bool { 934 return (uint64(a.incTime.UnixNano()) == b.IncNumber && a.seqNum < b.SeqNum) || 935 uint64(a.incTime.UnixNano()) < b.IncNumber 936 } 937 938 func getAliveTimeInterval() time.Duration { 939 return util.GetDurationOrDefault("peer.gossip.aliveTimeInterval", defaultHelloInterval) 940 } 941 942 func getAliveExpirationTimeout() time.Duration { 943 return util.GetDurationOrDefault("peer.gossip.aliveExpirationTimeout", 5*getAliveTimeInterval()) 944 } 945 946 func getAliveExpirationCheckInterval() time.Duration { 947 if aliveExpirationCheckInterval != 0 { 948 return aliveExpirationCheckInterval 949 } 950 951 return time.Duration(getAliveExpirationTimeout() / 10) 952 } 953 954 func getReconnectInterval() time.Duration { 955 return util.GetDurationOrDefault("peer.gossip.reconnectInterval", getAliveExpirationTimeout()) 956 } 957 958 func filterOutLocalhost(endpoints []string, port int) []string { 959 var returnedEndpoints []string 960 for _, endpoint := range endpoints { 961 if endpoint == fmt.Sprintf("127.0.0.1:%d", port) || endpoint == fmt.Sprintf("localhost:%d", port) { 962 continue 963 } 964 returnedEndpoints = append(returnedEndpoints, endpoint) 965 } 966 return returnedEndpoints 967 }