github.com/kchristidis/fabric@v1.0.4-0.20171028114726-837acd08cde1/gossip/discovery/discovery_impl.go (about) 1 /* 2 Copyright IBM Corp. 2016 All Rights Reserved. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package discovery 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "math" 24 "strconv" 25 "strings" 26 "sync" 27 "sync/atomic" 28 "time" 29 30 "github.com/hyperledger/fabric/gossip/common" 31 "github.com/hyperledger/fabric/gossip/gossip/msgstore" 32 "github.com/hyperledger/fabric/gossip/util" 33 proto "github.com/hyperledger/fabric/protos/gossip" 34 "github.com/op/go-logging" 35 ) 36 37 const defaultHelloInterval = time.Duration(5) * time.Second 38 const msgExpirationFactor = 20 39 40 var aliveExpirationCheckInterval time.Duration 41 var maxConnectionAttempts = 120 42 43 // SetAliveTimeInterval sets the alive time interval 44 func SetAliveTimeInterval(interval time.Duration) { 45 util.SetDuration("peer.gossip.aliveTimeInterval", interval) 46 } 47 48 // SetAliveExpirationTimeout sets the expiration timeout 49 func SetAliveExpirationTimeout(timeout time.Duration) { 50 util.SetDuration("peer.gossip.aliveExpirationTimeout", timeout) 51 aliveExpirationCheckInterval = time.Duration(timeout / 10) 52 } 53 54 // SetAliveExpirationCheckInterval sets the expiration check interval 55 func SetAliveExpirationCheckInterval(interval time.Duration) { 56 aliveExpirationCheckInterval = interval 57 } 58 59 // SetReconnectInterval sets the reconnect interval 60 func SetReconnectInterval(interval time.Duration) { 61 util.SetDuration("peer.gossip.reconnectInterval", interval) 62 } 63 64 // SetMaxConnAttempts sets the maximum number of connection 65 // attempts the peer would perform when invoking Connect() 66 func SetMaxConnAttempts(attempts int) { 67 maxConnectionAttempts = attempts 68 } 69 70 type timestamp struct { 71 incTime time.Time 72 seqNum uint64 73 lastSeen time.Time 74 } 75 76 func (ts *timestamp) String() string { 77 return fmt.Sprintf("%v, %v", ts.incTime.UnixNano(), ts.seqNum) 78 } 79 80 type gossipDiscoveryImpl struct { 81 incTime uint64 82 seqNum uint64 83 self NetworkMember 84 deadLastTS map[string]*timestamp // H 85 aliveLastTS map[string]*timestamp // V 86 id2Member map[string]*NetworkMember // all known members 87 aliveMembership *util.MembershipStore 88 deadMembership *util.MembershipStore 89 90 msgStore *aliveMsgStore 91 92 comm CommService 93 crypt CryptoService 94 lock *sync.RWMutex 95 96 toDieChan chan struct{} 97 toDieFlag int32 98 port int 99 logger *logging.Logger 100 disclosurePolicy DisclosurePolicy 101 pubsub *util.PubSub 102 } 103 104 // NewDiscoveryService returns a new discovery service with the comm module passed and the crypto service passed 105 func NewDiscoveryService(self NetworkMember, comm CommService, crypt CryptoService, disPol DisclosurePolicy) Discovery { 106 d := &gossipDiscoveryImpl{ 107 self: self, 108 incTime: uint64(time.Now().UnixNano()), 109 seqNum: uint64(0), 110 deadLastTS: make(map[string]*timestamp), 111 aliveLastTS: make(map[string]*timestamp), 112 id2Member: make(map[string]*NetworkMember), 113 aliveMembership: util.NewMembershipStore(), 114 deadMembership: util.NewMembershipStore(), 115 crypt: crypt, 116 comm: comm, 117 lock: &sync.RWMutex{}, 118 toDieChan: make(chan struct{}, 1), 119 toDieFlag: int32(0), 120 logger: util.GetLogger(util.LoggingDiscoveryModule, self.InternalEndpoint), 121 disclosurePolicy: disPol, 122 pubsub: util.NewPubSub(), 123 } 124 125 d.validateSelfConfig() 126 d.msgStore = newAliveMsgStore(d) 127 128 go d.periodicalSendAlive() 129 go d.periodicalCheckAlive() 130 go d.handleMessages() 131 go d.periodicalReconnectToDead() 132 go d.handlePresumedDeadPeers() 133 134 d.logger.Info("Started", self, "incTime is", d.incTime) 135 136 return d 137 } 138 139 // Lookup returns a network member, or nil if not found 140 func (d *gossipDiscoveryImpl) Lookup(PKIID common.PKIidType) *NetworkMember { 141 if bytes.Equal(PKIID, d.self.PKIid) { 142 return &d.self 143 } 144 d.lock.RLock() 145 defer d.lock.RUnlock() 146 nm := d.id2Member[string(PKIID)] 147 return nm 148 } 149 150 func (d *gossipDiscoveryImpl) Connect(member NetworkMember, id identifier) { 151 for _, endpoint := range []string{member.InternalEndpoint, member.Endpoint} { 152 if d.isMyOwnEndpoint(endpoint) { 153 d.logger.Debug("Skipping connecting to myself") 154 return 155 } 156 } 157 158 d.logger.Debug("Entering", member) 159 defer d.logger.Debug("Exiting") 160 go func() { 161 for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ { 162 id, err := id() 163 if err != nil { 164 if d.toDie() { 165 return 166 } 167 d.logger.Warning("Could not connect to", member, ":", err) 168 time.Sleep(getReconnectInterval()) 169 continue 170 } 171 peer := &NetworkMember{ 172 InternalEndpoint: member.InternalEndpoint, 173 Endpoint: member.Endpoint, 174 PKIid: id.ID, 175 } 176 m, err := d.createMembershipRequest(id.SelfOrg) 177 if err != nil { 178 d.logger.Warning("Failed creating membership request:", err) 179 continue 180 } 181 req, err := m.NoopSign() 182 if err != nil { 183 d.logger.Warning("Failed creating SignedGossipMessage:", err) 184 continue 185 } 186 req.Nonce = util.RandomUInt64() 187 req, err = req.NoopSign() 188 if err != nil { 189 d.logger.Warning("Failed adding NONCE to SignedGossipMessage", err) 190 continue 191 } 192 go d.sendUntilAcked(peer, req) 193 return 194 } 195 196 }() 197 } 198 199 func (d *gossipDiscoveryImpl) isMyOwnEndpoint(endpoint string) bool { 200 return endpoint == fmt.Sprintf("127.0.0.1:%d", d.port) || endpoint == fmt.Sprintf("localhost:%d", d.port) || 201 endpoint == d.self.InternalEndpoint || endpoint == d.self.Endpoint 202 } 203 204 func (d *gossipDiscoveryImpl) validateSelfConfig() { 205 endpoint := d.self.InternalEndpoint 206 if len(endpoint) == 0 { 207 d.logger.Panic("Internal endpoint is empty:", endpoint) 208 } 209 210 internalEndpointSplit := strings.Split(endpoint, ":") 211 if len(internalEndpointSplit) != 2 { 212 d.logger.Panicf("Self endpoint %s isn't formatted as 'host:port'", endpoint) 213 } 214 myPort, err := strconv.ParseInt(internalEndpointSplit[1], 10, 64) 215 if err != nil { 216 d.logger.Panicf("Self endpoint %s has not valid port'", endpoint) 217 } 218 219 if myPort > int64(math.MaxUint16) { 220 d.logger.Panicf("Self endpoint %s's port takes more than 16 bits", endpoint) 221 } 222 223 d.port = int(myPort) 224 } 225 226 func (d *gossipDiscoveryImpl) sendUntilAcked(peer *NetworkMember, message *proto.SignedGossipMessage) { 227 nonce := message.Nonce 228 for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ { 229 sub := d.pubsub.Subscribe(fmt.Sprintf("%d", nonce), time.Second*5) 230 d.comm.SendToPeer(peer, message) 231 if _, timeoutErr := sub.Listen(); timeoutErr == nil { 232 return 233 } 234 time.Sleep(getReconnectInterval()) 235 } 236 } 237 238 func (d *gossipDiscoveryImpl) InitiateSync(peerNum int) { 239 if d.toDie() { 240 return 241 } 242 var peers2SendTo []*NetworkMember 243 m, err := d.createMembershipRequest(true) 244 if err != nil { 245 d.logger.Warning("Failed creating membership request:", err) 246 return 247 } 248 memReq, err := m.NoopSign() 249 if err != nil { 250 d.logger.Warning("Failed creating SignedGossipMessage:", err) 251 return 252 } 253 d.lock.RLock() 254 255 n := d.aliveMembership.Size() 256 k := peerNum 257 if k > n { 258 k = n 259 } 260 261 aliveMembersAsSlice := d.aliveMembership.ToSlice() 262 for _, i := range util.GetRandomIndices(k, n-1) { 263 pulledPeer := aliveMembersAsSlice[i].GetAliveMsg().Membership 264 var internalEndpoint string 265 if aliveMembersAsSlice[i].Envelope.SecretEnvelope != nil { 266 internalEndpoint = aliveMembersAsSlice[i].Envelope.SecretEnvelope.InternalEndpoint() 267 } 268 netMember := &NetworkMember{ 269 Endpoint: pulledPeer.Endpoint, 270 Metadata: pulledPeer.Metadata, 271 PKIid: pulledPeer.PkiId, 272 InternalEndpoint: internalEndpoint, 273 } 274 peers2SendTo = append(peers2SendTo, netMember) 275 } 276 277 d.lock.RUnlock() 278 279 for _, netMember := range peers2SendTo { 280 d.comm.SendToPeer(netMember, memReq) 281 } 282 } 283 284 func (d *gossipDiscoveryImpl) handlePresumedDeadPeers() { 285 defer d.logger.Debug("Stopped") 286 287 for !d.toDie() { 288 select { 289 case deadPeer := <-d.comm.PresumedDead(): 290 if d.isAlive(deadPeer) { 291 d.expireDeadMembers([]common.PKIidType{deadPeer}) 292 } 293 case s := <-d.toDieChan: 294 d.toDieChan <- s 295 return 296 } 297 } 298 } 299 300 func (d *gossipDiscoveryImpl) isAlive(pkiID common.PKIidType) bool { 301 d.lock.RLock() 302 defer d.lock.RUnlock() 303 _, alive := d.aliveLastTS[string(pkiID)] 304 return alive 305 } 306 307 func (d *gossipDiscoveryImpl) handleMessages() { 308 defer d.logger.Debug("Stopped") 309 310 in := d.comm.Accept() 311 for !d.toDie() { 312 select { 313 case s := <-d.toDieChan: 314 d.toDieChan <- s 315 return 316 case m := <-in: 317 d.handleMsgFromComm(m) 318 } 319 } 320 } 321 322 func (d *gossipDiscoveryImpl) handleMsgFromComm(m *proto.SignedGossipMessage) { 323 if m == nil { 324 return 325 } 326 if m.GetAliveMsg() == nil && m.GetMemRes() == nil && m.GetMemReq() == nil { 327 d.logger.Warning("Got message with wrong type (expected Alive or MembershipResponse or MembershipRequest message):", m.GossipMessage) 328 return 329 } 330 331 d.logger.Debug("Got message:", m) 332 defer d.logger.Debug("Exiting") 333 334 if memReq := m.GetMemReq(); memReq != nil { 335 selfInfoGossipMsg, err := memReq.SelfInformation.ToGossipMessage() 336 if err != nil { 337 d.logger.Warning("Failed deserializing GossipMessage from envelope:", err) 338 return 339 } 340 341 if d.msgStore.CheckValid(selfInfoGossipMsg) { 342 d.handleAliveMessage(selfInfoGossipMsg) 343 } 344 345 var internalEndpoint string 346 if m.Envelope.SecretEnvelope != nil { 347 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 348 } 349 350 // Sending a membership response to a peer may block this routine 351 // in case the sending is deliberately slow (i.e attack). 352 // will keep this async until I'll write a timeout detector in the comm layer 353 go d.sendMemResponse(selfInfoGossipMsg.GetAliveMsg().Membership, internalEndpoint, m.Nonce) 354 return 355 } 356 357 if m.IsAliveMsg() { 358 359 if !d.msgStore.Add(m) { 360 return 361 } 362 d.handleAliveMessage(m) 363 364 d.comm.Gossip(m) 365 return 366 } 367 368 if memResp := m.GetMemRes(); memResp != nil { 369 d.pubsub.Publish(fmt.Sprintf("%d", m.Nonce), m.Nonce) 370 for _, env := range memResp.Alive { 371 am, err := env.ToGossipMessage() 372 if err != nil { 373 d.logger.Warning("Membership response contains an invalid message from an online peer:", err) 374 return 375 } 376 if !am.IsAliveMsg() { 377 d.logger.Warning("Expected alive message, got", am, "instead") 378 return 379 } 380 381 if d.msgStore.CheckValid(am) { 382 d.handleAliveMessage(am) 383 } 384 } 385 386 for _, env := range memResp.Dead { 387 dm, err := env.ToGossipMessage() 388 if err != nil { 389 d.logger.Warning("Membership response contains an invalid message from an offline peer", err) 390 return 391 } 392 if !d.crypt.ValidateAliveMsg(dm) { 393 d.logger.Debugf("Alive message isn't authentic, someone spoofed %s's identity", dm.GetAliveMsg().Membership) 394 continue 395 } 396 397 if !d.msgStore.CheckValid(dm) { 398 //Newer alive message exist 399 return 400 } 401 402 newDeadMembers := []*proto.SignedGossipMessage{} 403 d.lock.RLock() 404 if _, known := d.id2Member[string(dm.GetAliveMsg().Membership.PkiId)]; !known { 405 newDeadMembers = append(newDeadMembers, dm) 406 } 407 d.lock.RUnlock() 408 d.learnNewMembers([]*proto.SignedGossipMessage{}, newDeadMembers) 409 } 410 } 411 } 412 413 func (d *gossipDiscoveryImpl) sendMemResponse(targetMember *proto.Member, internalEndpoint string, nonce uint64) { 414 d.logger.Debug("Entering", targetMember) 415 416 targetPeer := &NetworkMember{ 417 Endpoint: targetMember.Endpoint, 418 Metadata: targetMember.Metadata, 419 PKIid: targetMember.PkiId, 420 InternalEndpoint: internalEndpoint, 421 } 422 423 aliveMsg, err := d.createAliveMessage(true) 424 if err != nil { 425 d.logger.Warning("Failed creating alive message:", err) 426 return 427 } 428 memResp := d.createMembershipResponse(aliveMsg, targetPeer) 429 if memResp == nil { 430 errMsg := `Got a membership request from a peer that shouldn't have sent one: %v, closing connection to the peer as a result.` 431 d.logger.Warningf(errMsg, targetMember) 432 d.comm.CloseConn(targetPeer) 433 return 434 } 435 436 defer d.logger.Debug("Exiting, replying with", memResp) 437 438 msg, err := (&proto.GossipMessage{ 439 Tag: proto.GossipMessage_EMPTY, 440 Nonce: nonce, 441 Content: &proto.GossipMessage_MemRes{ 442 MemRes: memResp, 443 }, 444 }).NoopSign() 445 if err != nil { 446 d.logger.Warning("Failed creating SignedGossipMessage:", err) 447 return 448 } 449 d.comm.SendToPeer(targetPeer, msg) 450 } 451 452 func (d *gossipDiscoveryImpl) createMembershipResponse(aliveMsg *proto.SignedGossipMessage, targetMember *NetworkMember) *proto.MembershipResponse { 453 shouldBeDisclosed, omitConcealedFields := d.disclosurePolicy(targetMember) 454 455 if !shouldBeDisclosed(aliveMsg) { 456 return nil 457 } 458 459 d.lock.RLock() 460 defer d.lock.RUnlock() 461 462 deadPeers := []*proto.Envelope{} 463 464 for _, dm := range d.deadMembership.ToSlice() { 465 466 if !shouldBeDisclosed(dm) { 467 continue 468 } 469 deadPeers = append(deadPeers, omitConcealedFields(dm)) 470 } 471 472 var aliveSnapshot []*proto.Envelope 473 for _, am := range d.aliveMembership.ToSlice() { 474 if !shouldBeDisclosed(am) { 475 continue 476 } 477 aliveSnapshot = append(aliveSnapshot, omitConcealedFields(am)) 478 } 479 480 return &proto.MembershipResponse{ 481 Alive: append(aliveSnapshot, omitConcealedFields(aliveMsg)), 482 Dead: deadPeers, 483 } 484 } 485 486 func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.SignedGossipMessage) { 487 d.logger.Debug("Entering", m) 488 defer d.logger.Debug("Exiting") 489 490 if !d.crypt.ValidateAliveMsg(m) { 491 d.logger.Debugf("Alive message isn't authentic, someone must be spoofing %s's identity", m.GetAliveMsg()) 492 return 493 } 494 495 pkiID := m.GetAliveMsg().Membership.PkiId 496 if equalPKIid(pkiID, d.self.PKIid) { 497 d.logger.Debug("Got alive message about ourselves,", m) 498 diffExternalEndpoint := d.self.Endpoint != m.GetAliveMsg().Membership.Endpoint 499 var diffInternalEndpoint bool 500 secretEnvelope := m.GetSecretEnvelope() 501 if secretEnvelope != nil && secretEnvelope.InternalEndpoint() != "" { 502 diffInternalEndpoint = secretEnvelope.InternalEndpoint() != d.self.InternalEndpoint 503 } 504 if diffInternalEndpoint || diffExternalEndpoint { 505 d.logger.Error("Bad configuration detected: Received AliveMessage from a peer with the same PKI-ID as myself:", m.GossipMessage) 506 } 507 508 return 509 } 510 511 ts := m.GetAliveMsg().Timestamp 512 513 d.lock.RLock() 514 _, known := d.id2Member[string(pkiID)] 515 d.lock.RUnlock() 516 517 if !known { 518 d.learnNewMembers([]*proto.SignedGossipMessage{m}, []*proto.SignedGossipMessage{}) 519 return 520 } 521 522 d.lock.RLock() 523 _, isAlive := d.aliveLastTS[string(pkiID)] 524 lastDeadTS, isDead := d.deadLastTS[string(pkiID)] 525 d.lock.RUnlock() 526 527 if !isAlive && !isDead { 528 d.logger.Panicf("Member %s is known but not found neither in alive nor in dead lastTS maps, isAlive=%v, isDead=%v", m.GetAliveMsg().Membership.Endpoint, isAlive, isDead) 529 return 530 } 531 532 if isAlive && isDead { 533 d.logger.Panicf("Member %s is both alive and dead at the same time", m.GetAliveMsg().Membership) 534 return 535 } 536 537 if isDead { 538 if before(lastDeadTS, ts) { 539 // resurrect peer 540 d.resurrectMember(m, *ts) 541 } else if !same(lastDeadTS, ts) { 542 d.logger.Debug(m.GetAliveMsg().Membership, "lastDeadTS:", lastDeadTS, "but got ts:", ts) 543 } 544 return 545 } 546 547 d.lock.RLock() 548 lastAliveTS, isAlive := d.aliveLastTS[string(pkiID)] 549 d.lock.RUnlock() 550 551 if isAlive { 552 if before(lastAliveTS, ts) { 553 d.learnExistingMembers([]*proto.SignedGossipMessage{m}) 554 } else if !same(lastAliveTS, ts) { 555 d.logger.Debug(m.GetAliveMsg().Membership, "lastAliveTS:", lastAliveTS, "but got ts:", ts) 556 } 557 558 } 559 // else, ignore the message because it is too old 560 } 561 562 func (d *gossipDiscoveryImpl) resurrectMember(am *proto.SignedGossipMessage, t proto.PeerTime) { 563 d.logger.Info("Entering, AliveMessage:", am, "t:", t) 564 defer d.logger.Info("Exiting") 565 d.lock.Lock() 566 defer d.lock.Unlock() 567 568 member := am.GetAliveMsg().Membership 569 pkiID := member.PkiId 570 d.aliveLastTS[string(pkiID)] = ×tamp{ 571 lastSeen: time.Now(), 572 seqNum: t.SeqNum, 573 incTime: tsToTime(t.IncNum), 574 } 575 576 var internalEndpoint string 577 if prevNetMem := d.id2Member[string(pkiID)]; prevNetMem != nil { 578 internalEndpoint = prevNetMem.InternalEndpoint 579 } 580 if am.Envelope.SecretEnvelope != nil { 581 internalEndpoint = am.Envelope.SecretEnvelope.InternalEndpoint() 582 } 583 584 d.id2Member[string(pkiID)] = &NetworkMember{ 585 Endpoint: member.Endpoint, 586 Metadata: member.Metadata, 587 PKIid: member.PkiId, 588 InternalEndpoint: internalEndpoint, 589 } 590 591 delete(d.deadLastTS, string(pkiID)) 592 d.deadMembership.Remove(common.PKIidType(pkiID)) 593 d.aliveMembership.Put(common.PKIidType(pkiID), &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 594 } 595 596 func (d *gossipDiscoveryImpl) periodicalReconnectToDead() { 597 defer d.logger.Debug("Stopped") 598 599 for !d.toDie() { 600 wg := &sync.WaitGroup{} 601 602 for _, member := range d.copyLastSeen(d.deadLastTS) { 603 wg.Add(1) 604 go func(member NetworkMember) { 605 defer wg.Done() 606 if d.comm.Ping(&member) { 607 d.logger.Debug(member, "is responding, sending membership request") 608 d.sendMembershipRequest(&member, true) 609 } else { 610 d.logger.Debug(member, "is still dead") 611 } 612 }(member) 613 } 614 615 wg.Wait() 616 d.logger.Debug("Sleeping", getReconnectInterval()) 617 time.Sleep(getReconnectInterval()) 618 } 619 } 620 621 func (d *gossipDiscoveryImpl) sendMembershipRequest(member *NetworkMember, includeInternalEndpoint bool) { 622 m, err := d.createMembershipRequest(includeInternalEndpoint) 623 if err != nil { 624 d.logger.Warning("Failed creating membership request:", err) 625 return 626 } 627 req, err := m.NoopSign() 628 if err != nil { 629 d.logger.Error("Failed creating SignedGossipMessage:", err) 630 return 631 } 632 d.comm.SendToPeer(member, req) 633 } 634 635 func (d *gossipDiscoveryImpl) createMembershipRequest(includeInternalEndpoint bool) (*proto.GossipMessage, error) { 636 am, err := d.createAliveMessage(includeInternalEndpoint) 637 if err != nil { 638 return nil, err 639 } 640 req := &proto.MembershipRequest{ 641 SelfInformation: am.Envelope, 642 // TODO: sending the known peers is not secure because the remote peer might shouldn't know 643 // TODO: about the known peers. I'm deprecating this until a secure mechanism will be implemented. 644 // TODO: See FAB-2570 for tracking this issue. 645 Known: [][]byte{}, 646 } 647 return &proto.GossipMessage{ 648 Tag: proto.GossipMessage_EMPTY, 649 Nonce: uint64(0), 650 Content: &proto.GossipMessage_MemReq{ 651 MemReq: req, 652 }, 653 }, nil 654 } 655 656 func (d *gossipDiscoveryImpl) copyLastSeen(lastSeenMap map[string]*timestamp) []NetworkMember { 657 d.lock.RLock() 658 defer d.lock.RUnlock() 659 660 res := []NetworkMember{} 661 for pkiIDStr := range lastSeenMap { 662 res = append(res, *(d.id2Member[pkiIDStr])) 663 } 664 return res 665 } 666 667 func (d *gossipDiscoveryImpl) periodicalCheckAlive() { 668 defer d.logger.Debug("Stopped") 669 670 for !d.toDie() { 671 time.Sleep(getAliveExpirationCheckInterval()) 672 dead := d.getDeadMembers() 673 if len(dead) > 0 { 674 d.logger.Debugf("Got %v dead members: %v", len(dead), dead) 675 d.expireDeadMembers(dead) 676 } 677 } 678 } 679 680 func (d *gossipDiscoveryImpl) expireDeadMembers(dead []common.PKIidType) { 681 d.logger.Warning("Entering", dead) 682 defer d.logger.Warning("Exiting") 683 684 var deadMembers2Expire []*NetworkMember 685 686 d.lock.Lock() 687 688 for _, pkiID := range dead { 689 if _, isAlive := d.aliveLastTS[string(pkiID)]; !isAlive { 690 continue 691 } 692 deadMembers2Expire = append(deadMembers2Expire, d.id2Member[string(pkiID)]) 693 // move lastTS from alive to dead 694 lastTS, hasLastTS := d.aliveLastTS[string(pkiID)] 695 if hasLastTS { 696 d.deadLastTS[string(pkiID)] = lastTS 697 delete(d.aliveLastTS, string(pkiID)) 698 } 699 700 if am := d.aliveMembership.MsgByID(pkiID); am != nil { 701 d.deadMembership.Put(pkiID, am) 702 d.aliveMembership.Remove(pkiID) 703 } 704 } 705 706 d.lock.Unlock() 707 708 for _, member2Expire := range deadMembers2Expire { 709 d.logger.Warning("Closing connection to", member2Expire) 710 d.comm.CloseConn(member2Expire) 711 } 712 } 713 714 func (d *gossipDiscoveryImpl) getDeadMembers() []common.PKIidType { 715 d.lock.RLock() 716 defer d.lock.RUnlock() 717 718 dead := []common.PKIidType{} 719 for id, last := range d.aliveLastTS { 720 elapsedNonAliveTime := time.Since(last.lastSeen) 721 if elapsedNonAliveTime.Nanoseconds() > getAliveExpirationTimeout().Nanoseconds() { 722 d.logger.Warning("Haven't heard from", []byte(id), "for", elapsedNonAliveTime) 723 dead = append(dead, common.PKIidType(id)) 724 } 725 } 726 return dead 727 } 728 729 func (d *gossipDiscoveryImpl) periodicalSendAlive() { 730 defer d.logger.Debug("Stopped") 731 732 for !d.toDie() { 733 d.logger.Debug("Sleeping", getAliveTimeInterval()) 734 time.Sleep(getAliveTimeInterval()) 735 msg, err := d.createAliveMessage(true) 736 if err != nil { 737 d.logger.Warning("Failed creating alive message:", err) 738 return 739 } 740 d.comm.Gossip(msg) 741 } 742 } 743 744 func (d *gossipDiscoveryImpl) createAliveMessage(includeInternalEndpoint bool) (*proto.SignedGossipMessage, error) { 745 d.lock.Lock() 746 d.seqNum++ 747 seqNum := d.seqNum 748 749 endpoint := d.self.Endpoint 750 meta := d.self.Metadata 751 pkiID := d.self.PKIid 752 internalEndpoint := d.self.InternalEndpoint 753 754 d.lock.Unlock() 755 756 msg2Gossip := &proto.GossipMessage{ 757 Tag: proto.GossipMessage_EMPTY, 758 Content: &proto.GossipMessage_AliveMsg{ 759 AliveMsg: &proto.AliveMessage{ 760 Membership: &proto.Member{ 761 Endpoint: endpoint, 762 Metadata: meta, 763 PkiId: pkiID, 764 }, 765 Timestamp: &proto.PeerTime{ 766 IncNum: uint64(d.incTime), 767 SeqNum: seqNum, 768 }, 769 }, 770 }, 771 } 772 773 envp := d.crypt.SignMessage(msg2Gossip, internalEndpoint) 774 if envp == nil { 775 return nil, errors.New("Failed signing message") 776 } 777 signedMsg := &proto.SignedGossipMessage{ 778 GossipMessage: msg2Gossip, 779 Envelope: envp, 780 } 781 782 if !includeInternalEndpoint { 783 signedMsg.Envelope.SecretEnvelope = nil 784 } 785 786 return signedMsg, nil 787 } 788 789 func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*proto.SignedGossipMessage) { 790 d.logger.Debugf("Entering: learnedMembers={%v}", aliveArr) 791 defer d.logger.Debug("Exiting") 792 793 d.lock.Lock() 794 defer d.lock.Unlock() 795 796 for _, m := range aliveArr { 797 am := m.GetAliveMsg() 798 if m == nil { 799 d.logger.Warning("Expected alive message, got instead:", m) 800 return 801 } 802 d.logger.Debug("updating", am) 803 804 var internalEndpoint string 805 if prevNetMem := d.id2Member[string(am.Membership.PkiId)]; prevNetMem != nil { 806 internalEndpoint = prevNetMem.InternalEndpoint 807 } 808 if m.Envelope.SecretEnvelope != nil { 809 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 810 } 811 812 // update member's data 813 member := d.id2Member[string(am.Membership.PkiId)] 814 member.Endpoint = am.Membership.Endpoint 815 member.Metadata = am.Membership.Metadata 816 member.InternalEndpoint = internalEndpoint 817 818 if _, isKnownAsDead := d.deadLastTS[string(am.Membership.PkiId)]; isKnownAsDead { 819 d.logger.Warning(am.Membership, "has already expired") 820 continue 821 } 822 823 if _, isKnownAsAlive := d.aliveLastTS[string(am.Membership.PkiId)]; !isKnownAsAlive { 824 d.logger.Warning(am.Membership, "has already expired") 825 continue 826 } else { 827 d.logger.Debug("Updating aliveness data:", am) 828 // update existing aliveness data 829 alive := d.aliveLastTS[string(am.Membership.PkiId)] 830 alive.incTime = tsToTime(am.Timestamp.IncNum) 831 alive.lastSeen = time.Now() 832 alive.seqNum = am.Timestamp.SeqNum 833 834 if am := d.aliveMembership.MsgByID(m.GetAliveMsg().Membership.PkiId); am == nil { 835 d.logger.Debug("Adding", am, "to aliveMembership") 836 msg := &proto.SignedGossipMessage{GossipMessage: m.GossipMessage, Envelope: am.Envelope} 837 d.aliveMembership.Put(m.GetAliveMsg().Membership.PkiId, msg) 838 } else { 839 d.logger.Debug("Replacing", am, "in aliveMembership") 840 am.GossipMessage = m.GossipMessage 841 am.Envelope = m.Envelope 842 } 843 } 844 } 845 } 846 847 func (d *gossipDiscoveryImpl) learnNewMembers(aliveMembers []*proto.SignedGossipMessage, deadMembers []*proto.SignedGossipMessage) { 848 d.logger.Debugf("Entering: learnedMembers={%v}, deadMembers={%v}", aliveMembers, deadMembers) 849 defer d.logger.Debugf("Exiting") 850 851 d.lock.Lock() 852 defer d.lock.Unlock() 853 854 for _, am := range aliveMembers { 855 if equalPKIid(am.GetAliveMsg().Membership.PkiId, d.self.PKIid) { 856 continue 857 } 858 d.aliveLastTS[string(am.GetAliveMsg().Membership.PkiId)] = ×tamp{ 859 incTime: tsToTime(am.GetAliveMsg().Timestamp.IncNum), 860 lastSeen: time.Now(), 861 seqNum: am.GetAliveMsg().Timestamp.SeqNum, 862 } 863 864 d.aliveMembership.Put(am.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope}) 865 d.logger.Debugf("Learned about a new alive member: %v", am) 866 } 867 868 for _, dm := range deadMembers { 869 if equalPKIid(dm.GetAliveMsg().Membership.PkiId, d.self.PKIid) { 870 continue 871 } 872 d.deadLastTS[string(dm.GetAliveMsg().Membership.PkiId)] = ×tamp{ 873 incTime: tsToTime(dm.GetAliveMsg().Timestamp.IncNum), 874 lastSeen: time.Now(), 875 seqNum: dm.GetAliveMsg().Timestamp.SeqNum, 876 } 877 878 d.deadMembership.Put(dm.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: dm.GossipMessage, Envelope: dm.Envelope}) 879 d.logger.Debugf("Learned about a new dead member: %v", dm) 880 } 881 882 // update the member in any case 883 for _, a := range [][]*proto.SignedGossipMessage{aliveMembers, deadMembers} { 884 for _, m := range a { 885 member := m.GetAliveMsg() 886 if member == nil { 887 d.logger.Warning("Expected alive message, got instead:", m) 888 return 889 } 890 891 var internalEndpoint string 892 if m.Envelope.SecretEnvelope != nil { 893 internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint() 894 } 895 896 if prevNetMem := d.id2Member[string(member.Membership.PkiId)]; prevNetMem != nil { 897 internalEndpoint = prevNetMem.InternalEndpoint 898 } 899 900 d.id2Member[string(member.Membership.PkiId)] = &NetworkMember{ 901 Endpoint: member.Membership.Endpoint, 902 Metadata: member.Membership.Metadata, 903 PKIid: member.Membership.PkiId, 904 InternalEndpoint: internalEndpoint, 905 } 906 } 907 } 908 } 909 910 func (d *gossipDiscoveryImpl) GetMembership() []NetworkMember { 911 if d.toDie() { 912 return []NetworkMember{} 913 } 914 d.lock.RLock() 915 defer d.lock.RUnlock() 916 917 response := []NetworkMember{} 918 for _, m := range d.aliveMembership.ToSlice() { 919 member := m.GetAliveMsg() 920 response = append(response, NetworkMember{ 921 PKIid: member.Membership.PkiId, 922 Endpoint: member.Membership.Endpoint, 923 Metadata: member.Membership.Metadata, 924 InternalEndpoint: d.id2Member[string(m.GetAliveMsg().Membership.PkiId)].InternalEndpoint, 925 }) 926 } 927 return response 928 929 } 930 931 func tsToTime(ts uint64) time.Time { 932 return time.Unix(int64(0), int64(ts)) 933 } 934 935 func (d *gossipDiscoveryImpl) UpdateMetadata(md []byte) { 936 d.lock.Lock() 937 defer d.lock.Unlock() 938 d.self.Metadata = md 939 } 940 941 func (d *gossipDiscoveryImpl) UpdateEndpoint(endpoint string) { 942 d.lock.Lock() 943 defer d.lock.Unlock() 944 945 d.self.Endpoint = endpoint 946 } 947 948 func (d *gossipDiscoveryImpl) Self() NetworkMember { 949 return NetworkMember{ 950 Endpoint: d.self.Endpoint, 951 Metadata: d.self.Metadata, 952 PKIid: d.self.PKIid, 953 InternalEndpoint: d.self.InternalEndpoint, 954 } 955 } 956 957 func (d *gossipDiscoveryImpl) toDie() bool { 958 toDie := atomic.LoadInt32(&d.toDieFlag) == int32(1) 959 return toDie 960 } 961 962 func (d *gossipDiscoveryImpl) Stop() { 963 defer d.logger.Info("Stopped") 964 d.logger.Info("Stopping") 965 atomic.StoreInt32(&d.toDieFlag, int32(1)) 966 d.msgStore.Stop() 967 d.toDieChan <- struct{}{} 968 } 969 970 func equalPKIid(a, b common.PKIidType) bool { 971 return bytes.Equal(a, b) 972 } 973 974 func same(a *timestamp, b *proto.PeerTime) bool { 975 return uint64(a.incTime.UnixNano()) == b.IncNum && a.seqNum == b.SeqNum 976 } 977 978 func before(a *timestamp, b *proto.PeerTime) bool { 979 return (uint64(a.incTime.UnixNano()) == b.IncNum && a.seqNum < b.SeqNum) || 980 uint64(a.incTime.UnixNano()) < b.IncNum 981 } 982 983 func getAliveTimeInterval() time.Duration { 984 return util.GetDurationOrDefault("peer.gossip.aliveTimeInterval", defaultHelloInterval) 985 } 986 987 func getAliveExpirationTimeout() time.Duration { 988 return util.GetDurationOrDefault("peer.gossip.aliveExpirationTimeout", 5*getAliveTimeInterval()) 989 } 990 991 func getAliveExpirationCheckInterval() time.Duration { 992 if aliveExpirationCheckInterval != 0 { 993 return aliveExpirationCheckInterval 994 } 995 996 return time.Duration(getAliveExpirationTimeout() / 10) 997 } 998 999 func getReconnectInterval() time.Duration { 1000 return util.GetDurationOrDefault("peer.gossip.reconnectInterval", getAliveExpirationTimeout()) 1001 } 1002 1003 type aliveMsgStore struct { 1004 msgstore.MessageStore 1005 } 1006 1007 func newAliveMsgStore(d *gossipDiscoveryImpl) *aliveMsgStore { 1008 policy := proto.NewGossipMessageComparator(0) 1009 trigger := func(m interface{}) {} 1010 aliveMsgTTL := getAliveExpirationTimeout() * msgExpirationFactor 1011 externalLock := func() { d.lock.Lock() } 1012 externalUnlock := func() { d.lock.Unlock() } 1013 callback := func(m interface{}) { 1014 msg := m.(*proto.SignedGossipMessage) 1015 if !msg.IsAliveMsg() { 1016 return 1017 } 1018 id := msg.GetAliveMsg().Membership.PkiId 1019 d.aliveMembership.Remove(id) 1020 d.deadMembership.Remove(id) 1021 delete(d.id2Member, string(id)) 1022 delete(d.deadLastTS, string(id)) 1023 delete(d.aliveLastTS, string(id)) 1024 } 1025 1026 s := &aliveMsgStore{ 1027 MessageStore: msgstore.NewMessageStoreExpirable(policy, trigger, aliveMsgTTL, externalLock, externalUnlock, callback), 1028 } 1029 return s 1030 } 1031 1032 func (s *aliveMsgStore) Add(msg interface{}) bool { 1033 if !msg.(*proto.SignedGossipMessage).IsAliveMsg() { 1034 panic(fmt.Sprint("Msg ", msg, " is not AliveMsg")) 1035 } 1036 return s.MessageStore.Add(msg) 1037 } 1038 1039 func (s *aliveMsgStore) CheckValid(msg interface{}) bool { 1040 if !msg.(*proto.SignedGossipMessage).IsAliveMsg() { 1041 panic(fmt.Sprint("Msg ", msg, " is not AliveMsg")) 1042 } 1043 return s.MessageStore.CheckValid(msg) 1044 }