github.com/adnan-c/fabric_e2e_couchdb@v0.6.1-preview.0.20170228180935-21ce6b23cf91/gossip/discovery/discovery_impl.go (about)

     1  /*
     2  Copyright IBM Corp. 2016 All Rights Reserved.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  		 http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package discovery
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"sync"
    23  	"sync/atomic"
    24  	"time"
    25  
    26  	"strconv"
    27  	"strings"
    28  
    29  	"github.com/hyperledger/fabric/gossip/common"
    30  	"github.com/hyperledger/fabric/gossip/util"
    31  	proto "github.com/hyperledger/fabric/protos/gossip"
    32  	"github.com/op/go-logging"
    33  	"github.com/spf13/viper"
    34  )
    35  
    36  const defaultHelloInterval = time.Duration(5) * time.Second
    37  
    38  var aliveExpirationCheckInterval time.Duration
    39  
    40  // SetAliveTimeInterval sets the alive time interval
    41  func SetAliveTimeInterval(interval time.Duration) {
    42  	viper.Set("peer.gossip.aliveTimeInterval", interval)
    43  }
    44  
    45  // SetAliveExpirationTimeout sets the expiration timeout
    46  func SetAliveExpirationTimeout(timeout time.Duration) {
    47  	viper.Set("peer.gossip.aliveExpirationTimeout", timeout)
    48  	aliveExpirationCheckInterval = time.Duration(timeout / 10)
    49  }
    50  
    51  // SetAliveExpirationCheckInterval sets the expiration check interval
    52  func SetAliveExpirationCheckInterval(interval time.Duration) {
    53  	aliveExpirationCheckInterval = interval
    54  }
    55  
    56  // SetReconnectInterval sets the reconnect interval
    57  func SetReconnectInterval(interval time.Duration) {
    58  	viper.Set("peer.gossip.reconnectInterval", interval)
    59  }
    60  
    61  type timestamp struct {
    62  	incTime  time.Time
    63  	seqNum   uint64
    64  	lastSeen time.Time
    65  }
    66  
    67  func (ts *timestamp) String() string {
    68  	return fmt.Sprintf("%v, %v", ts.incTime.UnixNano(), ts.seqNum)
    69  }
    70  
    71  type gossipDiscoveryImpl struct {
    72  	incTime         uint64
    73  	seqNum          uint64
    74  	self            NetworkMember
    75  	deadLastTS      map[string]*timestamp     // H
    76  	aliveLastTS     map[string]*timestamp     // V
    77  	id2Member       map[string]*NetworkMember // all known members
    78  	aliveMembership *util.MembershipStore
    79  	deadMembership  *util.MembershipStore
    80  
    81  	bootstrapPeers []string
    82  
    83  	comm  CommService
    84  	crypt CryptoService
    85  	lock  *sync.RWMutex
    86  
    87  	toDieChan chan struct{}
    88  	toDieFlag int32
    89  	logger    *logging.Logger
    90  }
    91  
    92  // NewDiscoveryService returns a new discovery service with the comm module passed and the crypto service passed
    93  func NewDiscoveryService(bootstrapPeers []string, self NetworkMember, comm CommService, crypt CryptoService) Discovery {
    94  	d := &gossipDiscoveryImpl{
    95  		self:            self,
    96  		incTime:         uint64(time.Now().UnixNano()),
    97  		seqNum:          uint64(0),
    98  		deadLastTS:      make(map[string]*timestamp),
    99  		aliveLastTS:     make(map[string]*timestamp),
   100  		id2Member:       make(map[string]*NetworkMember),
   101  		aliveMembership: util.NewMembershipStore(),
   102  		deadMembership:  util.NewMembershipStore(),
   103  		crypt:           crypt,
   104  		comm:            comm,
   105  		lock:            &sync.RWMutex{},
   106  		toDieChan:       make(chan struct{}, 1),
   107  		toDieFlag:       int32(0),
   108  		logger:          util.GetLogger(util.LoggingDiscoveryModule, self.InternalEndpoint),
   109  	}
   110  
   111  	go d.periodicalSendAlive()
   112  	go d.periodicalCheckAlive()
   113  	go d.handleMessages()
   114  	go d.periodicalReconnectToDead()
   115  	go d.handlePresumedDeadPeers()
   116  
   117  	go d.connect2BootstrapPeers(bootstrapPeers)
   118  
   119  	d.logger.Info("Started", self, "incTime is", d.incTime)
   120  
   121  	return d
   122  }
   123  
   124  // Exists returns whether a peer with given
   125  // PKI-ID is known
   126  func (d *gossipDiscoveryImpl) Exists(PKIID common.PKIidType) bool {
   127  	d.lock.RLock()
   128  	defer d.lock.RUnlock()
   129  	_, exists := d.id2Member[string(PKIID)]
   130  	return exists
   131  }
   132  
   133  func (d *gossipDiscoveryImpl) Connect(member NetworkMember) {
   134  	d.logger.Debug("Entering", member)
   135  	defer d.logger.Debug("Exiting")
   136  
   137  	d.lock.Lock()
   138  	defer d.lock.Unlock()
   139  
   140  	if _, exists := d.id2Member[string(member.PKIid)]; exists {
   141  		d.logger.Info("Member", member, "already known")
   142  		return
   143  	}
   144  
   145  	d.deadLastTS[string(member.PKIid)] = &timestamp{
   146  		incTime:  time.Unix(0, 0),
   147  		lastSeen: time.Now(),
   148  		seqNum:   0,
   149  	}
   150  	d.id2Member[string(member.PKIid)] = &member
   151  }
   152  
   153  func (d *gossipDiscoveryImpl) connect2BootstrapPeers(endpoints []string) {
   154  	if len(d.self.InternalEndpoint) == 0 {
   155  		d.logger.Panic("Internal endpoint is empty:", d.self.InternalEndpoint)
   156  	}
   157  
   158  	if len(strings.Split(d.self.InternalEndpoint, ":")) != 2 {
   159  		d.logger.Panicf("Self endpoint %s isn't formatted as 'host:port'", d.self.InternalEndpoint)
   160  	}
   161  
   162  	myPort, err := strconv.ParseInt(strings.Split(d.self.InternalEndpoint, ":")[1], 10, 64)
   163  	if err != nil {
   164  		d.logger.Panicf("Self endpoint %s has not valid port'", d.self.InternalEndpoint)
   165  	}
   166  
   167  	d.logger.Info("Entering:", endpoints)
   168  	defer d.logger.Info("Exiting")
   169  	endpoints = filterOutLocalhost(endpoints, int(myPort))
   170  	if len(endpoints) == 0 {
   171  		return
   172  	}
   173  
   174  	for !d.somePeerIsKnown() {
   175  		var wg sync.WaitGroup
   176  		req := d.createMembershipRequest().NoopSign()
   177  		wg.Add(len(endpoints))
   178  		for _, endpoint := range endpoints {
   179  			go func(endpoint string) {
   180  				defer wg.Done()
   181  				peer := &NetworkMember{
   182  					Endpoint:         endpoint,
   183  					InternalEndpoint: endpoint,
   184  				}
   185  				if !d.comm.Ping(peer) {
   186  					return
   187  				}
   188  				d.comm.SendToPeer(peer, req)
   189  			}(endpoint)
   190  		}
   191  		wg.Wait()
   192  		time.Sleep(getReconnectInterval())
   193  	}
   194  }
   195  
   196  func (d *gossipDiscoveryImpl) somePeerIsKnown() bool {
   197  	d.lock.RLock()
   198  	defer d.lock.RUnlock()
   199  	return len(d.aliveLastTS) != 0
   200  }
   201  
   202  func (d *gossipDiscoveryImpl) InitiateSync(peerNum int) {
   203  	if d.toDie() {
   204  		return
   205  	}
   206  	var peers2SendTo []*NetworkMember
   207  	memReq := d.createMembershipRequest().NoopSign()
   208  
   209  	d.lock.RLock()
   210  
   211  	n := d.aliveMembership.Size()
   212  	k := peerNum
   213  	if k > n {
   214  		k = n
   215  	}
   216  
   217  	aliveMembersAsSlice := d.aliveMembership.ToSlice()
   218  	for _, i := range util.GetRandomIndices(k, n-1) {
   219  		pulledPeer := aliveMembersAsSlice[i].GetAliveMsg().Membership
   220  		var internalEndpoint string
   221  		if aliveMembersAsSlice[i].Envelope.SecretEnvelope != nil {
   222  			internalEndpoint = aliveMembersAsSlice[i].Envelope.SecretEnvelope.InternalEndpoint()
   223  		}
   224  		netMember := &NetworkMember{
   225  			Endpoint:         pulledPeer.Endpoint,
   226  			Metadata:         pulledPeer.Metadata,
   227  			PKIid:            pulledPeer.PkiID,
   228  			InternalEndpoint: internalEndpoint,
   229  		}
   230  		peers2SendTo = append(peers2SendTo, netMember)
   231  	}
   232  
   233  	d.lock.RUnlock()
   234  
   235  	for _, netMember := range peers2SendTo {
   236  		d.comm.SendToPeer(netMember, memReq)
   237  	}
   238  }
   239  
   240  func (d *gossipDiscoveryImpl) handlePresumedDeadPeers() {
   241  	defer d.logger.Debug("Stopped")
   242  
   243  	for !d.toDie() {
   244  		select {
   245  		case deadPeer := <-d.comm.PresumedDead():
   246  			if d.isAlive(deadPeer) {
   247  				d.expireDeadMembers([]common.PKIidType{deadPeer})
   248  			}
   249  			break
   250  		case s := <-d.toDieChan:
   251  			d.toDieChan <- s
   252  			return
   253  		}
   254  	}
   255  }
   256  
   257  func (d *gossipDiscoveryImpl) isAlive(pkiID common.PKIidType) bool {
   258  	d.lock.RLock()
   259  	defer d.lock.RUnlock()
   260  	_, alive := d.aliveLastTS[string(pkiID)]
   261  	return alive
   262  }
   263  
   264  func (d *gossipDiscoveryImpl) handleMessages() {
   265  	defer d.logger.Debug("Stopped")
   266  
   267  	in := d.comm.Accept()
   268  	for !d.toDie() {
   269  		select {
   270  		case s := <-d.toDieChan:
   271  			d.toDieChan <- s
   272  			return
   273  		case m := <-in:
   274  			d.handleMsgFromComm(m)
   275  			break
   276  		}
   277  	}
   278  }
   279  
   280  func (d *gossipDiscoveryImpl) handleMsgFromComm(m *proto.SignedGossipMessage) {
   281  	if m == nil {
   282  		return
   283  	}
   284  	if m.GetAliveMsg() == nil && m.GetMemRes() == nil && m.GetMemReq() == nil {
   285  		d.logger.Warning("Got message with wrong type (expected Alive or MembershipResponse or MembershipRequest message):", m.Content) // TODO: write only message type
   286  		d.logger.Warning(m)
   287  		return
   288  	}
   289  
   290  	d.logger.Debug("Got message:", m)
   291  	defer d.logger.Debug("Exiting")
   292  
   293  	// TODO: make sure somehow that the membership request is "fresh"
   294  	if memReq := m.GetMemReq(); memReq != nil {
   295  		selfInfoGossipMsg, err := memReq.SelfInformation.ToGossipMessage()
   296  		if err != nil {
   297  			d.logger.Warning("Failed deserializing GossipMessage from envelope:", err)
   298  			return
   299  		}
   300  		d.handleAliveMessage(selfInfoGossipMsg)
   301  
   302  		var internalEndpoint string
   303  		if m.Envelope.SecretEnvelope != nil {
   304  			internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   305  		}
   306  
   307  		// Sending a membership response to a peer may block this routine
   308  		// in case the sending is deliberately slow (i.e attack).
   309  		// will keep this async until I'll write a timeout detector in the comm layer
   310  		go d.sendMemResponse(selfInfoGossipMsg.GetAliveMsg().Membership, memReq.Known, internalEndpoint)
   311  		return
   312  	}
   313  
   314  	if m.IsAliveMsg() {
   315  		d.handleAliveMessage(m)
   316  		return
   317  	}
   318  
   319  	if memResp := m.GetMemRes(); memResp != nil {
   320  		for _, env := range memResp.Alive {
   321  			am, err := env.ToGossipMessage()
   322  			if err != nil {
   323  				d.logger.Warning("Membership response contains an invalid message from an online peer:", err)
   324  				return
   325  			}
   326  			if !am.IsAliveMsg() {
   327  				d.logger.Warning("Expected alive message, got", am, "instead")
   328  				return
   329  			}
   330  			d.handleAliveMessage(am)
   331  		}
   332  
   333  		for _, env := range memResp.Dead {
   334  			dm, err := env.ToGossipMessage()
   335  			if err != nil {
   336  				d.logger.Warning("Membership response contains an invalid message from an offline peer", err)
   337  				return
   338  			}
   339  			if !d.crypt.ValidateAliveMsg(m) {
   340  				d.logger.Warningf("Alive message isn't authentic, someone spoofed %s's identity", dm.GetAliveMsg().Membership)
   341  				continue
   342  			}
   343  
   344  			newDeadMembers := []*proto.SignedGossipMessage{}
   345  			d.lock.RLock()
   346  			if _, known := d.id2Member[string(dm.GetAliveMsg().Membership.PkiID)]; !known {
   347  				newDeadMembers = append(newDeadMembers, dm)
   348  			}
   349  			d.lock.RUnlock()
   350  			d.learnNewMembers([]*proto.SignedGossipMessage{}, newDeadMembers)
   351  		}
   352  	}
   353  }
   354  
   355  func (d *gossipDiscoveryImpl) sendMemResponse(member *proto.Member, known [][]byte, internalEndpoint string) {
   356  	d.logger.Debug("Entering", member)
   357  
   358  	memResp := d.createMembershipResponse(known)
   359  
   360  	defer d.logger.Debug("Exiting, replying with", memResp)
   361  
   362  	d.comm.SendToPeer(&NetworkMember{
   363  		Endpoint:         member.Endpoint,
   364  		Metadata:         member.Metadata,
   365  		PKIid:            member.PkiID,
   366  		InternalEndpoint: internalEndpoint,
   367  	}, (&proto.GossipMessage{
   368  		Tag:   proto.GossipMessage_EMPTY,
   369  		Nonce: uint64(0),
   370  		Content: &proto.GossipMessage_MemRes{
   371  			MemRes: memResp,
   372  		},
   373  	}).NoopSign())
   374  }
   375  
   376  func (d *gossipDiscoveryImpl) createMembershipResponse(known [][]byte) *proto.MembershipResponse {
   377  	aliveMsg := d.createAliveMessage()
   378  
   379  	d.lock.RLock()
   380  	defer d.lock.RUnlock()
   381  
   382  	deadPeers := []*proto.Envelope{}
   383  
   384  	for _, dm := range d.deadMembership.ToSlice() {
   385  		isKnown := false
   386  		for _, knownPeer := range known {
   387  			if equalPKIid(knownPeer, dm.GetAliveMsg().Membership.PkiID) {
   388  				isKnown = true
   389  				break
   390  			}
   391  		}
   392  		if !isKnown {
   393  			deadPeers = append(deadPeers, dm.Envelope)
   394  			break
   395  		}
   396  	}
   397  
   398  	aliveMembersAsSlice := d.aliveMembership.ToSlice()
   399  	aliveSnapshot := make([]*proto.Envelope, len(aliveMembersAsSlice))
   400  	for i, msg := range aliveMembersAsSlice {
   401  		aliveSnapshot[i] = msg.Envelope
   402  	}
   403  
   404  	return &proto.MembershipResponse{
   405  		Alive: append(aliveSnapshot, aliveMsg.Envelope),
   406  		Dead:  deadPeers,
   407  	}
   408  }
   409  
   410  func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.SignedGossipMessage) {
   411  	d.logger.Debug("Entering", m)
   412  	defer d.logger.Debug("Exiting")
   413  
   414  	if !d.crypt.ValidateAliveMsg(m) {
   415  		d.logger.Warningf("Alive message isn't authentic, someone must be spoofing %s's identity", m.GetAliveMsg())
   416  		return
   417  	}
   418  
   419  	pkiID := m.GetAliveMsg().Membership.PkiID
   420  	if equalPKIid(pkiID, d.self.PKIid) {
   421  		d.logger.Debug("Got alive message about ourselves,", m)
   422  		return
   423  	}
   424  
   425  	ts := m.GetAliveMsg().Timestamp
   426  
   427  	d.lock.RLock()
   428  	_, known := d.id2Member[string(pkiID)]
   429  	d.lock.RUnlock()
   430  
   431  	if !known {
   432  		d.learnNewMembers([]*proto.SignedGossipMessage{m}, []*proto.SignedGossipMessage{})
   433  		return
   434  	}
   435  
   436  	d.lock.RLock()
   437  	lastAliveTS, isAlive := d.aliveLastTS[string(pkiID)]
   438  	lastDeadTS, isDead := d.deadLastTS[string(pkiID)]
   439  	d.lock.RUnlock()
   440  
   441  	if !isAlive && !isDead {
   442  		d.logger.Panicf("Member %s is known but not found neither in alive nor in dead lastTS maps, isAlive=%v, isDead=%v", m.GetAliveMsg().Membership.Endpoint, isAlive, isDead)
   443  		return
   444  	}
   445  
   446  	if isAlive && isDead {
   447  		d.logger.Panicf("Member %s is both alive and dead at the same time", m.GetAliveMsg().Membership)
   448  		return
   449  	}
   450  
   451  	if isDead {
   452  		if before(lastDeadTS, ts) {
   453  			// resurrect peer
   454  			d.resurrectMember(m, *ts)
   455  		} else if !same(lastDeadTS, ts) {
   456  			d.logger.Debug(m.GetAliveMsg().Membership, "lastDeadTS:", lastDeadTS, "but got ts:", ts)
   457  		}
   458  		return
   459  	}
   460  
   461  	d.lock.RLock()
   462  	lastAliveTS, isAlive = d.aliveLastTS[string(pkiID)]
   463  	d.lock.RUnlock()
   464  
   465  	if isAlive {
   466  		if before(lastAliveTS, ts) {
   467  			d.learnExistingMembers([]*proto.SignedGossipMessage{m})
   468  		} else if !same(lastAliveTS, ts) {
   469  			d.logger.Debug(m.GetAliveMsg().Membership, "lastAliveTS:", lastAliveTS, "but got ts:", ts)
   470  		}
   471  
   472  	}
   473  	// else, ignore the message because it is too old
   474  }
   475  
   476  func (d *gossipDiscoveryImpl) resurrectMember(am *proto.SignedGossipMessage, t proto.PeerTime) {
   477  	d.logger.Info("Entering, AliveMessage:", am, "t:", t)
   478  	defer d.logger.Info("Exiting")
   479  	d.lock.Lock()
   480  	defer d.lock.Unlock()
   481  
   482  	member := am.GetAliveMsg().Membership
   483  	pkiID := member.PkiID
   484  	d.aliveLastTS[string(pkiID)] = &timestamp{
   485  		lastSeen: time.Now(),
   486  		seqNum:   t.SeqNum,
   487  		incTime:  tsToTime(t.IncNumber),
   488  	}
   489  
   490  	var internalEndpoint string
   491  	if am.Envelope.SecretEnvelope != nil {
   492  		internalEndpoint = am.Envelope.SecretEnvelope.InternalEndpoint()
   493  	}
   494  
   495  	d.id2Member[string(pkiID)] = &NetworkMember{
   496  		Endpoint:         member.Endpoint,
   497  		Metadata:         member.Metadata,
   498  		PKIid:            member.PkiID,
   499  		InternalEndpoint: internalEndpoint,
   500  	}
   501  
   502  	delete(d.deadLastTS, string(pkiID))
   503  	d.deadMembership.Remove(common.PKIidType(pkiID))
   504  	d.aliveMembership.Put(common.PKIidType(pkiID), &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope})
   505  }
   506  
   507  func (d *gossipDiscoveryImpl) periodicalReconnectToDead() {
   508  	defer d.logger.Debug("Stopped")
   509  
   510  	for !d.toDie() {
   511  		wg := &sync.WaitGroup{}
   512  
   513  		for _, member := range d.copyLastSeen(d.deadLastTS) {
   514  			wg.Add(1)
   515  			go func(member NetworkMember) {
   516  				defer wg.Done()
   517  				if d.comm.Ping(&member) {
   518  					d.logger.Debug(member, "is responding, sending membership request")
   519  					d.sendMembershipRequest(&member)
   520  				} else {
   521  					d.logger.Debug(member, "is still dead")
   522  				}
   523  			}(member)
   524  		}
   525  
   526  		wg.Wait()
   527  		d.logger.Debug("Sleeping", getReconnectInterval())
   528  		time.Sleep(getReconnectInterval())
   529  	}
   530  }
   531  
   532  func (d *gossipDiscoveryImpl) sendMembershipRequest(member *NetworkMember) {
   533  	d.comm.SendToPeer(member, d.createMembershipRequest())
   534  }
   535  
   536  func (d *gossipDiscoveryImpl) createMembershipRequest() *proto.SignedGossipMessage {
   537  	req := &proto.MembershipRequest{
   538  		SelfInformation: d.createAliveMessage().Envelope,
   539  		Known:           d.getKnownPeers(),
   540  	}
   541  	return (&proto.GossipMessage{
   542  		Tag:   proto.GossipMessage_EMPTY,
   543  		Nonce: uint64(0),
   544  		Content: &proto.GossipMessage_MemReq{
   545  			MemReq: req,
   546  		},
   547  	}).NoopSign()
   548  }
   549  
   550  func (d *gossipDiscoveryImpl) getKnownPeers() [][]byte {
   551  	d.lock.RLock()
   552  	defer d.lock.RUnlock()
   553  
   554  	peers := [][]byte{}
   555  	for id := range d.id2Member {
   556  		peers = append(peers, common.PKIidType(id))
   557  	}
   558  	return peers
   559  }
   560  
   561  func (d *gossipDiscoveryImpl) copyLastSeen(lastSeenMap map[string]*timestamp) []NetworkMember {
   562  	d.lock.RLock()
   563  	defer d.lock.RUnlock()
   564  
   565  	res := []NetworkMember{}
   566  	for pkiIDStr := range lastSeenMap {
   567  		res = append(res, *(d.id2Member[pkiIDStr]))
   568  	}
   569  	return res
   570  }
   571  
   572  func (d *gossipDiscoveryImpl) periodicalCheckAlive() {
   573  	defer d.logger.Debug("Stopped")
   574  
   575  	for !d.toDie() {
   576  		time.Sleep(getAliveExpirationCheckInterval())
   577  		dead := d.getDeadMembers()
   578  		if len(dead) > 0 {
   579  			d.logger.Debugf("Got %v dead members: %v", len(dead), dead)
   580  			d.expireDeadMembers(dead)
   581  		}
   582  	}
   583  }
   584  
   585  func (d *gossipDiscoveryImpl) expireDeadMembers(dead []common.PKIidType) {
   586  	d.logger.Warning("Entering", dead)
   587  	defer d.logger.Warning("Exiting")
   588  
   589  	var deadMembers2Expire []*NetworkMember
   590  
   591  	d.lock.Lock()
   592  
   593  	for _, pkiID := range dead {
   594  		if _, isAlive := d.aliveLastTS[string(pkiID)]; !isAlive {
   595  			continue
   596  		}
   597  		deadMembers2Expire = append(deadMembers2Expire, d.id2Member[string(pkiID)])
   598  		// move lastTS from alive to dead
   599  		lastTS, hasLastTS := d.aliveLastTS[string(pkiID)]
   600  		if hasLastTS {
   601  			d.deadLastTS[string(pkiID)] = lastTS
   602  			delete(d.aliveLastTS, string(pkiID))
   603  		}
   604  
   605  		if am := d.aliveMembership.MsgByID(pkiID); am != nil {
   606  			d.deadMembership.Put(pkiID, am)
   607  			d.aliveMembership.Remove(pkiID)
   608  		}
   609  	}
   610  
   611  	d.lock.Unlock()
   612  
   613  	for _, member2Expire := range deadMembers2Expire {
   614  		d.logger.Warning("Closing connection to", member2Expire)
   615  		d.comm.CloseConn(member2Expire)
   616  	}
   617  }
   618  
   619  func (d *gossipDiscoveryImpl) getDeadMembers() []common.PKIidType {
   620  	d.lock.RLock()
   621  	defer d.lock.RUnlock()
   622  
   623  	dead := []common.PKIidType{}
   624  	for id, last := range d.aliveLastTS {
   625  		elapsedNonAliveTime := time.Since(last.lastSeen)
   626  		if elapsedNonAliveTime.Nanoseconds() > getAliveExpirationTimeout().Nanoseconds() {
   627  			d.logger.Warning("Haven't heard from", id, "for", elapsedNonAliveTime)
   628  			dead = append(dead, common.PKIidType(id))
   629  		}
   630  	}
   631  	return dead
   632  }
   633  
   634  func (d *gossipDiscoveryImpl) periodicalSendAlive() {
   635  	defer d.logger.Debug("Stopped")
   636  
   637  	for !d.toDie() {
   638  		d.logger.Debug("Sleeping", getAliveTimeInterval())
   639  		time.Sleep(getAliveTimeInterval())
   640  		d.comm.Gossip(d.createAliveMessage())
   641  	}
   642  }
   643  
   644  func (d *gossipDiscoveryImpl) createAliveMessage() *proto.SignedGossipMessage {
   645  	d.lock.Lock()
   646  	d.seqNum++
   647  	seqNum := d.seqNum
   648  
   649  	endpoint := d.self.Endpoint
   650  	meta := d.self.Metadata
   651  	pkiID := d.self.PKIid
   652  	internalEndpoint := d.self.InternalEndpoint
   653  
   654  	d.lock.Unlock()
   655  
   656  	msg2Gossip := &proto.GossipMessage{
   657  		Tag: proto.GossipMessage_EMPTY,
   658  		Content: &proto.GossipMessage_AliveMsg{
   659  			AliveMsg: &proto.AliveMessage{
   660  				Membership: &proto.Member{
   661  					Endpoint: endpoint,
   662  					Metadata: meta,
   663  					PkiID:    pkiID,
   664  				},
   665  				Timestamp: &proto.PeerTime{
   666  					IncNumber: uint64(d.incTime),
   667  					SeqNum:    seqNum,
   668  				},
   669  			},
   670  		},
   671  	}
   672  
   673  	return &proto.SignedGossipMessage{
   674  		GossipMessage: msg2Gossip,
   675  		Envelope:      d.crypt.SignMessage(msg2Gossip, internalEndpoint),
   676  	}
   677  }
   678  
   679  func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*proto.SignedGossipMessage) {
   680  	d.logger.Infof("Entering: learnedMembers={%v}", aliveArr)
   681  	defer d.logger.Debug("Exiting")
   682  
   683  	d.lock.Lock()
   684  	defer d.lock.Unlock()
   685  
   686  	for _, m := range aliveArr {
   687  		am := m.GetAliveMsg()
   688  		if m == nil {
   689  			d.logger.Warning("Expected alive message, got instead:", m)
   690  			return
   691  		}
   692  		d.logger.Debug("updating", am)
   693  
   694  		var internalEndpoint string
   695  
   696  		if m.Envelope.SecretEnvelope != nil {
   697  			internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   698  		}
   699  
   700  		// update member's data
   701  		member := d.id2Member[string(am.Membership.PkiID)]
   702  		member.Endpoint = am.Membership.Endpoint
   703  		member.Metadata = am.Membership.Metadata
   704  		member.InternalEndpoint = internalEndpoint
   705  
   706  		if _, isKnownAsDead := d.deadLastTS[string(am.Membership.PkiID)]; isKnownAsDead {
   707  			d.logger.Warning(am.Membership, "has already expired")
   708  			continue
   709  		}
   710  
   711  		if _, isKnownAsAlive := d.aliveLastTS[string(am.Membership.PkiID)]; !isKnownAsAlive {
   712  			d.logger.Warning(am.Membership, "has already expired")
   713  			continue
   714  		} else {
   715  			d.logger.Debug("Updating aliveness data:", am)
   716  			// update existing aliveness data
   717  			alive := d.aliveLastTS[string(am.Membership.PkiID)]
   718  			alive.incTime = tsToTime(am.Timestamp.IncNumber)
   719  			alive.lastSeen = time.Now()
   720  			alive.seqNum = am.Timestamp.SeqNum
   721  
   722  			if am := d.aliveMembership.MsgByID(m.GetAliveMsg().Membership.PkiID); am == nil {
   723  				d.logger.Debug("Adding", am, "to aliveMembership")
   724  				msg := &proto.SignedGossipMessage{GossipMessage: m.GossipMessage, Envelope: am.Envelope}
   725  				d.aliveMembership.Put(m.GetAliveMsg().Membership.PkiID, msg)
   726  			} else {
   727  				d.logger.Debug("Replacing", am, "in aliveMembership")
   728  				am.GossipMessage = m.GossipMessage
   729  				am.Envelope = m.Envelope
   730  			}
   731  		}
   732  	}
   733  }
   734  
   735  func (d *gossipDiscoveryImpl) learnNewMembers(aliveMembers []*proto.SignedGossipMessage, deadMembers []*proto.SignedGossipMessage) {
   736  	d.logger.Debugf("Entering: learnedMembers={%v}, deadMembers={%v}", aliveMembers, deadMembers)
   737  	defer d.logger.Debugf("Exiting")
   738  
   739  	d.lock.Lock()
   740  	defer d.lock.Unlock()
   741  
   742  	for _, am := range aliveMembers {
   743  		if equalPKIid(am.GetAliveMsg().Membership.PkiID, d.self.PKIid) {
   744  			continue
   745  		}
   746  		d.aliveLastTS[string(am.GetAliveMsg().Membership.PkiID)] = &timestamp{
   747  			incTime:  tsToTime(am.GetAliveMsg().Timestamp.IncNumber),
   748  			lastSeen: time.Now(),
   749  			seqNum:   am.GetAliveMsg().Timestamp.SeqNum,
   750  		}
   751  
   752  		d.aliveMembership.Put(am.GetAliveMsg().Membership.PkiID, &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope})
   753  		d.logger.Infof("Learned about a new alive member: %v", am)
   754  	}
   755  
   756  	for _, dm := range deadMembers {
   757  		if equalPKIid(dm.GetAliveMsg().Membership.PkiID, d.self.PKIid) {
   758  			continue
   759  		}
   760  		d.deadLastTS[string(dm.GetAliveMsg().Membership.PkiID)] = &timestamp{
   761  			incTime:  tsToTime(dm.GetAliveMsg().Timestamp.IncNumber),
   762  			lastSeen: time.Now(),
   763  			seqNum:   dm.GetAliveMsg().Timestamp.SeqNum,
   764  		}
   765  
   766  		d.deadMembership.Put(dm.GetAliveMsg().Membership.PkiID, &proto.SignedGossipMessage{GossipMessage: dm.GossipMessage, Envelope: dm.Envelope})
   767  		d.logger.Infof("Learned about a new dead member: %v", dm)
   768  	}
   769  
   770  	// update the member in any case
   771  	for _, a := range [][]*proto.SignedGossipMessage{aliveMembers, deadMembers} {
   772  		for _, m := range a {
   773  			member := m.GetAliveMsg()
   774  			if member == nil {
   775  				d.logger.Warning("Expected alive message, got instead:", m)
   776  				return
   777  			}
   778  
   779  			var internalEndpoint string
   780  			if m.Envelope.SecretEnvelope != nil {
   781  				internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   782  			}
   783  
   784  			d.id2Member[string(member.Membership.PkiID)] = &NetworkMember{
   785  				Endpoint:         member.Membership.Endpoint,
   786  				Metadata:         member.Membership.Metadata,
   787  				PKIid:            member.Membership.PkiID,
   788  				InternalEndpoint: internalEndpoint,
   789  			}
   790  		}
   791  	}
   792  }
   793  
   794  func (d *gossipDiscoveryImpl) GetMembership() []NetworkMember {
   795  	if d.toDie() {
   796  		return []NetworkMember{}
   797  	}
   798  	d.lock.RLock()
   799  	defer d.lock.RUnlock()
   800  
   801  	response := []NetworkMember{}
   802  	for _, m := range d.aliveMembership.ToSlice() {
   803  		var internalEndpoint string
   804  
   805  		if m.Envelope.SecretEnvelope != nil {
   806  			internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   807  		}
   808  
   809  		member := m.GetAliveMsg()
   810  		response = append(response, NetworkMember{
   811  			PKIid:            member.Membership.PkiID,
   812  			Endpoint:         member.Membership.Endpoint,
   813  			Metadata:         member.Membership.Metadata,
   814  			InternalEndpoint: internalEndpoint,
   815  		})
   816  	}
   817  	return response
   818  
   819  }
   820  
   821  func tsToTime(ts uint64) time.Time {
   822  	return time.Unix(int64(0), int64(ts))
   823  }
   824  
   825  func (d *gossipDiscoveryImpl) UpdateMetadata(md []byte) {
   826  	d.lock.Lock()
   827  	defer d.lock.Unlock()
   828  	d.self.Metadata = md
   829  }
   830  
   831  func (d *gossipDiscoveryImpl) UpdateEndpoint(endpoint string) {
   832  	d.lock.Lock()
   833  	defer d.lock.Unlock()
   834  
   835  	d.self.Endpoint = endpoint
   836  }
   837  
   838  func (d *gossipDiscoveryImpl) Self() NetworkMember {
   839  	return NetworkMember{
   840  		Endpoint:         d.self.Endpoint,
   841  		Metadata:         d.self.Metadata,
   842  		PKIid:            d.self.PKIid,
   843  		InternalEndpoint: d.self.InternalEndpoint,
   844  	}
   845  }
   846  
   847  func (d *gossipDiscoveryImpl) toDie() bool {
   848  	toDie := atomic.LoadInt32(&d.toDieFlag) == int32(1)
   849  	return toDie
   850  }
   851  
   852  func (d *gossipDiscoveryImpl) Stop() {
   853  	defer d.logger.Info("Stopped")
   854  	d.logger.Info("Stopping")
   855  	atomic.StoreInt32(&d.toDieFlag, int32(1))
   856  	d.toDieChan <- struct{}{}
   857  }
   858  
   859  func equalPKIid(a, b common.PKIidType) bool {
   860  	return bytes.Equal(a, b)
   861  }
   862  
   863  func same(a *timestamp, b *proto.PeerTime) bool {
   864  	return uint64(a.incTime.UnixNano()) == b.IncNumber && a.seqNum == b.SeqNum
   865  }
   866  
   867  func before(a *timestamp, b *proto.PeerTime) bool {
   868  	return (uint64(a.incTime.UnixNano()) == b.IncNumber && a.seqNum < b.SeqNum) ||
   869  		uint64(a.incTime.UnixNano()) < b.IncNumber
   870  }
   871  
   872  func getAliveTimeInterval() time.Duration {
   873  	return util.GetDurationOrDefault("peer.gossip.aliveTimeInterval", defaultHelloInterval)
   874  }
   875  
   876  func getAliveExpirationTimeout() time.Duration {
   877  	return util.GetDurationOrDefault("peer.gossip.aliveExpirationTimeout", 5*getAliveTimeInterval())
   878  }
   879  
   880  func getAliveExpirationCheckInterval() time.Duration {
   881  	if aliveExpirationCheckInterval != 0 {
   882  		return aliveExpirationCheckInterval
   883  	}
   884  
   885  	return time.Duration(getAliveExpirationTimeout() / 10)
   886  }
   887  
   888  func getReconnectInterval() time.Duration {
   889  	return util.GetDurationOrDefault("peer.gossip.reconnectInterval", getAliveExpirationTimeout())
   890  }
   891  
   892  func filterOutLocalhost(endpoints []string, port int) []string {
   893  	var returnedEndpoints []string
   894  	for _, endpoint := range endpoints {
   895  		if endpoint == fmt.Sprintf("127.0.0.1:%d", port) || endpoint == fmt.Sprintf("localhost:%d", port) {
   896  			continue
   897  		}
   898  		returnedEndpoints = append(returnedEndpoints, endpoint)
   899  	}
   900  	return returnedEndpoints
   901  }