github.com/myafeier/fabric@v1.0.1-0.20170722181825-3a4b1f2bce86/gossip/discovery/discovery_impl.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package discovery
     8  
     9  import (
    10  	"bytes"
    11  	"errors"
    12  	"fmt"
    13  	"math"
    14  	"strconv"
    15  	"strings"
    16  	"sync"
    17  	"sync/atomic"
    18  	"time"
    19  
    20  	"github.com/hyperledger/fabric/gossip/common"
    21  	"github.com/hyperledger/fabric/gossip/gossip/msgstore"
    22  	"github.com/hyperledger/fabric/gossip/util"
    23  	proto "github.com/hyperledger/fabric/protos/gossip"
    24  	"github.com/op/go-logging"
    25  )
    26  
    27  const defaultHelloInterval = time.Duration(5) * time.Second
    28  const msgExpirationFactor = 20
    29  
    30  var aliveExpirationCheckInterval time.Duration
    31  var maxConnectionAttempts = 120
    32  
    33  // SetAliveTimeInterval sets the alive time interval
    34  func SetAliveTimeInterval(interval time.Duration) {
    35  	util.SetDuration("peer.gossip.aliveTimeInterval", interval)
    36  }
    37  
    38  // SetAliveExpirationTimeout sets the expiration timeout
    39  func SetAliveExpirationTimeout(timeout time.Duration) {
    40  	util.SetDuration("peer.gossip.aliveExpirationTimeout", timeout)
    41  	aliveExpirationCheckInterval = time.Duration(timeout / 10)
    42  }
    43  
    44  // SetAliveExpirationCheckInterval sets the expiration check interval
    45  func SetAliveExpirationCheckInterval(interval time.Duration) {
    46  	aliveExpirationCheckInterval = interval
    47  }
    48  
    49  // SetReconnectInterval sets the reconnect interval
    50  func SetReconnectInterval(interval time.Duration) {
    51  	util.SetDuration("peer.gossip.reconnectInterval", interval)
    52  }
    53  
    54  // SetMaxConnAttempts sets the maximum number of connection
    55  // attempts the peer would perform when invoking Connect()
    56  func SetMaxConnAttempts(attempts int) {
    57  	maxConnectionAttempts = attempts
    58  }
    59  
    60  type timestamp struct {
    61  	incTime  time.Time
    62  	seqNum   uint64
    63  	lastSeen time.Time
    64  }
    65  
    66  func (ts *timestamp) String() string {
    67  	return fmt.Sprintf("%v, %v", ts.incTime.UnixNano(), ts.seqNum)
    68  }
    69  
    70  type gossipDiscoveryImpl struct {
    71  	incTime         uint64
    72  	seqNum          uint64
    73  	self            NetworkMember
    74  	deadLastTS      map[string]*timestamp     // H
    75  	aliveLastTS     map[string]*timestamp     // V
    76  	id2Member       map[string]*NetworkMember // all known members
    77  	aliveMembership *util.MembershipStore
    78  	deadMembership  *util.MembershipStore
    79  
    80  	msgStore *aliveMsgStore
    81  
    82  	comm  CommService
    83  	crypt CryptoService
    84  	lock  *sync.RWMutex
    85  
    86  	toDieChan        chan struct{}
    87  	toDieFlag        int32
    88  	port             int
    89  	logger           *logging.Logger
    90  	disclosurePolicy DisclosurePolicy
    91  	pubsub           *util.PubSub
    92  }
    93  
    94  // NewDiscoveryService returns a new discovery service with the comm module passed and the crypto service passed
    95  func NewDiscoveryService(self NetworkMember, comm CommService, crypt CryptoService, disPol DisclosurePolicy) Discovery {
    96  	d := &gossipDiscoveryImpl{
    97  		self:             self,
    98  		incTime:          uint64(time.Now().UnixNano()),
    99  		seqNum:           uint64(0),
   100  		deadLastTS:       make(map[string]*timestamp),
   101  		aliveLastTS:      make(map[string]*timestamp),
   102  		id2Member:        make(map[string]*NetworkMember),
   103  		aliveMembership:  util.NewMembershipStore(),
   104  		deadMembership:   util.NewMembershipStore(),
   105  		crypt:            crypt,
   106  		comm:             comm,
   107  		lock:             &sync.RWMutex{},
   108  		toDieChan:        make(chan struct{}, 1),
   109  		toDieFlag:        int32(0),
   110  		logger:           util.GetLogger(util.LoggingDiscoveryModule, self.InternalEndpoint),
   111  		disclosurePolicy: disPol,
   112  		pubsub:           util.NewPubSub(),
   113  	}
   114  
   115  	d.validateSelfConfig()
   116  	d.msgStore = newAliveMsgStore(d)
   117  
   118  	go d.periodicalSendAlive()
   119  	go d.periodicalCheckAlive()
   120  	go d.handleMessages()
   121  	go d.periodicalReconnectToDead()
   122  	go d.handlePresumedDeadPeers()
   123  
   124  	d.logger.Info("Started", self, "incTime is", d.incTime)
   125  
   126  	return d
   127  }
   128  
   129  // Lookup returns a network member, or nil if not found
   130  func (d *gossipDiscoveryImpl) Lookup(PKIID common.PKIidType) *NetworkMember {
   131  	if bytes.Equal(PKIID, d.self.PKIid) {
   132  		return &d.self
   133  	}
   134  	d.lock.RLock()
   135  	defer d.lock.RUnlock()
   136  	nm := d.id2Member[string(PKIID)]
   137  	return nm
   138  }
   139  
   140  func (d *gossipDiscoveryImpl) Connect(member NetworkMember, id identifier) {
   141  	for _, endpoint := range []string{member.InternalEndpoint, member.Endpoint} {
   142  		if d.isMyOwnEndpoint(endpoint) {
   143  			d.logger.Debug("Skipping connecting to myself")
   144  			return
   145  		}
   146  	}
   147  
   148  	d.logger.Debug("Entering", member)
   149  	defer d.logger.Debug("Exiting")
   150  	go func() {
   151  		for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ {
   152  			id, err := id()
   153  			if err != nil {
   154  				if d.toDie() {
   155  					return
   156  				}
   157  				d.logger.Warning("Could not connect to", member, ":", err)
   158  				time.Sleep(getReconnectInterval())
   159  				continue
   160  			}
   161  			peer := &NetworkMember{
   162  				InternalEndpoint: member.InternalEndpoint,
   163  				Endpoint:         member.Endpoint,
   164  				PKIid:            id.ID,
   165  			}
   166  			m, err := d.createMembershipRequest(id.SelfOrg)
   167  			if err != nil {
   168  				d.logger.Warning("Failed creating membership request:", err)
   169  				continue
   170  			}
   171  			req, err := m.NoopSign()
   172  			if err != nil {
   173  				d.logger.Warning("Failed creating SignedGossipMessage:", err)
   174  				continue
   175  			}
   176  			req.Nonce = util.RandomUInt64()
   177  			req, err = req.NoopSign()
   178  			if err != nil {
   179  				d.logger.Warning("Failed adding NONCE to SignedGossipMessage", err)
   180  				continue
   181  			}
   182  			go d.sendUntilAcked(peer, req)
   183  			return
   184  		}
   185  
   186  	}()
   187  }
   188  
   189  func (d *gossipDiscoveryImpl) isMyOwnEndpoint(endpoint string) bool {
   190  	return endpoint == fmt.Sprintf("127.0.0.1:%d", d.port) || endpoint == fmt.Sprintf("localhost:%d", d.port) ||
   191  		endpoint == d.self.InternalEndpoint || endpoint == d.self.Endpoint
   192  }
   193  
   194  func (d *gossipDiscoveryImpl) validateSelfConfig() {
   195  	endpoint := d.self.InternalEndpoint
   196  	if len(endpoint) == 0 {
   197  		d.logger.Panic("Internal endpoint is empty:", endpoint)
   198  	}
   199  
   200  	internalEndpointSplit := strings.Split(endpoint, ":")
   201  	if len(internalEndpointSplit) != 2 {
   202  		d.logger.Panicf("Self endpoint %s isn't formatted as 'host:port'", endpoint)
   203  	}
   204  	myPort, err := strconv.ParseInt(internalEndpointSplit[1], 10, 64)
   205  	if err != nil {
   206  		d.logger.Panicf("Self endpoint %s has not valid port'", endpoint)
   207  	}
   208  
   209  	if myPort > int64(math.MaxUint16) {
   210  		d.logger.Panicf("Self endpoint %s's port takes more than 16 bits", endpoint)
   211  	}
   212  
   213  	d.port = int(myPort)
   214  }
   215  
   216  func (d *gossipDiscoveryImpl) sendUntilAcked(peer *NetworkMember, message *proto.SignedGossipMessage) {
   217  	nonce := message.Nonce
   218  	for i := 0; i < maxConnectionAttempts && !d.toDie(); i++ {
   219  		sub := d.pubsub.Subscribe(fmt.Sprintf("%d", nonce), time.Second*5)
   220  		d.comm.SendToPeer(peer, message)
   221  		if _, timeoutErr := sub.Listen(); timeoutErr == nil {
   222  			return
   223  		}
   224  		time.Sleep(getReconnectInterval())
   225  	}
   226  }
   227  
   228  func (d *gossipDiscoveryImpl) InitiateSync(peerNum int) {
   229  	if d.toDie() {
   230  		return
   231  	}
   232  	var peers2SendTo []*NetworkMember
   233  	m, err := d.createMembershipRequest(true)
   234  	if err != nil {
   235  		d.logger.Warning("Failed creating membership request:", err)
   236  		return
   237  	}
   238  	memReq, err := m.NoopSign()
   239  	if err != nil {
   240  		d.logger.Warning("Failed creating SignedGossipMessage:", err)
   241  		return
   242  	}
   243  	d.lock.RLock()
   244  
   245  	n := d.aliveMembership.Size()
   246  	k := peerNum
   247  	if k > n {
   248  		k = n
   249  	}
   250  
   251  	aliveMembersAsSlice := d.aliveMembership.ToSlice()
   252  	for _, i := range util.GetRandomIndices(k, n-1) {
   253  		pulledPeer := aliveMembersAsSlice[i].GetAliveMsg().Membership
   254  		var internalEndpoint string
   255  		if aliveMembersAsSlice[i].Envelope.SecretEnvelope != nil {
   256  			internalEndpoint = aliveMembersAsSlice[i].Envelope.SecretEnvelope.InternalEndpoint()
   257  		}
   258  		netMember := &NetworkMember{
   259  			Endpoint:         pulledPeer.Endpoint,
   260  			Metadata:         pulledPeer.Metadata,
   261  			PKIid:            pulledPeer.PkiId,
   262  			InternalEndpoint: internalEndpoint,
   263  		}
   264  		peers2SendTo = append(peers2SendTo, netMember)
   265  	}
   266  
   267  	d.lock.RUnlock()
   268  
   269  	for _, netMember := range peers2SendTo {
   270  		d.comm.SendToPeer(netMember, memReq)
   271  	}
   272  }
   273  
   274  func (d *gossipDiscoveryImpl) handlePresumedDeadPeers() {
   275  	defer d.logger.Debug("Stopped")
   276  
   277  	for !d.toDie() {
   278  		select {
   279  		case deadPeer := <-d.comm.PresumedDead():
   280  			if d.isAlive(deadPeer) {
   281  				d.expireDeadMembers([]common.PKIidType{deadPeer})
   282  			}
   283  		case s := <-d.toDieChan:
   284  			d.toDieChan <- s
   285  			return
   286  		}
   287  	}
   288  }
   289  
   290  func (d *gossipDiscoveryImpl) isAlive(pkiID common.PKIidType) bool {
   291  	d.lock.RLock()
   292  	defer d.lock.RUnlock()
   293  	_, alive := d.aliveLastTS[string(pkiID)]
   294  	return alive
   295  }
   296  
   297  func (d *gossipDiscoveryImpl) handleMessages() {
   298  	defer d.logger.Debug("Stopped")
   299  
   300  	in := d.comm.Accept()
   301  	for !d.toDie() {
   302  		select {
   303  		case s := <-d.toDieChan:
   304  			d.toDieChan <- s
   305  			return
   306  		case m := <-in:
   307  			d.handleMsgFromComm(m)
   308  		}
   309  	}
   310  }
   311  
   312  func (d *gossipDiscoveryImpl) handleMsgFromComm(m *proto.SignedGossipMessage) {
   313  	if m == nil {
   314  		return
   315  	}
   316  	if m.GetAliveMsg() == nil && m.GetMemRes() == nil && m.GetMemReq() == nil {
   317  		d.logger.Warning("Got message with wrong type (expected Alive or MembershipResponse or MembershipRequest message):", m.GossipMessage)
   318  		return
   319  	}
   320  
   321  	d.logger.Debug("Got message:", m)
   322  	defer d.logger.Debug("Exiting")
   323  
   324  	if memReq := m.GetMemReq(); memReq != nil {
   325  		selfInfoGossipMsg, err := memReq.SelfInformation.ToGossipMessage()
   326  		if err != nil {
   327  			d.logger.Warning("Failed deserializing GossipMessage from envelope:", err)
   328  			return
   329  		}
   330  
   331  		if d.msgStore.CheckValid(selfInfoGossipMsg) {
   332  			d.handleAliveMessage(selfInfoGossipMsg)
   333  		}
   334  
   335  		var internalEndpoint string
   336  		if m.Envelope.SecretEnvelope != nil {
   337  			internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   338  		}
   339  
   340  		// Sending a membership response to a peer may block this routine
   341  		// in case the sending is deliberately slow (i.e attack).
   342  		// will keep this async until I'll write a timeout detector in the comm layer
   343  		go d.sendMemResponse(selfInfoGossipMsg.GetAliveMsg().Membership, internalEndpoint, m.Nonce)
   344  		return
   345  	}
   346  
   347  	if m.IsAliveMsg() {
   348  
   349  		if !d.msgStore.Add(m) {
   350  			return
   351  		}
   352  		d.handleAliveMessage(m)
   353  
   354  		d.comm.Gossip(m)
   355  		return
   356  	}
   357  
   358  	if memResp := m.GetMemRes(); memResp != nil {
   359  		d.pubsub.Publish(fmt.Sprintf("%d", m.Nonce), m.Nonce)
   360  		for _, env := range memResp.Alive {
   361  			am, err := env.ToGossipMessage()
   362  			if err != nil {
   363  				d.logger.Warning("Membership response contains an invalid message from an online peer:", err)
   364  				return
   365  			}
   366  			if !am.IsAliveMsg() {
   367  				d.logger.Warning("Expected alive message, got", am, "instead")
   368  				return
   369  			}
   370  
   371  			if d.msgStore.CheckValid(am) {
   372  				d.handleAliveMessage(am)
   373  			}
   374  		}
   375  
   376  		for _, env := range memResp.Dead {
   377  			dm, err := env.ToGossipMessage()
   378  			if err != nil {
   379  				d.logger.Warning("Membership response contains an invalid message from an offline peer", err)
   380  				return
   381  			}
   382  			if !d.crypt.ValidateAliveMsg(dm) {
   383  				d.logger.Debugf("Alive message isn't authentic, someone spoofed %s's identity", dm.GetAliveMsg().Membership)
   384  				continue
   385  			}
   386  
   387  			if !d.msgStore.CheckValid(dm) {
   388  				//Newer alive message exist
   389  				return
   390  			}
   391  
   392  			newDeadMembers := []*proto.SignedGossipMessage{}
   393  			d.lock.RLock()
   394  			if _, known := d.id2Member[string(dm.GetAliveMsg().Membership.PkiId)]; !known {
   395  				newDeadMembers = append(newDeadMembers, dm)
   396  			}
   397  			d.lock.RUnlock()
   398  			d.learnNewMembers([]*proto.SignedGossipMessage{}, newDeadMembers)
   399  		}
   400  	}
   401  }
   402  
   403  func (d *gossipDiscoveryImpl) sendMemResponse(targetMember *proto.Member, internalEndpoint string, nonce uint64) {
   404  	d.logger.Debug("Entering", targetMember)
   405  
   406  	targetPeer := &NetworkMember{
   407  		Endpoint:         targetMember.Endpoint,
   408  		Metadata:         targetMember.Metadata,
   409  		PKIid:            targetMember.PkiId,
   410  		InternalEndpoint: internalEndpoint,
   411  	}
   412  
   413  	aliveMsg, err := d.createAliveMessage(true)
   414  	if err != nil {
   415  		d.logger.Warning("Failed creating alive message:", err)
   416  		return
   417  	}
   418  	memResp := d.createMembershipResponse(aliveMsg, targetPeer)
   419  	if memResp == nil {
   420  		errMsg := `Got a membership request from a peer that shouldn't have sent one: %v, closing connection to the peer as a result.`
   421  		d.logger.Warningf(errMsg, targetMember)
   422  		d.comm.CloseConn(targetPeer)
   423  		return
   424  	}
   425  
   426  	defer d.logger.Debug("Exiting, replying with", memResp)
   427  
   428  	msg, err := (&proto.GossipMessage{
   429  		Tag:   proto.GossipMessage_EMPTY,
   430  		Nonce: nonce,
   431  		Content: &proto.GossipMessage_MemRes{
   432  			MemRes: memResp,
   433  		},
   434  	}).NoopSign()
   435  	if err != nil {
   436  		d.logger.Warning("Failed creating SignedGossipMessage:", err)
   437  		return
   438  	}
   439  	d.comm.SendToPeer(targetPeer, msg)
   440  }
   441  
   442  func (d *gossipDiscoveryImpl) createMembershipResponse(aliveMsg *proto.SignedGossipMessage, targetMember *NetworkMember) *proto.MembershipResponse {
   443  	shouldBeDisclosed, omitConcealedFields := d.disclosurePolicy(targetMember)
   444  
   445  	if !shouldBeDisclosed(aliveMsg) {
   446  		return nil
   447  	}
   448  
   449  	d.lock.RLock()
   450  	defer d.lock.RUnlock()
   451  
   452  	deadPeers := []*proto.Envelope{}
   453  
   454  	for _, dm := range d.deadMembership.ToSlice() {
   455  
   456  		if !shouldBeDisclosed(dm) {
   457  			continue
   458  		}
   459  		deadPeers = append(deadPeers, omitConcealedFields(dm))
   460  	}
   461  
   462  	var aliveSnapshot []*proto.Envelope
   463  	for _, am := range d.aliveMembership.ToSlice() {
   464  		if !shouldBeDisclosed(am) {
   465  			continue
   466  		}
   467  		aliveSnapshot = append(aliveSnapshot, omitConcealedFields(am))
   468  	}
   469  
   470  	return &proto.MembershipResponse{
   471  		Alive: append(aliveSnapshot, omitConcealedFields(aliveMsg)),
   472  		Dead:  deadPeers,
   473  	}
   474  }
   475  
   476  func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.SignedGossipMessage) {
   477  	d.logger.Debug("Entering", m)
   478  	defer d.logger.Debug("Exiting")
   479  
   480  	if !d.crypt.ValidateAliveMsg(m) {
   481  		d.logger.Debugf("Alive message isn't authentic, someone must be spoofing %s's identity", m.GetAliveMsg())
   482  		return
   483  	}
   484  
   485  	pkiID := m.GetAliveMsg().Membership.PkiId
   486  	if equalPKIid(pkiID, d.self.PKIid) {
   487  		d.logger.Debug("Got alive message about ourselves,", m)
   488  		diffExternalEndpoint := d.self.Endpoint != m.GetAliveMsg().Membership.Endpoint
   489  		var diffInternalEndpoint bool
   490  		secretEnvelope := m.GetSecretEnvelope()
   491  		if secretEnvelope != nil && secretEnvelope.InternalEndpoint() != "" {
   492  			diffInternalEndpoint = secretEnvelope.InternalEndpoint() != d.self.InternalEndpoint
   493  		}
   494  		if diffInternalEndpoint || diffExternalEndpoint {
   495  			d.logger.Error("Bad configuration detected: Received AliveMessage from a peer with the same PKI-ID as myself:", m.GossipMessage)
   496  		}
   497  
   498  		return
   499  	}
   500  
   501  	ts := m.GetAliveMsg().Timestamp
   502  
   503  	d.lock.RLock()
   504  	_, known := d.id2Member[string(pkiID)]
   505  	d.lock.RUnlock()
   506  
   507  	if !known {
   508  		d.learnNewMembers([]*proto.SignedGossipMessage{m}, []*proto.SignedGossipMessage{})
   509  		return
   510  	}
   511  
   512  	d.lock.RLock()
   513  	_, isAlive := d.aliveLastTS[string(pkiID)]
   514  	lastDeadTS, isDead := d.deadLastTS[string(pkiID)]
   515  	d.lock.RUnlock()
   516  
   517  	if !isAlive && !isDead {
   518  		d.logger.Panicf("Member %s is known but not found neither in alive nor in dead lastTS maps, isAlive=%v, isDead=%v", m.GetAliveMsg().Membership.Endpoint, isAlive, isDead)
   519  		return
   520  	}
   521  
   522  	if isAlive && isDead {
   523  		d.logger.Panicf("Member %s is both alive and dead at the same time", m.GetAliveMsg().Membership)
   524  		return
   525  	}
   526  
   527  	if isDead {
   528  		if before(lastDeadTS, ts) {
   529  			// resurrect peer
   530  			d.resurrectMember(m, *ts)
   531  		} else if !same(lastDeadTS, ts) {
   532  			d.logger.Debug(m.GetAliveMsg().Membership, "lastDeadTS:", lastDeadTS, "but got ts:", ts)
   533  		}
   534  		return
   535  	}
   536  
   537  	d.lock.RLock()
   538  	lastAliveTS, isAlive := d.aliveLastTS[string(pkiID)]
   539  	d.lock.RUnlock()
   540  
   541  	if isAlive {
   542  		if before(lastAliveTS, ts) {
   543  			d.learnExistingMembers([]*proto.SignedGossipMessage{m})
   544  		} else if !same(lastAliveTS, ts) {
   545  			d.logger.Debug(m.GetAliveMsg().Membership, "lastAliveTS:", lastAliveTS, "but got ts:", ts)
   546  		}
   547  
   548  	}
   549  	// else, ignore the message because it is too old
   550  }
   551  
   552  func (d *gossipDiscoveryImpl) resurrectMember(am *proto.SignedGossipMessage, t proto.PeerTime) {
   553  	d.logger.Info("Entering, AliveMessage:", am, "t:", t)
   554  	defer d.logger.Info("Exiting")
   555  	d.lock.Lock()
   556  	defer d.lock.Unlock()
   557  
   558  	member := am.GetAliveMsg().Membership
   559  	pkiID := member.PkiId
   560  	d.aliveLastTS[string(pkiID)] = &timestamp{
   561  		lastSeen: time.Now(),
   562  		seqNum:   t.SeqNum,
   563  		incTime:  tsToTime(t.IncNum),
   564  	}
   565  
   566  	var internalEndpoint string
   567  	if prevNetMem := d.id2Member[string(pkiID)]; prevNetMem != nil {
   568  		internalEndpoint = prevNetMem.InternalEndpoint
   569  	}
   570  	if am.Envelope.SecretEnvelope != nil {
   571  		internalEndpoint = am.Envelope.SecretEnvelope.InternalEndpoint()
   572  	}
   573  
   574  	d.id2Member[string(pkiID)] = &NetworkMember{
   575  		Endpoint:         member.Endpoint,
   576  		Metadata:         member.Metadata,
   577  		PKIid:            member.PkiId,
   578  		InternalEndpoint: internalEndpoint,
   579  	}
   580  
   581  	delete(d.deadLastTS, string(pkiID))
   582  	d.deadMembership.Remove(common.PKIidType(pkiID))
   583  	d.aliveMembership.Put(common.PKIidType(pkiID), &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope})
   584  }
   585  
   586  func (d *gossipDiscoveryImpl) periodicalReconnectToDead() {
   587  	defer d.logger.Debug("Stopped")
   588  
   589  	for !d.toDie() {
   590  		wg := &sync.WaitGroup{}
   591  
   592  		for _, member := range d.copyLastSeen(d.deadLastTS) {
   593  			wg.Add(1)
   594  			go func(member NetworkMember) {
   595  				defer wg.Done()
   596  				if d.comm.Ping(&member) {
   597  					d.logger.Debug(member, "is responding, sending membership request")
   598  					d.sendMembershipRequest(&member, true)
   599  				} else {
   600  					d.logger.Debug(member, "is still dead")
   601  				}
   602  			}(member)
   603  		}
   604  
   605  		wg.Wait()
   606  		d.logger.Debug("Sleeping", getReconnectInterval())
   607  		time.Sleep(getReconnectInterval())
   608  	}
   609  }
   610  
   611  func (d *gossipDiscoveryImpl) sendMembershipRequest(member *NetworkMember, includeInternalEndpoint bool) {
   612  	m, err := d.createMembershipRequest(includeInternalEndpoint)
   613  	if err != nil {
   614  		d.logger.Warning("Failed creating membership request:", err)
   615  		return
   616  	}
   617  	req, err := m.NoopSign()
   618  	if err != nil {
   619  		d.logger.Error("Failed creating SignedGossipMessage:", err)
   620  		return
   621  	}
   622  	d.comm.SendToPeer(member, req)
   623  }
   624  
   625  func (d *gossipDiscoveryImpl) createMembershipRequest(includeInternalEndpoint bool) (*proto.GossipMessage, error) {
   626  	am, err := d.createAliveMessage(includeInternalEndpoint)
   627  	if err != nil {
   628  		return nil, err
   629  	}
   630  	req := &proto.MembershipRequest{
   631  		SelfInformation: am.Envelope,
   632  		// TODO: sending the known peers is not secure because the remote peer might shouldn't know
   633  		// TODO: about the known peers. I'm deprecating this until a secure mechanism will be implemented.
   634  		// TODO: See FAB-2570 for tracking this issue.
   635  		Known: [][]byte{},
   636  	}
   637  	return &proto.GossipMessage{
   638  		Tag:   proto.GossipMessage_EMPTY,
   639  		Nonce: uint64(0),
   640  		Content: &proto.GossipMessage_MemReq{
   641  			MemReq: req,
   642  		},
   643  	}, nil
   644  }
   645  
   646  func (d *gossipDiscoveryImpl) copyLastSeen(lastSeenMap map[string]*timestamp) []NetworkMember {
   647  	d.lock.RLock()
   648  	defer d.lock.RUnlock()
   649  
   650  	res := []NetworkMember{}
   651  	for pkiIDStr := range lastSeenMap {
   652  		res = append(res, *(d.id2Member[pkiIDStr]))
   653  	}
   654  	return res
   655  }
   656  
   657  func (d *gossipDiscoveryImpl) periodicalCheckAlive() {
   658  	defer d.logger.Debug("Stopped")
   659  
   660  	for !d.toDie() {
   661  		time.Sleep(getAliveExpirationCheckInterval())
   662  		dead := d.getDeadMembers()
   663  		if len(dead) > 0 {
   664  			d.logger.Debugf("Got %v dead members: %v", len(dead), dead)
   665  			d.expireDeadMembers(dead)
   666  		}
   667  	}
   668  }
   669  
   670  func (d *gossipDiscoveryImpl) expireDeadMembers(dead []common.PKIidType) {
   671  	d.logger.Warning("Entering", dead)
   672  	defer d.logger.Warning("Exiting")
   673  
   674  	var deadMembers2Expire []*NetworkMember
   675  
   676  	d.lock.Lock()
   677  
   678  	for _, pkiID := range dead {
   679  		if _, isAlive := d.aliveLastTS[string(pkiID)]; !isAlive {
   680  			continue
   681  		}
   682  		deadMembers2Expire = append(deadMembers2Expire, d.id2Member[string(pkiID)])
   683  		// move lastTS from alive to dead
   684  		lastTS, hasLastTS := d.aliveLastTS[string(pkiID)]
   685  		if hasLastTS {
   686  			d.deadLastTS[string(pkiID)] = lastTS
   687  			delete(d.aliveLastTS, string(pkiID))
   688  		}
   689  
   690  		if am := d.aliveMembership.MsgByID(pkiID); am != nil {
   691  			d.deadMembership.Put(pkiID, am)
   692  			d.aliveMembership.Remove(pkiID)
   693  		}
   694  	}
   695  
   696  	d.lock.Unlock()
   697  
   698  	for _, member2Expire := range deadMembers2Expire {
   699  		d.logger.Warning("Closing connection to", member2Expire)
   700  		d.comm.CloseConn(member2Expire)
   701  	}
   702  }
   703  
   704  func (d *gossipDiscoveryImpl) getDeadMembers() []common.PKIidType {
   705  	d.lock.RLock()
   706  	defer d.lock.RUnlock()
   707  
   708  	dead := []common.PKIidType{}
   709  	for id, last := range d.aliveLastTS {
   710  		elapsedNonAliveTime := time.Since(last.lastSeen)
   711  		if elapsedNonAliveTime.Nanoseconds() > getAliveExpirationTimeout().Nanoseconds() {
   712  			d.logger.Warning("Haven't heard from", []byte(id), "for", elapsedNonAliveTime)
   713  			dead = append(dead, common.PKIidType(id))
   714  		}
   715  	}
   716  	return dead
   717  }
   718  
   719  func (d *gossipDiscoveryImpl) periodicalSendAlive() {
   720  	defer d.logger.Debug("Stopped")
   721  
   722  	for !d.toDie() {
   723  		d.logger.Debug("Sleeping", getAliveTimeInterval())
   724  		time.Sleep(getAliveTimeInterval())
   725  		msg, err := d.createAliveMessage(true)
   726  		if err != nil {
   727  			d.logger.Warning("Failed creating alive message:", err)
   728  			return
   729  		}
   730  		d.comm.Gossip(msg)
   731  	}
   732  }
   733  
   734  func (d *gossipDiscoveryImpl) createAliveMessage(includeInternalEndpoint bool) (*proto.SignedGossipMessage, error) {
   735  	d.lock.Lock()
   736  	d.seqNum++
   737  	seqNum := d.seqNum
   738  
   739  	endpoint := d.self.Endpoint
   740  	meta := d.self.Metadata
   741  	pkiID := d.self.PKIid
   742  	internalEndpoint := d.self.InternalEndpoint
   743  
   744  	d.lock.Unlock()
   745  
   746  	msg2Gossip := &proto.GossipMessage{
   747  		Tag: proto.GossipMessage_EMPTY,
   748  		Content: &proto.GossipMessage_AliveMsg{
   749  			AliveMsg: &proto.AliveMessage{
   750  				Membership: &proto.Member{
   751  					Endpoint: endpoint,
   752  					Metadata: meta,
   753  					PkiId:    pkiID,
   754  				},
   755  				Timestamp: &proto.PeerTime{
   756  					IncNum: uint64(d.incTime),
   757  					SeqNum: seqNum,
   758  				},
   759  			},
   760  		},
   761  	}
   762  
   763  	envp := d.crypt.SignMessage(msg2Gossip, internalEndpoint)
   764  	if envp == nil {
   765  		return nil, errors.New("Failed signing message")
   766  	}
   767  	signedMsg := &proto.SignedGossipMessage{
   768  		GossipMessage: msg2Gossip,
   769  		Envelope:      envp,
   770  	}
   771  
   772  	if !includeInternalEndpoint {
   773  		signedMsg.Envelope.SecretEnvelope = nil
   774  	}
   775  
   776  	return signedMsg, nil
   777  }
   778  
   779  func (d *gossipDiscoveryImpl) learnExistingMembers(aliveArr []*proto.SignedGossipMessage) {
   780  	d.logger.Debugf("Entering: learnedMembers={%v}", aliveArr)
   781  	defer d.logger.Debug("Exiting")
   782  
   783  	d.lock.Lock()
   784  	defer d.lock.Unlock()
   785  
   786  	for _, m := range aliveArr {
   787  		am := m.GetAliveMsg()
   788  		if m == nil {
   789  			d.logger.Warning("Expected alive message, got instead:", m)
   790  			return
   791  		}
   792  		d.logger.Debug("updating", am)
   793  
   794  		var internalEndpoint string
   795  		if prevNetMem := d.id2Member[string(am.Membership.PkiId)]; prevNetMem != nil {
   796  			internalEndpoint = prevNetMem.InternalEndpoint
   797  		}
   798  		if m.Envelope.SecretEnvelope != nil {
   799  			internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   800  		}
   801  
   802  		// update member's data
   803  		member := d.id2Member[string(am.Membership.PkiId)]
   804  		member.Endpoint = am.Membership.Endpoint
   805  		member.Metadata = am.Membership.Metadata
   806  		member.InternalEndpoint = internalEndpoint
   807  
   808  		if _, isKnownAsDead := d.deadLastTS[string(am.Membership.PkiId)]; isKnownAsDead {
   809  			d.logger.Warning(am.Membership, "has already expired")
   810  			continue
   811  		}
   812  
   813  		if _, isKnownAsAlive := d.aliveLastTS[string(am.Membership.PkiId)]; !isKnownAsAlive {
   814  			d.logger.Warning(am.Membership, "has already expired")
   815  			continue
   816  		} else {
   817  			d.logger.Debug("Updating aliveness data:", am)
   818  			// update existing aliveness data
   819  			alive := d.aliveLastTS[string(am.Membership.PkiId)]
   820  			alive.incTime = tsToTime(am.Timestamp.IncNum)
   821  			alive.lastSeen = time.Now()
   822  			alive.seqNum = am.Timestamp.SeqNum
   823  
   824  			if am := d.aliveMembership.MsgByID(m.GetAliveMsg().Membership.PkiId); am == nil {
   825  				d.logger.Debug("Adding", am, "to aliveMembership")
   826  				msg := &proto.SignedGossipMessage{GossipMessage: m.GossipMessage, Envelope: am.Envelope}
   827  				d.aliveMembership.Put(m.GetAliveMsg().Membership.PkiId, msg)
   828  			} else {
   829  				d.logger.Debug("Replacing", am, "in aliveMembership")
   830  				am.GossipMessage = m.GossipMessage
   831  				am.Envelope = m.Envelope
   832  			}
   833  		}
   834  	}
   835  }
   836  
   837  func (d *gossipDiscoveryImpl) learnNewMembers(aliveMembers []*proto.SignedGossipMessage, deadMembers []*proto.SignedGossipMessage) {
   838  	d.logger.Debugf("Entering: learnedMembers={%v}, deadMembers={%v}", aliveMembers, deadMembers)
   839  	defer d.logger.Debugf("Exiting")
   840  
   841  	d.lock.Lock()
   842  	defer d.lock.Unlock()
   843  
   844  	for _, am := range aliveMembers {
   845  		if equalPKIid(am.GetAliveMsg().Membership.PkiId, d.self.PKIid) {
   846  			continue
   847  		}
   848  		d.aliveLastTS[string(am.GetAliveMsg().Membership.PkiId)] = &timestamp{
   849  			incTime:  tsToTime(am.GetAliveMsg().Timestamp.IncNum),
   850  			lastSeen: time.Now(),
   851  			seqNum:   am.GetAliveMsg().Timestamp.SeqNum,
   852  		}
   853  
   854  		d.aliveMembership.Put(am.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: am.GossipMessage, Envelope: am.Envelope})
   855  		d.logger.Debugf("Learned about a new alive member: %v", am)
   856  	}
   857  
   858  	for _, dm := range deadMembers {
   859  		if equalPKIid(dm.GetAliveMsg().Membership.PkiId, d.self.PKIid) {
   860  			continue
   861  		}
   862  		d.deadLastTS[string(dm.GetAliveMsg().Membership.PkiId)] = &timestamp{
   863  			incTime:  tsToTime(dm.GetAliveMsg().Timestamp.IncNum),
   864  			lastSeen: time.Now(),
   865  			seqNum:   dm.GetAliveMsg().Timestamp.SeqNum,
   866  		}
   867  
   868  		d.deadMembership.Put(dm.GetAliveMsg().Membership.PkiId, &proto.SignedGossipMessage{GossipMessage: dm.GossipMessage, Envelope: dm.Envelope})
   869  		d.logger.Debugf("Learned about a new dead member: %v", dm)
   870  	}
   871  
   872  	// update the member in any case
   873  	for _, a := range [][]*proto.SignedGossipMessage{aliveMembers, deadMembers} {
   874  		for _, m := range a {
   875  			member := m.GetAliveMsg()
   876  			if member == nil {
   877  				d.logger.Warning("Expected alive message, got instead:", m)
   878  				return
   879  			}
   880  
   881  			var internalEndpoint string
   882  			if m.Envelope.SecretEnvelope != nil {
   883  				internalEndpoint = m.Envelope.SecretEnvelope.InternalEndpoint()
   884  			}
   885  
   886  			if prevNetMem := d.id2Member[string(member.Membership.PkiId)]; prevNetMem != nil {
   887  				internalEndpoint = prevNetMem.InternalEndpoint
   888  			}
   889  
   890  			d.id2Member[string(member.Membership.PkiId)] = &NetworkMember{
   891  				Endpoint:         member.Membership.Endpoint,
   892  				Metadata:         member.Membership.Metadata,
   893  				PKIid:            member.Membership.PkiId,
   894  				InternalEndpoint: internalEndpoint,
   895  			}
   896  		}
   897  	}
   898  }
   899  
   900  func (d *gossipDiscoveryImpl) GetMembership() []NetworkMember {
   901  	if d.toDie() {
   902  		return []NetworkMember{}
   903  	}
   904  	d.lock.RLock()
   905  	defer d.lock.RUnlock()
   906  
   907  	response := []NetworkMember{}
   908  	for _, m := range d.aliveMembership.ToSlice() {
   909  		member := m.GetAliveMsg()
   910  		response = append(response, NetworkMember{
   911  			PKIid:            member.Membership.PkiId,
   912  			Endpoint:         member.Membership.Endpoint,
   913  			Metadata:         member.Membership.Metadata,
   914  			InternalEndpoint: d.id2Member[string(m.GetAliveMsg().Membership.PkiId)].InternalEndpoint,
   915  		})
   916  	}
   917  	return response
   918  
   919  }
   920  
   921  func tsToTime(ts uint64) time.Time {
   922  	return time.Unix(int64(0), int64(ts))
   923  }
   924  
   925  func (d *gossipDiscoveryImpl) UpdateMetadata(md []byte) {
   926  	d.lock.Lock()
   927  	defer d.lock.Unlock()
   928  	d.self.Metadata = md
   929  }
   930  
   931  func (d *gossipDiscoveryImpl) UpdateEndpoint(endpoint string) {
   932  	d.lock.Lock()
   933  	defer d.lock.Unlock()
   934  
   935  	d.self.Endpoint = endpoint
   936  }
   937  
   938  func (d *gossipDiscoveryImpl) Self() NetworkMember {
   939  	return NetworkMember{
   940  		Endpoint:         d.self.Endpoint,
   941  		Metadata:         d.self.Metadata,
   942  		PKIid:            d.self.PKIid,
   943  		InternalEndpoint: d.self.InternalEndpoint,
   944  	}
   945  }
   946  
   947  func (d *gossipDiscoveryImpl) toDie() bool {
   948  	toDie := atomic.LoadInt32(&d.toDieFlag) == int32(1)
   949  	return toDie
   950  }
   951  
   952  func (d *gossipDiscoveryImpl) Stop() {
   953  	defer d.logger.Info("Stopped")
   954  	d.logger.Info("Stopping")
   955  	atomic.StoreInt32(&d.toDieFlag, int32(1))
   956  	d.msgStore.Stop()
   957  	d.toDieChan <- struct{}{}
   958  }
   959  
   960  func equalPKIid(a, b common.PKIidType) bool {
   961  	return bytes.Equal(a, b)
   962  }
   963  
   964  func same(a *timestamp, b *proto.PeerTime) bool {
   965  	return uint64(a.incTime.UnixNano()) == b.IncNum && a.seqNum == b.SeqNum
   966  }
   967  
   968  func before(a *timestamp, b *proto.PeerTime) bool {
   969  	return (uint64(a.incTime.UnixNano()) == b.IncNum && a.seqNum < b.SeqNum) ||
   970  		uint64(a.incTime.UnixNano()) < b.IncNum
   971  }
   972  
   973  func getAliveTimeInterval() time.Duration {
   974  	return util.GetDurationOrDefault("peer.gossip.aliveTimeInterval", defaultHelloInterval)
   975  }
   976  
   977  func getAliveExpirationTimeout() time.Duration {
   978  	return util.GetDurationOrDefault("peer.gossip.aliveExpirationTimeout", 5*getAliveTimeInterval())
   979  }
   980  
   981  func getAliveExpirationCheckInterval() time.Duration {
   982  	if aliveExpirationCheckInterval != 0 {
   983  		return aliveExpirationCheckInterval
   984  	}
   985  
   986  	return time.Duration(getAliveExpirationTimeout() / 10)
   987  }
   988  
   989  func getReconnectInterval() time.Duration {
   990  	return util.GetDurationOrDefault("peer.gossip.reconnectInterval", getAliveExpirationTimeout())
   991  }
   992  
   993  type aliveMsgStore struct {
   994  	msgstore.MessageStore
   995  }
   996  
   997  func newAliveMsgStore(d *gossipDiscoveryImpl) *aliveMsgStore {
   998  	policy := proto.NewGossipMessageComparator(0)
   999  	trigger := func(m interface{}) {}
  1000  	aliveMsgTTL := getAliveExpirationTimeout() * msgExpirationFactor
  1001  	externalLock := func() { d.lock.Lock() }
  1002  	externalUnlock := func() { d.lock.Unlock() }
  1003  	callback := func(m interface{}) {
  1004  		msg := m.(*proto.SignedGossipMessage)
  1005  		if !msg.IsAliveMsg() {
  1006  			return
  1007  		}
  1008  		id := msg.GetAliveMsg().Membership.PkiId
  1009  		d.aliveMembership.Remove(id)
  1010  		d.deadMembership.Remove(id)
  1011  		delete(d.id2Member, string(id))
  1012  		delete(d.deadLastTS, string(id))
  1013  		delete(d.aliveLastTS, string(id))
  1014  	}
  1015  
  1016  	s := &aliveMsgStore{
  1017  		MessageStore: msgstore.NewMessageStoreExpirable(policy, trigger, aliveMsgTTL, externalLock, externalUnlock, callback),
  1018  	}
  1019  	return s
  1020  }
  1021  
  1022  func (s *aliveMsgStore) Add(msg interface{}) bool {
  1023  	if !msg.(*proto.SignedGossipMessage).IsAliveMsg() {
  1024  		panic(fmt.Sprint("Msg ", msg, " is not AliveMsg"))
  1025  	}
  1026  	return s.MessageStore.Add(msg)
  1027  }
  1028  
  1029  func (s *aliveMsgStore) CheckValid(msg interface{}) bool {
  1030  	if !msg.(*proto.SignedGossipMessage).IsAliveMsg() {
  1031  		panic(fmt.Sprint("Msg ", msg, " is not AliveMsg"))
  1032  	}
  1033  	return s.MessageStore.CheckValid(msg)
  1034  }