github.com/aergoio/aergo@v1.3.1/p2p/remotepeer.go (about)

     1  /*
     2   * @file
     3   * @copyright defined in aergo/LICENSE.txt
     4   */
     5  
     6  package p2p
     7  
     8  import (
     9  	"fmt"
    10  	"github.com/pkg/errors"
    11  	"runtime/debug"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/aergoio/aergo/p2p/metric"
    16  	"github.com/aergoio/aergo/p2p/p2pcommon"
    17  
    18  	lru "github.com/hashicorp/golang-lru"
    19  
    20  	"github.com/aergoio/aergo-lib/log"
    21  	"github.com/aergoio/aergo/p2p/p2putil"
    22  	"github.com/aergoio/aergo/types"
    23  )
    24  
    25  var TimeoutError = errors.New("timeout")
    26  var CancelError = errors.New("canceled")
    27  
    28  type requestInfo struct {
    29  	cTime    time.Time
    30  	reqMO    p2pcommon.MsgOrder
    31  	receiver p2pcommon.ResponseReceiver
    32  }
    33  
    34  type queryMsg struct {
    35  	handler p2pcommon.MessageHandler
    36  	msg     p2pcommon.Message
    37  	msgBody p2pcommon.MessageBody
    38  }
    39  
    40  // remotePeerImpl represent remote peer to which is connected
    41  type remotePeerImpl struct {
    42  	logger       *log.Logger
    43  	pingDuration time.Duration
    44  
    45  	manageNum uint32
    46  	meta      p2pcommon.PeerMeta
    47  	name      string
    48  	state     types.PeerState
    49  	role      p2pcommon.PeerRole
    50  	actor     p2pcommon.ActorService
    51  	pm        p2pcommon.PeerManager
    52  	mf        p2pcommon.MoFactory
    53  	signer    p2pcommon.MsgSigner
    54  	metric    *metric.PeerMetric
    55  	tnt       p2pcommon.TxNoticeTracer
    56  
    57  	stopChan chan struct{}
    58  
    59  	// direct write channel
    60  	dWrite     chan p2pcommon.MsgOrder
    61  	closeWrite chan struct{}
    62  
    63  	// used to access request data from response handlers
    64  	requests map[p2pcommon.MsgID]*requestInfo
    65  	reqMutex *sync.Mutex
    66  
    67  	handlers map[p2pcommon.SubProtocol]p2pcommon.MessageHandler
    68  
    69  	// TODO make automatic disconnect if remote peer cause too many wrong message
    70  	blkHashCache *lru.Cache
    71  	txHashCache  *lru.Cache
    72  	lastStatus   *types.LastBlockStatus
    73  	// lastBlkNoticeTime is time that local peer sent NewBlockNotice to this remote peer
    74  	lastBlkNoticeTime time.Time
    75  	skipCnt           int32
    76  
    77  	txQueueLock         *sync.Mutex
    78  	txNoticeQueue       *p2putil.PressableQueue
    79  	maxTxNoticeHashSize int
    80  
    81  	rw p2pcommon.MsgReadWriter
    82  }
    83  
    84  var _ p2pcommon.RemotePeer = (*remotePeerImpl)(nil)
    85  
    86  // newRemotePeer create an object which represent a remote peer.
    87  func newRemotePeer(meta p2pcommon.PeerMeta, manageNum uint32, pm p2pcommon.PeerManager, actor p2pcommon.ActorService, log *log.Logger, mf p2pcommon.MoFactory, signer p2pcommon.MsgSigner, rw p2pcommon.MsgReadWriter) *remotePeerImpl {
    88  	rPeer := &remotePeerImpl{
    89  		meta: meta, manageNum: manageNum, pm: pm,
    90  		name:  fmt.Sprintf("%s#%d", p2putil.ShortForm(meta.ID), manageNum),
    91  		actor: actor, logger: log, mf: mf, signer: signer, rw: rw,
    92  		pingDuration: defaultPingInterval,
    93  		state:        types.STARTING,
    94  
    95  		lastStatus: &types.LastBlockStatus{},
    96  		stopChan:   make(chan struct{}, 1),
    97  		closeWrite: make(chan struct{}),
    98  
    99  		requests: make(map[p2pcommon.MsgID]*requestInfo),
   100  		reqMutex: &sync.Mutex{},
   101  
   102  		handlers: make(map[p2pcommon.SubProtocol]p2pcommon.MessageHandler),
   103  
   104  		txQueueLock:         &sync.Mutex{},
   105  		txNoticeQueue:       p2putil.NewPressableQueue(DefaultPeerTxQueueSize),
   106  		maxTxNoticeHashSize: DefaultPeerTxQueueSize,
   107  	}
   108  	rPeer.dWrite = make(chan p2pcommon.MsgOrder, writeMsgBufferSize)
   109  
   110  	var err error
   111  	rPeer.blkHashCache, err = lru.New(DefaultPeerBlockCacheSize)
   112  	if err != nil {
   113  		panic("Failed to create remote peer " + err.Error())
   114  	}
   115  	rPeer.txHashCache, err = lru.New(DefaultPeerTxCacheSize)
   116  	if err != nil {
   117  		panic("Failed to create remote peer " + err.Error())
   118  	}
   119  
   120  	return rPeer
   121  }
   122  
   123  // ID return id of peer, same as peer.meta.ID
   124  func (p *remotePeerImpl) ID() types.PeerID {
   125  	return p.meta.ID
   126  }
   127  
   128  func (p *remotePeerImpl) Meta() p2pcommon.PeerMeta {
   129  	return p.meta
   130  }
   131  
   132  func (p *remotePeerImpl) ManageNumber() uint32 {
   133  	return p.manageNum
   134  }
   135  
   136  func (p *remotePeerImpl) Name() string {
   137  	return p.name
   138  }
   139  
   140  func (p *remotePeerImpl) Version() string {
   141  	return p.meta.Version
   142  }
   143  
   144  func (p *remotePeerImpl) Role() p2pcommon.PeerRole {
   145  	return p.role
   146  }
   147  func (p *remotePeerImpl) ChangeRole(role p2pcommon.PeerRole) {
   148  	p.role = role
   149  }
   150  
   151  func (p *remotePeerImpl) AddMessageHandler(subProtocol p2pcommon.SubProtocol, handler p2pcommon.MessageHandler) {
   152  	p.handlers[subProtocol] = handler
   153  }
   154  
   155  func (p *remotePeerImpl) MF() p2pcommon.MoFactory {
   156  	return p.mf
   157  }
   158  
   159  // State returns current state of peer
   160  func (p *remotePeerImpl) State() types.PeerState {
   161  	return p.state.Get()
   162  }
   163  
   164  func (p *remotePeerImpl) LastStatus() *types.LastBlockStatus {
   165  	return p.lastStatus
   166  }
   167  
   168  // runPeer should be called by go routine
   169  func (p *remotePeerImpl) RunPeer() {
   170  	p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Msg("Starting peer")
   171  	pingTicker := time.NewTicker(p.pingDuration)
   172  
   173  	go p.runWrite()
   174  	go p.runRead()
   175  
   176  	txNoticeTicker := time.NewTicker(txNoticeInterval)
   177  
   178  	// peer state is changed to RUNNING after all sub goroutine is ready, and to STOPPED before fll sub goroutine is stopped.
   179  	p.state.SetAndGet(types.RUNNING)
   180  READNOPLOOP:
   181  	for {
   182  		select {
   183  		case <-pingTicker.C:
   184  			p.sendPing()
   185  			// no operation for now
   186  		case <-txNoticeTicker.C:
   187  			p.trySendTxNotices()
   188  		case <-p.stopChan:
   189  			break READNOPLOOP
   190  		}
   191  	}
   192  
   193  	p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Msg("Finishing peer")
   194  	txNoticeTicker.Stop()
   195  	pingTicker.Stop()
   196  	// finish goroutine write. read goroutine will be closed automatically when disconnect
   197  	close(p.closeWrite)
   198  	close(p.stopChan)
   199  	p.state.SetAndGet(types.STOPPED)
   200  
   201  	p.pm.RemovePeer(p)
   202  }
   203  
   204  func (p *remotePeerImpl) runWrite() {
   205  	cleanupTicker := time.NewTicker(cleanRequestInterval)
   206  	defer func() {
   207  		if r := recover(); r != nil {
   208  			p.logger.Panic().Str("callStack", string(debug.Stack())).Str(p2putil.LogPeerName, p.Name()).Str("recover", fmt.Sprint(r)).Msg("There were panic in runWrite ")
   209  		}
   210  	}()
   211  
   212  WRITELOOP:
   213  	for {
   214  		select {
   215  		case m := <-p.dWrite:
   216  			p.writeToPeer(m)
   217  		case <-cleanupTicker.C:
   218  			p.pruneRequests()
   219  		case <-p.closeWrite:
   220  			p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Msg("Quitting runWrite")
   221  			break WRITELOOP
   222  		}
   223  	}
   224  	cleanupTicker.Stop()
   225  	p.cleanupWrite()
   226  	p.rw.Close()
   227  
   228  	// closing channel is up to golang runtime
   229  	// close(p.write)
   230  	// close(p.consumeChan)
   231  }
   232  
   233  func (p *remotePeerImpl) cleanupWrite() {
   234  	// 1. cleaning receive handlers. TODO add code
   235  
   236  	// 2. canceling not sent orders
   237  	for {
   238  		select {
   239  		case m := <-p.dWrite:
   240  			m.CancelSend(p)
   241  		default:
   242  			return
   243  		}
   244  	}
   245  }
   246  
   247  func (p *remotePeerImpl) runRead() {
   248  	for {
   249  		msg, err := p.rw.ReadMsg()
   250  		if err != nil {
   251  			// TODO set different log level by case (i.e. it can be expected if peer is disconnecting )
   252  			p.logger.Warn().Str(p2putil.LogPeerName, p.Name()).Err(err).Msg("Failed to read message")
   253  			p.Stop()
   254  			return
   255  		}
   256  		if err = p.handleMsg(msg); err != nil {
   257  			// TODO set different log level by case (i.e. it can be expected if peer is disconnecting )
   258  			p.logger.Warn().Str(p2putil.LogPeerName, p.Name()).Err(err).Msg("Failed to handle message")
   259  			p.Stop()
   260  			return
   261  		}
   262  	}
   263  }
   264  
   265  func (p *remotePeerImpl) handleMsg(msg p2pcommon.Message) (err error) {
   266  	subProto := msg.Subprotocol()
   267  	defer func() {
   268  		if r := recover(); r != nil {
   269  			p.logger.Error().Str(p2putil.LogProtoID, subProto.String()).Str("callStack", string(debug.Stack())).Interface("panic", r).Msg("There were panic in handler.")
   270  			err = fmt.Errorf("internal error")
   271  		}
   272  	}()
   273  
   274  	if p.State() > types.RUNNING {
   275  		p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Str("current_state", p.State().String()).Msg("peer is not running. silently drop input message")
   276  		return nil
   277  	}
   278  
   279  	handler, found := p.handlers[subProto]
   280  	if !found {
   281  		p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Msg("invalid protocol")
   282  		return fmt.Errorf("invalid protocol %s", subProto)
   283  	}
   284  
   285  	handler.PreHandle()
   286  
   287  	payload, err := handler.ParsePayload(msg.Payload())
   288  	if err != nil {
   289  		p.logger.Warn().Err(err).Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Msg("invalid message data")
   290  		return fmt.Errorf("invalid message data")
   291  	}
   292  	//err = p.signer.verifyMsg(msg, p.meta.ID)
   293  	//if err != nil {
   294  	//	p.logger.Warn().Err(err).Str(LogPeerName, p.Name()).Str(LogMsgID, msg.ID().String()).Str(LogProtoID, subProto.String()).Msg("Failed to check signature")
   295  	//	return fmt.Errorf("Failed to check signature")
   296  	//}
   297  	err = handler.CheckAuth(msg, payload)
   298  	if err != nil {
   299  		p.logger.Warn().Err(err).Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Msg("Failed to authenticate message")
   300  		return fmt.Errorf("Failed to authenticate message.")
   301  	}
   302  
   303  	handler.Handle(msg, payload)
   304  
   305  	handler.PostHandle(msg, payload)
   306  	return nil
   307  }
   308  
   309  // Stop stops aPeer works
   310  func (p *remotePeerImpl) Stop() {
   311  	prevState := p.state.SetAndGet(types.STOPPING)
   312  	if prevState <= types.RUNNING {
   313  		p.stopChan <- struct{}{}
   314  	}
   315  
   316  }
   317  
   318  func (p *remotePeerImpl) SendMessage(msg p2pcommon.MsgOrder) {
   319  	if p.State() > types.RUNNING {
   320  		p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()).
   321  			Str(p2putil.LogMsgID, msg.GetMsgID().String()).Str("current_state", p.State().String()).Msg("Cancel sending message, since peer is not running state")
   322  		return
   323  	}
   324  	select {
   325  	case p.dWrite <- msg:
   326  		// it's OK
   327  	default:
   328  		p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()).
   329  			Str(p2putil.LogMsgID, msg.GetMsgID().String()).Msg("Remote peer is busy or down")
   330  		// TODO find more elegant way to handled flooding queue. in lots of cases, pending for dropped tx notice or newBlock notice (not blockProduced notice) are not critical in lots of cases.
   331  		p.Stop()
   332  	}
   333  }
   334  
   335  func (p *remotePeerImpl) SendAndWaitMessage(msg p2pcommon.MsgOrder, timeout time.Duration) error {
   336  	if p.State() > types.RUNNING {
   337  		p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()).
   338  			Str(p2putil.LogMsgID, msg.GetMsgID().String()).Str("current_state", p.State().String()).Msg("Cancel sending message, since peer is not running state")
   339  		return fmt.Errorf("not running")
   340  	}
   341  	select {
   342  	case p.dWrite <- msg:
   343  		return nil
   344  	case <-time.NewTimer(timeout).C:
   345  		p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()).
   346  			Str(p2putil.LogMsgID, msg.GetMsgID().String()).Msg("Remote peer is busy or down")
   347  		// TODO find more elegant way to handled flooding queue. in lots of cases, pending for dropped tx notice or newBlock notice (not blockProduced notice) are not critical in lots of cases.
   348  		p.Stop()
   349  		return TimeoutError
   350  	}
   351  }
   352  
   353  func (p *remotePeerImpl) PushTxsNotice(txHashes []types.TxID) {
   354  	p.txQueueLock.Lock()
   355  	defer p.txQueueLock.Unlock()
   356  	for _, hash := range txHashes {
   357  		if !p.txNoticeQueue.Offer(hash) {
   358  			p.sendTxNotices()
   359  			// this Offer is always succeeded by invariant
   360  			p.txNoticeQueue.Offer(hash)
   361  		}
   362  	}
   363  }
   364  
   365  // ConsumeRequest remove request from request history.
   366  func (p *remotePeerImpl) ConsumeRequest(originalID p2pcommon.MsgID) {
   367  	p.reqMutex.Lock()
   368  	delete(p.requests, originalID)
   369  	p.reqMutex.Unlock()
   370  }
   371  
   372  // requestIDNotFoundReceiver is to handle response msg which the original message is not identified
   373  func (p *remotePeerImpl) requestIDNotFoundReceiver(msg p2pcommon.Message, msgBody p2pcommon.MessageBody) bool {
   374  	return true
   375  }
   376  
   377  // passThroughReceiver is bypass message to legacy handler.
   378  func (p *remotePeerImpl) passThroughReceiver(msg p2pcommon.Message, msgBody p2pcommon.MessageBody) bool {
   379  	return false
   380  }
   381  
   382  func (p *remotePeerImpl) GetReceiver(originalID p2pcommon.MsgID) p2pcommon.ResponseReceiver {
   383  	p.reqMutex.Lock()
   384  	defer p.reqMutex.Unlock()
   385  	req, found := p.requests[originalID]
   386  	if !found {
   387  		return p.requestIDNotFoundReceiver
   388  	} else if req.receiver == nil {
   389  		return p.passThroughReceiver
   390  	} else {
   391  		return req.receiver
   392  	}
   393  }
   394  
   395  func (p *remotePeerImpl) writeToPeer(m p2pcommon.MsgOrder) {
   396  	if err := m.SendTo(p); err != nil {
   397  		// write fail
   398  		p.Stop()
   399  	}
   400  }
   401  
   402  func (p *remotePeerImpl) trySendTxNotices() {
   403  	p.txQueueLock.Lock()
   404  	defer p.txQueueLock.Unlock()
   405  	p.sendTxNotices()
   406  }
   407  
   408  // sendTxNotices must be called in txQueueLock
   409  func (p *remotePeerImpl) sendTxNotices() {
   410  	// make create multiple message if queue is too many hashes.
   411  	for {
   412  		// no need to send if queue is empty
   413  		if p.txNoticeQueue.Size() == 0 {
   414  			return
   415  		}
   416  		hashes := make([][]byte, 0, p.txNoticeQueue.Size())
   417  		skippedTxIDs := make([]types.TxID, 0)
   418  		for element := p.txNoticeQueue.Poll(); element != nil; element = p.txNoticeQueue.Poll() {
   419  			hash := element.(types.TxID)
   420  			if p.txHashCache.Contains(hash) {
   421  				skippedTxIDs = append(skippedTxIDs, hash)
   422  				continue
   423  			}
   424  			hashes = append(hashes, hash[:])
   425  			p.txHashCache.Add(hash, cachePlaceHolder)
   426  		}
   427  		if len(hashes) > 0 {
   428  			mo := p.mf.NewMsgTxBroadcastOrder(&types.NewTransactionsNotice{TxHashes: hashes})
   429  			p.SendMessage(mo)
   430  		}
   431  		if len(skippedTxIDs) > 0 {
   432  			// if tx is in cache, the remote peer will have that tx.
   433  			p.tnt.ReportSend(skippedTxIDs, p.ID())
   434  		}
   435  	}
   436  }
   437  
   438  // this method MUST be called in same go routine as AergoPeer.RunPeer()
   439  func (p *remotePeerImpl) sendPing() {
   440  	// find my best block
   441  	bestBlock, err := p.actor.GetChainAccessor().GetBestBlock()
   442  	if err != nil {
   443  		p.logger.Warn().Err(err).Msg("cancel ping. failed to get best block")
   444  		return
   445  	}
   446  	// create message data
   447  	pingMsg := &types.Ping{
   448  		BestBlockHash: bestBlock.BlockHash(),
   449  		BestHeight:    bestBlock.GetHeader().GetBlockNo(),
   450  	}
   451  
   452  	p.SendMessage(p.mf.NewMsgRequestOrder(true, p2pcommon.PingRequest, pingMsg))
   453  }
   454  
   455  // send notice message and then disconnect. this routine should only run in RunPeer go routine
   456  func (p *remotePeerImpl) goAwayMsg(msg string) {
   457  	p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Str("msg", msg).Msg("Peer is closing")
   458  	p.SendAndWaitMessage(p.mf.NewMsgRequestOrder(false, p2pcommon.GoAway, &types.GoAwayNotice{Message: msg}), time.Second)
   459  	p.Stop()
   460  }
   461  
   462  func (p *remotePeerImpl) pruneRequests() {
   463  	debugLog := p.logger.IsDebugEnabled()
   464  	deletedCnt := 0
   465  	var deletedReqs []string
   466  	expireTime := time.Now().Add(-1 * time.Hour)
   467  	p.reqMutex.Lock()
   468  	defer p.reqMutex.Unlock()
   469  	for key, m := range p.requests {
   470  		if m.cTime.Before(expireTime) {
   471  			delete(p.requests, key)
   472  			if debugLog {
   473  				deletedReqs = append(deletedReqs, m.reqMO.GetProtocolID().String()+"/"+key.String()+m.cTime.String())
   474  			}
   475  			deletedCnt++
   476  		}
   477  	}
   478  	p.logger.Info().Int("count", deletedCnt).Str(p2putil.LogPeerName, p.Name()).
   479  		Time("until", expireTime).Msg("Pruned requests which response was not came")
   480  	//.Msg("Pruned %d requests but no response to peer %s until %v", deletedCnt, p.meta.ID.Pretty(), time.Unix(expireTime, 0))
   481  	if debugLog {
   482  		p.logger.Debug().Strs("reqs", deletedReqs).Msg("Pruned")
   483  	}
   484  }
   485  
   486  func (p *remotePeerImpl) UpdateBlkCache(blkHash []byte, blkNumber uint64) bool {
   487  	p.UpdateLastNotice(blkHash, blkNumber)
   488  	hash := types.ToBlockID(blkHash)
   489  	// lru cache can't accept byte slice key
   490  	found, _ := p.blkHashCache.ContainsOrAdd(hash, true)
   491  	return found
   492  }
   493  
   494  func (p *remotePeerImpl) UpdateTxCache(hashes []types.TxID) []types.TxID {
   495  	// lru cache can't accept byte slice key
   496  	added := make([]types.TxID, 0, len(hashes))
   497  	for _, hash := range hashes {
   498  		if found, _ := p.txHashCache.ContainsOrAdd(hash, true); !found {
   499  			added = append(added, hash)
   500  		}
   501  	}
   502  	return added
   503  }
   504  
   505  func (p *remotePeerImpl) UpdateLastNotice(blkHash []byte, blkNumber uint64) {
   506  	p.lastStatus = &types.LastBlockStatus{time.Now(), blkHash, blkNumber}
   507  }
   508  
   509  func (p *remotePeerImpl) sendGoAway(msg string) {
   510  	// TODO: send goaway message and close connection
   511  }