github.com/aergoio/aergo@v1.3.1/p2p/raftsupport/concclusterreceiver.go (about)

     1  /*
     2   * @file
     3   * @copyright defined in aergo/LICENSE.txt
     4   */
     5  
     6  package raftsupport
     7  
     8  import (
     9  	"github.com/aergoio/aergo-lib/log"
    10  	"github.com/aergoio/aergo/p2p/p2putil"
    11  	"github.com/pkg/errors"
    12  	"strconv"
    13  	"sync"
    14  	"time"
    15  
    16  	"github.com/aergoio/aergo/message"
    17  	"github.com/aergoio/aergo/p2p/p2pcommon"
    18  	"github.com/aergoio/aergo/types"
    19  	"github.com/golang/protobuf/proto"
    20  )
    21  
    22  // ClusterInfoReceiver is send p2p getClusterInfo to connected peers and Receive p2p responses one of peers return successful response
    23  // The first version will be simplified version. it send and Receive one by one.
    24  type ConcurrentClusterInfoReceiver struct {
    25  	logger *log.Logger
    26  	mf     p2pcommon.MoFactory
    27  
    28  	peers   []p2pcommon.RemotePeer
    29  	mutex   sync.Mutex
    30  	sent    map[p2pcommon.MsgID]p2pcommon.RemotePeer
    31  	sentCnt int
    32  
    33  	req *message.GetCluster
    34  
    35  	ttl          time.Duration
    36  	timeout      time.Time
    37  	respCnt      int
    38  	requiredResp int
    39  	succResps    map[types.PeerID]*types.GetClusterInfoResponse
    40  	status       receiverStatus
    41  
    42  	finished chan bool
    43  }
    44  
    45  func NewConcClusterInfoReceiver(actor p2pcommon.ActorService, mf p2pcommon.MoFactory, peers []p2pcommon.RemotePeer, ttl time.Duration, req *message.GetCluster, logger *log.Logger) *ConcurrentClusterInfoReceiver {
    46  	r := &ConcurrentClusterInfoReceiver{logger: logger, mf: mf, peers: peers, ttl: ttl, req: req,
    47  		requiredResp: len(peers)/2 + 1,
    48  		succResps:    make(map[types.PeerID]*types.GetClusterInfoResponse),
    49  		sent:         make(map[p2pcommon.MsgID]p2pcommon.RemotePeer), finished: make(chan bool)}
    50  
    51  	return r
    52  }
    53  
    54  func (r *ConcurrentClusterInfoReceiver) StartGet() {
    55  	r.timeout = time.Now().Add(r.ttl)
    56  	// create message data
    57  	// send message to first peer
    58  	go func() {
    59  		r.mutex.Lock()
    60  		if !r.trySendAllPeers() {
    61  			r.cancelReceiving(errors.New("no live peers"), false)
    62  			r.mutex.Unlock()
    63  			return
    64  		}
    65  		r.mutex.Unlock()
    66  		r.runExpireTimer()
    67  	}()
    68  }
    69  
    70  func (r *ConcurrentClusterInfoReceiver) runExpireTimer() {
    71  	t := time.NewTimer(r.ttl)
    72  	select {
    73  	case <-t.C:
    74  		// time is up. check or collect mid result.
    75  		r.mutex.Lock()
    76  		defer r.mutex.Unlock()
    77  		if r.status == receiverStatusWaiting {
    78  			r.finishReceiver(nil)
    79  		}
    80  	case <-r.finished:
    81  	}
    82  	r.logger.Debug().Msg("expire timer finished")
    83  }
    84  
    85  func (r *ConcurrentClusterInfoReceiver) trySendAllPeers() bool {
    86  	r.logger.Debug().Array("peers", p2putil.NewLogPeersMarshaller(r.peers,10)).Msg("sending get cluster request to connected peers")
    87  	req := &types.GetClusterInfoRequest{BestBlockHash: r.req.BestBlockHash}
    88  	for _, peer := range r.peers {
    89  		if peer.State() == types.RUNNING {
    90  			mo := r.mf.NewMsgBlockRequestOrder(r.ReceiveResp, p2pcommon.GetClusterRequest, req)
    91  			peer.SendMessage(mo)
    92  			r.sent[mo.GetMsgID()] = peer
    93  			r.sentCnt++
    94  		}
    95  	}
    96  	r.logger.Debug().Int("sent", r.sentCnt).Msg("sent get cluster requests")
    97  	return r.sentCnt >= r.requiredResp
    98  }
    99  
   100  // ReceiveResp must be called just in read go routine
   101  func (r *ConcurrentClusterInfoReceiver) ReceiveResp(msg p2pcommon.Message, msgBody p2pcommon.MessageBody) (ret bool) {
   102  	// cases in waiting
   103  	//   normal not status => wait
   104  	//   normal status (last response)  => finish
   105  	//   abnormal resp (no following resp expected): hasNext is true => cancel
   106  	//   abnormal resp (following resp expected): hasNext is false, or invalid resp data type (maybe remote peer is totally broken) => cancel finish
   107  	// case in status or status
   108  	ret = true
   109  	r.mutex.Lock()
   110  	defer r.mutex.Unlock()
   111  	// consuming request id at first
   112  	peer, exist := r.sent[msg.OriginalID()]
   113  	if exist {
   114  		delete(r.sent, msg.OriginalID())
   115  		peer.ConsumeRequest(msg.OriginalID())
   116  	} else {
   117  		// TODO report unknown message?
   118  		return
   119  	}
   120  
   121  	status := r.status
   122  	switch status {
   123  	case receiverStatusWaiting:
   124  		r.handleInWaiting(peer, msg, msgBody)
   125  		r.respCnt++
   126  		if r.respCnt >= r.sentCnt {
   127  			r.finishReceiver(nil)
   128  		}
   129  	case receiverStatusCanceled:
   130  		fallthrough
   131  	case receiverStatusFinished:
   132  		fallthrough
   133  	default:
   134  		r.ignoreMsg(msg, msgBody)
   135  		return
   136  	}
   137  	return
   138  }
   139  
   140  func (r *ConcurrentClusterInfoReceiver) handleInWaiting(peer p2pcommon.RemotePeer, msg p2pcommon.Message, msgBody proto.Message) {
   141  	// timeout: either runExpireTimer() expire or this test is called just once in the case of timeout
   142  	if r.timeout.Before(time.Now()) {
   143  		// silently ignore already finished job
   144  		r.finishReceiver(nil)
   145  		return
   146  	}
   147  
   148  	// remote peer response malformed data.
   149  	body, ok := msgBody.(*types.GetClusterInfoResponse)
   150  	if !ok {
   151  		r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Msg("get cluster invalid response data")
   152  		return
   153  	} else if len(body.MbrAttrs) == 0 || body.Error != "" {
   154  		r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Err(errors.New(body.Error)).Msg("get cluster response empty member")
   155  		return
   156  	}
   157  
   158  	r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Object("resp", body).Msg("received get cluster response")
   159  	// return the result
   160  	if len(body.Error) != 0 {
   161  		r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Err(errors.New(body.Error)).Msg("get cluster response error")
   162  		return
   163  	}
   164  	r.succResps[peer.ID()] = body
   165  }
   166  
   167  // cancelReceiving is cancel wait for receiving and return the failure result.
   168  // it wait remaining (and useless) response. It is assumed cancellations are not frequently occur
   169  func (r *ConcurrentClusterInfoReceiver) cancelReceiving(err error, hasNext bool) {
   170  	r.status = receiverStatusCanceled
   171  	r.finishReceiver(err)
   172  }
   173  
   174  // finishReceiver is to cancel works, assuming cancellations are not frequently occur
   175  func (r *ConcurrentClusterInfoReceiver) finishReceiver(err error) {
   176  	if r.status == receiverStatusFinished {
   177  		r.logger.Warn().Msg("redundant finish call")
   178  		return
   179  	}
   180  	r.status = receiverStatusFinished
   181  	r.logger.Debug().Msg("finishing receiver")
   182  	r.req.ReplyC <- r.calculate(err)
   183  	close(r.req.ReplyC)
   184  	close(r.finished)
   185  }
   186  
   187  // ignoreMsg is silently ignore following responses, which is not useless anymore.
   188  func (r *ConcurrentClusterInfoReceiver) ignoreMsg(msg p2pcommon.Message, msgBody proto.Message) {
   189  	// nothing to do for now
   190  }
   191  
   192  func (r *ConcurrentClusterInfoReceiver) calculate(err error) *message.GetClusterRsp {
   193  	rsp := &message.GetClusterRsp{}
   194  	if err != nil {
   195  		rsp.Err = err
   196  	} else if len(r.succResps) < r.requiredResp {
   197  		rsp.Err = errors.New("too low responses: " + strconv.Itoa(len(r.succResps)))
   198  	} else {
   199  		r.logger.Debug().Int("respCnt", len(r.succResps)).Msg("calculating collected responses")
   200  		var bestRsp *types.GetClusterInfoResponse = nil
   201  		var bestPid types.PeerID
   202  		for pid, rsp := range r.succResps {
   203  			if bestRsp == nil || rsp.BestBlockNo > bestRsp.BestBlockNo {
   204  				bestRsp = rsp
   205  				bestPid = pid
   206  			}
   207  		}
   208  		if bestRsp != nil {
   209  			r.logger.Debug().Str(p2putil.LogPeerID, p2putil.ShortForm(bestPid)).Object("resp", bestRsp).Msg("chosed best response")
   210  			rsp.ClusterID = bestRsp.GetClusterID()
   211  			rsp.ChainID = bestRsp.GetChainID()
   212  			rsp.Members = bestRsp.GetMbrAttrs()
   213  			rsp.HardStateInfo = bestRsp.HardStateInfo
   214  		} else {
   215  			rsp.Err = errors.New("no successful responses")
   216  		}
   217  	}
   218  	return rsp
   219  }