github.com/aergoio/aergo@v1.3.1/p2p/raftsupport/concclusterreceiver.go (about) 1 /* 2 * @file 3 * @copyright defined in aergo/LICENSE.txt 4 */ 5 6 package raftsupport 7 8 import ( 9 "github.com/aergoio/aergo-lib/log" 10 "github.com/aergoio/aergo/p2p/p2putil" 11 "github.com/pkg/errors" 12 "strconv" 13 "sync" 14 "time" 15 16 "github.com/aergoio/aergo/message" 17 "github.com/aergoio/aergo/p2p/p2pcommon" 18 "github.com/aergoio/aergo/types" 19 "github.com/golang/protobuf/proto" 20 ) 21 22 // ClusterInfoReceiver is send p2p getClusterInfo to connected peers and Receive p2p responses one of peers return successful response 23 // The first version will be simplified version. it send and Receive one by one. 24 type ConcurrentClusterInfoReceiver struct { 25 logger *log.Logger 26 mf p2pcommon.MoFactory 27 28 peers []p2pcommon.RemotePeer 29 mutex sync.Mutex 30 sent map[p2pcommon.MsgID]p2pcommon.RemotePeer 31 sentCnt int 32 33 req *message.GetCluster 34 35 ttl time.Duration 36 timeout time.Time 37 respCnt int 38 requiredResp int 39 succResps map[types.PeerID]*types.GetClusterInfoResponse 40 status receiverStatus 41 42 finished chan bool 43 } 44 45 func NewConcClusterInfoReceiver(actor p2pcommon.ActorService, mf p2pcommon.MoFactory, peers []p2pcommon.RemotePeer, ttl time.Duration, req *message.GetCluster, logger *log.Logger) *ConcurrentClusterInfoReceiver { 46 r := &ConcurrentClusterInfoReceiver{logger: logger, mf: mf, peers: peers, ttl: ttl, req: req, 47 requiredResp: len(peers)/2 + 1, 48 succResps: make(map[types.PeerID]*types.GetClusterInfoResponse), 49 sent: make(map[p2pcommon.MsgID]p2pcommon.RemotePeer), finished: make(chan bool)} 50 51 return r 52 } 53 54 func (r *ConcurrentClusterInfoReceiver) StartGet() { 55 r.timeout = time.Now().Add(r.ttl) 56 // create message data 57 // send message to first peer 58 go func() { 59 r.mutex.Lock() 60 if !r.trySendAllPeers() { 61 r.cancelReceiving(errors.New("no live peers"), false) 62 r.mutex.Unlock() 63 return 64 } 65 r.mutex.Unlock() 66 r.runExpireTimer() 67 }() 68 } 69 70 func (r *ConcurrentClusterInfoReceiver) runExpireTimer() { 71 t := time.NewTimer(r.ttl) 72 select { 73 case <-t.C: 74 // time is up. check or collect mid result. 75 r.mutex.Lock() 76 defer r.mutex.Unlock() 77 if r.status == receiverStatusWaiting { 78 r.finishReceiver(nil) 79 } 80 case <-r.finished: 81 } 82 r.logger.Debug().Msg("expire timer finished") 83 } 84 85 func (r *ConcurrentClusterInfoReceiver) trySendAllPeers() bool { 86 r.logger.Debug().Array("peers", p2putil.NewLogPeersMarshaller(r.peers,10)).Msg("sending get cluster request to connected peers") 87 req := &types.GetClusterInfoRequest{BestBlockHash: r.req.BestBlockHash} 88 for _, peer := range r.peers { 89 if peer.State() == types.RUNNING { 90 mo := r.mf.NewMsgBlockRequestOrder(r.ReceiveResp, p2pcommon.GetClusterRequest, req) 91 peer.SendMessage(mo) 92 r.sent[mo.GetMsgID()] = peer 93 r.sentCnt++ 94 } 95 } 96 r.logger.Debug().Int("sent", r.sentCnt).Msg("sent get cluster requests") 97 return r.sentCnt >= r.requiredResp 98 } 99 100 // ReceiveResp must be called just in read go routine 101 func (r *ConcurrentClusterInfoReceiver) ReceiveResp(msg p2pcommon.Message, msgBody p2pcommon.MessageBody) (ret bool) { 102 // cases in waiting 103 // normal not status => wait 104 // normal status (last response) => finish 105 // abnormal resp (no following resp expected): hasNext is true => cancel 106 // abnormal resp (following resp expected): hasNext is false, or invalid resp data type (maybe remote peer is totally broken) => cancel finish 107 // case in status or status 108 ret = true 109 r.mutex.Lock() 110 defer r.mutex.Unlock() 111 // consuming request id at first 112 peer, exist := r.sent[msg.OriginalID()] 113 if exist { 114 delete(r.sent, msg.OriginalID()) 115 peer.ConsumeRequest(msg.OriginalID()) 116 } else { 117 // TODO report unknown message? 118 return 119 } 120 121 status := r.status 122 switch status { 123 case receiverStatusWaiting: 124 r.handleInWaiting(peer, msg, msgBody) 125 r.respCnt++ 126 if r.respCnt >= r.sentCnt { 127 r.finishReceiver(nil) 128 } 129 case receiverStatusCanceled: 130 fallthrough 131 case receiverStatusFinished: 132 fallthrough 133 default: 134 r.ignoreMsg(msg, msgBody) 135 return 136 } 137 return 138 } 139 140 func (r *ConcurrentClusterInfoReceiver) handleInWaiting(peer p2pcommon.RemotePeer, msg p2pcommon.Message, msgBody proto.Message) { 141 // timeout: either runExpireTimer() expire or this test is called just once in the case of timeout 142 if r.timeout.Before(time.Now()) { 143 // silently ignore already finished job 144 r.finishReceiver(nil) 145 return 146 } 147 148 // remote peer response malformed data. 149 body, ok := msgBody.(*types.GetClusterInfoResponse) 150 if !ok { 151 r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Msg("get cluster invalid response data") 152 return 153 } else if len(body.MbrAttrs) == 0 || body.Error != "" { 154 r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Err(errors.New(body.Error)).Msg("get cluster response empty member") 155 return 156 } 157 158 r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Object("resp", body).Msg("received get cluster response") 159 // return the result 160 if len(body.Error) != 0 { 161 r.logger.Debug().Str(p2putil.LogPeerName, peer.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Err(errors.New(body.Error)).Msg("get cluster response error") 162 return 163 } 164 r.succResps[peer.ID()] = body 165 } 166 167 // cancelReceiving is cancel wait for receiving and return the failure result. 168 // it wait remaining (and useless) response. It is assumed cancellations are not frequently occur 169 func (r *ConcurrentClusterInfoReceiver) cancelReceiving(err error, hasNext bool) { 170 r.status = receiverStatusCanceled 171 r.finishReceiver(err) 172 } 173 174 // finishReceiver is to cancel works, assuming cancellations are not frequently occur 175 func (r *ConcurrentClusterInfoReceiver) finishReceiver(err error) { 176 if r.status == receiverStatusFinished { 177 r.logger.Warn().Msg("redundant finish call") 178 return 179 } 180 r.status = receiverStatusFinished 181 r.logger.Debug().Msg("finishing receiver") 182 r.req.ReplyC <- r.calculate(err) 183 close(r.req.ReplyC) 184 close(r.finished) 185 } 186 187 // ignoreMsg is silently ignore following responses, which is not useless anymore. 188 func (r *ConcurrentClusterInfoReceiver) ignoreMsg(msg p2pcommon.Message, msgBody proto.Message) { 189 // nothing to do for now 190 } 191 192 func (r *ConcurrentClusterInfoReceiver) calculate(err error) *message.GetClusterRsp { 193 rsp := &message.GetClusterRsp{} 194 if err != nil { 195 rsp.Err = err 196 } else if len(r.succResps) < r.requiredResp { 197 rsp.Err = errors.New("too low responses: " + strconv.Itoa(len(r.succResps))) 198 } else { 199 r.logger.Debug().Int("respCnt", len(r.succResps)).Msg("calculating collected responses") 200 var bestRsp *types.GetClusterInfoResponse = nil 201 var bestPid types.PeerID 202 for pid, rsp := range r.succResps { 203 if bestRsp == nil || rsp.BestBlockNo > bestRsp.BestBlockNo { 204 bestRsp = rsp 205 bestPid = pid 206 } 207 } 208 if bestRsp != nil { 209 r.logger.Debug().Str(p2putil.LogPeerID, p2putil.ShortForm(bestPid)).Object("resp", bestRsp).Msg("chosed best response") 210 rsp.ClusterID = bestRsp.GetClusterID() 211 rsp.ChainID = bestRsp.GetChainID() 212 rsp.Members = bestRsp.GetMbrAttrs() 213 rsp.HardStateInfo = bestRsp.HardStateInfo 214 } else { 215 rsp.Err = errors.New("no successful responses") 216 } 217 } 218 return rsp 219 }