github.com/aergoio/aergo@v1.3.1/p2p/remotepeer.go (about) 1 /* 2 * @file 3 * @copyright defined in aergo/LICENSE.txt 4 */ 5 6 package p2p 7 8 import ( 9 "fmt" 10 "github.com/pkg/errors" 11 "runtime/debug" 12 "sync" 13 "time" 14 15 "github.com/aergoio/aergo/p2p/metric" 16 "github.com/aergoio/aergo/p2p/p2pcommon" 17 18 lru "github.com/hashicorp/golang-lru" 19 20 "github.com/aergoio/aergo-lib/log" 21 "github.com/aergoio/aergo/p2p/p2putil" 22 "github.com/aergoio/aergo/types" 23 ) 24 25 var TimeoutError = errors.New("timeout") 26 var CancelError = errors.New("canceled") 27 28 type requestInfo struct { 29 cTime time.Time 30 reqMO p2pcommon.MsgOrder 31 receiver p2pcommon.ResponseReceiver 32 } 33 34 type queryMsg struct { 35 handler p2pcommon.MessageHandler 36 msg p2pcommon.Message 37 msgBody p2pcommon.MessageBody 38 } 39 40 // remotePeerImpl represent remote peer to which is connected 41 type remotePeerImpl struct { 42 logger *log.Logger 43 pingDuration time.Duration 44 45 manageNum uint32 46 meta p2pcommon.PeerMeta 47 name string 48 state types.PeerState 49 role p2pcommon.PeerRole 50 actor p2pcommon.ActorService 51 pm p2pcommon.PeerManager 52 mf p2pcommon.MoFactory 53 signer p2pcommon.MsgSigner 54 metric *metric.PeerMetric 55 tnt p2pcommon.TxNoticeTracer 56 57 stopChan chan struct{} 58 59 // direct write channel 60 dWrite chan p2pcommon.MsgOrder 61 closeWrite chan struct{} 62 63 // used to access request data from response handlers 64 requests map[p2pcommon.MsgID]*requestInfo 65 reqMutex *sync.Mutex 66 67 handlers map[p2pcommon.SubProtocol]p2pcommon.MessageHandler 68 69 // TODO make automatic disconnect if remote peer cause too many wrong message 70 blkHashCache *lru.Cache 71 txHashCache *lru.Cache 72 lastStatus *types.LastBlockStatus 73 // lastBlkNoticeTime is time that local peer sent NewBlockNotice to this remote peer 74 lastBlkNoticeTime time.Time 75 skipCnt int32 76 77 txQueueLock *sync.Mutex 78 txNoticeQueue *p2putil.PressableQueue 79 maxTxNoticeHashSize int 80 81 rw p2pcommon.MsgReadWriter 82 } 83 84 var _ p2pcommon.RemotePeer = (*remotePeerImpl)(nil) 85 86 // newRemotePeer create an object which represent a remote peer. 87 func newRemotePeer(meta p2pcommon.PeerMeta, manageNum uint32, pm p2pcommon.PeerManager, actor p2pcommon.ActorService, log *log.Logger, mf p2pcommon.MoFactory, signer p2pcommon.MsgSigner, rw p2pcommon.MsgReadWriter) *remotePeerImpl { 88 rPeer := &remotePeerImpl{ 89 meta: meta, manageNum: manageNum, pm: pm, 90 name: fmt.Sprintf("%s#%d", p2putil.ShortForm(meta.ID), manageNum), 91 actor: actor, logger: log, mf: mf, signer: signer, rw: rw, 92 pingDuration: defaultPingInterval, 93 state: types.STARTING, 94 95 lastStatus: &types.LastBlockStatus{}, 96 stopChan: make(chan struct{}, 1), 97 closeWrite: make(chan struct{}), 98 99 requests: make(map[p2pcommon.MsgID]*requestInfo), 100 reqMutex: &sync.Mutex{}, 101 102 handlers: make(map[p2pcommon.SubProtocol]p2pcommon.MessageHandler), 103 104 txQueueLock: &sync.Mutex{}, 105 txNoticeQueue: p2putil.NewPressableQueue(DefaultPeerTxQueueSize), 106 maxTxNoticeHashSize: DefaultPeerTxQueueSize, 107 } 108 rPeer.dWrite = make(chan p2pcommon.MsgOrder, writeMsgBufferSize) 109 110 var err error 111 rPeer.blkHashCache, err = lru.New(DefaultPeerBlockCacheSize) 112 if err != nil { 113 panic("Failed to create remote peer " + err.Error()) 114 } 115 rPeer.txHashCache, err = lru.New(DefaultPeerTxCacheSize) 116 if err != nil { 117 panic("Failed to create remote peer " + err.Error()) 118 } 119 120 return rPeer 121 } 122 123 // ID return id of peer, same as peer.meta.ID 124 func (p *remotePeerImpl) ID() types.PeerID { 125 return p.meta.ID 126 } 127 128 func (p *remotePeerImpl) Meta() p2pcommon.PeerMeta { 129 return p.meta 130 } 131 132 func (p *remotePeerImpl) ManageNumber() uint32 { 133 return p.manageNum 134 } 135 136 func (p *remotePeerImpl) Name() string { 137 return p.name 138 } 139 140 func (p *remotePeerImpl) Version() string { 141 return p.meta.Version 142 } 143 144 func (p *remotePeerImpl) Role() p2pcommon.PeerRole { 145 return p.role 146 } 147 func (p *remotePeerImpl) ChangeRole(role p2pcommon.PeerRole) { 148 p.role = role 149 } 150 151 func (p *remotePeerImpl) AddMessageHandler(subProtocol p2pcommon.SubProtocol, handler p2pcommon.MessageHandler) { 152 p.handlers[subProtocol] = handler 153 } 154 155 func (p *remotePeerImpl) MF() p2pcommon.MoFactory { 156 return p.mf 157 } 158 159 // State returns current state of peer 160 func (p *remotePeerImpl) State() types.PeerState { 161 return p.state.Get() 162 } 163 164 func (p *remotePeerImpl) LastStatus() *types.LastBlockStatus { 165 return p.lastStatus 166 } 167 168 // runPeer should be called by go routine 169 func (p *remotePeerImpl) RunPeer() { 170 p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Msg("Starting peer") 171 pingTicker := time.NewTicker(p.pingDuration) 172 173 go p.runWrite() 174 go p.runRead() 175 176 txNoticeTicker := time.NewTicker(txNoticeInterval) 177 178 // peer state is changed to RUNNING after all sub goroutine is ready, and to STOPPED before fll sub goroutine is stopped. 179 p.state.SetAndGet(types.RUNNING) 180 READNOPLOOP: 181 for { 182 select { 183 case <-pingTicker.C: 184 p.sendPing() 185 // no operation for now 186 case <-txNoticeTicker.C: 187 p.trySendTxNotices() 188 case <-p.stopChan: 189 break READNOPLOOP 190 } 191 } 192 193 p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Msg("Finishing peer") 194 txNoticeTicker.Stop() 195 pingTicker.Stop() 196 // finish goroutine write. read goroutine will be closed automatically when disconnect 197 close(p.closeWrite) 198 close(p.stopChan) 199 p.state.SetAndGet(types.STOPPED) 200 201 p.pm.RemovePeer(p) 202 } 203 204 func (p *remotePeerImpl) runWrite() { 205 cleanupTicker := time.NewTicker(cleanRequestInterval) 206 defer func() { 207 if r := recover(); r != nil { 208 p.logger.Panic().Str("callStack", string(debug.Stack())).Str(p2putil.LogPeerName, p.Name()).Str("recover", fmt.Sprint(r)).Msg("There were panic in runWrite ") 209 } 210 }() 211 212 WRITELOOP: 213 for { 214 select { 215 case m := <-p.dWrite: 216 p.writeToPeer(m) 217 case <-cleanupTicker.C: 218 p.pruneRequests() 219 case <-p.closeWrite: 220 p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Msg("Quitting runWrite") 221 break WRITELOOP 222 } 223 } 224 cleanupTicker.Stop() 225 p.cleanupWrite() 226 p.rw.Close() 227 228 // closing channel is up to golang runtime 229 // close(p.write) 230 // close(p.consumeChan) 231 } 232 233 func (p *remotePeerImpl) cleanupWrite() { 234 // 1. cleaning receive handlers. TODO add code 235 236 // 2. canceling not sent orders 237 for { 238 select { 239 case m := <-p.dWrite: 240 m.CancelSend(p) 241 default: 242 return 243 } 244 } 245 } 246 247 func (p *remotePeerImpl) runRead() { 248 for { 249 msg, err := p.rw.ReadMsg() 250 if err != nil { 251 // TODO set different log level by case (i.e. it can be expected if peer is disconnecting ) 252 p.logger.Warn().Str(p2putil.LogPeerName, p.Name()).Err(err).Msg("Failed to read message") 253 p.Stop() 254 return 255 } 256 if err = p.handleMsg(msg); err != nil { 257 // TODO set different log level by case (i.e. it can be expected if peer is disconnecting ) 258 p.logger.Warn().Str(p2putil.LogPeerName, p.Name()).Err(err).Msg("Failed to handle message") 259 p.Stop() 260 return 261 } 262 } 263 } 264 265 func (p *remotePeerImpl) handleMsg(msg p2pcommon.Message) (err error) { 266 subProto := msg.Subprotocol() 267 defer func() { 268 if r := recover(); r != nil { 269 p.logger.Error().Str(p2putil.LogProtoID, subProto.String()).Str("callStack", string(debug.Stack())).Interface("panic", r).Msg("There were panic in handler.") 270 err = fmt.Errorf("internal error") 271 } 272 }() 273 274 if p.State() > types.RUNNING { 275 p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Str("current_state", p.State().String()).Msg("peer is not running. silently drop input message") 276 return nil 277 } 278 279 handler, found := p.handlers[subProto] 280 if !found { 281 p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Msg("invalid protocol") 282 return fmt.Errorf("invalid protocol %s", subProto) 283 } 284 285 handler.PreHandle() 286 287 payload, err := handler.ParsePayload(msg.Payload()) 288 if err != nil { 289 p.logger.Warn().Err(err).Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Msg("invalid message data") 290 return fmt.Errorf("invalid message data") 291 } 292 //err = p.signer.verifyMsg(msg, p.meta.ID) 293 //if err != nil { 294 // p.logger.Warn().Err(err).Str(LogPeerName, p.Name()).Str(LogMsgID, msg.ID().String()).Str(LogProtoID, subProto.String()).Msg("Failed to check signature") 295 // return fmt.Errorf("Failed to check signature") 296 //} 297 err = handler.CheckAuth(msg, payload) 298 if err != nil { 299 p.logger.Warn().Err(err).Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogMsgID, msg.ID().String()).Str(p2putil.LogProtoID, subProto.String()).Msg("Failed to authenticate message") 300 return fmt.Errorf("Failed to authenticate message.") 301 } 302 303 handler.Handle(msg, payload) 304 305 handler.PostHandle(msg, payload) 306 return nil 307 } 308 309 // Stop stops aPeer works 310 func (p *remotePeerImpl) Stop() { 311 prevState := p.state.SetAndGet(types.STOPPING) 312 if prevState <= types.RUNNING { 313 p.stopChan <- struct{}{} 314 } 315 316 } 317 318 func (p *remotePeerImpl) SendMessage(msg p2pcommon.MsgOrder) { 319 if p.State() > types.RUNNING { 320 p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()). 321 Str(p2putil.LogMsgID, msg.GetMsgID().String()).Str("current_state", p.State().String()).Msg("Cancel sending message, since peer is not running state") 322 return 323 } 324 select { 325 case p.dWrite <- msg: 326 // it's OK 327 default: 328 p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()). 329 Str(p2putil.LogMsgID, msg.GetMsgID().String()).Msg("Remote peer is busy or down") 330 // TODO find more elegant way to handled flooding queue. in lots of cases, pending for dropped tx notice or newBlock notice (not blockProduced notice) are not critical in lots of cases. 331 p.Stop() 332 } 333 } 334 335 func (p *remotePeerImpl) SendAndWaitMessage(msg p2pcommon.MsgOrder, timeout time.Duration) error { 336 if p.State() > types.RUNNING { 337 p.logger.Debug().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()). 338 Str(p2putil.LogMsgID, msg.GetMsgID().String()).Str("current_state", p.State().String()).Msg("Cancel sending message, since peer is not running state") 339 return fmt.Errorf("not running") 340 } 341 select { 342 case p.dWrite <- msg: 343 return nil 344 case <-time.NewTimer(timeout).C: 345 p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Str(p2putil.LogProtoID, msg.GetProtocolID().String()). 346 Str(p2putil.LogMsgID, msg.GetMsgID().String()).Msg("Remote peer is busy or down") 347 // TODO find more elegant way to handled flooding queue. in lots of cases, pending for dropped tx notice or newBlock notice (not blockProduced notice) are not critical in lots of cases. 348 p.Stop() 349 return TimeoutError 350 } 351 } 352 353 func (p *remotePeerImpl) PushTxsNotice(txHashes []types.TxID) { 354 p.txQueueLock.Lock() 355 defer p.txQueueLock.Unlock() 356 for _, hash := range txHashes { 357 if !p.txNoticeQueue.Offer(hash) { 358 p.sendTxNotices() 359 // this Offer is always succeeded by invariant 360 p.txNoticeQueue.Offer(hash) 361 } 362 } 363 } 364 365 // ConsumeRequest remove request from request history. 366 func (p *remotePeerImpl) ConsumeRequest(originalID p2pcommon.MsgID) { 367 p.reqMutex.Lock() 368 delete(p.requests, originalID) 369 p.reqMutex.Unlock() 370 } 371 372 // requestIDNotFoundReceiver is to handle response msg which the original message is not identified 373 func (p *remotePeerImpl) requestIDNotFoundReceiver(msg p2pcommon.Message, msgBody p2pcommon.MessageBody) bool { 374 return true 375 } 376 377 // passThroughReceiver is bypass message to legacy handler. 378 func (p *remotePeerImpl) passThroughReceiver(msg p2pcommon.Message, msgBody p2pcommon.MessageBody) bool { 379 return false 380 } 381 382 func (p *remotePeerImpl) GetReceiver(originalID p2pcommon.MsgID) p2pcommon.ResponseReceiver { 383 p.reqMutex.Lock() 384 defer p.reqMutex.Unlock() 385 req, found := p.requests[originalID] 386 if !found { 387 return p.requestIDNotFoundReceiver 388 } else if req.receiver == nil { 389 return p.passThroughReceiver 390 } else { 391 return req.receiver 392 } 393 } 394 395 func (p *remotePeerImpl) writeToPeer(m p2pcommon.MsgOrder) { 396 if err := m.SendTo(p); err != nil { 397 // write fail 398 p.Stop() 399 } 400 } 401 402 func (p *remotePeerImpl) trySendTxNotices() { 403 p.txQueueLock.Lock() 404 defer p.txQueueLock.Unlock() 405 p.sendTxNotices() 406 } 407 408 // sendTxNotices must be called in txQueueLock 409 func (p *remotePeerImpl) sendTxNotices() { 410 // make create multiple message if queue is too many hashes. 411 for { 412 // no need to send if queue is empty 413 if p.txNoticeQueue.Size() == 0 { 414 return 415 } 416 hashes := make([][]byte, 0, p.txNoticeQueue.Size()) 417 skippedTxIDs := make([]types.TxID, 0) 418 for element := p.txNoticeQueue.Poll(); element != nil; element = p.txNoticeQueue.Poll() { 419 hash := element.(types.TxID) 420 if p.txHashCache.Contains(hash) { 421 skippedTxIDs = append(skippedTxIDs, hash) 422 continue 423 } 424 hashes = append(hashes, hash[:]) 425 p.txHashCache.Add(hash, cachePlaceHolder) 426 } 427 if len(hashes) > 0 { 428 mo := p.mf.NewMsgTxBroadcastOrder(&types.NewTransactionsNotice{TxHashes: hashes}) 429 p.SendMessage(mo) 430 } 431 if len(skippedTxIDs) > 0 { 432 // if tx is in cache, the remote peer will have that tx. 433 p.tnt.ReportSend(skippedTxIDs, p.ID()) 434 } 435 } 436 } 437 438 // this method MUST be called in same go routine as AergoPeer.RunPeer() 439 func (p *remotePeerImpl) sendPing() { 440 // find my best block 441 bestBlock, err := p.actor.GetChainAccessor().GetBestBlock() 442 if err != nil { 443 p.logger.Warn().Err(err).Msg("cancel ping. failed to get best block") 444 return 445 } 446 // create message data 447 pingMsg := &types.Ping{ 448 BestBlockHash: bestBlock.BlockHash(), 449 BestHeight: bestBlock.GetHeader().GetBlockNo(), 450 } 451 452 p.SendMessage(p.mf.NewMsgRequestOrder(true, p2pcommon.PingRequest, pingMsg)) 453 } 454 455 // send notice message and then disconnect. this routine should only run in RunPeer go routine 456 func (p *remotePeerImpl) goAwayMsg(msg string) { 457 p.logger.Info().Str(p2putil.LogPeerName, p.Name()).Str("msg", msg).Msg("Peer is closing") 458 p.SendAndWaitMessage(p.mf.NewMsgRequestOrder(false, p2pcommon.GoAway, &types.GoAwayNotice{Message: msg}), time.Second) 459 p.Stop() 460 } 461 462 func (p *remotePeerImpl) pruneRequests() { 463 debugLog := p.logger.IsDebugEnabled() 464 deletedCnt := 0 465 var deletedReqs []string 466 expireTime := time.Now().Add(-1 * time.Hour) 467 p.reqMutex.Lock() 468 defer p.reqMutex.Unlock() 469 for key, m := range p.requests { 470 if m.cTime.Before(expireTime) { 471 delete(p.requests, key) 472 if debugLog { 473 deletedReqs = append(deletedReqs, m.reqMO.GetProtocolID().String()+"/"+key.String()+m.cTime.String()) 474 } 475 deletedCnt++ 476 } 477 } 478 p.logger.Info().Int("count", deletedCnt).Str(p2putil.LogPeerName, p.Name()). 479 Time("until", expireTime).Msg("Pruned requests which response was not came") 480 //.Msg("Pruned %d requests but no response to peer %s until %v", deletedCnt, p.meta.ID.Pretty(), time.Unix(expireTime, 0)) 481 if debugLog { 482 p.logger.Debug().Strs("reqs", deletedReqs).Msg("Pruned") 483 } 484 } 485 486 func (p *remotePeerImpl) UpdateBlkCache(blkHash []byte, blkNumber uint64) bool { 487 p.UpdateLastNotice(blkHash, blkNumber) 488 hash := types.ToBlockID(blkHash) 489 // lru cache can't accept byte slice key 490 found, _ := p.blkHashCache.ContainsOrAdd(hash, true) 491 return found 492 } 493 494 func (p *remotePeerImpl) UpdateTxCache(hashes []types.TxID) []types.TxID { 495 // lru cache can't accept byte slice key 496 added := make([]types.TxID, 0, len(hashes)) 497 for _, hash := range hashes { 498 if found, _ := p.txHashCache.ContainsOrAdd(hash, true); !found { 499 added = append(added, hash) 500 } 501 } 502 return added 503 } 504 505 func (p *remotePeerImpl) UpdateLastNotice(blkHash []byte, blkNumber uint64) { 506 p.lastStatus = &types.LastBlockStatus{time.Now(), blkHash, blkNumber} 507 } 508 509 func (p *remotePeerImpl) sendGoAway(msg string) { 510 // TODO: send goaway message and close connection 511 }