bitbucket.org/number571/tendermint@v0.8.14/internal/mempool/v1/reactor.go (about) 1 package v1 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "runtime/debug" 8 "sync" 9 "time" 10 11 cfg "bitbucket.org/number571/tendermint/config" 12 tmsync "bitbucket.org/number571/tendermint/internal/libs/sync" 13 "bitbucket.org/number571/tendermint/internal/mempool" 14 "bitbucket.org/number571/tendermint/internal/p2p" 15 "bitbucket.org/number571/tendermint/libs/log" 16 "bitbucket.org/number571/tendermint/libs/service" 17 protomem "bitbucket.org/number571/tendermint/proto/tendermint/mempool" 18 "bitbucket.org/number571/tendermint/types" 19 ) 20 21 var ( 22 _ service.Service = (*Reactor)(nil) 23 _ p2p.Wrapper = (*protomem.Message)(nil) 24 ) 25 26 // PeerManager defines the interface contract required for getting necessary 27 // peer information. This should eventually be replaced with a message-oriented 28 // approach utilizing the p2p stack. 29 type PeerManager interface { 30 GetHeight(types.NodeID) int64 31 } 32 33 // Reactor implements a service that contains mempool of txs that are broadcasted 34 // amongst peers. It maintains a map from peer ID to counter, to prevent gossiping 35 // txs to the peers you received it from. 36 type Reactor struct { 37 service.BaseService 38 39 config *cfg.MempoolConfig 40 mempool *TxMempool 41 ids *mempool.MempoolIDs 42 43 // XXX: Currently, this is the only way to get information about a peer. Ideally, 44 // we rely on message-oriented communication to get necessary peer data. 45 // ref: https://bitbucket.org/number571/tendermint/issues/5670 46 peerMgr PeerManager 47 48 mempoolCh *p2p.Channel 49 peerUpdates *p2p.PeerUpdates 50 closeCh chan struct{} 51 52 // peerWG is used to coordinate graceful termination of all peer broadcasting 53 // goroutines. 54 peerWG sync.WaitGroup 55 56 mtx tmsync.Mutex 57 peerRoutines map[types.NodeID]*tmsync.Closer 58 } 59 60 // NewReactor returns a reference to a new reactor. 61 func NewReactor( 62 logger log.Logger, 63 config *cfg.MempoolConfig, 64 peerMgr PeerManager, 65 txmp *TxMempool, 66 mempoolCh *p2p.Channel, 67 peerUpdates *p2p.PeerUpdates, 68 ) *Reactor { 69 70 r := &Reactor{ 71 config: config, 72 peerMgr: peerMgr, 73 mempool: txmp, 74 ids: mempool.NewMempoolIDs(), 75 mempoolCh: mempoolCh, 76 peerUpdates: peerUpdates, 77 closeCh: make(chan struct{}), 78 peerRoutines: make(map[types.NodeID]*tmsync.Closer), 79 } 80 81 r.BaseService = *service.NewBaseService(logger, "Mempool", r) 82 return r 83 } 84 85 // GetChannelShims returns a map of ChannelDescriptorShim objects, where each 86 // object wraps a reference to a legacy p2p ChannelDescriptor and the corresponding 87 // p2p proto.Message the new p2p Channel is responsible for handling. 88 // 89 // 90 // TODO: Remove once p2p refactor is complete. 91 // ref: https://bitbucket.org/number571/tendermint/issues/5670 92 func GetChannelShims(config *cfg.MempoolConfig) map[p2p.ChannelID]*p2p.ChannelDescriptorShim { 93 largestTx := make([]byte, config.MaxTxBytes) 94 batchMsg := protomem.Message{ 95 Sum: &protomem.Message_Txs{ 96 Txs: &protomem.Txs{Txs: [][]byte{largestTx}}, 97 }, 98 } 99 100 return map[p2p.ChannelID]*p2p.ChannelDescriptorShim{ 101 mempool.MempoolChannel: { 102 MsgType: new(protomem.Message), 103 Descriptor: &p2p.ChannelDescriptor{ 104 ID: byte(mempool.MempoolChannel), 105 Priority: 5, 106 RecvMessageCapacity: batchMsg.Size(), 107 RecvBufferCapacity: 128, 108 MaxSendBytes: 5000, 109 }, 110 }, 111 } 112 } 113 114 // OnStart starts separate go routines for each p2p Channel and listens for 115 // envelopes on each. In addition, it also listens for peer updates and handles 116 // messages on that p2p channel accordingly. The caller must be sure to execute 117 // OnStop to ensure the outbound p2p Channels are closed. 118 func (r *Reactor) OnStart() error { 119 if !r.config.Broadcast { 120 r.Logger.Info("tx broadcasting is disabled") 121 } 122 123 go r.processMempoolCh() 124 go r.processPeerUpdates() 125 126 return nil 127 } 128 129 // OnStop stops the reactor by signaling to all spawned goroutines to exit and 130 // blocking until they all exit. 131 func (r *Reactor) OnStop() { 132 r.mtx.Lock() 133 for _, c := range r.peerRoutines { 134 c.Close() 135 } 136 r.mtx.Unlock() 137 138 // wait for all spawned peer tx broadcasting goroutines to gracefully exit 139 r.peerWG.Wait() 140 141 // Close closeCh to signal to all spawned goroutines to gracefully exit. All 142 // p2p Channels should execute Close(). 143 close(r.closeCh) 144 145 // Wait for all p2p Channels to be closed before returning. This ensures we 146 // can easily reason about synchronization of all p2p Channels and ensure no 147 // panics will occur. 148 <-r.mempoolCh.Done() 149 <-r.peerUpdates.Done() 150 } 151 152 // handleMempoolMessage handles envelopes sent from peers on the MempoolChannel. 153 // For every tx in the message, we execute CheckTx. It returns an error if an 154 // empty set of txs are sent in an envelope or if we receive an unexpected 155 // message type. 156 func (r *Reactor) handleMempoolMessage(envelope p2p.Envelope) error { 157 logger := r.Logger.With("peer", envelope.From) 158 159 switch msg := envelope.Message.(type) { 160 case *protomem.Txs: 161 protoTxs := msg.GetTxs() 162 if len(protoTxs) == 0 { 163 return errors.New("empty txs received from peer") 164 } 165 166 txInfo := mempool.TxInfo{SenderID: r.ids.GetForPeer(envelope.From)} 167 if len(envelope.From) != 0 { 168 txInfo.SenderNodeID = envelope.From 169 } 170 171 for _, tx := range protoTxs { 172 if err := r.mempool.CheckTx(context.Background(), types.Tx(tx), nil, txInfo); err != nil { 173 logger.Error("checktx failed for tx", "tx", fmt.Sprintf("%X", mempool.TxHashFromBytes(tx)), "err", err) 174 } 175 } 176 177 default: 178 return fmt.Errorf("received unknown message: %T", msg) 179 } 180 181 return nil 182 } 183 184 // handleMessage handles an Envelope sent from a peer on a specific p2p Channel. 185 // It will handle errors and any possible panics gracefully. A caller can handle 186 // any error returned by sending a PeerError on the respective channel. 187 func (r *Reactor) handleMessage(chID p2p.ChannelID, envelope p2p.Envelope) (err error) { 188 defer func() { 189 if e := recover(); e != nil { 190 err = fmt.Errorf("panic in processing message: %v", e) 191 r.Logger.Error( 192 "recovering from processing message panic", 193 "err", err, 194 "stack", string(debug.Stack()), 195 ) 196 } 197 }() 198 199 r.Logger.Debug("received message", "peer", envelope.From) 200 201 switch chID { 202 case mempool.MempoolChannel: 203 err = r.handleMempoolMessage(envelope) 204 205 default: 206 err = fmt.Errorf("unknown channel ID (%d) for envelope (%T)", chID, envelope.Message) 207 } 208 209 return err 210 } 211 212 // processMempoolCh implements a blocking event loop where we listen for p2p 213 // Envelope messages from the mempoolCh. 214 func (r *Reactor) processMempoolCh() { 215 defer r.mempoolCh.Close() 216 217 for { 218 select { 219 case envelope := <-r.mempoolCh.In: 220 if err := r.handleMessage(r.mempoolCh.ID, envelope); err != nil { 221 r.Logger.Error("failed to process message", "ch_id", r.mempoolCh.ID, "envelope", envelope, "err", err) 222 r.mempoolCh.Error <- p2p.PeerError{ 223 NodeID: envelope.From, 224 Err: err, 225 } 226 } 227 228 case <-r.closeCh: 229 r.Logger.Debug("stopped listening on mempool channel; closing...") 230 return 231 } 232 } 233 } 234 235 // processPeerUpdate processes a PeerUpdate. For added peers, PeerStatusUp, we 236 // check if the reactor is running and if we've already started a tx broadcasting 237 // goroutine or not. If not, we start one for the newly added peer. For down or 238 // removed peers, we remove the peer from the mempool peer ID set and signal to 239 // stop the tx broadcasting goroutine. 240 func (r *Reactor) processPeerUpdate(peerUpdate p2p.PeerUpdate) { 241 r.Logger.Debug("received peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status) 242 243 r.mtx.Lock() 244 defer r.mtx.Unlock() 245 246 switch peerUpdate.Status { 247 case p2p.PeerStatusUp: 248 // Do not allow starting new tx broadcast loops after reactor shutdown 249 // has been initiated. This can happen after we've manually closed all 250 // peer broadcast loops and closed r.closeCh, but the router still sends 251 // in-flight peer updates. 252 if !r.IsRunning() { 253 return 254 } 255 256 if r.config.Broadcast { 257 // Check if we've already started a goroutine for this peer, if not we create 258 // a new done channel so we can explicitly close the goroutine if the peer 259 // is later removed, we increment the waitgroup so the reactor can stop 260 // safely, and finally start the goroutine to broadcast txs to that peer. 261 _, ok := r.peerRoutines[peerUpdate.NodeID] 262 if !ok { 263 closer := tmsync.NewCloser() 264 265 r.peerRoutines[peerUpdate.NodeID] = closer 266 r.peerWG.Add(1) 267 268 r.ids.ReserveForPeer(peerUpdate.NodeID) 269 270 // start a broadcast routine ensuring all txs are forwarded to the peer 271 go r.broadcastTxRoutine(peerUpdate.NodeID, closer) 272 } 273 } 274 275 case p2p.PeerStatusDown: 276 r.ids.Reclaim(peerUpdate.NodeID) 277 278 // Check if we've started a tx broadcasting goroutine for this peer. 279 // If we have, we signal to terminate the goroutine via the channel's closure. 280 // This will internally decrement the peer waitgroup and remove the peer 281 // from the map of peer tx broadcasting goroutines. 282 closer, ok := r.peerRoutines[peerUpdate.NodeID] 283 if ok { 284 closer.Close() 285 } 286 } 287 } 288 289 // processPeerUpdates initiates a blocking process where we listen for and handle 290 // PeerUpdate messages. When the reactor is stopped, we will catch the signal and 291 // close the p2p PeerUpdatesCh gracefully. 292 func (r *Reactor) processPeerUpdates() { 293 defer r.peerUpdates.Close() 294 295 for { 296 select { 297 case peerUpdate := <-r.peerUpdates.Updates(): 298 r.processPeerUpdate(peerUpdate) 299 300 case <-r.closeCh: 301 r.Logger.Debug("stopped listening on peer updates channel; closing...") 302 return 303 } 304 } 305 } 306 307 func (r *Reactor) broadcastTxRoutine(peerID types.NodeID, closer *tmsync.Closer) { 308 peerMempoolID := r.ids.GetForPeer(peerID) 309 var memTx *WrappedTx 310 311 // remove the peer ID from the map of routines and mark the waitgroup as done 312 defer func() { 313 r.mtx.Lock() 314 delete(r.peerRoutines, peerID) 315 r.mtx.Unlock() 316 317 r.peerWG.Done() 318 319 if e := recover(); e != nil { 320 r.Logger.Error( 321 "recovering from broadcasting mempool loop", 322 "err", e, 323 "stack", string(debug.Stack()), 324 ) 325 } 326 }() 327 328 for { 329 if !r.IsRunning() { 330 return 331 } 332 333 // This happens because the CElement we were looking at got garbage 334 // collected (removed). That is, .NextWait() returned nil. Go ahead and 335 // start from the beginning. 336 if memTx == nil { 337 select { 338 case <-r.mempool.WaitForNextTx(): // wait until a tx is available 339 if memTx = r.mempool.NextGossipTx(); memTx == nil { 340 continue 341 } 342 343 case <-closer.Done(): 344 // The peer is marked for removal via a PeerUpdate as the doneCh was 345 // explicitly closed to signal we should exit. 346 return 347 348 case <-r.closeCh: 349 // The reactor has signaled that we are stopped and thus we should 350 // implicitly exit this peer's goroutine. 351 return 352 } 353 } 354 355 if r.peerMgr != nil { 356 height := r.peerMgr.GetHeight(peerID) 357 if height > 0 && height < memTx.height-1 { 358 // allow for a lag of one block 359 time.Sleep(mempool.PeerCatchupSleepIntervalMS * time.Millisecond) 360 continue 361 } 362 } 363 364 // NOTE: Transaction batching was disabled due to: 365 // https://bitbucket.org/number571/tendermint/issues/5796 366 if ok := r.mempool.txStore.TxHasPeer(memTx.hash, peerMempoolID); !ok { 367 // Send the mempool tx to the corresponding peer. Note, the peer may be 368 // behind and thus would not be able to process the mempool tx correctly. 369 r.mempoolCh.Out <- p2p.Envelope{ 370 To: peerID, 371 Message: &protomem.Txs{ 372 Txs: [][]byte{memTx.tx}, 373 }, 374 } 375 r.Logger.Debug( 376 "gossiped tx to peer", 377 "tx", fmt.Sprintf("%X", mempool.TxHashFromBytes(memTx.tx)), 378 "peer", peerID, 379 ) 380 } 381 382 select { 383 case <-memTx.gossipEl.NextWaitChan(): 384 // If there is a next element in gossip index, we point memTx to that node's 385 // value, otherwise we reset memTx to nil which will be checked at the 386 // parent for loop. 387 next := memTx.gossipEl.Next() 388 if next != nil { 389 memTx = next.Value.(*WrappedTx) 390 } else { 391 memTx = nil 392 } 393 394 case <-closer.Done(): 395 // The peer is marked for removal via a PeerUpdate as the doneCh was 396 // explicitly closed to signal we should exit. 397 return 398 399 case <-r.closeCh: 400 // The reactor has signaled that we are stopped and thus we should 401 // implicitly exit this peer's goroutine. 402 return 403 } 404 } 405 }