github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/internal/p2p/pex/reactor.go (about) 1 package pex 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/ari-anchor/sei-tendermint/config" 10 "github.com/ari-anchor/sei-tendermint/internal/p2p" 11 "github.com/ari-anchor/sei-tendermint/internal/p2p/conn" 12 "github.com/ari-anchor/sei-tendermint/libs/log" 13 "github.com/ari-anchor/sei-tendermint/libs/service" 14 protop2p "github.com/ari-anchor/sei-tendermint/proto/tendermint/p2p" 15 "github.com/ari-anchor/sei-tendermint/types" 16 ) 17 18 var ( 19 _ service.Service = (*Reactor)(nil) 20 _ p2p.Wrapper = (*protop2p.PexMessage)(nil) 21 ) 22 23 const ( 24 // PexChannel is a channel for PEX messages 25 PexChannel = 0x00 26 27 // over-estimate of max NetAddress size 28 // hexID (40) + IP (16) + Port (2) + Name (100) ... 29 // NOTE: dont use massive DNS name .. 30 maxAddressSize = 256 31 32 // max addresses returned by GetSelection 33 // NOTE: this must match "maxMsgSize" 34 maxGetSelection = 250 35 36 // NOTE: amplification factor! 37 // small request results in up to maxMsgSize response 38 maxMsgSize = maxAddressSize * maxGetSelection 39 40 // the minimum time one peer can send another request to the same peer 41 minReceiveRequestInterval = 100 * time.Millisecond 42 43 // the maximum amount of addresses that can be included in a response 44 maxAddresses = 100 45 46 // How long to wait when there are no peers available before trying again 47 noAvailablePeersWaitPeriod = 1 * time.Second 48 49 // indicates the ping rate of the pex reactor when the peer store is full. 50 // The reactor should still look to add new peers in order to flush out low 51 // scoring peers that are still in the peer store 52 fullCapacityInterval = 10 * time.Minute 53 ) 54 55 type NoPeersAvailableError struct { 56 error 57 } 58 59 func (e *NoPeersAvailableError) Error() string { 60 return fmt.Sprintf("no available peers to send a PEX request to (retrying)") 61 } 62 63 // TODO: We should decide whether we want channel descriptors to be housed 64 // within each reactor (as they are now) or, considering that the reactor doesn't 65 // really need to care about the channel descriptors, if they should be housed 66 // in the node module. 67 func ChannelDescriptor() *conn.ChannelDescriptor { 68 return &conn.ChannelDescriptor{ 69 ID: PexChannel, 70 MessageType: new(protop2p.PexMessage), 71 Priority: 1, 72 SendQueueCapacity: 10, 73 RecvMessageCapacity: maxMsgSize, 74 RecvBufferCapacity: 128, 75 Name: "pex", 76 } 77 } 78 79 // The peer exchange or PEX reactor supports the peer manager by sending 80 // requests to other peers for addresses that can be given to the peer manager 81 // and at the same time advertises addresses to peers that need more. 82 // 83 // The reactor is able to tweak the intensity of it's search by decreasing or 84 // increasing the interval between each request. It tracks connected peers via 85 // a linked list, sending a request to the node at the front of the list and 86 // adding it to the back of the list once a response is received. 87 type Reactor struct { 88 service.BaseService 89 logger log.Logger 90 91 peerManager *p2p.PeerManager 92 peerEvents p2p.PeerEventSubscriber 93 // list of available peers to loop through and send peer requests to 94 availablePeers map[types.NodeID]struct{} 95 // keep track of the last time we saw no available peers, so we can restart if it's been too long 96 lastNoAvailablePeers time.Time 97 98 mtx sync.RWMutex 99 100 // requestsSent keeps track of which peers the PEX reactor has sent requests 101 // to. This prevents the sending of spurious responses. 102 // NOTE: If a node never responds, they will remain in this map until a 103 // peer down status update is sent 104 requestsSent map[types.NodeID]struct{} 105 106 // lastReceivedRequests keeps track of when peers send a request to prevent 107 // peers from sending requests too often (as defined by 108 // minReceiveRequestInterval). 109 lastReceivedRequests map[types.NodeID]time.Time 110 111 // the total number of unique peers added 112 totalPeers int 113 114 channel *p2p.Channel 115 116 // Used to signal a restart the node on the application level 117 restartCh chan struct{} 118 restartNoAvailablePeersWindow time.Duration 119 } 120 121 // NewReactor returns a reference to a new reactor. 122 func NewReactor( 123 logger log.Logger, 124 peerManager *p2p.PeerManager, 125 peerEvents p2p.PeerEventSubscriber, 126 restartCh chan struct{}, 127 selfRemediationConfig *config.SelfRemediationConfig, 128 ) *Reactor { 129 r := &Reactor{ 130 logger: logger, 131 peerManager: peerManager, 132 peerEvents: peerEvents, 133 availablePeers: make(map[types.NodeID]struct{}), 134 lastNoAvailablePeers: time.Time{}, 135 requestsSent: make(map[types.NodeID]struct{}), 136 lastReceivedRequests: make(map[types.NodeID]time.Time), 137 restartCh: restartCh, 138 restartNoAvailablePeersWindow: time.Duration(selfRemediationConfig.P2pNoPeersRestarWindowSeconds) * time.Second, 139 } 140 141 r.BaseService = *service.NewBaseService(logger, "PEX", r) 142 return r 143 } 144 145 func (r *Reactor) SetChannel(ch *p2p.Channel) { 146 r.channel = ch 147 } 148 149 // OnStart starts separate go routines for each p2p Channel and listens for 150 // envelopes on each. In addition, it also listens for peer updates and handles 151 // messages on that p2p channel accordingly. The caller must be sure to execute 152 // OnStop to ensure the outbound p2p Channels are closed. 153 func (r *Reactor) OnStart(ctx context.Context) error { 154 peerUpdates := r.peerEvents(ctx) 155 go r.processPexCh(ctx, r.channel) 156 go r.processPeerUpdates(ctx, peerUpdates) 157 return nil 158 } 159 160 // OnStop stops the reactor by signaling to all spawned goroutines to exit and 161 // blocking until they all exit. 162 func (r *Reactor) OnStop() {} 163 164 // processPexCh implements a blocking event loop where we listen for p2p 165 // Envelope messages from the pexCh. 166 func (r *Reactor) processPexCh(ctx context.Context, pexCh *p2p.Channel) { 167 incoming := make(chan *p2p.Envelope) 168 go func() { 169 defer close(incoming) 170 iter := pexCh.Receive(ctx) 171 for iter.Next(ctx) { 172 select { 173 case <-ctx.Done(): 174 return 175 case incoming <- iter.Envelope(): 176 } 177 } 178 }() 179 180 // Initially, we will request peers quickly to bootstrap. This duration 181 // will be adjusted upward as knowledge of the network grows. 182 var nextPeerRequest = minReceiveRequestInterval 183 noAvailablePeerFailCounter := 0 184 lastNoAvailablePeersTime := time.Now() 185 186 timer := time.NewTimer(0) 187 defer timer.Stop() 188 189 for { 190 timer.Reset(nextPeerRequest) 191 192 select { 193 case <-ctx.Done(): 194 return 195 196 case <-timer.C: 197 // back off sending peer requests if there's none available. 198 // Let the loop continue to handle incoming pex messages 199 if noAvailablePeerFailCounter > 0 { 200 waitPeriod := float64(noAvailablePeersWaitPeriod) * float64(noAvailablePeerFailCounter) 201 if time.Since(lastNoAvailablePeersTime).Seconds() < time.Duration(waitPeriod).Seconds() { 202 r.logger.Debug(fmt.Sprintf("waiting for more peers to become available still in the waitPeriod=%f\n", time.Duration(waitPeriod).Seconds())) 203 continue 204 } 205 } 206 207 // Send a request for more peer addresses. 208 if err := r.sendRequestForPeers(ctx, pexCh); err != nil { 209 r.logger.Error("failed to send request for peers", "err", err) 210 if _, ok := err.(*NoPeersAvailableError); ok { 211 noAvailablePeerFailCounter++ 212 lastNoAvailablePeersTime = time.Now() 213 continue 214 } 215 return 216 } 217 noAvailablePeerFailCounter = 0 218 case envelope, ok := <-incoming: 219 if !ok { 220 return // channel closed 221 } 222 223 // A request from another peer, or a response to one of our requests. 224 dur, err := r.handlePexMessage(ctx, envelope, pexCh) 225 if err != nil { 226 r.logger.Error("failed to process message", 227 "ch_id", envelope.ChannelID, "envelope", envelope, "err", err) 228 if serr := pexCh.SendError(ctx, p2p.PeerError{ 229 NodeID: envelope.From, 230 Err: err, 231 }); serr != nil { 232 return 233 } 234 } else if dur != 0 { 235 // We got a useful result; update the poll timer. 236 nextPeerRequest = dur 237 } 238 } 239 } 240 } 241 242 // processPeerUpdates initiates a blocking process where we listen for and handle 243 // PeerUpdate messages. When the reactor is stopped, we will catch the signal and 244 // close the p2p PeerUpdatesCh gracefully. 245 func (r *Reactor) processPeerUpdates(ctx context.Context, peerUpdates *p2p.PeerUpdates) { 246 for { 247 select { 248 case <-ctx.Done(): 249 return 250 case peerUpdate := <-peerUpdates.Updates(): 251 r.processPeerUpdate(peerUpdate) 252 } 253 } 254 } 255 256 // handlePexMessage handles envelopes sent from peers on the PexChannel. 257 // If an update was received, a new polling interval is returned; otherwise the 258 // duration is 0. 259 func (r *Reactor) handlePexMessage(ctx context.Context, envelope *p2p.Envelope, pexCh *p2p.Channel) (time.Duration, error) { 260 logger := r.logger.With("peer", envelope.From) 261 262 switch msg := envelope.Message.(type) { 263 case *protop2p.PexRequest: 264 // Verify that this peer hasn't sent us another request too recently. 265 if err := r.markPeerRequest(envelope.From); err != nil { 266 r.logger.Error(fmt.Sprintf("PEX mark peer req from %s error %s", envelope.From, err)) 267 return 0, err 268 } 269 270 // Fetch peers from the peer manager, convert NodeAddresses into URL 271 // strings, and send them back to the caller. 272 nodeAddresses := r.peerManager.Advertise(envelope.From, maxAddresses) 273 pexAddresses := make([]protop2p.PexAddress, len(nodeAddresses)) 274 for idx, addr := range nodeAddresses { 275 pexAddresses[idx] = protop2p.PexAddress{ 276 URL: addr.String(), 277 } 278 } 279 return 0, pexCh.Send(ctx, p2p.Envelope{ 280 To: envelope.From, 281 Message: &protop2p.PexResponse{Addresses: pexAddresses}, 282 }) 283 284 case *protop2p.PexResponse: 285 // Verify that this response corresponds to one of our pending requests. 286 if err := r.markPeerResponse(envelope.From); err != nil { 287 r.logger.Error(fmt.Sprintf("PEX mark peer resp from %s error %s", envelope.From, err)) 288 return 0, err 289 } 290 291 // Verify that the response does not exceed the safety limit. 292 if len(msg.Addresses) > maxAddresses { 293 r.logger.Error(fmt.Sprintf("peer %s sent too many addresses (%d > maxiumum %d)", 294 envelope.From, len(msg.Addresses), maxAddresses)) 295 return 0, fmt.Errorf("peer sent too many addresses (%d > maxiumum %d)", 296 len(msg.Addresses), maxAddresses) 297 } 298 299 var numAdded int 300 for _, pexAddress := range msg.Addresses { 301 peerAddress, err := p2p.ParseNodeAddress(pexAddress.URL) 302 if err != nil { 303 r.logger.Error(fmt.Sprintf("PEX parse node address error %s", err)) 304 continue 305 } 306 added, err := r.peerManager.Add(peerAddress) 307 if err != nil { 308 logger.Error("failed to add PEX address", "address", peerAddress, "err", err) 309 continue 310 } 311 if added { 312 numAdded++ 313 logger.Debug("added PEX address", "address", peerAddress) 314 } 315 } 316 317 return r.calculateNextRequestTime(numAdded), nil 318 319 default: 320 return 0, fmt.Errorf("received unknown message: %T", msg) 321 } 322 } 323 324 // processPeerUpdate processes a PeerUpdate. For added peers, PeerStatusUp, we 325 // send a request for addresses. 326 func (r *Reactor) processPeerUpdate(peerUpdate p2p.PeerUpdate) { 327 r.logger.Debug("received PEX peer update", "peer", peerUpdate.NodeID, "status", peerUpdate.Status) 328 329 r.mtx.Lock() 330 defer r.mtx.Unlock() 331 332 switch peerUpdate.Status { 333 case p2p.PeerStatusUp: 334 r.availablePeers[peerUpdate.NodeID] = struct{}{} 335 r.lastNoAvailablePeers = time.Time{} // reset 336 case p2p.PeerStatusDown: 337 delete(r.availablePeers, peerUpdate.NodeID) 338 delete(r.requestsSent, peerUpdate.NodeID) 339 delete(r.lastReceivedRequests, peerUpdate.NodeID) 340 341 // p2p can be flaky. If no peers are available, let's restart the entire router 342 if len(r.availablePeers) == 0 && r.restartNoAvailablePeersWindow > 0 { 343 r.logger.Error("no available peers to send a PEX request to (restarting router)") 344 if r.lastNoAvailablePeers.IsZero() { 345 r.lastNoAvailablePeers = time.Now() 346 } else if time.Since(r.lastNoAvailablePeers) > r.restartNoAvailablePeersWindow { 347 r.restartCh <- struct{}{} 348 } 349 } 350 default: 351 } 352 } 353 354 // sendRequestForPeers chooses a peer from the set of available peers and sends 355 // that peer a request for more peer addresses. The chosen peer is moved into 356 // the requestsSent bucket so that we will not attempt to contact them again 357 // until they've replied or updated. 358 func (r *Reactor) sendRequestForPeers(ctx context.Context, pexCh *p2p.Channel) error { 359 r.mtx.Lock() 360 defer r.mtx.Unlock() 361 if len(r.availablePeers) == 0 { 362 return &NoPeersAvailableError{} 363 } 364 365 // Select an arbitrary peer from the available set. 366 var peerID types.NodeID 367 for peerID = range r.availablePeers { 368 break 369 } 370 371 if err := pexCh.Send(ctx, p2p.Envelope{ 372 To: peerID, 373 Message: &protop2p.PexRequest{}, 374 }); err != nil { 375 return err 376 } 377 378 // Move the peer from available to pending. 379 delete(r.availablePeers, peerID) 380 r.requestsSent[peerID] = struct{}{} 381 382 return nil 383 } 384 385 // calculateNextRequestTime selects how long we should wait before attempting 386 // to send out another request for peer addresses. 387 // 388 // This implements a simplified proportional control mechanism to poll more 389 // often when our knowledge of the network is incomplete, and less often as our 390 // knowledge grows. To estimate our knowledge of the network, we use the 391 // fraction of "new" peers (addresses we have not previously seen) to the total 392 // so far observed. When we first join the network, this fraction will be close 393 // to 1, meaning most new peers are "new" to us, and as we discover more peers, 394 // the fraction will go toward zero. 395 // 396 // The minimum interval will be minReceiveRequestInterval to ensure we will not 397 // request from any peer more often than we would allow them to do from us. 398 func (r *Reactor) calculateNextRequestTime(added int) time.Duration { 399 r.mtx.Lock() 400 defer r.mtx.Unlock() 401 402 r.totalPeers += added 403 404 // If the peer store is nearly full, wait the maximum interval. 405 if ratio := r.peerManager.PeerRatio(); ratio >= 0.95 { 406 r.logger.Debug("Peer manager is nearly full", 407 "sleep_period", fullCapacityInterval, "ratio", ratio) 408 return fullCapacityInterval 409 } 410 411 // If there are no available peers to query, poll less aggressively. 412 if len(r.availablePeers) == 0 { 413 r.logger.Debug("No available peers to send a PEX request", 414 "sleep_period", noAvailablePeersWaitPeriod) 415 return noAvailablePeersWaitPeriod 416 } 417 418 // Reaching here, there are available peers to query and the peer store 419 // still has space. Estimate our knowledge of the network from the latest 420 // update and choose a new interval. 421 base := float64(minReceiveRequestInterval) / float64(len(r.availablePeers)) 422 multiplier := float64(r.totalPeers+1) / float64(added+1) // +1 to avert zero division 423 return time.Duration(base*multiplier*multiplier) + minReceiveRequestInterval 424 } 425 426 func (r *Reactor) markPeerRequest(peer types.NodeID) error { 427 r.mtx.Lock() 428 defer r.mtx.Unlock() 429 if lastRequestTime, ok := r.lastReceivedRequests[peer]; ok { 430 if d := time.Since(lastRequestTime); d < minReceiveRequestInterval { 431 return fmt.Errorf("peer %v sent PEX request too soon (%v < minimum %v)", 432 peer, d, minReceiveRequestInterval) 433 } 434 } 435 r.lastReceivedRequests[peer] = time.Now() 436 return nil 437 } 438 439 func (r *Reactor) markPeerResponse(peer types.NodeID) error { 440 r.mtx.Lock() 441 defer r.mtx.Unlock() 442 // check if a request to this peer was sent 443 if _, ok := r.requestsSent[peer]; !ok { 444 return fmt.Errorf("peer sent a PEX response when none was requested (%v)", peer) 445 } 446 delete(r.requestsSent, peer) 447 // attach to the back of the list so that the peer can be used again for 448 // future requests 449 450 r.availablePeers[peer] = struct{}{} 451 return nil 452 }