github.com/sunrise-zone/sunrise-node@v0.13.1-sr2/share/p2p/peers/manager.go (about) 1 package peers 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 logging "github.com/ipfs/go-log/v2" 12 pubsub "github.com/libp2p/go-libp2p-pubsub" 13 "github.com/libp2p/go-libp2p/core/event" 14 "github.com/libp2p/go-libp2p/core/host" 15 "github.com/libp2p/go-libp2p/core/network" 16 "github.com/libp2p/go-libp2p/core/peer" 17 "github.com/libp2p/go-libp2p/p2p/host/eventbus" 18 "github.com/libp2p/go-libp2p/p2p/net/conngater" 19 20 libhead "github.com/celestiaorg/go-header" 21 22 "github.com/sunrise-zone/sunrise-node/header" 23 "github.com/sunrise-zone/sunrise-node/share" 24 "github.com/sunrise-zone/sunrise-node/share/p2p/shrexsub" 25 ) 26 27 const ( 28 // ResultNoop indicates operation was successful and no extra action is required 29 ResultNoop result = "result_noop" 30 // ResultCooldownPeer will put returned peer on cooldown, meaning it won't be available by Peer 31 // method for some time 32 ResultCooldownPeer = "result_cooldown_peer" 33 // ResultBlacklistPeer will blacklist peer. Blacklisted peers will be disconnected and blocked from 34 // any p2p communication in future by libp2p Gater 35 ResultBlacklistPeer = "result_blacklist_peer" 36 37 // eventbusBufSize is the size of the buffered channel to handle 38 // events in libp2p 39 eventbusBufSize = 32 40 41 // storedPoolsAmount is the amount of pools for recent headers that will be stored in the peer 42 // manager 43 storedPoolsAmount = 10 44 ) 45 46 type result string 47 48 var log = logging.Logger("shrex/peer-manager") 49 50 // Manager keeps track of peers coming from shrex.Sub and from discovery 51 type Manager struct { 52 lock sync.Mutex 53 params Parameters 54 55 // header subscription is necessary in order to Validate the inbound eds hash 56 headerSub libhead.Subscriber[*header.ExtendedHeader] 57 shrexSub *shrexsub.PubSub 58 host host.Host 59 connGater *conngater.BasicConnectionGater 60 61 // pools collecting peers from shrexSub and stores them by datahash 62 pools map[string]*syncPool 63 64 // initialHeight is the height of the first header received from headersub 65 initialHeight atomic.Uint64 66 // messages from shrex.Sub with height below storeFrom will be ignored, since we don't need to 67 // track peers for those headers 68 storeFrom atomic.Uint64 69 70 // nodes collects nodes' peer.IDs found via discovery 71 nodes *pool 72 73 // hashes that are not in the chain 74 blacklistedHashes map[string]bool 75 76 metrics *metrics 77 78 headerSubDone chan struct{} 79 disconnectedPeersDone chan struct{} 80 cancel context.CancelFunc 81 } 82 83 // DoneFunc updates internal state depending on call results. Should be called once per returned 84 // peer from Peer method 85 type DoneFunc func(result) 86 87 type syncPool struct { 88 *pool 89 90 // isValidatedDataHash indicates if datahash was validated by receiving corresponding extended 91 // header from headerSub 92 isValidatedDataHash atomic.Bool 93 // height is the height of the header that corresponds to datahash 94 height uint64 95 // createdAt is the syncPool creation time 96 createdAt time.Time 97 } 98 99 func NewManager( 100 params Parameters, 101 host host.Host, 102 connGater *conngater.BasicConnectionGater, 103 options ...Option, 104 ) (*Manager, error) { 105 if err := params.Validate(); err != nil { 106 return nil, err 107 } 108 109 s := &Manager{ 110 params: params, 111 connGater: connGater, 112 host: host, 113 pools: make(map[string]*syncPool), 114 blacklistedHashes: make(map[string]bool), 115 headerSubDone: make(chan struct{}), 116 disconnectedPeersDone: make(chan struct{}), 117 } 118 119 for _, opt := range options { 120 err := opt(s) 121 if err != nil { 122 return nil, err 123 } 124 } 125 126 s.nodes = newPool(s.params.PeerCooldown) 127 return s, nil 128 } 129 130 func (m *Manager) Start(startCtx context.Context) error { 131 ctx, cancel := context.WithCancel(context.Background()) 132 m.cancel = cancel 133 134 // pools will only be populated with senders of shrexsub notifications if the WithShrexSubPools 135 // option is used. 136 if m.shrexSub == nil && m.headerSub == nil { 137 return nil 138 } 139 140 validatorFn := m.metrics.validationObserver(m.Validate) 141 err := m.shrexSub.AddValidator(validatorFn) 142 if err != nil { 143 return fmt.Errorf("registering validator: %w", err) 144 } 145 err = m.shrexSub.Start(startCtx) 146 if err != nil { 147 return fmt.Errorf("starting shrexsub: %w", err) 148 } 149 150 headerSub, err := m.headerSub.Subscribe() 151 if err != nil { 152 return fmt.Errorf("subscribing to headersub: %w", err) 153 } 154 155 sub, err := m.host.EventBus().Subscribe(&event.EvtPeerConnectednessChanged{}, eventbus.BufSize(eventbusBufSize)) 156 if err != nil { 157 return fmt.Errorf("subscribing to libp2p events: %w", err) 158 } 159 160 go m.subscribeHeader(ctx, headerSub) 161 go m.subscribeDisconnectedPeers(ctx, sub) 162 go m.GC(ctx) 163 return nil 164 } 165 166 func (m *Manager) Stop(ctx context.Context) error { 167 m.cancel() 168 169 // we do not need to wait for headersub and disconnected peers to finish 170 // here, since they were never started 171 if m.headerSub == nil && m.shrexSub == nil { 172 return nil 173 } 174 175 select { 176 case <-m.headerSubDone: 177 case <-ctx.Done(): 178 return ctx.Err() 179 } 180 181 select { 182 case <-m.disconnectedPeersDone: 183 case <-ctx.Done(): 184 return ctx.Err() 185 } 186 187 return nil 188 } 189 190 // Peer returns peer collected from shrex.Sub for given datahash if any available. 191 // If there is none, it will look for nodes collected from discovery. If there is no discovered 192 // nodes, it will wait until any peer appear in either source or timeout happen. 193 // After fetching data using given peer, caller is required to call returned DoneFunc using 194 // appropriate result value 195 func (m *Manager) Peer(ctx context.Context, datahash share.DataHash, height uint64, 196 ) (peer.ID, DoneFunc, error) { 197 p := m.validatedPool(datahash.String(), height) 198 199 // first, check if a peer is available for the given datahash 200 peerID, ok := p.tryGet() 201 if ok { 202 if m.removeIfUnreachable(p, peerID) { 203 return m.Peer(ctx, datahash, height) 204 } 205 return m.newPeer(ctx, datahash, peerID, sourceShrexSub, p.len(), 0) 206 } 207 208 // if no peer for datahash is currently available, try to use node 209 // obtained from discovery 210 peerID, ok = m.nodes.tryGet() 211 if ok { 212 return m.newPeer(ctx, datahash, peerID, sourceFullNodes, m.nodes.len(), 0) 213 } 214 215 // no peers are available right now, wait for the first one 216 start := time.Now() 217 select { 218 case peerID = <-p.next(ctx): 219 if m.removeIfUnreachable(p, peerID) { 220 return m.Peer(ctx, datahash, height) 221 } 222 return m.newPeer(ctx, datahash, peerID, sourceShrexSub, p.len(), time.Since(start)) 223 case peerID = <-m.nodes.next(ctx): 224 return m.newPeer(ctx, datahash, peerID, sourceFullNodes, m.nodes.len(), time.Since(start)) 225 case <-ctx.Done(): 226 return "", nil, ctx.Err() 227 } 228 } 229 230 // UpdateNodePool is called by discovery when new node is discovered or removed. 231 func (m *Manager) UpdateNodePool(peerID peer.ID, isAdded bool) { 232 if isAdded { 233 if m.isBlacklistedPeer(peerID) { 234 log.Debugw("got blacklisted peer from discovery", "peer", peerID.String()) 235 return 236 } 237 m.nodes.add(peerID) 238 log.Debugw("added to discovered nodes pool", "peer", peerID) 239 return 240 } 241 242 log.Debugw("removing peer from discovered nodes pool", "peer", peerID.String()) 243 m.nodes.remove(peerID) 244 } 245 246 func (m *Manager) newPeer( 247 ctx context.Context, 248 datahash share.DataHash, 249 peerID peer.ID, 250 source peerSource, 251 poolSize int, 252 waitTime time.Duration, 253 ) (peer.ID, DoneFunc, error) { 254 log.Debugw("got peer", 255 "hash", datahash.String(), 256 "peer", peerID.String(), 257 "source", source, 258 "pool_size", poolSize, 259 "wait (s)", waitTime) 260 m.metrics.observeGetPeer(ctx, source, poolSize, waitTime) 261 return peerID, m.doneFunc(datahash, peerID, source), nil 262 } 263 264 func (m *Manager) doneFunc(datahash share.DataHash, peerID peer.ID, source peerSource) DoneFunc { 265 return func(result result) { 266 log.Debugw("set peer result", 267 "hash", datahash.String(), 268 "peer", peerID.String(), 269 "source", source, 270 "result", result) 271 m.metrics.observeDoneResult(source, result) 272 switch result { 273 case ResultNoop: 274 case ResultCooldownPeer: 275 if source == sourceFullNodes { 276 m.nodes.putOnCooldown(peerID) 277 return 278 } 279 m.getPool(datahash.String()).putOnCooldown(peerID) 280 case ResultBlacklistPeer: 281 m.blacklistPeers(reasonMisbehave, peerID) 282 } 283 } 284 } 285 286 // subscribeHeader takes datahash from received header and validates corresponding peer pool. 287 func (m *Manager) subscribeHeader(ctx context.Context, headerSub libhead.Subscription[*header.ExtendedHeader]) { 288 defer close(m.headerSubDone) 289 defer headerSub.Cancel() 290 291 for { 292 h, err := headerSub.NextHeader(ctx) 293 if err != nil { 294 if errors.Is(err, context.Canceled) { 295 return 296 } 297 log.Errorw("get next header from sub", "err", err) 298 continue 299 } 300 m.validatedPool(h.DataHash.String(), h.Height()) 301 302 // store first header for validation purposes 303 if m.initialHeight.CompareAndSwap(0, h.Height()) { 304 log.Debugw("stored initial height", "height", h.Height()) 305 } 306 307 // update storeFrom if header height 308 m.storeFrom.Store(uint64(max(0, int(h.Height())-storedPoolsAmount))) 309 log.Debugw("updated lowest stored height", "height", h.Height()) 310 } 311 } 312 313 // subscribeDisconnectedPeers subscribes to libp2p connectivity events and removes disconnected 314 // peers from nodes pool 315 func (m *Manager) subscribeDisconnectedPeers(ctx context.Context, sub event.Subscription) { 316 defer close(m.disconnectedPeersDone) 317 defer sub.Close() 318 for { 319 select { 320 case <-ctx.Done(): 321 return 322 case e, ok := <-sub.Out(): 323 if !ok { 324 log.Fatal("Subscription for connectedness events is closed.") //nolint:gocritic 325 return 326 } 327 // listen to disconnect event to remove peer from nodes pool 328 connStatus := e.(event.EvtPeerConnectednessChanged) 329 if connStatus.Connectedness == network.NotConnected { 330 peer := connStatus.Peer 331 if m.nodes.has(peer) { 332 log.Debugw("peer disconnected, removing from discovered nodes pool", 333 "peer", peer.String()) 334 m.nodes.remove(peer) 335 } 336 } 337 } 338 } 339 } 340 341 // Validate will collect peer.ID into corresponding peer pool 342 func (m *Manager) Validate(_ context.Context, peerID peer.ID, msg shrexsub.Notification) pubsub.ValidationResult { 343 logger := log.With("peer", peerID.String(), "hash", msg.DataHash.String()) 344 345 // messages broadcast from self should bypass the validation with Accept 346 if peerID == m.host.ID() { 347 logger.Debug("received datahash from self") 348 return pubsub.ValidationAccept 349 } 350 351 // punish peer for sending invalid hash if it has misbehaved in the past 352 if m.isBlacklistedHash(msg.DataHash) { 353 logger.Debug("received blacklisted hash, reject validation") 354 return pubsub.ValidationReject 355 } 356 357 if m.isBlacklistedPeer(peerID) { 358 logger.Debug("received message from blacklisted peer, reject validation") 359 return pubsub.ValidationReject 360 } 361 362 if msg.Height < m.storeFrom.Load() { 363 logger.Debug("received message for past header") 364 return pubsub.ValidationIgnore 365 } 366 367 p := m.getOrCreatePool(msg.DataHash.String(), msg.Height) 368 logger.Debugw("got hash from shrex-sub") 369 370 p.add(peerID) 371 if p.isValidatedDataHash.Load() { 372 // add peer to discovered nodes pool only if datahash has been already validated 373 m.nodes.add(peerID) 374 } 375 return pubsub.ValidationIgnore 376 } 377 378 func (m *Manager) getPool(datahash string) *syncPool { 379 m.lock.Lock() 380 defer m.lock.Unlock() 381 return m.pools[datahash] 382 } 383 384 func (m *Manager) getOrCreatePool(datahash string, height uint64) *syncPool { 385 m.lock.Lock() 386 defer m.lock.Unlock() 387 388 p, ok := m.pools[datahash] 389 if !ok { 390 p = &syncPool{ 391 height: height, 392 pool: newPool(m.params.PeerCooldown), 393 createdAt: time.Now(), 394 } 395 m.pools[datahash] = p 396 } 397 398 return p 399 } 400 401 func (m *Manager) blacklistPeers(reason blacklistPeerReason, peerIDs ...peer.ID) { 402 m.metrics.observeBlacklistPeers(reason, len(peerIDs)) 403 404 for _, peerID := range peerIDs { 405 // blacklisted peers will be logged regardless of EnableBlackListing whether option being is 406 // enabled, until blacklisting is not properly tested and enabled by default. 407 log.Debugw("blacklisting peer", "peer", peerID.String(), "reason", reason) 408 if !m.params.EnableBlackListing { 409 continue 410 } 411 412 m.nodes.remove(peerID) 413 // add peer to the blacklist, so we can't connect to it in the future. 414 err := m.connGater.BlockPeer(peerID) 415 if err != nil { 416 log.Warnw("failed to block peer", "peer", peerID, "err", err) 417 } 418 // close connections to peer. 419 err = m.host.Network().ClosePeer(peerID) 420 if err != nil { 421 log.Warnw("failed to close connection with peer", "peer", peerID, "err", err) 422 } 423 } 424 } 425 426 func (m *Manager) isBlacklistedPeer(peerID peer.ID) bool { 427 return !m.connGater.InterceptPeerDial(peerID) 428 } 429 430 func (m *Manager) isBlacklistedHash(hash share.DataHash) bool { 431 m.lock.Lock() 432 defer m.lock.Unlock() 433 return m.blacklistedHashes[hash.String()] 434 } 435 436 func (m *Manager) validatedPool(hashStr string, height uint64) *syncPool { 437 p := m.getOrCreatePool(hashStr, height) 438 if p.isValidatedDataHash.CompareAndSwap(false, true) { 439 log.Debugw("pool marked validated", "datahash", hashStr) 440 // if pool is proven to be valid, add all collected peers to discovered nodes 441 m.nodes.add(p.peers()...) 442 } 443 return p 444 } 445 446 // removeIfUnreachable removes peer from some pool if it is blacklisted or disconnected 447 func (m *Manager) removeIfUnreachable(pool *syncPool, peerID peer.ID) bool { 448 if m.isBlacklistedPeer(peerID) || !m.nodes.has(peerID) { 449 log.Debugw("removing outdated peer from pool", "peer", peerID.String()) 450 pool.remove(peerID) 451 return true 452 } 453 return false 454 } 455 456 func (m *Manager) GC(ctx context.Context) { 457 ticker := time.NewTicker(m.params.GcInterval) 458 defer ticker.Stop() 459 460 var blacklist []peer.ID 461 for { 462 select { 463 case <-ticker.C: 464 case <-ctx.Done(): 465 return 466 } 467 468 blacklist = m.cleanUp() 469 if len(blacklist) > 0 { 470 m.blacklistPeers(reasonInvalidHash, blacklist...) 471 } 472 } 473 } 474 475 func (m *Manager) cleanUp() []peer.ID { 476 if m.initialHeight.Load() == 0 { 477 // can't blacklist peers until initialHeight is set 478 return nil 479 } 480 481 m.lock.Lock() 482 defer m.lock.Unlock() 483 484 addToBlackList := make(map[peer.ID]struct{}) 485 for h, p := range m.pools { 486 if p.isValidatedDataHash.Load() { 487 // remove pools that are outdated 488 if p.height < m.storeFrom.Load() { 489 delete(m.pools, h) 490 } 491 continue 492 } 493 494 // can't validate datahashes below initial height 495 if p.height < m.initialHeight.Load() { 496 delete(m.pools, h) 497 continue 498 } 499 500 // find pools that are not validated in time 501 if time.Since(p.createdAt) > m.params.PoolValidationTimeout { 502 delete(m.pools, h) 503 504 log.Debug("blacklisting datahash with all corresponding peers", 505 "hash", h, 506 "peer_list", p.peersList) 507 // blacklist hash 508 m.blacklistedHashes[h] = true 509 510 // blacklist peers 511 for _, peer := range p.peersList { 512 addToBlackList[peer] = struct{}{} 513 } 514 } 515 } 516 517 blacklist := make([]peer.ID, 0, len(addToBlackList)) 518 for peerID := range addToBlackList { 519 blacklist = append(blacklist, peerID) 520 } 521 return blacklist 522 }