go.uber.org/yarpc@v1.72.1/peer/peerlist/v2/list.go (about) 1 // Copyright (c) 2022 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package peerlist 22 23 import ( 24 "context" 25 "fmt" 26 "math/rand" 27 "sync" 28 "time" 29 30 "go.uber.org/atomic" 31 "go.uber.org/multierr" 32 "go.uber.org/yarpc/api/peer" 33 "go.uber.org/yarpc/api/transport" 34 "go.uber.org/yarpc/api/x/introspection" 35 intyarpcerrors "go.uber.org/yarpc/internal/yarpcerrors" 36 "go.uber.org/yarpc/pkg/lifecycle" 37 "go.uber.org/yarpc/yarpcerrors" 38 ) 39 40 var ( 41 _noContextDeadlineError = "can't wait for peer without a context deadline for a %s peer list" 42 ) 43 44 // Implementation is a collection of available peers, with its own 45 // subscribers for peer status change notifications. 46 // The available peer list encapsulates the logic for selecting from among 47 // available peers, whereas a ChooserList is responsible for retaining, 48 // releasing, and monitoring peer availability. 49 // Use "go.uber.org/yarpc/peer/peerlist".List in conjunction with a 50 // ListImplementation to produce a "go.uber.org/yarpc/api/peer".List. 51 // 52 // peerlist.List and peerlist.Implementation compose well with sharding schemes 53 // the degenerate to returning the only available peer. 54 // 55 // The peerlist.List calls Add, Remove, and Choose under a write lock so the 56 // implementation is free to perform mutations on its own data without locks. 57 type Implementation interface { 58 transport.Lifecycle 59 60 Add(peer.StatusPeer, peer.Identifier) peer.Subscriber 61 Remove(peer.StatusPeer, peer.Identifier, peer.Subscriber) 62 // Choose must return an available peer under a list read lock, so must 63 // not block. 64 Choose(context.Context, *transport.Request) peer.StatusPeer 65 } 66 67 type listOptions struct { 68 capacity int 69 noShuffle bool 70 failFast bool 71 seed int64 72 } 73 74 var defaultListOptions = listOptions{ 75 capacity: 10, 76 seed: time.Now().UnixNano(), 77 } 78 79 // ListOption customizes the behavior of a list. 80 type ListOption interface { 81 apply(*listOptions) 82 } 83 84 type listOptionFunc func(*listOptions) 85 86 func (f listOptionFunc) apply(options *listOptions) { f(options) } 87 88 // Capacity specifies the default capacity of the underlying 89 // data structures for this list 90 // 91 // Defaults to 10. 92 func Capacity(capacity int) ListOption { 93 return listOptionFunc(func(options *listOptions) { 94 options.capacity = capacity 95 }) 96 } 97 98 // NoShuffle disables the default behavior of shuffling peerlist order. 99 func NoShuffle() ListOption { 100 return listOptionFunc(func(options *listOptions) { 101 options.noShuffle = true 102 }) 103 } 104 105 // FailFast indicates that the peer list should not wait for peers to be added, 106 // when choosing a peer. 107 // 108 // This option is particularly useful for proxies. 109 func FailFast() ListOption { 110 return listOptionFunc(func(options *listOptions) { 111 options.failFast = true 112 }) 113 } 114 115 // Seed specifies the random seed to use for shuffling peers 116 // 117 // Defaults to approximately the process start time in nanoseconds. 118 func Seed(seed int64) ListOption { 119 return listOptionFunc(func(options *listOptions) { 120 options.seed = seed 121 }) 122 } 123 124 // New creates a new peer list with an identifier chooser for available peers. 125 func New(name string, transport peer.Transport, availableChooser Implementation, opts ...ListOption) *List { 126 options := defaultListOptions 127 for _, o := range opts { 128 o.apply(&options) 129 } 130 131 return &List{ 132 once: lifecycle.NewOnce(), 133 name: name, 134 uninitializedPeers: make(map[string]peer.Identifier, options.capacity), 135 unavailablePeers: make(map[string]*peerThunk, options.capacity), 136 availablePeers: make(map[string]*peerThunk, options.capacity), 137 availableChooser: availableChooser, 138 transport: transport, 139 noShuffle: options.noShuffle, 140 failFast: options.failFast, 141 randSrc: rand.NewSource(options.seed), 142 peerAvailableEvent: make(chan struct{}, 1), 143 } 144 } 145 146 // List is an abstract peer list, backed by an Implementation to 147 // determine which peer to choose among available peers. 148 // The abstract list manages available versus unavailable peers, intercepting 149 // these notifications from the transport's concrete implementation of 150 // peer.Peer with the peer.Subscriber API. 151 // The peer list will not choose an unavailable peer, prefering to block until 152 // one becomes available. 153 // 154 // The list is a suitable basis for concrete implementations like round-robin. 155 type List struct { 156 lock sync.RWMutex 157 158 name string 159 160 shouldRetainPeers atomic.Bool 161 uninitializedPeers map[string]peer.Identifier 162 163 unavailablePeers map[string]*peerThunk 164 availablePeers map[string]*peerThunk 165 availableChooser Implementation 166 peerAvailableEvent chan struct{} 167 transport peer.Transport 168 169 noShuffle bool 170 failFast bool 171 randSrc rand.Source 172 173 once *lifecycle.Once 174 } 175 176 // Update applies the additions and removals of peer Identifiers to the list 177 // it returns a multi-error result of every failure that happened without 178 // circuit breaking due to failures. 179 func (pl *List) Update(updates peer.ListUpdates) error { 180 if len(updates.Additions) == 0 && len(updates.Removals) == 0 { 181 return nil 182 } 183 184 pl.lock.Lock() 185 defer pl.lock.Unlock() 186 187 if pl.shouldRetainPeers.Load() { 188 return pl.updateInitialized(updates) 189 } 190 return pl.updateUninitialized(updates) 191 } 192 193 // updateInitialized applies peer list updates when the peer list 194 // is able to retain peers, putting the updates into the available 195 // or unavailable containers. 196 // 197 // Must be run inside a mutex.Lock() 198 func (pl *List) updateInitialized(updates peer.ListUpdates) error { 199 var errs error 200 for _, pid := range updates.Removals { 201 errs = multierr.Append(errs, pl.removePeerIdentifier(pid)) 202 } 203 204 add := updates.Additions 205 if !pl.noShuffle { 206 add = shuffle(pl.randSrc, add) 207 } 208 209 for _, pid := range add { 210 errs = multierr.Append(errs, pl.addPeerIdentifier(pid)) 211 } 212 return errs 213 } 214 215 // updateUninitialized applies peer list updates when the peer list 216 // is **not** able to retain peers, putting the updates into a single 217 // uninitialized peer list. 218 // 219 // Must be run inside a mutex.Lock() 220 func (pl *List) updateUninitialized(updates peer.ListUpdates) error { 221 var errs error 222 for _, pid := range updates.Removals { 223 if _, ok := pl.uninitializedPeers[pid.Identifier()]; !ok { 224 errs = multierr.Append(errs, peer.ErrPeerRemoveNotInList(pid.Identifier())) 225 continue 226 } 227 delete(pl.uninitializedPeers, pid.Identifier()) 228 } 229 for _, pid := range updates.Additions { 230 pl.uninitializedPeers[pid.Identifier()] = pid 231 } 232 233 return errs 234 } 235 236 // Must be run inside a mutex.Lock() 237 func (pl *List) addPeerIdentifier(pid peer.Identifier) error { 238 if t := pl.getThunk(pid); t != nil { 239 return peer.ErrPeerAddAlreadyInList(pid.Identifier()) 240 } 241 242 t := &peerThunk{list: pl, id: pid} 243 t.boundOnFinish = t.onFinish 244 p, err := pl.transport.RetainPeer(pid, t) 245 if err != nil { 246 return err 247 } 248 t.peer = p 249 return pl.addPeer(t) 250 } 251 252 // Must be run in a mutex.Lock() 253 func (pl *List) addPeer(t *peerThunk) error { 254 if t.peer.Status().ConnectionStatus != peer.Available { 255 return pl.addToUnavailablePeers(t) 256 } 257 258 return pl.addToAvailablePeers(t) 259 } 260 261 // Must be run in a mutex.Lock() 262 func (pl *List) addToUnavailablePeers(t *peerThunk) error { 263 pl.unavailablePeers[t.peer.Identifier()] = t 264 return nil 265 } 266 267 // Must be run in a mutex.Lock() 268 func (pl *List) addToAvailablePeers(t *peerThunk) error { 269 if pl.availablePeers[t.peer.Identifier()] != nil { 270 return peer.ErrPeerAddAlreadyInList(t.peer.Identifier()) 271 } 272 sub := pl.availableChooser.Add(t, t.id) 273 t.SetSubscriber(sub) 274 pl.availablePeers[t.Identifier()] = t 275 pl.notifyPeerAvailable() 276 return nil 277 } 278 279 // Start notifies the List that requests will start coming 280 func (pl *List) Start() error { 281 return pl.once.Start(pl.start) 282 } 283 284 func (pl *List) start() error { 285 pl.lock.Lock() 286 defer pl.lock.Unlock() 287 288 if err := pl.availableChooser.Start(); err != nil { 289 return err 290 } 291 292 add := values(pl.uninitializedPeers) 293 if !pl.noShuffle { 294 add = shuffle(pl.randSrc, add) 295 } 296 297 var errs error 298 for _, pid := range add { 299 errs = multierr.Append(errs, pl.addPeerIdentifier(pid)) 300 delete(pl.uninitializedPeers, pid.Identifier()) 301 } 302 303 pl.shouldRetainPeers.Store(true) 304 305 return errs 306 } 307 308 // Stop notifies the List that requests will stop coming 309 func (pl *List) Stop() error { 310 return pl.once.Stop(pl.stop) 311 } 312 313 // stop will release all the peers from the list 314 func (pl *List) stop() error { 315 pl.lock.Lock() 316 defer pl.lock.Unlock() 317 318 var errs error 319 320 if err := pl.availableChooser.Stop(); err != nil { 321 errs = multierr.Append(errs, err) 322 } 323 324 availablePeers := pl.removeAllAvailablePeers(pl.availablePeers) 325 errs = pl.releaseAll(errs, availablePeers) 326 pl.addToUninitialized(availablePeers) 327 328 unavailablePeers := pl.removeAllUnavailablePeers(pl.unavailablePeers) 329 errs = pl.releaseAll(errs, unavailablePeers) 330 pl.addToUninitialized(unavailablePeers) 331 332 pl.shouldRetainPeers.Store(false) 333 334 return errs 335 } 336 337 func (pl *List) addToUninitialized(thunks []*peerThunk) { 338 for _, t := range thunks { 339 pl.uninitializedPeers[t.id.Identifier()] = t.id 340 } 341 } 342 343 // removeAllAvailablePeers will clear the availablePeers list and return all 344 // the Peers in the list in a slice 345 // Must be run in a mutex.Lock() 346 func (pl *List) removeAllAvailablePeers(toRemove map[string]*peerThunk) []*peerThunk { 347 thunks := make([]*peerThunk, 0, len(toRemove)) 348 for id, t := range toRemove { 349 thunks = append(thunks, t) 350 delete(pl.availablePeers, id) 351 pl.availableChooser.Remove(t, t.id, t.Subscriber()) 352 } 353 return thunks 354 } 355 356 // removeAllUnavailablePeers will clear the unavailablePeers list and 357 // return all the Peers in the list in a slice 358 // Must be run in a mutex.Lock() 359 func (pl *List) removeAllUnavailablePeers(toRemove map[string]*peerThunk) []*peerThunk { 360 thunks := make([]*peerThunk, 0, len(toRemove)) 361 for id, t := range toRemove { 362 thunks = append(thunks, t) 363 delete(toRemove, id) 364 } 365 return thunks 366 } 367 368 // releaseAll will iterate through a list of peers and call release 369 // on the transport 370 func (pl *List) releaseAll(errs error, peers []*peerThunk) error { 371 for _, t := range peers { 372 if err := pl.transport.ReleasePeer(t.peer, t); err != nil { 373 errs = multierr.Append(errs, err) 374 } 375 } 376 return errs 377 } 378 379 // removePeerIdentifier will go remove references to the peer identifier and release 380 // it from the transport 381 // Must be run in a mutex.Lock() 382 func (pl *List) removePeerIdentifier(pid peer.Identifier) error { 383 t, err := pl.removePeerIdentifierReferences(pid) 384 if err != nil { 385 // The peer has already been removed 386 return err 387 } 388 389 return pl.transport.ReleasePeer(pid, t) 390 } 391 392 // removePeerIdentifierReferences will search through the Available and Unavailable Peers 393 // for the PeerID and remove it 394 // Must be run in a mutex.Lock() 395 func (pl *List) removePeerIdentifierReferences(pid peer.Identifier) (*peerThunk, error) { 396 if t := pl.availablePeers[pid.Identifier()]; t != nil { 397 return t, pl.removeFromAvailablePeers(t) 398 } 399 400 if t, ok := pl.unavailablePeers[pid.Identifier()]; ok && t != nil { 401 pl.removeFromUnavailablePeers(t) 402 return t, nil 403 } 404 405 return nil, peer.ErrPeerRemoveNotInList(pid.Identifier()) 406 } 407 408 // removeFromAvailablePeers remove a peer from the Available Peers list the 409 // Peer should already be validated as non-nil and in the Available list. 410 // Must be run in a mutex.Lock() 411 func (pl *List) removeFromAvailablePeers(t *peerThunk) error { 412 delete(pl.availablePeers, t.peer.Identifier()) 413 pl.availableChooser.Remove(t, t.id, t.Subscriber()) 414 t.SetSubscriber(nil) 415 return nil 416 } 417 418 // removeFromUnavailablePeers remove a peer from the Unavailable Peers list the 419 // Peer should already be validated as non-nil and in the Unavailable list. 420 // Must be run in a mutex.Lock() 421 func (pl *List) removeFromUnavailablePeers(t *peerThunk) { 422 delete(pl.unavailablePeers, t.peer.Identifier()) 423 } 424 425 // Choose selects the next available peer in the peer list 426 func (pl *List) Choose(ctx context.Context, req *transport.Request) (peer.Peer, func(error), error) { 427 if err := pl.once.WaitUntilRunning(ctx); err != nil { 428 return nil, nil, intyarpcerrors.AnnotateWithInfo(yarpcerrors.FromError(err), "%s peer list is not running", pl.name) 429 } 430 431 for { 432 pl.lock.Lock() 433 p := pl.availableChooser.Choose(ctx, req) 434 pl.lock.Unlock() 435 436 if p != nil { 437 // A nil peer is an indication that there are no more peers 438 // available for pending choices. 439 // A non-nil peer indicates that we have drained the waiting 440 // channel but there may be other peer lists waiting for a peer. 441 // We re-fill the channel enabling those choices to proceed 442 // concurrently. 443 t := p.(*peerThunk) 444 pl.notifyPeerAvailable() 445 t.StartRequest() 446 return t.peer, t.boundOnFinish, nil 447 } else if pl.failFast { 448 return nil, nil, yarpcerrors.Newf(yarpcerrors.CodeUnavailable, "%q peer list has no peer available", pl.name) 449 } 450 if err := pl.waitForPeerAddedEvent(ctx); err != nil { 451 return nil, nil, err 452 } 453 } 454 } 455 456 // IsRunning returns whether the peer list is running. 457 func (pl *List) IsRunning() bool { 458 return pl.once.IsRunning() 459 } 460 461 // notifyPeerAvailable writes to a channel indicating that a Peer is currently 462 // available for requests 463 func (pl *List) notifyPeerAvailable() { 464 select { 465 case pl.peerAvailableEvent <- struct{}{}: 466 default: 467 } 468 } 469 470 // waitForPeerAddedEvent waits until a peer is added to the peer list or the 471 // given context finishes. 472 // Must NOT be run in a mutex.Lock() 473 func (pl *List) waitForPeerAddedEvent(ctx context.Context) error { 474 if _, ok := ctx.Deadline(); !ok { 475 return pl.newNoContextDeadlineError() 476 } 477 478 select { 479 case <-pl.peerAvailableEvent: 480 return nil 481 case <-ctx.Done(): 482 return pl.newUnavailableError(ctx.Err()) 483 } 484 } 485 486 func (pl *List) newNoContextDeadlineError() error { 487 return yarpcerrors.Newf(yarpcerrors.CodeInvalidArgument, _noContextDeadlineError, pl.name) 488 } 489 490 func (pl *List) newUnavailableError(err error) error { 491 return yarpcerrors.Newf(yarpcerrors.CodeUnavailable, "%s peer list timed out waiting for peer: %s", pl.name, err.Error()) 492 } 493 494 // NotifyStatusChanged receives status change notifications for peers in the 495 // list. 496 func (pl *List) NotifyStatusChanged(pid peer.Identifier) { 497 pl.lock.RLock() 498 t := pl.getThunk(pid) 499 pl.lock.RUnlock() 500 501 if t != nil { 502 t.NotifyStatusChanged(t.id) 503 } 504 } 505 506 // getThunk returns either the available or unavailable peer thunk. 507 // Must be called under a lock. 508 func (pl *List) getThunk(pid peer.Identifier) *peerThunk { 509 if t := pl.availablePeers[pid.Identifier()]; t != nil { 510 return t 511 } 512 return pl.unavailablePeers[pid.Identifier()] 513 } 514 515 // notifyStatusChanged gets called by peer thunks 516 func (pl *List) notifyStatusChanged(pid peer.Identifier) { 517 pl.lock.Lock() 518 defer pl.lock.Unlock() 519 520 if t := pl.availablePeers[pid.Identifier()]; t != nil { 521 // TODO: log error 522 _ = pl.handleAvailablePeerStatusChange(t) 523 return 524 } 525 526 if t := pl.unavailablePeers[pid.Identifier()]; t != nil { 527 // TODO: log error 528 _ = pl.handleUnavailablePeerStatusChange(t) 529 } 530 // No action required 531 } 532 533 // handleAvailablePeerStatusChange checks the connection status of a connected 534 // peer to potentially move that Peer from the implementation data structure to 535 // the unavailable peer map 536 // Must be run in a mutex.Lock() 537 func (pl *List) handleAvailablePeerStatusChange(t *peerThunk) error { 538 if t.peer.Status().ConnectionStatus == peer.Available { 539 // Peer is in the proper pool, ignore 540 return nil 541 } 542 543 pl.availableChooser.Remove(t, t.id, t.Subscriber()) 544 t.SetSubscriber(nil) 545 delete(pl.availablePeers, t.peer.Identifier()) 546 547 return pl.addToUnavailablePeers(t) 548 549 } 550 551 // handleUnavailablePeerStatusChange checks the connection status of an unavailable peer to potentially 552 // move that Peer from the unavailablePeerMap into the available Peer Ring 553 // Must be run in a mutex.Lock() 554 func (pl *List) handleUnavailablePeerStatusChange(t *peerThunk) error { 555 if t.peer.Status().ConnectionStatus != peer.Available { 556 // Peer is in the proper pool, ignore 557 return nil 558 } 559 560 pl.removeFromUnavailablePeers(t) 561 return pl.addToAvailablePeers(t) 562 } 563 564 // Available returns whether the identifier peer is available for traffic. 565 func (pl *List) Available(p peer.Identifier) bool { 566 _, ok := pl.availablePeers[p.Identifier()] 567 return ok 568 } 569 570 // Uninitialized returns whether a peer is waiting for the peer list to start. 571 func (pl *List) Uninitialized(p peer.Identifier) bool { 572 _, ok := pl.uninitializedPeers[p.Identifier()] 573 return ok 574 } 575 576 // Peers returns a snapshot of all retained (available and 577 // unavailable) peers. 578 func (pl *List) Peers() []peer.Peer { 579 pl.lock.RLock() 580 defer pl.lock.RUnlock() 581 peers := make([]peer.Peer, 0) 582 for _, t := range pl.availablePeers { 583 peers = append(peers, t.peer) 584 } 585 for _, t := range pl.unavailablePeers { 586 peers = append(peers, t.peer) 587 } 588 return peers 589 } 590 591 // NumAvailable returns how many peers are available. 592 func (pl *List) NumAvailable() int { 593 return len(pl.availablePeers) 594 } 595 596 // NumUnavailable returns how many peers are unavailable. 597 func (pl *List) NumUnavailable() int { 598 return len(pl.unavailablePeers) 599 } 600 601 // NumUninitialized returns how many peers are unavailable. 602 func (pl *List) NumUninitialized() int { 603 return len(pl.uninitializedPeers) 604 } 605 606 // Introspect returns a ChooserStatus with a summary of the Peers. 607 func (pl *List) Introspect() introspection.ChooserStatus { 608 state := "Stopped" 609 if pl.IsRunning() { 610 state = "Running" 611 } 612 613 pl.lock.Lock() 614 availables := make([]peer.Peer, 0, len(pl.availablePeers)) 615 for _, t := range pl.availablePeers { 616 availables = append(availables, t.peer) 617 } 618 unavailables := make([]peer.Peer, 0, len(pl.unavailablePeers)) 619 for _, t := range pl.unavailablePeers { 620 unavailables = append(unavailables, t.peer) 621 } 622 pl.lock.Unlock() 623 624 peersStatus := make([]introspection.PeerStatus, 0, 625 len(availables)+len(unavailables)) 626 627 buildPeerStatus := func(peer peer.Peer) introspection.PeerStatus { 628 ps := peer.Status() 629 return introspection.PeerStatus{ 630 Identifier: peer.Identifier(), 631 State: fmt.Sprintf("%s, %d pending request(s)", 632 ps.ConnectionStatus.String(), 633 ps.PendingRequestCount), 634 } 635 } 636 637 for _, peer := range availables { 638 peersStatus = append(peersStatus, buildPeerStatus(peer)) 639 } 640 641 for _, peer := range unavailables { 642 peersStatus = append(peersStatus, buildPeerStatus(peer)) 643 } 644 645 return introspection.ChooserStatus{ 646 Name: pl.name, 647 State: fmt.Sprintf("%s (%d/%d available)", state, len(availables), 648 len(availables)+len(unavailables)), 649 Peers: peersStatus, 650 } 651 } 652 653 // shuffle randomizes the order of a slice of peers. 654 // see: https://en.wikipedia.org/wiki/Fisher-Yates_shuffle 655 func shuffle(src rand.Source, in []peer.Identifier) []peer.Identifier { 656 shuffled := make([]peer.Identifier, len(in)) 657 r := rand.New(src) 658 copy(shuffled, in) 659 for i := len(in) - 1; i > 0; i-- { 660 j := r.Intn(i + 1) 661 shuffled[i], shuffled[j] = shuffled[j], shuffled[i] 662 } 663 return shuffled 664 } 665 666 // values returns a slice of the values contained in a map of peers. 667 func values(m map[string]peer.Identifier) []peer.Identifier { 668 vs := make([]peer.Identifier, 0, len(m)) 669 for _, v := range m { 670 vs = append(vs, v) 671 } 672 return vs 673 }