go.uber.org/yarpc@v1.72.1/peer/peerlist/list.go (about) 1 // Copyright (c) 2022 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package peerlist 22 23 import ( 24 "context" 25 "fmt" 26 "math/rand" 27 "sync" 28 "time" 29 30 "go.uber.org/atomic" 31 "go.uber.org/multierr" 32 "go.uber.org/yarpc/api/peer" 33 "go.uber.org/yarpc/api/transport" 34 "go.uber.org/yarpc/api/x/introspection" 35 intyarpcerrors "go.uber.org/yarpc/internal/yarpcerrors" 36 "go.uber.org/yarpc/pkg/lifecycle" 37 "go.uber.org/yarpc/yarpcerrors" 38 ) 39 40 var ( 41 _noContextDeadlineError = "can't wait for peer without a context deadline for a %s peer list" 42 ) 43 44 type listOptions struct { 45 capacity int 46 noShuffle bool 47 seed int64 48 } 49 50 var defaultListOptions = listOptions{ 51 capacity: 10, 52 seed: time.Now().UnixNano(), 53 } 54 55 // ListOption customizes the behavior of a list. 56 type ListOption interface { 57 apply(*listOptions) 58 } 59 60 type listOptionFunc func(*listOptions) 61 62 func (f listOptionFunc) apply(options *listOptions) { f(options) } 63 64 // Capacity specifies the default capacity of the underlying 65 // data structures for this list 66 // 67 // Defaults to 10. 68 func Capacity(capacity int) ListOption { 69 return listOptionFunc(func(options *listOptions) { 70 options.capacity = capacity 71 }) 72 } 73 74 // NoShuffle disables the default behavior of shuffling peerlist order. 75 func NoShuffle() ListOption { 76 return listOptionFunc(func(options *listOptions) { 77 options.noShuffle = true 78 }) 79 } 80 81 // Seed specifies the random seed to use for shuffling peers 82 // 83 // Defaults to approximately the process start time in nanoseconds. 84 func Seed(seed int64) ListOption { 85 return listOptionFunc(func(options *listOptions) { 86 options.seed = seed 87 }) 88 } 89 90 // New creates a new peer list with an identifier chooser for available peers. 91 func New(name string, transport peer.Transport, availableChooser peer.ListImplementation, opts ...ListOption) *List { 92 options := defaultListOptions 93 for _, o := range opts { 94 o.apply(&options) 95 } 96 97 return &List{ 98 once: lifecycle.NewOnce(), 99 name: name, 100 uninitializedPeers: make(map[string]peer.Identifier, options.capacity), 101 unavailablePeers: make(map[string]*peerThunk, options.capacity), 102 availablePeers: make(map[string]*peerThunk, options.capacity), 103 availableChooser: availableChooser, 104 transport: transport, 105 noShuffle: options.noShuffle, 106 randSrc: rand.NewSource(options.seed), 107 peerAvailableEvent: make(chan struct{}, 1), 108 } 109 } 110 111 // List is an abstract peer list, backed by a peer.ListImplementation to 112 // determine which peer to choose among available peers. 113 // The abstract list manages available versus unavailable peers, intercepting 114 // these notifications from the transport's concrete implementation of 115 // peer.Peer with the peer.Subscriber API. 116 // The peer list will not choose an unavailable peer, prefering to block until 117 // one becomes available. 118 // 119 // The list is a suitable basis for concrete implementations like round-robin. 120 type List struct { 121 lock sync.RWMutex 122 123 name string 124 125 shouldRetainPeers atomic.Bool 126 uninitializedPeers map[string]peer.Identifier 127 128 unavailablePeers map[string]*peerThunk 129 availablePeers map[string]*peerThunk 130 availableChooser peer.ListImplementation 131 peerAvailableEvent chan struct{} 132 transport peer.Transport 133 134 noShuffle bool 135 randSrc rand.Source 136 137 once *lifecycle.Once 138 } 139 140 // Update applies the additions and removals of peer Identifiers to the list 141 // it returns a multi-error result of every failure that happened without 142 // circuit breaking due to failures. 143 func (pl *List) Update(updates peer.ListUpdates) error { 144 if len(updates.Additions) == 0 && len(updates.Removals) == 0 { 145 return nil 146 } 147 148 pl.lock.Lock() 149 defer pl.lock.Unlock() 150 151 if pl.shouldRetainPeers.Load() { 152 return pl.updateInitialized(updates) 153 } 154 return pl.updateUninitialized(updates) 155 } 156 157 // updateInitialized applies peer list updates when the peer list 158 // is able to retain peers, putting the updates into the available 159 // or unavailable containers. 160 // 161 // Must be run inside a mutex.Lock() 162 func (pl *List) updateInitialized(updates peer.ListUpdates) error { 163 var errs error 164 for _, pid := range updates.Removals { 165 errs = multierr.Append(errs, pl.removePeerIdentifier(pid)) 166 } 167 168 add := updates.Additions 169 if !pl.noShuffle { 170 add = shuffle(pl.randSrc, add) 171 } 172 173 for _, pid := range add { 174 errs = multierr.Append(errs, pl.addPeerIdentifier(pid)) 175 } 176 return errs 177 } 178 179 // updateUninitialized applies peer list updates when the peer list 180 // is **not** able to retain peers, putting the updates into a single 181 // uninitialized peer list. 182 // 183 // Must be run inside a mutex.Lock() 184 func (pl *List) updateUninitialized(updates peer.ListUpdates) error { 185 var errs error 186 for _, pid := range updates.Removals { 187 if _, ok := pl.uninitializedPeers[pid.Identifier()]; !ok { 188 errs = multierr.Append(errs, peer.ErrPeerRemoveNotInList(pid.Identifier())) 189 continue 190 } 191 delete(pl.uninitializedPeers, pid.Identifier()) 192 } 193 for _, pid := range updates.Additions { 194 pl.uninitializedPeers[pid.Identifier()] = pid 195 } 196 197 return errs 198 } 199 200 // Must be run inside a mutex.Lock() 201 func (pl *List) addPeerIdentifier(pid peer.Identifier) error { 202 if t := pl.getThunk(pid); t != nil { 203 return peer.ErrPeerAddAlreadyInList(pid.Identifier()) 204 } 205 206 t := &peerThunk{list: pl, id: pid} 207 t.boundOnFinish = t.onFinish 208 p, err := pl.transport.RetainPeer(pid, t) 209 if err != nil { 210 return err 211 } 212 t.peer = p 213 return pl.addPeer(t) 214 } 215 216 // Must be run in a mutex.Lock() 217 func (pl *List) addPeer(t *peerThunk) error { 218 if t.peer.Status().ConnectionStatus != peer.Available { 219 return pl.addToUnavailablePeers(t) 220 } 221 222 return pl.addToAvailablePeers(t) 223 } 224 225 // Must be run in a mutex.Lock() 226 func (pl *List) addToUnavailablePeers(t *peerThunk) error { 227 pl.unavailablePeers[t.peer.Identifier()] = t 228 return nil 229 } 230 231 // Must be run in a mutex.Lock() 232 func (pl *List) addToAvailablePeers(t *peerThunk) error { 233 if pl.availablePeers[t.peer.Identifier()] != nil { 234 return peer.ErrPeerAddAlreadyInList(t.peer.Identifier()) 235 } 236 sub := pl.availableChooser.Add(t) 237 t.SetSubscriber(sub) 238 pl.availablePeers[t.Identifier()] = t 239 pl.notifyPeerAvailable() 240 return nil 241 } 242 243 // Start notifies the List that requests will start coming 244 func (pl *List) Start() error { 245 return pl.once.Start(pl.start) 246 } 247 248 func (pl *List) start() error { 249 pl.lock.Lock() 250 defer pl.lock.Unlock() 251 252 if err := pl.availableChooser.Start(); err != nil { 253 return err 254 } 255 256 add := values(pl.uninitializedPeers) 257 if !pl.noShuffle { 258 add = shuffle(pl.randSrc, add) 259 } 260 261 var errs error 262 for _, pid := range add { 263 errs = multierr.Append(errs, pl.addPeerIdentifier(pid)) 264 delete(pl.uninitializedPeers, pid.Identifier()) 265 } 266 267 pl.shouldRetainPeers.Store(true) 268 269 return errs 270 } 271 272 // Stop notifies the List that requests will stop coming 273 func (pl *List) Stop() error { 274 return pl.once.Stop(pl.stop) 275 } 276 277 // stop will release all the peers from the list 278 func (pl *List) stop() error { 279 pl.lock.Lock() 280 defer pl.lock.Unlock() 281 282 var errs error 283 284 if err := pl.availableChooser.Stop(); err != nil { 285 errs = multierr.Append(errs, err) 286 } 287 288 availablePeers := pl.removeAllAvailablePeers(pl.availablePeers) 289 errs = pl.releaseAll(errs, availablePeers) 290 pl.addToUninitialized(availablePeers) 291 292 unavailablePeers := pl.removeAllUnavailablePeers(pl.unavailablePeers) 293 errs = pl.releaseAll(errs, unavailablePeers) 294 pl.addToUninitialized(unavailablePeers) 295 296 pl.shouldRetainPeers.Store(false) 297 298 return errs 299 } 300 301 func (pl *List) addToUninitialized(thunks []*peerThunk) { 302 for _, t := range thunks { 303 pl.uninitializedPeers[t.id.Identifier()] = t.id 304 } 305 } 306 307 // removeAllAvailablePeers will clear the availablePeers list and return all 308 // the Peers in the list in a slice 309 // Must be run in a mutex.Lock() 310 func (pl *List) removeAllAvailablePeers(toRemove map[string]*peerThunk) []*peerThunk { 311 thunks := make([]*peerThunk, 0, len(toRemove)) 312 for id, t := range toRemove { 313 thunks = append(thunks, t) 314 delete(pl.availablePeers, id) 315 pl.availableChooser.Remove(t, t.Subscriber()) 316 } 317 return thunks 318 } 319 320 // removeAllUnavailablePeers will clear the unavailablePeers list and 321 // return all the Peers in the list in a slice 322 // Must be run in a mutex.Lock() 323 func (pl *List) removeAllUnavailablePeers(toRemove map[string]*peerThunk) []*peerThunk { 324 thunks := make([]*peerThunk, 0, len(toRemove)) 325 for id, t := range toRemove { 326 thunks = append(thunks, t) 327 delete(toRemove, id) 328 } 329 return thunks 330 } 331 332 // releaseAll will iterate through a list of peers and call release 333 // on the transport 334 func (pl *List) releaseAll(errs error, peers []*peerThunk) error { 335 for _, t := range peers { 336 if err := pl.transport.ReleasePeer(t.peer, t); err != nil { 337 errs = multierr.Append(errs, err) 338 } 339 } 340 return errs 341 } 342 343 // removePeerIdentifier will go remove references to the peer identifier and release 344 // it from the transport 345 // Must be run in a mutex.Lock() 346 func (pl *List) removePeerIdentifier(pid peer.Identifier) error { 347 t, err := pl.removePeerIdentifierReferences(pid) 348 if err != nil { 349 // The peer has already been removed 350 return err 351 } 352 353 return pl.transport.ReleasePeer(pid, t) 354 } 355 356 // removePeerIdentifierReferences will search through the Available and Unavailable Peers 357 // for the PeerID and remove it 358 // Must be run in a mutex.Lock() 359 func (pl *List) removePeerIdentifierReferences(pid peer.Identifier) (*peerThunk, error) { 360 if t := pl.availablePeers[pid.Identifier()]; t != nil { 361 return t, pl.removeFromAvailablePeers(t) 362 } 363 364 if t, ok := pl.unavailablePeers[pid.Identifier()]; ok && t != nil { 365 pl.removeFromUnavailablePeers(t) 366 return t, nil 367 } 368 369 return nil, peer.ErrPeerRemoveNotInList(pid.Identifier()) 370 } 371 372 // removeFromAvailablePeers remove a peer from the Available Peers list the 373 // Peer should already be validated as non-nil and in the Available list. 374 // Must be run in a mutex.Lock() 375 func (pl *List) removeFromAvailablePeers(t *peerThunk) error { 376 delete(pl.availablePeers, t.peer.Identifier()) 377 pl.availableChooser.Remove(t, t.Subscriber()) 378 t.SetSubscriber(nil) 379 return nil 380 } 381 382 // removeFromUnavailablePeers remove a peer from the Unavailable Peers list the 383 // Peer should already be validated as non-nil and in the Unavailable list. 384 // Must be run in a mutex.Lock() 385 func (pl *List) removeFromUnavailablePeers(t *peerThunk) { 386 delete(pl.unavailablePeers, t.peer.Identifier()) 387 } 388 389 // Choose selects the next available peer in the peer list 390 func (pl *List) Choose(ctx context.Context, req *transport.Request) (peer.Peer, func(error), error) { 391 if err := pl.once.WaitUntilRunning(ctx); err != nil { 392 return nil, nil, intyarpcerrors.AnnotateWithInfo(yarpcerrors.FromError(err), "%s peer list is not running", pl.name) 393 } 394 395 for { 396 pl.lock.Lock() 397 p := pl.availableChooser.Choose(ctx, req) 398 pl.lock.Unlock() 399 400 if p != nil { 401 t := p.(*peerThunk) 402 // A nil peer is an indication that there are no more peers 403 // available for pending choices. 404 // A non-nil peer indicates that we have drained the waiting 405 // channel but there may be other peer lists waiting for a peer. 406 // We re-fill the channel enabling those choices to proceed 407 // concurrently. 408 pl.notifyPeerAvailable() 409 t.onStart() 410 return t.peer, t.boundOnFinish, nil 411 } 412 if err := pl.waitForPeerAddedEvent(ctx); err != nil { 413 return nil, nil, err 414 } 415 } 416 } 417 418 // IsRunning returns whether the peer list is running. 419 func (pl *List) IsRunning() bool { 420 return pl.once.IsRunning() 421 } 422 423 // notifyPeerAvailable writes to a channel indicating that a Peer is currently 424 // available for requests 425 func (pl *List) notifyPeerAvailable() { 426 select { 427 case pl.peerAvailableEvent <- struct{}{}: 428 default: 429 } 430 } 431 432 // waitForPeerAddedEvent waits until a peer is added to the peer list or the 433 // given context finishes. 434 // Must NOT be run in a mutex.Lock() 435 func (pl *List) waitForPeerAddedEvent(ctx context.Context) error { 436 if _, ok := ctx.Deadline(); !ok { 437 return pl.newNoContextDeadlineError() 438 } 439 440 select { 441 case <-pl.peerAvailableEvent: 442 return nil 443 case <-ctx.Done(): 444 return pl.newUnavailableError(ctx.Err()) 445 } 446 } 447 448 func (pl *List) newNoContextDeadlineError() error { 449 return yarpcerrors.Newf(yarpcerrors.CodeInvalidArgument, _noContextDeadlineError, pl.name) 450 } 451 452 func (pl *List) newUnavailableError(err error) error { 453 return yarpcerrors.Newf(yarpcerrors.CodeUnavailable, "%s peer list timed out waiting for peer: %s", pl.name, err.Error()) 454 } 455 456 // NotifyStatusChanged receives status change notifications for peers in the 457 // list. 458 func (pl *List) NotifyStatusChanged(pid peer.Identifier) { 459 pl.lock.RLock() 460 t := pl.getThunk(pid) 461 pl.lock.RUnlock() 462 463 if t != nil { 464 t.NotifyStatusChanged(t.id) 465 } 466 } 467 468 // getThunk returns either the available or unavailable peer thunk. 469 // Must be called under a lock. 470 func (pl *List) getThunk(pid peer.Identifier) *peerThunk { 471 if t := pl.availablePeers[pid.Identifier()]; t != nil { 472 return t 473 } 474 return pl.unavailablePeers[pid.Identifier()] 475 } 476 477 // notifyStatusChanged gets called by peer thunks 478 func (pl *List) notifyStatusChanged(pid peer.Identifier) { 479 pl.lock.Lock() 480 defer pl.lock.Unlock() 481 482 if t := pl.availablePeers[pid.Identifier()]; t != nil { 483 // TODO: log error 484 _ = pl.handleAvailablePeerStatusChange(t) 485 return 486 } 487 488 if t := pl.unavailablePeers[pid.Identifier()]; t != nil { 489 // TODO: log error 490 _ = pl.handleUnavailablePeerStatusChange(t) 491 } 492 // No action required 493 } 494 495 // handleAvailablePeerStatusChange checks the connection status of a connected 496 // peer to potentially move that Peer from the implementation data structure to 497 // the unavailable peer map Must be run in a mutex.Lock() 498 func (pl *List) handleAvailablePeerStatusChange(t *peerThunk) error { 499 if t.peer.Status().ConnectionStatus == peer.Available { 500 // Peer is in the proper pool, ignore 501 return nil 502 } 503 504 pl.availableChooser.Remove(t, t.Subscriber()) 505 t.SetSubscriber(nil) 506 delete(pl.availablePeers, t.peer.Identifier()) 507 508 return pl.addToUnavailablePeers(t) 509 510 } 511 512 // handleUnavailablePeerStatusChange checks the connection status of an unavailable peer to potentially 513 // move that Peer from the unavailablePeerMap into the available Peer Ring 514 // Must be run in a mutex.Lock() 515 func (pl *List) handleUnavailablePeerStatusChange(t *peerThunk) error { 516 if t.peer.Status().ConnectionStatus != peer.Available { 517 // Peer is in the proper pool, ignore 518 return nil 519 } 520 521 pl.removeFromUnavailablePeers(t) 522 return pl.addToAvailablePeers(t) 523 } 524 525 // Available returns whether the identifier peer is available for traffic. 526 func (pl *List) Available(p peer.Identifier) bool { 527 _, ok := pl.availablePeers[p.Identifier()] 528 return ok 529 } 530 531 // Uninitialized returns whether a peer is waiting for the peer list to start. 532 func (pl *List) Uninitialized(p peer.Identifier) bool { 533 _, ok := pl.uninitializedPeers[p.Identifier()] 534 return ok 535 } 536 537 // Peers returns a snapshot of all retained (available and 538 // unavailable) peers. 539 func (pl *List) Peers() []peer.Peer { 540 pl.lock.RLock() 541 defer pl.lock.RUnlock() 542 peers := make([]peer.Peer, 0) 543 for _, t := range pl.availablePeers { 544 peers = append(peers, t.peer) 545 } 546 for _, t := range pl.unavailablePeers { 547 peers = append(peers, t.peer) 548 } 549 return peers 550 } 551 552 // NumAvailable returns how many peers are available. 553 func (pl *List) NumAvailable() int { 554 return len(pl.availablePeers) 555 } 556 557 // NumUnavailable returns how many peers are unavailable. 558 func (pl *List) NumUnavailable() int { 559 return len(pl.unavailablePeers) 560 } 561 562 // NumUninitialized returns how many peers are unavailable. 563 func (pl *List) NumUninitialized() int { 564 return len(pl.uninitializedPeers) 565 } 566 567 // Introspect returns a ChooserStatus with a summary of the Peers. 568 func (pl *List) Introspect() introspection.ChooserStatus { 569 state := "Stopped" 570 if pl.IsRunning() { 571 state = "Running" 572 } 573 574 pl.lock.Lock() 575 availables := make([]peer.Peer, 0, len(pl.availablePeers)) 576 for _, t := range pl.availablePeers { 577 availables = append(availables, t.peer) 578 } 579 unavailables := make([]peer.Peer, 0, len(pl.unavailablePeers)) 580 for _, t := range pl.unavailablePeers { 581 unavailables = append(unavailables, t.peer) 582 } 583 pl.lock.Unlock() 584 585 peersStatus := make([]introspection.PeerStatus, 0, 586 len(availables)+len(unavailables)) 587 588 buildPeerStatus := func(peer peer.Peer) introspection.PeerStatus { 589 ps := peer.Status() 590 return introspection.PeerStatus{ 591 Identifier: peer.Identifier(), 592 State: fmt.Sprintf("%s, %d pending request(s)", 593 ps.ConnectionStatus.String(), 594 ps.PendingRequestCount), 595 } 596 } 597 598 for _, peer := range availables { 599 peersStatus = append(peersStatus, buildPeerStatus(peer)) 600 } 601 602 for _, peer := range unavailables { 603 peersStatus = append(peersStatus, buildPeerStatus(peer)) 604 } 605 606 return introspection.ChooserStatus{ 607 Name: "Single", 608 State: fmt.Sprintf("%s (%d/%d available)", state, len(availables), 609 len(availables)+len(unavailables)), 610 Peers: peersStatus, 611 } 612 } 613 614 // shuffle randomizes the order of a slice of peers. 615 // see: https://en.wikipedia.org/wiki/Fisher-Yates_shuffle 616 func shuffle(src rand.Source, in []peer.Identifier) []peer.Identifier { 617 shuffled := make([]peer.Identifier, len(in)) 618 r := rand.New(src) 619 copy(shuffled, in) 620 for i := len(in) - 1; i > 0; i-- { 621 j := r.Intn(i + 1) 622 shuffled[i], shuffled[j] = shuffled[j], shuffled[i] 623 } 624 return shuffled 625 } 626 627 // values returns a slice of the values contained in a map of peers. 628 func values(m map[string]peer.Identifier) []peer.Identifier { 629 vs := make([]peer.Identifier, 0, len(m)) 630 for _, v := range m { 631 vs = append(vs, v) 632 } 633 return vs 634 }