go.uber.org/yarpc@v1.72.1/peer/x/peerheap/list.go (about)

     1  // Copyright (c) 2022 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package peerheap
    22  
    23  import (
    24  	"context"
    25  	"math"
    26  	"sync"
    27  	"time"
    28  
    29  	"go.uber.org/multierr"
    30  	"go.uber.org/yarpc/api/peer"
    31  	"go.uber.org/yarpc/api/transport"
    32  	intyarpcerrors "go.uber.org/yarpc/internal/yarpcerrors"
    33  	"go.uber.org/yarpc/pkg/lifecycle"
    34  	"go.uber.org/yarpc/yarpcerrors"
    35  )
    36  
    37  var (
    38  	_noContextDeadlineError = yarpcerrors.Newf(yarpcerrors.CodeInvalidArgument, "can't wait for peer without a context deadline for peerheap")
    39  )
    40  
    41  const unavailablePenalty = math.MaxInt32
    42  
    43  type heapConfig struct {
    44  	startupWait time.Duration
    45  }
    46  
    47  var defaultHeapConfig = heapConfig{
    48  	startupWait: 5 * time.Second,
    49  }
    50  
    51  // HeapOption customizes the behavior of a peer heap.
    52  type HeapOption func(*heapConfig)
    53  
    54  // StartupWait specifies how long updates to the heap will block
    55  // before the list heap been started
    56  //
    57  // Defaults to 5 seconds.
    58  func StartupWait(t time.Duration) HeapOption {
    59  	return func(c *heapConfig) {
    60  		c.startupWait = t
    61  	}
    62  }
    63  
    64  // List is a peer list and peer chooser that favors the peer with the least
    65  // pending requests, and then favors the least recently used or most recently
    66  // introduced peer.
    67  type List struct {
    68  	mu   sync.Mutex
    69  	once *lifecycle.Once
    70  
    71  	transport peer.Transport
    72  
    73  	byScore      peerHeap
    74  	byIdentifier map[string]*peerScore
    75  
    76  	peerAvailableEvent chan struct{}
    77  
    78  	startupWait time.Duration
    79  }
    80  
    81  // IsRunning returns whether the peer list is running.
    82  func (pl *List) IsRunning() bool {
    83  	return pl.once.IsRunning()
    84  }
    85  
    86  // Start starts the peer list.
    87  func (pl *List) Start() error {
    88  	return pl.once.Start(nil)
    89  }
    90  
    91  // Stop stops the peer list. This releases all retained peers.
    92  func (pl *List) Stop() error {
    93  	return pl.once.Stop(pl.clearPeers) // TODO clear peers
    94  }
    95  
    96  // New returns a new peer heap-chooser-list for the given transport.
    97  func New(transport peer.Transport, opts ...HeapOption) *List {
    98  	cfg := defaultHeapConfig
    99  	for _, o := range opts {
   100  		o(&cfg)
   101  	}
   102  
   103  	return &List{
   104  		once:               lifecycle.NewOnce(),
   105  		transport:          transport,
   106  		byIdentifier:       make(map[string]*peerScore),
   107  		peerAvailableEvent: make(chan struct{}, 1),
   108  		startupWait:        cfg.startupWait,
   109  	}
   110  }
   111  
   112  // Update satisfies the peer.List interface, so a peer list updater can manage
   113  // the retained peers.
   114  func (pl *List) Update(updates peer.ListUpdates) error {
   115  	ctx, cancel := context.WithTimeout(context.Background(), pl.startupWait)
   116  	defer cancel()
   117  	if err := pl.once.WaitUntilRunning(ctx); err != nil {
   118  		return intyarpcerrors.AnnotateWithInfo(yarpcerrors.FromError(err), "%s peer list is not running", "peer heap")
   119  	}
   120  
   121  	var errs error
   122  
   123  	pl.mu.Lock()
   124  	defer pl.mu.Unlock()
   125  
   126  	for _, pid := range updates.Removals {
   127  		errs = multierr.Append(errs, pl.releasePeer(pid))
   128  	}
   129  
   130  	for _, pid := range updates.Additions {
   131  		errs = multierr.Append(errs, pl.retainPeer(pid))
   132  	}
   133  
   134  	return errs
   135  }
   136  
   137  // retainPeer must be called with the mutex locked.
   138  func (pl *List) retainPeer(pid peer.Identifier) error {
   139  	if _, ok := pl.byIdentifier[pid.Identifier()]; ok {
   140  		return peer.ErrPeerAddAlreadyInList(pid.Identifier())
   141  	}
   142  
   143  	ps := &peerScore{id: pid, list: pl}
   144  	p, err := pl.transport.RetainPeer(pid, ps)
   145  	if err != nil {
   146  		return err
   147  	}
   148  
   149  	ps.peer = p
   150  	ps.score = scorePeer(p)
   151  	ps.boundFinish = ps.finish
   152  	pl.byIdentifier[pid.Identifier()] = ps
   153  	pl.byScore.pushPeer(ps)
   154  	pl.internalNotifyStatusChanged(ps)
   155  	return nil
   156  }
   157  
   158  // releasePeer must be called with the mutex locked.
   159  func (pl *List) releasePeer(pid peer.Identifier) error {
   160  	ps, ok := pl.byIdentifier[pid.Identifier()]
   161  	if !ok {
   162  		return peer.ErrPeerRemoveNotInList(pid.Identifier())
   163  	}
   164  
   165  	if err := pl.byScore.validate(ps); err != nil {
   166  		return err
   167  	}
   168  
   169  	err := pl.transport.ReleasePeer(pid, ps)
   170  	delete(pl.byIdentifier, pid.Identifier())
   171  	pl.byScore.delete(ps.idx)
   172  	ps.list = nil
   173  	return err
   174  }
   175  
   176  func (pl *List) clearPeers() error {
   177  	pl.mu.Lock()
   178  	defer pl.mu.Unlock()
   179  
   180  	var errs error
   181  
   182  	for {
   183  		ps, ok := pl.byScore.peekPeer()
   184  		if !ok {
   185  			break
   186  		}
   187  
   188  		errs = multierr.Append(errs, pl.releasePeer(ps.id))
   189  	}
   190  
   191  	return errs
   192  }
   193  
   194  // Choose satisfies peer.Chooser, providing a single peer for a request, a
   195  // callback for when the request is finished, or an error if it fails.
   196  // The choose method takes a context that must have a deadline.
   197  // Choose resepects this deadline, waiting for an available peer until the
   198  // deadline.
   199  // The peer heap does not use the given *transport.Request and can safely
   200  // receive nil.
   201  func (pl *List) Choose(ctx context.Context, _ *transport.Request) (peer.Peer, func(error), error) {
   202  	if err := pl.once.WaitUntilRunning(ctx); err != nil {
   203  		return nil, nil, intyarpcerrors.AnnotateWithInfo(yarpcerrors.FromError(err), "%s peer list is not running", "peer heap")
   204  	}
   205  
   206  	for {
   207  		if ps, ok := pl.get(); ok {
   208  			pl.notifyPeerAvailable()
   209  			ps.peer.StartRequest()
   210  			return ps.peer, ps.boundFinish, nil
   211  		}
   212  
   213  		if err := pl.waitForPeerAvailableEvent(ctx); err != nil {
   214  			return nil, nil, err
   215  		}
   216  	}
   217  }
   218  
   219  func (pl *List) get() (*peerScore, bool) {
   220  	pl.mu.Lock()
   221  	defer pl.mu.Unlock()
   222  
   223  	ps, ok := pl.byScore.popPeer()
   224  	if !ok {
   225  		return nil, false
   226  	}
   227  
   228  	// Note: We push the peer back to reset the "next" counter.
   229  	// This gives us round-robin behavior.
   230  	pl.byScore.pushPeer(ps)
   231  
   232  	return ps, ps.status.ConnectionStatus == peer.Available
   233  }
   234  
   235  // waitForPeerAvailableEvent waits until a peer is added to the peer list or the
   236  // given context finishes.
   237  // Must NOT be run in a mutex.Lock()
   238  func (pl *List) waitForPeerAvailableEvent(ctx context.Context) error {
   239  	if _, ok := ctx.Deadline(); !ok {
   240  		return _noContextDeadlineError
   241  	}
   242  
   243  	select {
   244  	case <-pl.peerAvailableEvent:
   245  		return nil
   246  	case <-ctx.Done():
   247  		return newUnavailableError(ctx.Err())
   248  	}
   249  }
   250  
   251  func newUnavailableError(err error) error {
   252  	return yarpcerrors.Newf(yarpcerrors.CodeUnavailable, "peer heap timed out waiting for peer: %s", err.Error())
   253  }
   254  
   255  // notifyPeerAvailable writes to a channel indicating that a Peer is currently
   256  // available for requests
   257  func (pl *List) notifyPeerAvailable() {
   258  	select {
   259  	case pl.peerAvailableEvent <- struct{}{}:
   260  	default:
   261  	}
   262  }
   263  
   264  // NotifyStatusChanged receives notifications when a peer becomes available,
   265  // connected, unavailable, or when its pending request count changes.
   266  // This method satisfies peer.Subscriber and is only used for tests, since
   267  // the peer heap has a subscriber for each invividual peer.
   268  func (pl *List) NotifyStatusChanged(pid peer.Identifier) {
   269  	pl.mu.Lock()
   270  	ps := pl.byIdentifier[pid.Identifier()]
   271  	pl.mu.Unlock()
   272  	ps.NotifyStatusChanged(pid)
   273  }
   274  
   275  func (pl *List) peerScoreChanged(ps *peerScore) {
   276  	pl.mu.Lock()
   277  	pl.rescorePeer(ps)
   278  	pl.mu.Unlock()
   279  
   280  	if ps.peer.Status().ConnectionStatus == peer.Available {
   281  		pl.notifyPeerAvailable()
   282  	}
   283  }
   284  
   285  func (pl *List) internalNotifyStatusChanged(ps *peerScore) {
   286  	pl.rescorePeer(ps)
   287  
   288  	if ps.peer.Status().ConnectionStatus == peer.Available {
   289  		pl.notifyPeerAvailable()
   290  	}
   291  }
   292  
   293  func (pl *List) rescorePeer(ps *peerScore) {
   294  	p := ps.peer
   295  	ps.status = p.Status()
   296  	ps.score = scorePeer(p)
   297  	pl.byScore.update(ps.idx)
   298  }
   299  
   300  func scorePeer(p peer.Peer) int64 {
   301  	status := p.Status()
   302  	score := int64(status.PendingRequestCount)
   303  	if status.ConnectionStatus != peer.Available {
   304  		score += int64(unavailablePenalty)
   305  	}
   306  	return score
   307  }