go.uber.org/yarpc@v1.72.1/peer/x/peerheap/list.go (about) 1 // Copyright (c) 2022 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package peerheap 22 23 import ( 24 "context" 25 "math" 26 "sync" 27 "time" 28 29 "go.uber.org/multierr" 30 "go.uber.org/yarpc/api/peer" 31 "go.uber.org/yarpc/api/transport" 32 intyarpcerrors "go.uber.org/yarpc/internal/yarpcerrors" 33 "go.uber.org/yarpc/pkg/lifecycle" 34 "go.uber.org/yarpc/yarpcerrors" 35 ) 36 37 var ( 38 _noContextDeadlineError = yarpcerrors.Newf(yarpcerrors.CodeInvalidArgument, "can't wait for peer without a context deadline for peerheap") 39 ) 40 41 const unavailablePenalty = math.MaxInt32 42 43 type heapConfig struct { 44 startupWait time.Duration 45 } 46 47 var defaultHeapConfig = heapConfig{ 48 startupWait: 5 * time.Second, 49 } 50 51 // HeapOption customizes the behavior of a peer heap. 52 type HeapOption func(*heapConfig) 53 54 // StartupWait specifies how long updates to the heap will block 55 // before the list heap been started 56 // 57 // Defaults to 5 seconds. 58 func StartupWait(t time.Duration) HeapOption { 59 return func(c *heapConfig) { 60 c.startupWait = t 61 } 62 } 63 64 // List is a peer list and peer chooser that favors the peer with the least 65 // pending requests, and then favors the least recently used or most recently 66 // introduced peer. 67 type List struct { 68 mu sync.Mutex 69 once *lifecycle.Once 70 71 transport peer.Transport 72 73 byScore peerHeap 74 byIdentifier map[string]*peerScore 75 76 peerAvailableEvent chan struct{} 77 78 startupWait time.Duration 79 } 80 81 // IsRunning returns whether the peer list is running. 82 func (pl *List) IsRunning() bool { 83 return pl.once.IsRunning() 84 } 85 86 // Start starts the peer list. 87 func (pl *List) Start() error { 88 return pl.once.Start(nil) 89 } 90 91 // Stop stops the peer list. This releases all retained peers. 92 func (pl *List) Stop() error { 93 return pl.once.Stop(pl.clearPeers) // TODO clear peers 94 } 95 96 // New returns a new peer heap-chooser-list for the given transport. 97 func New(transport peer.Transport, opts ...HeapOption) *List { 98 cfg := defaultHeapConfig 99 for _, o := range opts { 100 o(&cfg) 101 } 102 103 return &List{ 104 once: lifecycle.NewOnce(), 105 transport: transport, 106 byIdentifier: make(map[string]*peerScore), 107 peerAvailableEvent: make(chan struct{}, 1), 108 startupWait: cfg.startupWait, 109 } 110 } 111 112 // Update satisfies the peer.List interface, so a peer list updater can manage 113 // the retained peers. 114 func (pl *List) Update(updates peer.ListUpdates) error { 115 ctx, cancel := context.WithTimeout(context.Background(), pl.startupWait) 116 defer cancel() 117 if err := pl.once.WaitUntilRunning(ctx); err != nil { 118 return intyarpcerrors.AnnotateWithInfo(yarpcerrors.FromError(err), "%s peer list is not running", "peer heap") 119 } 120 121 var errs error 122 123 pl.mu.Lock() 124 defer pl.mu.Unlock() 125 126 for _, pid := range updates.Removals { 127 errs = multierr.Append(errs, pl.releasePeer(pid)) 128 } 129 130 for _, pid := range updates.Additions { 131 errs = multierr.Append(errs, pl.retainPeer(pid)) 132 } 133 134 return errs 135 } 136 137 // retainPeer must be called with the mutex locked. 138 func (pl *List) retainPeer(pid peer.Identifier) error { 139 if _, ok := pl.byIdentifier[pid.Identifier()]; ok { 140 return peer.ErrPeerAddAlreadyInList(pid.Identifier()) 141 } 142 143 ps := &peerScore{id: pid, list: pl} 144 p, err := pl.transport.RetainPeer(pid, ps) 145 if err != nil { 146 return err 147 } 148 149 ps.peer = p 150 ps.score = scorePeer(p) 151 ps.boundFinish = ps.finish 152 pl.byIdentifier[pid.Identifier()] = ps 153 pl.byScore.pushPeer(ps) 154 pl.internalNotifyStatusChanged(ps) 155 return nil 156 } 157 158 // releasePeer must be called with the mutex locked. 159 func (pl *List) releasePeer(pid peer.Identifier) error { 160 ps, ok := pl.byIdentifier[pid.Identifier()] 161 if !ok { 162 return peer.ErrPeerRemoveNotInList(pid.Identifier()) 163 } 164 165 if err := pl.byScore.validate(ps); err != nil { 166 return err 167 } 168 169 err := pl.transport.ReleasePeer(pid, ps) 170 delete(pl.byIdentifier, pid.Identifier()) 171 pl.byScore.delete(ps.idx) 172 ps.list = nil 173 return err 174 } 175 176 func (pl *List) clearPeers() error { 177 pl.mu.Lock() 178 defer pl.mu.Unlock() 179 180 var errs error 181 182 for { 183 ps, ok := pl.byScore.peekPeer() 184 if !ok { 185 break 186 } 187 188 errs = multierr.Append(errs, pl.releasePeer(ps.id)) 189 } 190 191 return errs 192 } 193 194 // Choose satisfies peer.Chooser, providing a single peer for a request, a 195 // callback for when the request is finished, or an error if it fails. 196 // The choose method takes a context that must have a deadline. 197 // Choose resepects this deadline, waiting for an available peer until the 198 // deadline. 199 // The peer heap does not use the given *transport.Request and can safely 200 // receive nil. 201 func (pl *List) Choose(ctx context.Context, _ *transport.Request) (peer.Peer, func(error), error) { 202 if err := pl.once.WaitUntilRunning(ctx); err != nil { 203 return nil, nil, intyarpcerrors.AnnotateWithInfo(yarpcerrors.FromError(err), "%s peer list is not running", "peer heap") 204 } 205 206 for { 207 if ps, ok := pl.get(); ok { 208 pl.notifyPeerAvailable() 209 ps.peer.StartRequest() 210 return ps.peer, ps.boundFinish, nil 211 } 212 213 if err := pl.waitForPeerAvailableEvent(ctx); err != nil { 214 return nil, nil, err 215 } 216 } 217 } 218 219 func (pl *List) get() (*peerScore, bool) { 220 pl.mu.Lock() 221 defer pl.mu.Unlock() 222 223 ps, ok := pl.byScore.popPeer() 224 if !ok { 225 return nil, false 226 } 227 228 // Note: We push the peer back to reset the "next" counter. 229 // This gives us round-robin behavior. 230 pl.byScore.pushPeer(ps) 231 232 return ps, ps.status.ConnectionStatus == peer.Available 233 } 234 235 // waitForPeerAvailableEvent waits until a peer is added to the peer list or the 236 // given context finishes. 237 // Must NOT be run in a mutex.Lock() 238 func (pl *List) waitForPeerAvailableEvent(ctx context.Context) error { 239 if _, ok := ctx.Deadline(); !ok { 240 return _noContextDeadlineError 241 } 242 243 select { 244 case <-pl.peerAvailableEvent: 245 return nil 246 case <-ctx.Done(): 247 return newUnavailableError(ctx.Err()) 248 } 249 } 250 251 func newUnavailableError(err error) error { 252 return yarpcerrors.Newf(yarpcerrors.CodeUnavailable, "peer heap timed out waiting for peer: %s", err.Error()) 253 } 254 255 // notifyPeerAvailable writes to a channel indicating that a Peer is currently 256 // available for requests 257 func (pl *List) notifyPeerAvailable() { 258 select { 259 case pl.peerAvailableEvent <- struct{}{}: 260 default: 261 } 262 } 263 264 // NotifyStatusChanged receives notifications when a peer becomes available, 265 // connected, unavailable, or when its pending request count changes. 266 // This method satisfies peer.Subscriber and is only used for tests, since 267 // the peer heap has a subscriber for each invividual peer. 268 func (pl *List) NotifyStatusChanged(pid peer.Identifier) { 269 pl.mu.Lock() 270 ps := pl.byIdentifier[pid.Identifier()] 271 pl.mu.Unlock() 272 ps.NotifyStatusChanged(pid) 273 } 274 275 func (pl *List) peerScoreChanged(ps *peerScore) { 276 pl.mu.Lock() 277 pl.rescorePeer(ps) 278 pl.mu.Unlock() 279 280 if ps.peer.Status().ConnectionStatus == peer.Available { 281 pl.notifyPeerAvailable() 282 } 283 } 284 285 func (pl *List) internalNotifyStatusChanged(ps *peerScore) { 286 pl.rescorePeer(ps) 287 288 if ps.peer.Status().ConnectionStatus == peer.Available { 289 pl.notifyPeerAvailable() 290 } 291 } 292 293 func (pl *List) rescorePeer(ps *peerScore) { 294 p := ps.peer 295 ps.status = p.Status() 296 ps.score = scorePeer(p) 297 pl.byScore.update(ps.idx) 298 } 299 300 func scorePeer(p peer.Peer) int64 { 301 status := p.Status() 302 score := int64(status.PendingRequestCount) 303 if status.ConnectionStatus != peer.Available { 304 score += int64(unavailablePenalty) 305 } 306 return score 307 }