google.golang.org/grpc@v1.62.1/internal/balancer/gracefulswitch/gracefulswitch.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package gracefulswitch implements a graceful switch load balancer. 20 package gracefulswitch 21 22 import ( 23 "errors" 24 "fmt" 25 "sync" 26 27 "google.golang.org/grpc/balancer" 28 "google.golang.org/grpc/balancer/base" 29 "google.golang.org/grpc/connectivity" 30 "google.golang.org/grpc/resolver" 31 ) 32 33 var errBalancerClosed = errors.New("gracefulSwitchBalancer is closed") 34 var _ balancer.Balancer = (*Balancer)(nil) 35 36 // NewBalancer returns a graceful switch Balancer. 37 func NewBalancer(cc balancer.ClientConn, opts balancer.BuildOptions) *Balancer { 38 return &Balancer{ 39 cc: cc, 40 bOpts: opts, 41 } 42 } 43 44 // Balancer is a utility to gracefully switch from one balancer to 45 // a new balancer. It implements the balancer.Balancer interface. 46 type Balancer struct { 47 bOpts balancer.BuildOptions 48 cc balancer.ClientConn 49 50 // mu protects the following fields and all fields within balancerCurrent 51 // and balancerPending. mu does not need to be held when calling into the 52 // child balancers, as all calls into these children happen only as a direct 53 // result of a call into the gracefulSwitchBalancer, which are also 54 // guaranteed to be synchronous. There is one exception: an UpdateState call 55 // from a child balancer when current and pending are populated can lead to 56 // calling Close() on the current. To prevent that racing with an 57 // UpdateSubConnState from the channel, we hold currentMu during Close and 58 // UpdateSubConnState calls. 59 mu sync.Mutex 60 balancerCurrent *balancerWrapper 61 balancerPending *balancerWrapper 62 closed bool // set to true when this balancer is closed 63 64 // currentMu must be locked before mu. This mutex guards against this 65 // sequence of events: UpdateSubConnState() called, finds the 66 // balancerCurrent, gives up lock, updateState comes in, causes Close() on 67 // balancerCurrent before the UpdateSubConnState is called on the 68 // balancerCurrent. 69 currentMu sync.Mutex 70 } 71 72 // swap swaps out the current lb with the pending lb and updates the ClientConn. 73 // The caller must hold gsb.mu. 74 func (gsb *Balancer) swap() { 75 gsb.cc.UpdateState(gsb.balancerPending.lastState) 76 cur := gsb.balancerCurrent 77 gsb.balancerCurrent = gsb.balancerPending 78 gsb.balancerPending = nil 79 go func() { 80 gsb.currentMu.Lock() 81 defer gsb.currentMu.Unlock() 82 cur.Close() 83 }() 84 } 85 86 // Helper function that checks if the balancer passed in is current or pending. 87 // The caller must hold gsb.mu. 88 func (gsb *Balancer) balancerCurrentOrPending(bw *balancerWrapper) bool { 89 return bw == gsb.balancerCurrent || bw == gsb.balancerPending 90 } 91 92 // SwitchTo initializes the graceful switch process, which completes based on 93 // connectivity state changes on the current/pending balancer. Thus, the switch 94 // process is not complete when this method returns. This method must be called 95 // synchronously alongside the rest of the balancer.Balancer methods this 96 // Graceful Switch Balancer implements. 97 func (gsb *Balancer) SwitchTo(builder balancer.Builder) error { 98 gsb.mu.Lock() 99 if gsb.closed { 100 gsb.mu.Unlock() 101 return errBalancerClosed 102 } 103 bw := &balancerWrapper{ 104 gsb: gsb, 105 lastState: balancer.State{ 106 ConnectivityState: connectivity.Connecting, 107 Picker: base.NewErrPicker(balancer.ErrNoSubConnAvailable), 108 }, 109 subconns: make(map[balancer.SubConn]bool), 110 } 111 balToClose := gsb.balancerPending // nil if there is no pending balancer 112 if gsb.balancerCurrent == nil { 113 gsb.balancerCurrent = bw 114 } else { 115 gsb.balancerPending = bw 116 } 117 gsb.mu.Unlock() 118 balToClose.Close() 119 // This function takes a builder instead of a balancer because builder.Build 120 // can call back inline, and this utility needs to handle the callbacks. 121 newBalancer := builder.Build(bw, gsb.bOpts) 122 if newBalancer == nil { 123 // This is illegal and should never happen; we clear the balancerWrapper 124 // we were constructing if it happens to avoid a potential panic. 125 gsb.mu.Lock() 126 if gsb.balancerPending != nil { 127 gsb.balancerPending = nil 128 } else { 129 gsb.balancerCurrent = nil 130 } 131 gsb.mu.Unlock() 132 return balancer.ErrBadResolverState 133 } 134 135 // This write doesn't need to take gsb.mu because this field never gets read 136 // or written to on any calls from the current or pending. Calls from grpc 137 // to this balancer are guaranteed to be called synchronously, so this 138 // bw.Balancer field will never be forwarded to until this SwitchTo() 139 // function returns. 140 bw.Balancer = newBalancer 141 return nil 142 } 143 144 // Returns nil if the graceful switch balancer is closed. 145 func (gsb *Balancer) latestBalancer() *balancerWrapper { 146 gsb.mu.Lock() 147 defer gsb.mu.Unlock() 148 if gsb.balancerPending != nil { 149 return gsb.balancerPending 150 } 151 return gsb.balancerCurrent 152 } 153 154 // UpdateClientConnState forwards the update to the latest balancer created. 155 func (gsb *Balancer) UpdateClientConnState(state balancer.ClientConnState) error { 156 // The resolver data is only relevant to the most recent LB Policy. 157 balToUpdate := gsb.latestBalancer() 158 if balToUpdate == nil { 159 return errBalancerClosed 160 } 161 // Perform this call without gsb.mu to prevent deadlocks if the child calls 162 // back into the channel. The latest balancer can never be closed during a 163 // call from the channel, even without gsb.mu held. 164 return balToUpdate.UpdateClientConnState(state) 165 } 166 167 // ResolverError forwards the error to the latest balancer created. 168 func (gsb *Balancer) ResolverError(err error) { 169 // The resolver data is only relevant to the most recent LB Policy. 170 balToUpdate := gsb.latestBalancer() 171 if balToUpdate == nil { 172 return 173 } 174 // Perform this call without gsb.mu to prevent deadlocks if the child calls 175 // back into the channel. The latest balancer can never be closed during a 176 // call from the channel, even without gsb.mu held. 177 balToUpdate.ResolverError(err) 178 } 179 180 // ExitIdle forwards the call to the latest balancer created. 181 // 182 // If the latest balancer does not support ExitIdle, the subConns are 183 // re-connected to manually. 184 func (gsb *Balancer) ExitIdle() { 185 balToUpdate := gsb.latestBalancer() 186 if balToUpdate == nil { 187 return 188 } 189 // There is no need to protect this read with a mutex, as the write to the 190 // Balancer field happens in SwitchTo, which completes before this can be 191 // called. 192 if ei, ok := balToUpdate.Balancer.(balancer.ExitIdler); ok { 193 ei.ExitIdle() 194 return 195 } 196 gsb.mu.Lock() 197 defer gsb.mu.Unlock() 198 for sc := range balToUpdate.subconns { 199 sc.Connect() 200 } 201 } 202 203 // updateSubConnState forwards the update to the appropriate child. 204 func (gsb *Balancer) updateSubConnState(sc balancer.SubConn, state balancer.SubConnState, cb func(balancer.SubConnState)) { 205 gsb.currentMu.Lock() 206 defer gsb.currentMu.Unlock() 207 gsb.mu.Lock() 208 // Forward update to the appropriate child. Even if there is a pending 209 // balancer, the current balancer should continue to get SubConn updates to 210 // maintain the proper state while the pending is still connecting. 211 var balToUpdate *balancerWrapper 212 if gsb.balancerCurrent != nil && gsb.balancerCurrent.subconns[sc] { 213 balToUpdate = gsb.balancerCurrent 214 } else if gsb.balancerPending != nil && gsb.balancerPending.subconns[sc] { 215 balToUpdate = gsb.balancerPending 216 } 217 if balToUpdate == nil { 218 // SubConn belonged to a stale lb policy that has not yet fully closed, 219 // or the balancer was already closed. 220 gsb.mu.Unlock() 221 return 222 } 223 if state.ConnectivityState == connectivity.Shutdown { 224 delete(balToUpdate.subconns, sc) 225 } 226 gsb.mu.Unlock() 227 if cb != nil { 228 cb(state) 229 } else { 230 balToUpdate.UpdateSubConnState(sc, state) 231 } 232 } 233 234 // UpdateSubConnState forwards the update to the appropriate child. 235 func (gsb *Balancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 236 gsb.updateSubConnState(sc, state, nil) 237 } 238 239 // Close closes any active child balancers. 240 func (gsb *Balancer) Close() { 241 gsb.mu.Lock() 242 gsb.closed = true 243 currentBalancerToClose := gsb.balancerCurrent 244 gsb.balancerCurrent = nil 245 pendingBalancerToClose := gsb.balancerPending 246 gsb.balancerPending = nil 247 gsb.mu.Unlock() 248 249 currentBalancerToClose.Close() 250 pendingBalancerToClose.Close() 251 } 252 253 // balancerWrapper wraps a balancer.Balancer, and overrides some Balancer 254 // methods to help cleanup SubConns created by the wrapped balancer. 255 // 256 // It implements the balancer.ClientConn interface and is passed down in that 257 // capacity to the wrapped balancer. It maintains a set of subConns created by 258 // the wrapped balancer and calls from the latter to create/update/shutdown 259 // SubConns update this set before being forwarded to the parent ClientConn. 260 // State updates from the wrapped balancer can result in invocation of the 261 // graceful switch logic. 262 type balancerWrapper struct { 263 balancer.Balancer 264 gsb *Balancer 265 266 lastState balancer.State 267 subconns map[balancer.SubConn]bool // subconns created by this balancer 268 } 269 270 // Close closes the underlying LB policy and shuts down the subconns it 271 // created. bw must not be referenced via balancerCurrent or balancerPending in 272 // gsb when called. gsb.mu must not be held. Does not panic with a nil 273 // receiver. 274 func (bw *balancerWrapper) Close() { 275 // before Close is called. 276 if bw == nil { 277 return 278 } 279 // There is no need to protect this read with a mutex, as Close() is 280 // impossible to be called concurrently with the write in SwitchTo(). The 281 // callsites of Close() for this balancer in Graceful Switch Balancer will 282 // never be called until SwitchTo() returns. 283 bw.Balancer.Close() 284 bw.gsb.mu.Lock() 285 for sc := range bw.subconns { 286 sc.Shutdown() 287 } 288 bw.gsb.mu.Unlock() 289 } 290 291 func (bw *balancerWrapper) UpdateState(state balancer.State) { 292 // Hold the mutex for this entire call to ensure it cannot occur 293 // concurrently with other updateState() calls. This causes updates to 294 // lastState and calls to cc.UpdateState to happen atomically. 295 bw.gsb.mu.Lock() 296 defer bw.gsb.mu.Unlock() 297 bw.lastState = state 298 299 if !bw.gsb.balancerCurrentOrPending(bw) { 300 return 301 } 302 303 if bw == bw.gsb.balancerCurrent { 304 // In the case that the current balancer exits READY, and there is a pending 305 // balancer, you can forward the pending balancer's cached State up to 306 // ClientConn and swap the pending into the current. This is because there 307 // is no reason to gracefully switch from and keep using the old policy as 308 // the ClientConn is not connected to any backends. 309 if state.ConnectivityState != connectivity.Ready && bw.gsb.balancerPending != nil { 310 bw.gsb.swap() 311 return 312 } 313 // Even if there is a pending balancer waiting to be gracefully switched to, 314 // continue to forward current balancer updates to the Client Conn. Ignoring 315 // state + picker from the current would cause undefined behavior/cause the 316 // system to behave incorrectly from the current LB policies perspective. 317 // Also, the current LB is still being used by grpc to choose SubConns per 318 // RPC, and thus should use the most updated form of the current balancer. 319 bw.gsb.cc.UpdateState(state) 320 return 321 } 322 // This method is now dealing with a state update from the pending balancer. 323 // If the current balancer is currently in a state other than READY, the new 324 // policy can be swapped into place immediately. This is because there is no 325 // reason to gracefully switch from and keep using the old policy as the 326 // ClientConn is not connected to any backends. 327 if state.ConnectivityState != connectivity.Connecting || bw.gsb.balancerCurrent.lastState.ConnectivityState != connectivity.Ready { 328 bw.gsb.swap() 329 } 330 } 331 332 func (bw *balancerWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 333 bw.gsb.mu.Lock() 334 if !bw.gsb.balancerCurrentOrPending(bw) { 335 bw.gsb.mu.Unlock() 336 return nil, fmt.Errorf("%T at address %p that called NewSubConn is deleted", bw, bw) 337 } 338 bw.gsb.mu.Unlock() 339 340 var sc balancer.SubConn 341 oldListener := opts.StateListener 342 opts.StateListener = func(state balancer.SubConnState) { bw.gsb.updateSubConnState(sc, state, oldListener) } 343 sc, err := bw.gsb.cc.NewSubConn(addrs, opts) 344 if err != nil { 345 return nil, err 346 } 347 bw.gsb.mu.Lock() 348 if !bw.gsb.balancerCurrentOrPending(bw) { // balancer was closed during this call 349 sc.Shutdown() 350 bw.gsb.mu.Unlock() 351 return nil, fmt.Errorf("%T at address %p that called NewSubConn is deleted", bw, bw) 352 } 353 bw.subconns[sc] = true 354 bw.gsb.mu.Unlock() 355 return sc, nil 356 } 357 358 func (bw *balancerWrapper) ResolveNow(opts resolver.ResolveNowOptions) { 359 // Ignore ResolveNow requests from anything other than the most recent 360 // balancer, because older balancers were already removed from the config. 361 if bw != bw.gsb.latestBalancer() { 362 return 363 } 364 bw.gsb.cc.ResolveNow(opts) 365 } 366 367 func (bw *balancerWrapper) RemoveSubConn(sc balancer.SubConn) { 368 // Note: existing third party balancers may call this, so it must remain 369 // until RemoveSubConn is fully removed. 370 sc.Shutdown() 371 } 372 373 func (bw *balancerWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { 374 bw.gsb.mu.Lock() 375 if !bw.gsb.balancerCurrentOrPending(bw) { 376 bw.gsb.mu.Unlock() 377 return 378 } 379 bw.gsb.mu.Unlock() 380 bw.gsb.cc.UpdateAddresses(sc, addrs) 381 } 382 383 func (bw *balancerWrapper) Target() string { 384 return bw.gsb.cc.Target() 385 }