github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/grpc/xds/internal/balancer/clusterimpl/clusterimpl.go (about) 1 /* 2 * 3 * Copyright 2020 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package clusterimpl implements the xds_cluster_impl balancing policy. It 20 // handles the cluster features (e.g. circuit_breaking, RPC dropping). 21 // 22 // Note that it doesn't handle name resolution, which is done by policy 23 // xds_cluster_resolver. 24 package clusterimpl 25 26 import ( 27 "encoding/json" 28 "fmt" 29 "sync" 30 "sync/atomic" 31 32 "github.com/hxx258456/ccgo/grpc/balancer" 33 "github.com/hxx258456/ccgo/grpc/connectivity" 34 "github.com/hxx258456/ccgo/grpc/internal" 35 "github.com/hxx258456/ccgo/grpc/internal/buffer" 36 "github.com/hxx258456/ccgo/grpc/internal/grpclog" 37 "github.com/hxx258456/ccgo/grpc/internal/grpcsync" 38 "github.com/hxx258456/ccgo/grpc/internal/pretty" 39 "github.com/hxx258456/ccgo/grpc/resolver" 40 "github.com/hxx258456/ccgo/grpc/serviceconfig" 41 xdsinternal "github.com/hxx258456/ccgo/grpc/xds/internal" 42 "github.com/hxx258456/ccgo/grpc/xds/internal/balancer/loadstore" 43 "github.com/hxx258456/ccgo/grpc/xds/internal/xdsclient" 44 "github.com/hxx258456/ccgo/grpc/xds/internal/xdsclient/load" 45 ) 46 47 const ( 48 // Name is the name of the cluster_impl balancer. 49 Name = "xds_cluster_impl_experimental" 50 defaultRequestCountMax = 1024 51 ) 52 53 func init() { 54 balancer.Register(bb{}) 55 } 56 57 type bb struct{} 58 59 func (bb) Build(cc balancer.ClientConn, bOpts balancer.BuildOptions) balancer.Balancer { 60 b := &clusterImplBalancer{ 61 ClientConn: cc, 62 bOpts: bOpts, 63 closed: grpcsync.NewEvent(), 64 done: grpcsync.NewEvent(), 65 loadWrapper: loadstore.NewWrapper(), 66 scWrappers: make(map[balancer.SubConn]*scWrapper), 67 pickerUpdateCh: buffer.NewUnbounded(), 68 requestCountMax: defaultRequestCountMax, 69 } 70 b.logger = prefixLogger(b) 71 go b.run() 72 b.logger.Infof("Created") 73 return b 74 } 75 76 func (bb) Name() string { 77 return Name 78 } 79 80 func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 81 return parseConfig(c) 82 } 83 84 type clusterImplBalancer struct { 85 balancer.ClientConn 86 87 // mu guarantees mutual exclusion between Close() and handling of picker 88 // update to the parent ClientConn in run(). It's to make sure that the 89 // run() goroutine doesn't send picker update to parent after the balancer 90 // is closed. 91 // 92 // It's only used by the run() goroutine, but not the other exported 93 // functions. Because the exported functions are guaranteed to be 94 // synchronized with Close(). 95 mu sync.Mutex 96 closed *grpcsync.Event 97 done *grpcsync.Event 98 99 bOpts balancer.BuildOptions 100 logger *grpclog.PrefixLogger 101 xdsClient xdsclient.XDSClient 102 103 config *LBConfig 104 childLB balancer.Balancer 105 cancelLoadReport func() 106 edsServiceName string 107 lrsServerName *string 108 loadWrapper *loadstore.Wrapper 109 110 clusterNameMu sync.Mutex 111 clusterName string 112 113 scWrappersMu sync.Mutex 114 // The SubConns passed to the child policy are wrapped in a wrapper, to keep 115 // locality ID. But when the parent ClientConn sends updates, it's going to 116 // give the original SubConn, not the wrapper. But the child policies only 117 // know about the wrapper, so when forwarding SubConn updates, they must be 118 // sent for the wrappers. 119 // 120 // This keeps a map from original SubConn to wrapper, so that when 121 // forwarding the SubConn state update, the child policy will get the 122 // wrappers. 123 scWrappers map[balancer.SubConn]*scWrapper 124 125 // childState/drops/requestCounter keeps the state used by the most recently 126 // generated picker. All fields can only be accessed in run(). And run() is 127 // the only goroutine that sends picker to the parent ClientConn. All 128 // requests to update picker need to be sent to pickerUpdateCh. 129 childState balancer.State 130 dropCategories []DropConfig // The categories for drops. 131 drops []*dropper 132 requestCounterCluster string // The cluster name for the request counter. 133 requestCounterService string // The service name for the request counter. 134 requestCounter *xdsclient.ClusterRequestsCounter 135 requestCountMax uint32 136 pickerUpdateCh *buffer.Unbounded 137 } 138 139 // updateLoadStore checks the config for load store, and decides whether it 140 // needs to restart the load reporting stream. 141 func (b *clusterImplBalancer) updateLoadStore(newConfig *LBConfig) error { 142 var updateLoadClusterAndService bool 143 144 // ClusterName is different, restart. ClusterName is from ClusterName and 145 // EDSServiceName. 146 clusterName := b.getClusterName() 147 if clusterName != newConfig.Cluster { 148 updateLoadClusterAndService = true 149 b.setClusterName(newConfig.Cluster) 150 clusterName = newConfig.Cluster 151 } 152 if b.edsServiceName != newConfig.EDSServiceName { 153 updateLoadClusterAndService = true 154 b.edsServiceName = newConfig.EDSServiceName 155 } 156 if updateLoadClusterAndService { 157 // This updates the clusterName and serviceName that will be reported 158 // for the loads. The update here is too early, the perfect timing is 159 // when the picker is updated with the new connection. But from this 160 // balancer's point of view, it's impossible to tell. 161 // 162 // On the other hand, this will almost never happen. Each LRS policy 163 // shouldn't get updated config. The parent should do a graceful switch 164 // when the clusterName or serviceName is changed. 165 b.loadWrapper.UpdateClusterAndService(clusterName, b.edsServiceName) 166 } 167 168 var ( 169 stopOldLoadReport bool 170 startNewLoadReport bool 171 ) 172 173 // Check if it's necessary to restart load report. 174 if b.lrsServerName == nil { 175 if newConfig.LoadReportingServerName != nil { 176 // Old is nil, new is not nil, start new LRS. 177 b.lrsServerName = newConfig.LoadReportingServerName 178 startNewLoadReport = true 179 } 180 // Old is nil, new is nil, do nothing. 181 } else if newConfig.LoadReportingServerName == nil { 182 // Old is not nil, new is nil, stop old, don't start new. 183 b.lrsServerName = newConfig.LoadReportingServerName 184 stopOldLoadReport = true 185 } else { 186 // Old is not nil, new is not nil, compare string values, if 187 // different, stop old and start new. 188 if *b.lrsServerName != *newConfig.LoadReportingServerName { 189 b.lrsServerName = newConfig.LoadReportingServerName 190 stopOldLoadReport = true 191 startNewLoadReport = true 192 } 193 } 194 195 if stopOldLoadReport { 196 if b.cancelLoadReport != nil { 197 b.cancelLoadReport() 198 b.cancelLoadReport = nil 199 if !startNewLoadReport { 200 // If a new LRS stream will be started later, no need to update 201 // it to nil here. 202 b.loadWrapper.UpdateLoadStore(nil) 203 } 204 } 205 } 206 if startNewLoadReport { 207 var loadStore *load.Store 208 if b.xdsClient != nil { 209 loadStore, b.cancelLoadReport = b.xdsClient.ReportLoad(*b.lrsServerName) 210 } 211 b.loadWrapper.UpdateLoadStore(loadStore) 212 } 213 214 return nil 215 } 216 217 func (b *clusterImplBalancer) UpdateClientConnState(s balancer.ClientConnState) error { 218 if b.closed.HasFired() { 219 b.logger.Warningf("xds: received ClientConnState {%+v} after clusterImplBalancer was closed", s) 220 return nil 221 } 222 223 b.logger.Infof("Received update from resolver, balancer config: %+v", pretty.ToJSON(s.BalancerConfig)) 224 newConfig, ok := s.BalancerConfig.(*LBConfig) 225 if !ok { 226 return fmt.Errorf("unexpected balancer config with type: %T", s.BalancerConfig) 227 } 228 229 // Need to check for potential errors at the beginning of this function, so 230 // that on errors, we reject the whole config, instead of applying part of 231 // it. 232 bb := balancer.Get(newConfig.ChildPolicy.Name) 233 if bb == nil { 234 return fmt.Errorf("balancer %q not registered", newConfig.ChildPolicy.Name) 235 } 236 237 if b.xdsClient == nil { 238 c := xdsclient.FromResolverState(s.ResolverState) 239 if c == nil { 240 return balancer.ErrBadResolverState 241 } 242 b.xdsClient = c 243 } 244 245 // Update load reporting config. This needs to be done before updating the 246 // child policy because we need the loadStore from the updated client to be 247 // passed to the ccWrapper, so that the next picker from the child policy 248 // will pick up the new loadStore. 249 if err := b.updateLoadStore(newConfig); err != nil { 250 return err 251 } 252 253 // If child policy is a different type, recreate the sub-balancer. 254 if b.config == nil || b.config.ChildPolicy.Name != newConfig.ChildPolicy.Name { 255 if b.childLB != nil { 256 b.childLB.Close() 257 } 258 b.childLB = bb.Build(b, b.bOpts) 259 } 260 b.config = newConfig 261 262 if b.childLB == nil { 263 // This is not an expected situation, and should be super rare in 264 // practice. 265 // 266 // When this happens, we already applied all the other configurations 267 // (drop/circuit breaking), but there's no child policy. This balancer 268 // will be stuck, and we report the error to the parent. 269 return fmt.Errorf("child policy is nil, this means balancer %q's Build() returned nil", newConfig.ChildPolicy.Name) 270 } 271 272 // Notify run() of this new config, in case drop and request counter need 273 // update (which means a new picker needs to be generated). 274 b.pickerUpdateCh.Put(newConfig) 275 276 // Addresses and sub-balancer config are sent to sub-balancer. 277 return b.childLB.UpdateClientConnState(balancer.ClientConnState{ 278 ResolverState: s.ResolverState, 279 BalancerConfig: b.config.ChildPolicy.Config, 280 }) 281 } 282 283 func (b *clusterImplBalancer) ResolverError(err error) { 284 if b.closed.HasFired() { 285 b.logger.Warningf("xds: received resolver error {%+v} after clusterImplBalancer was closed", err) 286 return 287 } 288 289 if b.childLB != nil { 290 b.childLB.ResolverError(err) 291 } 292 } 293 294 func (b *clusterImplBalancer) UpdateSubConnState(sc balancer.SubConn, s balancer.SubConnState) { 295 if b.closed.HasFired() { 296 b.logger.Warningf("xds: received subconn state change {%+v, %+v} after clusterImplBalancer was closed", sc, s) 297 return 298 } 299 300 // Trigger re-resolution when a SubConn turns transient failure. This is 301 // necessary for the LogicalDNS in cluster_resolver policy to re-resolve. 302 // 303 // Note that this happens not only for the addresses from DNS, but also for 304 // EDS (cluster_impl doesn't know if it's DNS or EDS, only the parent 305 // knows). The parent priority policy is configured to ignore re-resolution 306 // signal from the EDS children. 307 if s.ConnectivityState == connectivity.TransientFailure { 308 b.ClientConn.ResolveNow(resolver.ResolveNowOptions{}) 309 } 310 311 b.scWrappersMu.Lock() 312 if scw, ok := b.scWrappers[sc]; ok { 313 sc = scw 314 if s.ConnectivityState == connectivity.Shutdown { 315 // Remove this SubConn from the map on Shutdown. 316 delete(b.scWrappers, scw.SubConn) 317 } 318 } 319 b.scWrappersMu.Unlock() 320 if b.childLB != nil { 321 b.childLB.UpdateSubConnState(sc, s) 322 } 323 } 324 325 func (b *clusterImplBalancer) Close() { 326 b.mu.Lock() 327 b.closed.Fire() 328 b.mu.Unlock() 329 330 if b.childLB != nil { 331 b.childLB.Close() 332 b.childLB = nil 333 } 334 <-b.done.Done() 335 b.logger.Infof("Shutdown") 336 } 337 338 func (b *clusterImplBalancer) ExitIdle() { 339 if b.childLB == nil { 340 return 341 } 342 if ei, ok := b.childLB.(balancer.ExitIdler); ok { 343 ei.ExitIdle() 344 return 345 } 346 // Fallback for children that don't support ExitIdle -- connect to all 347 // SubConns. 348 for _, sc := range b.scWrappers { 349 sc.Connect() 350 } 351 } 352 353 // Override methods to accept updates from the child LB. 354 355 func (b *clusterImplBalancer) UpdateState(state balancer.State) { 356 // Instead of updating parent ClientConn inline, send state to run(). 357 b.pickerUpdateCh.Put(state) 358 } 359 360 func (b *clusterImplBalancer) setClusterName(n string) { 361 b.clusterNameMu.Lock() 362 defer b.clusterNameMu.Unlock() 363 b.clusterName = n 364 } 365 366 func (b *clusterImplBalancer) getClusterName() string { 367 b.clusterNameMu.Lock() 368 defer b.clusterNameMu.Unlock() 369 return b.clusterName 370 } 371 372 // scWrapper is a wrapper of SubConn with locality ID. The locality ID can be 373 // retrieved from the addresses when creating SubConn. 374 // 375 // All SubConns passed to the child policies are wrapped in this, so that the 376 // picker can get the localityID from the picked SubConn, and do load reporting. 377 // 378 // After wrapping, all SubConns to and from the parent ClientConn (e.g. for 379 // SubConn state update, update/remove SubConn) must be the original SubConns. 380 // All SubConns to and from the child policy (NewSubConn, forwarding SubConn 381 // state update) must be the wrapper. The balancer keeps a map from the original 382 // SubConn to the wrapper for this purpose. 383 type scWrapper struct { 384 balancer.SubConn 385 // locality needs to be atomic because it can be updated while being read by 386 // the picker. 387 locality atomic.Value // type xdsinternal.LocalityID 388 } 389 390 func (scw *scWrapper) updateLocalityID(lID xdsinternal.LocalityID) { 391 scw.locality.Store(lID) 392 } 393 394 func (scw *scWrapper) localityID() xdsinternal.LocalityID { 395 lID, _ := scw.locality.Load().(xdsinternal.LocalityID) 396 return lID 397 } 398 399 func (b *clusterImplBalancer) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 400 clusterName := b.getClusterName() 401 newAddrs := make([]resolver.Address, len(addrs)) 402 var lID xdsinternal.LocalityID 403 for i, addr := range addrs { 404 newAddrs[i] = internal.SetXDSHandshakeClusterName(addr, clusterName) 405 lID = xdsinternal.GetLocalityID(newAddrs[i]) 406 } 407 sc, err := b.ClientConn.NewSubConn(newAddrs, opts) 408 if err != nil { 409 return nil, err 410 } 411 // Wrap this SubConn in a wrapper, and add it to the map. 412 b.scWrappersMu.Lock() 413 ret := &scWrapper{SubConn: sc} 414 ret.updateLocalityID(lID) 415 b.scWrappers[sc] = ret 416 b.scWrappersMu.Unlock() 417 return ret, nil 418 } 419 420 func (b *clusterImplBalancer) RemoveSubConn(sc balancer.SubConn) { 421 scw, ok := sc.(*scWrapper) 422 if !ok { 423 b.ClientConn.RemoveSubConn(sc) 424 return 425 } 426 // Remove the original SubConn from the parent ClientConn. 427 // 428 // Note that we don't remove this SubConn from the scWrappers map. We will 429 // need it to forward the final SubConn state Shutdown to the child policy. 430 // 431 // This entry is kept in the map until it's state is changes to Shutdown, 432 // and will be deleted in UpdateSubConnState(). 433 b.ClientConn.RemoveSubConn(scw.SubConn) 434 } 435 436 func (b *clusterImplBalancer) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { 437 clusterName := b.getClusterName() 438 newAddrs := make([]resolver.Address, len(addrs)) 439 var lID xdsinternal.LocalityID 440 for i, addr := range addrs { 441 newAddrs[i] = internal.SetXDSHandshakeClusterName(addr, clusterName) 442 lID = xdsinternal.GetLocalityID(newAddrs[i]) 443 } 444 if scw, ok := sc.(*scWrapper); ok { 445 scw.updateLocalityID(lID) 446 // Need to get the original SubConn from the wrapper before calling 447 // parent ClientConn. 448 sc = scw.SubConn 449 } 450 b.ClientConn.UpdateAddresses(sc, newAddrs) 451 } 452 453 type dropConfigs struct { 454 drops []*dropper 455 requestCounter *xdsclient.ClusterRequestsCounter 456 requestCountMax uint32 457 } 458 459 // handleDropAndRequestCount compares drop and request counter in newConfig with 460 // the one currently used by picker. It returns a new dropConfigs if a new 461 // picker needs to be generated, otherwise it returns nil. 462 func (b *clusterImplBalancer) handleDropAndRequestCount(newConfig *LBConfig) *dropConfigs { 463 // Compare new drop config. And update picker if it's changed. 464 var updatePicker bool 465 if !equalDropCategories(b.dropCategories, newConfig.DropCategories) { 466 b.dropCategories = newConfig.DropCategories 467 b.drops = make([]*dropper, 0, len(newConfig.DropCategories)) 468 for _, c := range newConfig.DropCategories { 469 b.drops = append(b.drops, newDropper(c)) 470 } 471 updatePicker = true 472 } 473 474 // Compare cluster name. And update picker if it's changed, because circuit 475 // breaking's stream counter will be different. 476 if b.requestCounterCluster != newConfig.Cluster || b.requestCounterService != newConfig.EDSServiceName { 477 b.requestCounterCluster = newConfig.Cluster 478 b.requestCounterService = newConfig.EDSServiceName 479 b.requestCounter = xdsclient.GetClusterRequestsCounter(newConfig.Cluster, newConfig.EDSServiceName) 480 updatePicker = true 481 } 482 // Compare upper bound of stream count. And update picker if it's changed. 483 // This is also for circuit breaking. 484 var newRequestCountMax uint32 = 1024 485 if newConfig.MaxConcurrentRequests != nil { 486 newRequestCountMax = *newConfig.MaxConcurrentRequests 487 } 488 if b.requestCountMax != newRequestCountMax { 489 b.requestCountMax = newRequestCountMax 490 updatePicker = true 491 } 492 493 if !updatePicker { 494 return nil 495 } 496 return &dropConfigs{ 497 drops: b.drops, 498 requestCounter: b.requestCounter, 499 requestCountMax: b.requestCountMax, 500 } 501 } 502 503 func (b *clusterImplBalancer) run() { 504 defer b.done.Fire() 505 for { 506 select { 507 case update := <-b.pickerUpdateCh.Get(): 508 b.pickerUpdateCh.Load() 509 b.mu.Lock() 510 if b.closed.HasFired() { 511 b.mu.Unlock() 512 return 513 } 514 switch u := update.(type) { 515 case balancer.State: 516 b.childState = u 517 b.ClientConn.UpdateState(balancer.State{ 518 ConnectivityState: b.childState.ConnectivityState, 519 Picker: newPicker(b.childState, &dropConfigs{ 520 drops: b.drops, 521 requestCounter: b.requestCounter, 522 requestCountMax: b.requestCountMax, 523 }, b.loadWrapper), 524 }) 525 case *LBConfig: 526 dc := b.handleDropAndRequestCount(u) 527 if dc != nil && b.childState.Picker != nil { 528 b.ClientConn.UpdateState(balancer.State{ 529 ConnectivityState: b.childState.ConnectivityState, 530 Picker: newPicker(b.childState, dc, b.loadWrapper), 531 }) 532 } 533 } 534 b.mu.Unlock() 535 case <-b.closed.Done(): 536 if b.cancelLoadReport != nil { 537 b.cancelLoadReport() 538 b.cancelLoadReport = nil 539 } 540 return 541 } 542 } 543 }