gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/xds/internal/balancer/cdsbalancer/cluster_handler.go (about) 1 /* 2 * Copyright 2021 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cdsbalancer 18 19 import ( 20 "errors" 21 "sync" 22 23 "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient" 24 "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/xdsresource" 25 ) 26 27 var errNotReceivedUpdate = errors.New("tried to construct a cluster update on a cluster that has not received an update") 28 29 // clusterHandlerUpdate wraps the information received from the registered CDS 30 // watcher. A non-nil error is propagated to the underlying cluster_resolver 31 // balancer. A valid update results in creating a new cluster_resolver balancer 32 // (if one doesn't already exist) and pushing the update to it. 33 type clusterHandlerUpdate struct { 34 // securityCfg is the Security Config from the top (root) cluster. 35 securityCfg *xdsresource.SecurityConfig 36 // lbPolicy is the lb policy from the top (root) cluster. 37 // 38 // Currently, we only support roundrobin or ringhash, and since roundrobin 39 // does need configs, this is only set to the ringhash config, if the policy 40 // is ringhash. In the future, if we support more policies, we can make this 41 // an interface, and set it to config of the other policies. 42 lbPolicy *xdsresource.ClusterLBPolicyRingHash 43 44 // updates is a list of ClusterUpdates from all the leaf clusters. 45 updates []xdsresource.ClusterUpdate 46 err error 47 } 48 49 // clusterHandler will be given a name representing a cluster. It will then 50 // update the CDS policy constantly with a list of Clusters to pass down to 51 // XdsClusterResolverLoadBalancingPolicyConfig in a stream like fashion. 52 type clusterHandler struct { 53 parent *cdsBalancer 54 55 // A mutex to protect entire tree of clusters. 56 clusterMutex sync.Mutex 57 root *clusterNode 58 rootClusterName string 59 60 // A way to ping CDS Balancer about any updates or errors to a Node in the 61 // tree. This will either get called from this handler constructing an 62 // update or from a child with an error. Capacity of one as the only update 63 // CDS Balancer cares about is the most recent update. 64 updateChannel chan clusterHandlerUpdate 65 } 66 67 func newClusterHandler(parent *cdsBalancer) *clusterHandler { 68 return &clusterHandler{ 69 parent: parent, 70 updateChannel: make(chan clusterHandlerUpdate, 1), 71 } 72 } 73 74 func (ch *clusterHandler) updateRootCluster(rootClusterName string) { 75 ch.clusterMutex.Lock() 76 defer ch.clusterMutex.Unlock() 77 if ch.root == nil { 78 // Construct a root node on first update. 79 ch.root = createClusterNode(rootClusterName, ch.parent.xdsClient, ch) 80 ch.rootClusterName = rootClusterName 81 return 82 } 83 // Check if root cluster was changed. If it was, delete old one and start 84 // new one, if not do nothing. 85 if rootClusterName != ch.rootClusterName { 86 ch.root.delete() 87 ch.root = createClusterNode(rootClusterName, ch.parent.xdsClient, ch) 88 ch.rootClusterName = rootClusterName 89 } 90 } 91 92 // This function tries to construct a cluster update to send to CDS. 93 func (ch *clusterHandler) constructClusterUpdate() { 94 if ch.root == nil { 95 // If root is nil, this handler is closed, ignore the update. 96 return 97 } 98 clusterUpdate, err := ch.root.constructClusterUpdate() 99 if err != nil { 100 // If there was an error received no op, as this simply means one of the 101 // children hasn't received an update yet. 102 return 103 } 104 // For a ClusterUpdate, the only update CDS cares about is the most 105 // recent one, so opportunistically drain the update channel before 106 // sending the new update. 107 select { 108 case <-ch.updateChannel: 109 default: 110 } 111 ch.updateChannel <- clusterHandlerUpdate{ 112 securityCfg: ch.root.clusterUpdate.SecurityCfg, 113 lbPolicy: ch.root.clusterUpdate.LBPolicy, 114 updates: clusterUpdate, 115 } 116 } 117 118 // close() is meant to be called by CDS when the CDS balancer is closed, and it 119 // cancels the watches for every cluster in the cluster tree. 120 func (ch *clusterHandler) close() { 121 ch.clusterMutex.Lock() 122 defer ch.clusterMutex.Unlock() 123 if ch.root == nil { 124 return 125 } 126 ch.root.delete() 127 ch.root = nil 128 ch.rootClusterName = "" 129 } 130 131 // This logically represents a cluster. This handles all the logic for starting 132 // and stopping a cluster watch, handling any updates, and constructing a list 133 // recursively for the ClusterHandler. 134 type clusterNode struct { 135 // A way to cancel the watch for the cluster. 136 cancelFunc func() 137 138 // A list of children, as the Node can be an aggregate Cluster. 139 children []*clusterNode 140 141 // A ClusterUpdate in order to build a list of cluster updates for CDS to 142 // send down to child XdsClusterResolverLoadBalancingPolicy. 143 clusterUpdate xdsresource.ClusterUpdate 144 145 // This boolean determines whether this Node has received an update or not. 146 // This isn't the best practice, but this will protect a list of Cluster 147 // Updates from being constructed if a cluster in the tree has not received 148 // an update yet. 149 receivedUpdate bool 150 151 clusterHandler *clusterHandler 152 } 153 154 // CreateClusterNode creates a cluster node from a given clusterName. This will 155 // also start the watch for that cluster. 156 func createClusterNode(clusterName string, xdsClient xdsclient.XDSClient, topLevelHandler *clusterHandler) *clusterNode { 157 c := &clusterNode{ 158 clusterHandler: topLevelHandler, 159 } 160 // Communicate with the xds client here. 161 topLevelHandler.parent.logger.Infof("CDS watch started on %v", clusterName) 162 cancel := xdsClient.WatchCluster(clusterName, c.handleResp) 163 c.cancelFunc = func() { 164 topLevelHandler.parent.logger.Infof("CDS watch canceled on %v", clusterName) 165 cancel() 166 } 167 return c 168 } 169 170 // This function cancels the cluster watch on the cluster and all of it's 171 // children. 172 func (c *clusterNode) delete() { 173 c.cancelFunc() 174 for _, child := range c.children { 175 child.delete() 176 } 177 } 178 179 // Construct cluster update (potentially a list of ClusterUpdates) for a node. 180 func (c *clusterNode) constructClusterUpdate() ([]xdsresource.ClusterUpdate, error) { 181 // If the cluster has not yet received an update, the cluster update is not 182 // yet ready. 183 if !c.receivedUpdate { 184 return nil, errNotReceivedUpdate 185 } 186 187 // Base case - LogicalDNS or EDS. Both of these cluster types will be tied 188 // to a single ClusterUpdate. 189 if c.clusterUpdate.ClusterType != xdsresource.ClusterTypeAggregate { 190 return []xdsresource.ClusterUpdate{c.clusterUpdate}, nil 191 } 192 193 // If an aggregate construct a list by recursively calling down to all of 194 // it's children. 195 var childrenUpdates []xdsresource.ClusterUpdate 196 for _, child := range c.children { 197 childUpdateList, err := child.constructClusterUpdate() 198 if err != nil { 199 return nil, err 200 } 201 childrenUpdates = append(childrenUpdates, childUpdateList...) 202 } 203 return childrenUpdates, nil 204 } 205 206 // handleResp handles a xds response for a particular cluster. This function 207 // also handles any logic with regards to any child state that may have changed. 208 // At the end of the handleResp(), the clusterUpdate will be pinged in certain 209 // situations to try and construct an update to send back to CDS. 210 func (c *clusterNode) handleResp(clusterUpdate xdsresource.ClusterUpdate, err error) { 211 c.clusterHandler.clusterMutex.Lock() 212 defer c.clusterHandler.clusterMutex.Unlock() 213 if err != nil { // Write this error for run() to pick up in CDS LB policy. 214 // For a ClusterUpdate, the only update CDS cares about is the most 215 // recent one, so opportunistically drain the update channel before 216 // sending the new update. 217 select { 218 case <-c.clusterHandler.updateChannel: 219 default: 220 } 221 c.clusterHandler.updateChannel <- clusterHandlerUpdate{err: err} 222 return 223 } 224 225 c.receivedUpdate = true 226 c.clusterUpdate = clusterUpdate 227 228 // If the cluster was a leaf node, if the cluster update received had change 229 // in the cluster update then the overall cluster update would change and 230 // there is a possibility for the overall update to build so ping cluster 231 // handler to return. Also, if there was any children from previously, 232 // delete the children, as the cluster type is no longer an aggregate 233 // cluster. 234 if clusterUpdate.ClusterType != xdsresource.ClusterTypeAggregate { 235 for _, child := range c.children { 236 child.delete() 237 } 238 c.children = nil 239 // This is an update in the one leaf node, should try to send an update 240 // to the parent CDS balancer. 241 // 242 // Note that this update might be a duplicate from the previous one. 243 // Because the update contains not only the cluster name to watch, but 244 // also the extra fields (e.g. security config). There's no good way to 245 // compare all the fields. 246 c.clusterHandler.constructClusterUpdate() 247 return 248 } 249 250 // Aggregate cluster handling. 251 newChildren := make(map[string]bool) 252 for _, childName := range clusterUpdate.PrioritizedClusterNames { 253 newChildren[childName] = true 254 } 255 256 // These booleans help determine whether this callback will ping the overall 257 // clusterHandler to try and construct an update to send back to CDS. This 258 // will be determined by whether there would be a change in the overall 259 // clusterUpdate for the whole tree (ex. change in clusterUpdate for current 260 // cluster or a deleted child) and also if there's even a possibility for 261 // the update to build (ex. if a child is created and a watch is started, 262 // that child hasn't received an update yet due to the mutex lock on this 263 // callback). 264 var createdChild, deletedChild bool 265 266 // This map will represent the current children of the cluster. It will be 267 // first added to in order to represent the new children. It will then have 268 // any children deleted that are no longer present. Then, from the cluster 269 // update received, will be used to construct the new child list. 270 mapCurrentChildren := make(map[string]*clusterNode) 271 for _, child := range c.children { 272 mapCurrentChildren[child.clusterUpdate.ClusterName] = child 273 } 274 275 // Add and construct any new child nodes. 276 for child := range newChildren { 277 if _, inChildrenAlready := mapCurrentChildren[child]; !inChildrenAlready { 278 createdChild = true 279 mapCurrentChildren[child] = createClusterNode(child, c.clusterHandler.parent.xdsClient, c.clusterHandler) 280 } 281 } 282 283 // Delete any child nodes no longer in the aggregate cluster's children. 284 for child := range mapCurrentChildren { 285 if _, stillAChild := newChildren[child]; !stillAChild { 286 deletedChild = true 287 mapCurrentChildren[child].delete() 288 delete(mapCurrentChildren, child) 289 } 290 } 291 292 // The order of the children list matters, so use the clusterUpdate from 293 // xdsclient as the ordering, and use that logical ordering for the new 294 // children list. This will be a mixture of child nodes which are all 295 // already constructed in the mapCurrentChildrenMap. 296 var children = make([]*clusterNode, 0, len(clusterUpdate.PrioritizedClusterNames)) 297 298 for _, orderedChild := range clusterUpdate.PrioritizedClusterNames { 299 // The cluster's already have watches started for them in xds client, so 300 // you can use these pointers to construct the new children list, you 301 // just have to put them in the correct order using the original cluster 302 // update. 303 currentChild := mapCurrentChildren[orderedChild] 304 children = append(children, currentChild) 305 } 306 307 c.children = children 308 309 // If the cluster is an aggregate cluster, if this callback created any new 310 // child cluster nodes, then there's no possibility for a full cluster 311 // update to successfully build, as those created children will not have 312 // received an update yet. However, if there was simply a child deleted, 313 // then there is a possibility that it will have a full cluster update to 314 // build and also will have a changed overall cluster update from the 315 // deleted child. 316 if deletedChild && !createdChild { 317 c.clusterHandler.constructClusterUpdate() 318 } 319 }