gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/xds/internal/balancer/cdsbalancer/cluster_handler.go (about)

     1  /*
     2   * Copyright 2021 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cdsbalancer
    18  
    19  import (
    20  	"errors"
    21  	"sync"
    22  
    23  	"gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient"
    24  	"gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/xdsresource"
    25  )
    26  
    27  var errNotReceivedUpdate = errors.New("tried to construct a cluster update on a cluster that has not received an update")
    28  
    29  // clusterHandlerUpdate wraps the information received from the registered CDS
    30  // watcher. A non-nil error is propagated to the underlying cluster_resolver
    31  // balancer. A valid update results in creating a new cluster_resolver balancer
    32  // (if one doesn't already exist) and pushing the update to it.
    33  type clusterHandlerUpdate struct {
    34  	// securityCfg is the Security Config from the top (root) cluster.
    35  	securityCfg *xdsresource.SecurityConfig
    36  	// lbPolicy is the lb policy from the top (root) cluster.
    37  	//
    38  	// Currently, we only support roundrobin or ringhash, and since roundrobin
    39  	// does need configs, this is only set to the ringhash config, if the policy
    40  	// is ringhash. In the future, if we support more policies, we can make this
    41  	// an interface, and set it to config of the other policies.
    42  	lbPolicy *xdsresource.ClusterLBPolicyRingHash
    43  
    44  	// updates is a list of ClusterUpdates from all the leaf clusters.
    45  	updates []xdsresource.ClusterUpdate
    46  	err     error
    47  }
    48  
    49  // clusterHandler will be given a name representing a cluster. It will then
    50  // update the CDS policy constantly with a list of Clusters to pass down to
    51  // XdsClusterResolverLoadBalancingPolicyConfig in a stream like fashion.
    52  type clusterHandler struct {
    53  	parent *cdsBalancer
    54  
    55  	// A mutex to protect entire tree of clusters.
    56  	clusterMutex    sync.Mutex
    57  	root            *clusterNode
    58  	rootClusterName string
    59  
    60  	// A way to ping CDS Balancer about any updates or errors to a Node in the
    61  	// tree. This will either get called from this handler constructing an
    62  	// update or from a child with an error. Capacity of one as the only update
    63  	// CDS Balancer cares about is the most recent update.
    64  	updateChannel chan clusterHandlerUpdate
    65  }
    66  
    67  func newClusterHandler(parent *cdsBalancer) *clusterHandler {
    68  	return &clusterHandler{
    69  		parent:        parent,
    70  		updateChannel: make(chan clusterHandlerUpdate, 1),
    71  	}
    72  }
    73  
    74  func (ch *clusterHandler) updateRootCluster(rootClusterName string) {
    75  	ch.clusterMutex.Lock()
    76  	defer ch.clusterMutex.Unlock()
    77  	if ch.root == nil {
    78  		// Construct a root node on first update.
    79  		ch.root = createClusterNode(rootClusterName, ch.parent.xdsClient, ch)
    80  		ch.rootClusterName = rootClusterName
    81  		return
    82  	}
    83  	// Check if root cluster was changed. If it was, delete old one and start
    84  	// new one, if not do nothing.
    85  	if rootClusterName != ch.rootClusterName {
    86  		ch.root.delete()
    87  		ch.root = createClusterNode(rootClusterName, ch.parent.xdsClient, ch)
    88  		ch.rootClusterName = rootClusterName
    89  	}
    90  }
    91  
    92  // This function tries to construct a cluster update to send to CDS.
    93  func (ch *clusterHandler) constructClusterUpdate() {
    94  	if ch.root == nil {
    95  		// If root is nil, this handler is closed, ignore the update.
    96  		return
    97  	}
    98  	clusterUpdate, err := ch.root.constructClusterUpdate()
    99  	if err != nil {
   100  		// If there was an error received no op, as this simply means one of the
   101  		// children hasn't received an update yet.
   102  		return
   103  	}
   104  	// For a ClusterUpdate, the only update CDS cares about is the most
   105  	// recent one, so opportunistically drain the update channel before
   106  	// sending the new update.
   107  	select {
   108  	case <-ch.updateChannel:
   109  	default:
   110  	}
   111  	ch.updateChannel <- clusterHandlerUpdate{
   112  		securityCfg: ch.root.clusterUpdate.SecurityCfg,
   113  		lbPolicy:    ch.root.clusterUpdate.LBPolicy,
   114  		updates:     clusterUpdate,
   115  	}
   116  }
   117  
   118  // close() is meant to be called by CDS when the CDS balancer is closed, and it
   119  // cancels the watches for every cluster in the cluster tree.
   120  func (ch *clusterHandler) close() {
   121  	ch.clusterMutex.Lock()
   122  	defer ch.clusterMutex.Unlock()
   123  	if ch.root == nil {
   124  		return
   125  	}
   126  	ch.root.delete()
   127  	ch.root = nil
   128  	ch.rootClusterName = ""
   129  }
   130  
   131  // This logically represents a cluster. This handles all the logic for starting
   132  // and stopping a cluster watch, handling any updates, and constructing a list
   133  // recursively for the ClusterHandler.
   134  type clusterNode struct {
   135  	// A way to cancel the watch for the cluster.
   136  	cancelFunc func()
   137  
   138  	// A list of children, as the Node can be an aggregate Cluster.
   139  	children []*clusterNode
   140  
   141  	// A ClusterUpdate in order to build a list of cluster updates for CDS to
   142  	// send down to child XdsClusterResolverLoadBalancingPolicy.
   143  	clusterUpdate xdsresource.ClusterUpdate
   144  
   145  	// This boolean determines whether this Node has received an update or not.
   146  	// This isn't the best practice, but this will protect a list of Cluster
   147  	// Updates from being constructed if a cluster in the tree has not received
   148  	// an update yet.
   149  	receivedUpdate bool
   150  
   151  	clusterHandler *clusterHandler
   152  }
   153  
   154  // CreateClusterNode creates a cluster node from a given clusterName. This will
   155  // also start the watch for that cluster.
   156  func createClusterNode(clusterName string, xdsClient xdsclient.XDSClient, topLevelHandler *clusterHandler) *clusterNode {
   157  	c := &clusterNode{
   158  		clusterHandler: topLevelHandler,
   159  	}
   160  	// Communicate with the xds client here.
   161  	topLevelHandler.parent.logger.Infof("CDS watch started on %v", clusterName)
   162  	cancel := xdsClient.WatchCluster(clusterName, c.handleResp)
   163  	c.cancelFunc = func() {
   164  		topLevelHandler.parent.logger.Infof("CDS watch canceled on %v", clusterName)
   165  		cancel()
   166  	}
   167  	return c
   168  }
   169  
   170  // This function cancels the cluster watch on the cluster and all of it's
   171  // children.
   172  func (c *clusterNode) delete() {
   173  	c.cancelFunc()
   174  	for _, child := range c.children {
   175  		child.delete()
   176  	}
   177  }
   178  
   179  // Construct cluster update (potentially a list of ClusterUpdates) for a node.
   180  func (c *clusterNode) constructClusterUpdate() ([]xdsresource.ClusterUpdate, error) {
   181  	// If the cluster has not yet received an update, the cluster update is not
   182  	// yet ready.
   183  	if !c.receivedUpdate {
   184  		return nil, errNotReceivedUpdate
   185  	}
   186  
   187  	// Base case - LogicalDNS or EDS. Both of these cluster types will be tied
   188  	// to a single ClusterUpdate.
   189  	if c.clusterUpdate.ClusterType != xdsresource.ClusterTypeAggregate {
   190  		return []xdsresource.ClusterUpdate{c.clusterUpdate}, nil
   191  	}
   192  
   193  	// If an aggregate construct a list by recursively calling down to all of
   194  	// it's children.
   195  	var childrenUpdates []xdsresource.ClusterUpdate
   196  	for _, child := range c.children {
   197  		childUpdateList, err := child.constructClusterUpdate()
   198  		if err != nil {
   199  			return nil, err
   200  		}
   201  		childrenUpdates = append(childrenUpdates, childUpdateList...)
   202  	}
   203  	return childrenUpdates, nil
   204  }
   205  
   206  // handleResp handles a xds response for a particular cluster. This function
   207  // also handles any logic with regards to any child state that may have changed.
   208  // At the end of the handleResp(), the clusterUpdate will be pinged in certain
   209  // situations to try and construct an update to send back to CDS.
   210  func (c *clusterNode) handleResp(clusterUpdate xdsresource.ClusterUpdate, err error) {
   211  	c.clusterHandler.clusterMutex.Lock()
   212  	defer c.clusterHandler.clusterMutex.Unlock()
   213  	if err != nil { // Write this error for run() to pick up in CDS LB policy.
   214  		// For a ClusterUpdate, the only update CDS cares about is the most
   215  		// recent one, so opportunistically drain the update channel before
   216  		// sending the new update.
   217  		select {
   218  		case <-c.clusterHandler.updateChannel:
   219  		default:
   220  		}
   221  		c.clusterHandler.updateChannel <- clusterHandlerUpdate{err: err}
   222  		return
   223  	}
   224  
   225  	c.receivedUpdate = true
   226  	c.clusterUpdate = clusterUpdate
   227  
   228  	// If the cluster was a leaf node, if the cluster update received had change
   229  	// in the cluster update then the overall cluster update would change and
   230  	// there is a possibility for the overall update to build so ping cluster
   231  	// handler to return. Also, if there was any children from previously,
   232  	// delete the children, as the cluster type is no longer an aggregate
   233  	// cluster.
   234  	if clusterUpdate.ClusterType != xdsresource.ClusterTypeAggregate {
   235  		for _, child := range c.children {
   236  			child.delete()
   237  		}
   238  		c.children = nil
   239  		// This is an update in the one leaf node, should try to send an update
   240  		// to the parent CDS balancer.
   241  		//
   242  		// Note that this update might be a duplicate from the previous one.
   243  		// Because the update contains not only the cluster name to watch, but
   244  		// also the extra fields (e.g. security config). There's no good way to
   245  		// compare all the fields.
   246  		c.clusterHandler.constructClusterUpdate()
   247  		return
   248  	}
   249  
   250  	// Aggregate cluster handling.
   251  	newChildren := make(map[string]bool)
   252  	for _, childName := range clusterUpdate.PrioritizedClusterNames {
   253  		newChildren[childName] = true
   254  	}
   255  
   256  	// These booleans help determine whether this callback will ping the overall
   257  	// clusterHandler to try and construct an update to send back to CDS. This
   258  	// will be determined by whether there would be a change in the overall
   259  	// clusterUpdate for the whole tree (ex. change in clusterUpdate for current
   260  	// cluster or a deleted child) and also if there's even a possibility for
   261  	// the update to build (ex. if a child is created and a watch is started,
   262  	// that child hasn't received an update yet due to the mutex lock on this
   263  	// callback).
   264  	var createdChild, deletedChild bool
   265  
   266  	// This map will represent the current children of the cluster. It will be
   267  	// first added to in order to represent the new children. It will then have
   268  	// any children deleted that are no longer present. Then, from the cluster
   269  	// update received, will be used to construct the new child list.
   270  	mapCurrentChildren := make(map[string]*clusterNode)
   271  	for _, child := range c.children {
   272  		mapCurrentChildren[child.clusterUpdate.ClusterName] = child
   273  	}
   274  
   275  	// Add and construct any new child nodes.
   276  	for child := range newChildren {
   277  		if _, inChildrenAlready := mapCurrentChildren[child]; !inChildrenAlready {
   278  			createdChild = true
   279  			mapCurrentChildren[child] = createClusterNode(child, c.clusterHandler.parent.xdsClient, c.clusterHandler)
   280  		}
   281  	}
   282  
   283  	// Delete any child nodes no longer in the aggregate cluster's children.
   284  	for child := range mapCurrentChildren {
   285  		if _, stillAChild := newChildren[child]; !stillAChild {
   286  			deletedChild = true
   287  			mapCurrentChildren[child].delete()
   288  			delete(mapCurrentChildren, child)
   289  		}
   290  	}
   291  
   292  	// The order of the children list matters, so use the clusterUpdate from
   293  	// xdsclient as the ordering, and use that logical ordering for the new
   294  	// children list. This will be a mixture of child nodes which are all
   295  	// already constructed in the mapCurrentChildrenMap.
   296  	var children = make([]*clusterNode, 0, len(clusterUpdate.PrioritizedClusterNames))
   297  
   298  	for _, orderedChild := range clusterUpdate.PrioritizedClusterNames {
   299  		// The cluster's already have watches started for them in xds client, so
   300  		// you can use these pointers to construct the new children list, you
   301  		// just have to put them in the correct order using the original cluster
   302  		// update.
   303  		currentChild := mapCurrentChildren[orderedChild]
   304  		children = append(children, currentChild)
   305  	}
   306  
   307  	c.children = children
   308  
   309  	// If the cluster is an aggregate cluster, if this callback created any new
   310  	// child cluster nodes, then there's no possibility for a full cluster
   311  	// update to successfully build, as those created children will not have
   312  	// received an update yet. However, if there was simply a child deleted,
   313  	// then there is a possibility that it will have a full cluster update to
   314  	// build and also will have a changed overall cluster update from the
   315  	// deleted child.
   316  	if deletedChild && !createdChild {
   317  		c.clusterHandler.constructClusterUpdate()
   318  	}
   319  }