google.golang.org/grpc@v1.72.2/xds/internal/balancer/clusterresolver/resource_resolver.go (about)

     1  /*
     2   *
     3   * Copyright 2021 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package clusterresolver
    20  
    21  import (
    22  	"context"
    23  	"sync"
    24  
    25  	"google.golang.org/grpc/internal/grpclog"
    26  	"google.golang.org/grpc/internal/grpcsync"
    27  	"google.golang.org/grpc/resolver"
    28  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    29  )
    30  
    31  // resourceUpdate is a combined update from all the resources, in the order of
    32  // priority. For example, it can be {EDS, EDS, DNS}.
    33  type resourceUpdate struct {
    34  	// A discovery mechanism would return an empty update when it runs into
    35  	// errors, and this would result in the priority LB policy reporting
    36  	// TRANSIENT_FAILURE (if there was a single discovery mechanism), or would
    37  	// fallback to the next highest priority that is available.
    38  	priorities []priorityConfig
    39  	// To be invoked once the update is completely processed, or is dropped in
    40  	// favor of a newer update.
    41  	onDone xdsresource.OnDoneFunc
    42  }
    43  
    44  // topLevelResolver is used by concrete endpointsResolver implementations for
    45  // reporting updates and errors. The `resourceResolver` type implements this
    46  // interface and takes appropriate actions upon receipt of updates and errors
    47  // from underlying concrete resolvers.
    48  type topLevelResolver interface {
    49  	// onUpdate is called when a new update is received from the underlying
    50  	// endpointsResolver implementation. The onDone callback is to be invoked
    51  	// once the update is completely processed, or is dropped in favor of a
    52  	// newer update.
    53  	onUpdate(onDone xdsresource.OnDoneFunc)
    54  }
    55  
    56  // endpointsResolver wraps the functionality to resolve a given resource name to
    57  // a set of endpoints. The mechanism used by concrete implementations depend on
    58  // the supported discovery mechanism type.
    59  type endpointsResolver interface {
    60  	// lastUpdate returns endpoint results from the most recent resolution.
    61  	//
    62  	// The type of the first return result is dependent on the resolver
    63  	// implementation.
    64  	//
    65  	// The second return result indicates whether the resolver was able to
    66  	// successfully resolve the resource name to endpoints. If set to false, the
    67  	// first return result is invalid and must not be used.
    68  	lastUpdate() (any, bool)
    69  
    70  	// resolverNow triggers re-resolution of the resource.
    71  	resolveNow()
    72  
    73  	// stop stops resolution of the resource. Implementations must not invoke
    74  	// any methods on the topLevelResolver interface once `stop()` returns.
    75  	stop()
    76  }
    77  
    78  // discoveryMechanismKey is {type+resource_name}, it's used as the map key, so
    79  // that the same resource resolver can be reused (e.g. when there are two
    80  // mechanisms, both for the same EDS resource, but has different circuit
    81  // breaking config).
    82  type discoveryMechanismKey struct {
    83  	typ  DiscoveryMechanismType
    84  	name string
    85  }
    86  
    87  // discoveryMechanismAndResolver is needed to keep the resolver and the
    88  // discovery mechanism together, because resolvers can be shared. And we need
    89  // the mechanism for fields like circuit breaking, LRS etc when generating the
    90  // balancer config.
    91  type discoveryMechanismAndResolver struct {
    92  	dm DiscoveryMechanism
    93  	r  endpointsResolver
    94  
    95  	childNameGen *nameGenerator
    96  }
    97  
    98  type resourceResolver struct {
    99  	parent           *clusterResolverBalancer
   100  	logger           *grpclog.PrefixLogger
   101  	updateChannel    chan *resourceUpdate
   102  	serializer       *grpcsync.CallbackSerializer
   103  	serializerCancel context.CancelFunc
   104  
   105  	// mu protects the slice and map, and content of the resolvers in the slice.
   106  	mu         sync.Mutex
   107  	mechanisms []DiscoveryMechanism
   108  	children   []discoveryMechanismAndResolver
   109  	// childrenMap's value only needs the resolver implementation (type
   110  	// discoveryMechanism) and the childNameGen. The other two fields are not
   111  	// used.
   112  	//
   113  	// TODO(cleanup): maybe we can make a new type with just the necessary
   114  	// fields, and use it here instead.
   115  	childrenMap map[discoveryMechanismKey]discoveryMechanismAndResolver
   116  	// Each new discovery mechanism needs a child name generator to reuse child
   117  	// policy names. But to make sure the names across discover mechanism
   118  	// doesn't conflict, we need a seq ID. This ID is incremented for each new
   119  	// discover mechanism.
   120  	childNameGeneratorSeqID uint64
   121  }
   122  
   123  func newResourceResolver(parent *clusterResolverBalancer, logger *grpclog.PrefixLogger) *resourceResolver {
   124  	rr := &resourceResolver{
   125  		parent:        parent,
   126  		logger:        logger,
   127  		updateChannel: make(chan *resourceUpdate, 1),
   128  		childrenMap:   make(map[discoveryMechanismKey]discoveryMechanismAndResolver),
   129  	}
   130  	ctx, cancel := context.WithCancel(context.Background())
   131  	rr.serializer = grpcsync.NewCallbackSerializer(ctx)
   132  	rr.serializerCancel = cancel
   133  	return rr
   134  }
   135  
   136  func equalDiscoveryMechanisms(a, b []DiscoveryMechanism) bool {
   137  	if len(a) != len(b) {
   138  		return false
   139  	}
   140  	for i, aa := range a {
   141  		bb := b[i]
   142  		if !aa.Equal(bb) {
   143  			return false
   144  		}
   145  	}
   146  	return true
   147  }
   148  
   149  func discoveryMechanismToKey(dm DiscoveryMechanism) discoveryMechanismKey {
   150  	switch dm.Type {
   151  	case DiscoveryMechanismTypeEDS:
   152  		nameToWatch := dm.EDSServiceName
   153  		if nameToWatch == "" {
   154  			nameToWatch = dm.Cluster
   155  		}
   156  		return discoveryMechanismKey{typ: dm.Type, name: nameToWatch}
   157  	case DiscoveryMechanismTypeLogicalDNS:
   158  		return discoveryMechanismKey{typ: dm.Type, name: dm.DNSHostname}
   159  	default:
   160  		return discoveryMechanismKey{}
   161  	}
   162  }
   163  
   164  func (rr *resourceResolver) updateMechanisms(mechanisms []DiscoveryMechanism) {
   165  	rr.mu.Lock()
   166  	defer rr.mu.Unlock()
   167  	if equalDiscoveryMechanisms(rr.mechanisms, mechanisms) {
   168  		return
   169  	}
   170  	rr.mechanisms = mechanisms
   171  	rr.children = make([]discoveryMechanismAndResolver, len(mechanisms))
   172  	newDMs := make(map[discoveryMechanismKey]bool)
   173  
   174  	// Start one watch for each new discover mechanism {type+resource_name}.
   175  	for i, dm := range mechanisms {
   176  		dmKey := discoveryMechanismToKey(dm)
   177  		newDMs[dmKey] = true
   178  		dmAndResolver, ok := rr.childrenMap[dmKey]
   179  		if ok {
   180  			// If this is not new, keep the fields (especially childNameGen),
   181  			// and only update the DiscoveryMechanism.
   182  			//
   183  			// Note that the same dmKey doesn't mean the same
   184  			// DiscoveryMechanism. There are fields (e.g.
   185  			// MaxConcurrentRequests) in DiscoveryMechanism that are not copied
   186  			// to dmKey, we need to keep those updated.
   187  			dmAndResolver.dm = dm
   188  			rr.children[i] = dmAndResolver
   189  			continue
   190  		}
   191  
   192  		// Create resolver for a newly seen resource.
   193  		var resolver endpointsResolver
   194  		switch dm.Type {
   195  		case DiscoveryMechanismTypeEDS:
   196  			resolver = newEDSResolver(dmKey.name, rr.parent.xdsClient, rr, rr.logger)
   197  		case DiscoveryMechanismTypeLogicalDNS:
   198  			resolver = newDNSResolver(dmKey.name, rr, rr.logger)
   199  		}
   200  		dmAndResolver = discoveryMechanismAndResolver{
   201  			dm:           dm,
   202  			r:            resolver,
   203  			childNameGen: newNameGenerator(rr.childNameGeneratorSeqID),
   204  		}
   205  		rr.childrenMap[dmKey] = dmAndResolver
   206  		rr.children[i] = dmAndResolver
   207  		rr.childNameGeneratorSeqID++
   208  	}
   209  
   210  	// Stop the resources that were removed.
   211  	for dm, r := range rr.childrenMap {
   212  		if !newDMs[dm] {
   213  			delete(rr.childrenMap, dm)
   214  			go r.r.stop()
   215  		}
   216  	}
   217  	// Regenerate even if there's no change in discovery mechanism, in case
   218  	// priority order changed.
   219  	rr.generateLocked(func() {})
   220  }
   221  
   222  // resolveNow is typically called to trigger re-resolve of DNS. The EDS
   223  // resolveNow() is a noop.
   224  func (rr *resourceResolver) resolveNow() {
   225  	rr.mu.Lock()
   226  	defer rr.mu.Unlock()
   227  	for _, r := range rr.childrenMap {
   228  		r.r.resolveNow()
   229  	}
   230  }
   231  
   232  func (rr *resourceResolver) stop(closing bool) {
   233  	rr.mu.Lock()
   234  
   235  	// Save the previous childrenMap to stop the children outside the mutex,
   236  	// and reinitialize the map.  We only need to reinitialize to allow for the
   237  	// policy to be reused if the resource comes back.  In practice, this does
   238  	// not happen as the parent LB policy will also be closed, causing this to
   239  	// be removed entirely, but a future use case might want to reuse the
   240  	// policy instead.
   241  	cm := rr.childrenMap
   242  	rr.childrenMap = make(map[discoveryMechanismKey]discoveryMechanismAndResolver)
   243  	rr.mechanisms = nil
   244  	rr.children = nil
   245  
   246  	rr.mu.Unlock()
   247  
   248  	for _, r := range cm {
   249  		r.r.stop()
   250  	}
   251  
   252  	if closing {
   253  		rr.serializerCancel()
   254  		<-rr.serializer.Done()
   255  	}
   256  
   257  	// stop() is called when the LB policy is closed or when the underlying
   258  	// cluster resource is removed by the management server. In the latter case,
   259  	// an empty config update needs to be pushed to the child policy to ensure
   260  	// that a picker that fails RPCs is sent up to the channel.
   261  	//
   262  	// Resource resolver implementations are expected to not send any updates
   263  	// after they are stopped. Therefore, we don't have to worry about another
   264  	// write to this channel happening at the same time as this one.
   265  	select {
   266  	case ru := <-rr.updateChannel:
   267  		if ru.onDone != nil {
   268  			ru.onDone()
   269  		}
   270  	default:
   271  	}
   272  	rr.updateChannel <- &resourceUpdate{}
   273  }
   274  
   275  // generateLocked collects updates from all resolvers. It pushes the combined
   276  // result on the update channel if all child resolvers have received at least
   277  // one update. Otherwise it returns early.
   278  //
   279  // The onDone callback is invoked inline if not all child resolvers have
   280  // received at least one update. If all child resolvers have received at least
   281  // one update, onDone is invoked when the combined update is processed by the
   282  // clusterresolver LB policy.
   283  //
   284  // Caller must hold rr.mu.
   285  func (rr *resourceResolver) generateLocked(onDone xdsresource.OnDoneFunc) {
   286  	var ret []priorityConfig
   287  	for _, rDM := range rr.children {
   288  		u, ok := rDM.r.lastUpdate()
   289  		if !ok {
   290  			// Don't send updates to parent until all resolvers have update to
   291  			// send.
   292  			onDone()
   293  			return
   294  		}
   295  		switch uu := u.(type) {
   296  		case xdsresource.EndpointsUpdate:
   297  			ret = append(ret, priorityConfig{mechanism: rDM.dm, edsResp: uu, childNameGen: rDM.childNameGen})
   298  		case []resolver.Endpoint:
   299  			ret = append(ret, priorityConfig{mechanism: rDM.dm, endpoints: uu, childNameGen: rDM.childNameGen})
   300  		}
   301  	}
   302  	select {
   303  	// A previously unprocessed update is dropped in favor of the new one, and
   304  	// the former's onDone callback is invoked to unblock the xDS client's
   305  	// receive path.
   306  	case ru := <-rr.updateChannel:
   307  		if ru.onDone != nil {
   308  			ru.onDone()
   309  		}
   310  	default:
   311  	}
   312  	rr.updateChannel <- &resourceUpdate{priorities: ret, onDone: onDone}
   313  }
   314  
   315  func (rr *resourceResolver) onUpdate(onDone xdsresource.OnDoneFunc) {
   316  	handleUpdate := func(context.Context) {
   317  		rr.mu.Lock()
   318  		rr.generateLocked(onDone)
   319  		rr.mu.Unlock()
   320  	}
   321  	rr.serializer.ScheduleOr(handleUpdate, func() { onDone() })
   322  }