gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/xds/internal/xdsclient/controller/transport.go (about)

     1  /*
     2   *
     3   * Copyright 2020 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package controller
    20  
    21  import (
    22  	"context"
    23  	"fmt"
    24  	"time"
    25  
    26  	grpc "gitee.com/ks-custle/core-gm/grpc"
    27  	controllerversion "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/controller/version"
    28  	xdsresourceversion "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/controller/version"
    29  	"gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/load"
    30  	"gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/xdsresource"
    31  	"github.com/golang/protobuf/proto"
    32  )
    33  
    34  // AddWatch adds a watch for an xDS resource given its type and name.
    35  func (t *Controller) AddWatch(rType xdsresource.ResourceType, resourceName string) {
    36  	t.sendCh.Put(&watchAction{
    37  		rType:    rType,
    38  		remove:   false,
    39  		resource: resourceName,
    40  	})
    41  }
    42  
    43  // RemoveWatch cancels an already registered watch for an xDS resource
    44  // given its type and name.
    45  func (t *Controller) RemoveWatch(rType xdsresource.ResourceType, resourceName string) {
    46  	t.sendCh.Put(&watchAction{
    47  		rType:    rType,
    48  		remove:   true,
    49  		resource: resourceName,
    50  	})
    51  }
    52  
    53  // run starts an ADS stream (and backs off exponentially, if the previous
    54  // stream failed without receiving a single reply) and runs the sender and
    55  // receiver routines to send and receive data from the stream respectively.
    56  func (t *Controller) run(ctx context.Context) {
    57  	go t.send(ctx)
    58  	// TODO: start a goroutine monitoring ClientConn's connectivity state, and
    59  	// report error (and log) when stats is transient failure.
    60  
    61  	retries := 0
    62  	for {
    63  		select {
    64  		case <-ctx.Done():
    65  			return
    66  		default:
    67  		}
    68  
    69  		if retries != 0 {
    70  			timer := time.NewTimer(t.backoff(retries))
    71  			select {
    72  			case <-timer.C:
    73  			case <-ctx.Done():
    74  				if !timer.Stop() {
    75  					<-timer.C
    76  				}
    77  				return
    78  			}
    79  		}
    80  
    81  		retries++
    82  		stream, err := t.vClient.NewStream(ctx, t.cc)
    83  		if err != nil {
    84  			t.updateHandler.NewConnectionError(err)
    85  			t.logger.Warningf("xds: ADS stream creation failed: %v", err)
    86  			continue
    87  		}
    88  		t.logger.Infof("ADS stream created")
    89  
    90  		select {
    91  		case <-t.streamCh:
    92  		default:
    93  		}
    94  		t.streamCh <- stream
    95  		if t.recv(stream) {
    96  			retries = 0
    97  		}
    98  	}
    99  }
   100  
   101  // send is a separate goroutine for sending watch requests on the xds stream.
   102  //
   103  // It watches the stream channel for new streams, and the request channel for
   104  // new requests to send on the stream.
   105  //
   106  // For each new request (watchAction), it's
   107  //   - processed and added to the watch map
   108  //   - so resend will pick them up when there are new streams
   109  //   - sent on the current stream if there's one
   110  //   - the current stream is cleared when any send on it fails
   111  //
   112  // For each new stream, all the existing requests will be resent.
   113  //
   114  // Note that this goroutine doesn't do anything to the old stream when there's a
   115  // new one. In fact, there should be only one stream in progress, and new one
   116  // should only be created when the old one fails (recv returns an error).
   117  func (t *Controller) send(ctx context.Context) {
   118  	var stream grpc.ClientStream
   119  	for {
   120  		select {
   121  		case <-ctx.Done():
   122  			return
   123  		case stream = <-t.streamCh:
   124  			if !t.sendExisting(stream) {
   125  				// send failed, clear the current stream.
   126  				stream = nil
   127  			}
   128  		case u := <-t.sendCh.Get():
   129  			t.sendCh.Load()
   130  
   131  			var (
   132  				target                 []string
   133  				rType                  xdsresource.ResourceType
   134  				version, nonce, errMsg string
   135  				send                   bool
   136  			)
   137  			switch update := u.(type) {
   138  			case *watchAction:
   139  				target, rType, version, nonce = t.processWatchInfo(update)
   140  			case *ackAction:
   141  				target, rType, version, nonce, send = t.processAckInfo(update, stream)
   142  				if !send {
   143  					continue
   144  				}
   145  				errMsg = update.errMsg
   146  			}
   147  			if stream == nil {
   148  				// There's no stream yet. Skip the request. This request
   149  				// will be resent to the new streams. If no stream is
   150  				// created, the watcher will timeout (same as server not
   151  				// sending response back).
   152  				continue
   153  			}
   154  			if err := t.vClient.SendRequest(stream, target, rType, version, nonce, errMsg); err != nil {
   155  				t.logger.Warningf("ADS request for {target: %q, type: %v, version: %q, nonce: %q} failed: %v", target, rType, version, nonce, err)
   156  				// send failed, clear the current stream.
   157  				stream = nil
   158  			}
   159  		}
   160  	}
   161  }
   162  
   163  // sendExisting sends out xDS requests for registered watchers when recovering
   164  // from a broken stream.
   165  //
   166  // We call stream.Send() here with the lock being held. It should be OK to do
   167  // that here because the stream has just started and Send() usually returns
   168  // quickly (once it pushes the message onto the transport layer) and is only
   169  // ever blocked if we don't have enough flow control quota.
   170  func (t *Controller) sendExisting(stream grpc.ClientStream) bool {
   171  	t.mu.Lock()
   172  	defer t.mu.Unlock()
   173  
   174  	// Reset the ack versions when the stream restarts.
   175  	t.versionMap = make(map[xdsresource.ResourceType]string)
   176  	t.nonceMap = make(map[xdsresource.ResourceType]string)
   177  
   178  	for rType, s := range t.watchMap {
   179  		if err := t.vClient.SendRequest(stream, mapToSlice(s), rType, "", "", ""); err != nil {
   180  			t.logger.Warningf("ADS request failed: %v", err)
   181  			return false
   182  		}
   183  	}
   184  
   185  	return true
   186  }
   187  
   188  // recv receives xDS responses on the provided ADS stream and branches out to
   189  // message specific handlers.
   190  func (t *Controller) recv(stream grpc.ClientStream) bool {
   191  	success := false
   192  	for {
   193  		resp, err := t.vClient.RecvResponse(stream)
   194  		if err != nil {
   195  			t.updateHandler.NewConnectionError(err)
   196  			t.logger.Warningf("ADS stream is closed with error: %v", err)
   197  			return success
   198  		}
   199  
   200  		rType, version, nonce, err := t.handleResponse(resp)
   201  
   202  		if e, ok := err.(xdsresourceversion.ErrResourceTypeUnsupported); ok {
   203  			t.logger.Warningf("%s", e.ErrStr)
   204  			continue
   205  		}
   206  		if err != nil {
   207  			t.sendCh.Put(&ackAction{
   208  				rType:   rType,
   209  				version: "",
   210  				nonce:   nonce,
   211  				errMsg:  err.Error(),
   212  				stream:  stream,
   213  			})
   214  			t.logger.Warningf("Sending NACK for response type: %v, version: %v, nonce: %v, reason: %v", rType, version, nonce, err)
   215  			continue
   216  		}
   217  		t.sendCh.Put(&ackAction{
   218  			rType:   rType,
   219  			version: version,
   220  			nonce:   nonce,
   221  			stream:  stream,
   222  		})
   223  		t.logger.Infof("Sending ACK for response type: %v, version: %v, nonce: %v", rType, version, nonce)
   224  		success = true
   225  	}
   226  }
   227  
   228  func (t *Controller) handleResponse(resp proto.Message) (xdsresource.ResourceType, string, string, error) {
   229  	rType, resource, version, nonce, err := t.vClient.ParseResponse(resp)
   230  	if err != nil {
   231  		return rType, version, nonce, err
   232  	}
   233  	opts := &xdsresource.UnmarshalOptions{
   234  		Version:         version,
   235  		Resources:       resource,
   236  		Logger:          t.logger,
   237  		UpdateValidator: t.updateValidator,
   238  	}
   239  	var md xdsresource.UpdateMetadata
   240  	switch rType {
   241  	case xdsresource.ListenerResource:
   242  		var update map[string]xdsresource.ListenerUpdateErrTuple
   243  		update, md, err = xdsresource.UnmarshalListener(opts)
   244  		t.updateHandler.NewListeners(update, md)
   245  	case xdsresource.RouteConfigResource:
   246  		var update map[string]xdsresource.RouteConfigUpdateErrTuple
   247  		update, md, err = xdsresource.UnmarshalRouteConfig(opts)
   248  		t.updateHandler.NewRouteConfigs(update, md)
   249  	case xdsresource.ClusterResource:
   250  		var update map[string]xdsresource.ClusterUpdateErrTuple
   251  		update, md, err = xdsresource.UnmarshalCluster(opts)
   252  		t.updateHandler.NewClusters(update, md)
   253  	case xdsresource.EndpointsResource:
   254  		var update map[string]xdsresource.EndpointsUpdateErrTuple
   255  		update, md, err = xdsresource.UnmarshalEndpoints(opts)
   256  		t.updateHandler.NewEndpoints(update, md)
   257  	default:
   258  		return rType, "", "", xdsresourceversion.ErrResourceTypeUnsupported{
   259  			ErrStr: fmt.Sprintf("Resource type %v unknown in response from server", rType),
   260  		}
   261  	}
   262  	return rType, version, nonce, err
   263  }
   264  
   265  func mapToSlice(m map[string]bool) []string {
   266  	ret := make([]string, 0, len(m))
   267  	for i := range m {
   268  		ret = append(ret, i)
   269  	}
   270  	return ret
   271  }
   272  
   273  type watchAction struct {
   274  	rType    xdsresource.ResourceType
   275  	remove   bool // Whether this is to remove watch for the resource.
   276  	resource string
   277  }
   278  
   279  // processWatchInfo pulls the fields needed by the request from a watchAction.
   280  //
   281  // It also updates the watch map.
   282  func (t *Controller) processWatchInfo(w *watchAction) (target []string, rType xdsresource.ResourceType, ver, nonce string) {
   283  	t.mu.Lock()
   284  	defer t.mu.Unlock()
   285  
   286  	var current map[string]bool
   287  	current, ok := t.watchMap[w.rType]
   288  	if !ok {
   289  		current = make(map[string]bool)
   290  		t.watchMap[w.rType] = current
   291  	}
   292  
   293  	if w.remove {
   294  		delete(current, w.resource)
   295  		if len(current) == 0 {
   296  			delete(t.watchMap, w.rType)
   297  		}
   298  	} else {
   299  		current[w.resource] = true
   300  	}
   301  
   302  	rType = w.rType
   303  	target = mapToSlice(current)
   304  	// We don't reset version or nonce when a new watch is started. The version
   305  	// and nonce from previous response are carried by the request unless the
   306  	// stream is recreated.
   307  	ver = t.versionMap[rType]
   308  	nonce = t.nonceMap[rType]
   309  	return target, rType, ver, nonce
   310  }
   311  
   312  type ackAction struct {
   313  	rType   xdsresource.ResourceType
   314  	version string // NACK if version is an empty string.
   315  	nonce   string
   316  	errMsg  string // Empty unless it's a NACK.
   317  	// ACK/NACK are tagged with the stream it's for. When the stream is down,
   318  	// all the ACK/NACK for this stream will be dropped, and the version/nonce
   319  	// won't be updated.
   320  	stream grpc.ClientStream
   321  }
   322  
   323  // processAckInfo pulls the fields needed by the ack request from a ackAction.
   324  //
   325  // If no active watch is found for this ack, it returns false for send.
   326  func (t *Controller) processAckInfo(ack *ackAction, stream grpc.ClientStream) (target []string, rType xdsresource.ResourceType, version, nonce string, send bool) {
   327  	if ack.stream != stream {
   328  		// If ACK's stream isn't the current sending stream, this means the ACK
   329  		// was pushed to queue before the old stream broke, and a new stream has
   330  		// been started since. Return immediately here so we don't update the
   331  		// nonce for the new stream.
   332  		return nil, xdsresource.UnknownResource, "", "", false
   333  	}
   334  	rType = ack.rType
   335  
   336  	t.mu.Lock()
   337  	defer t.mu.Unlock()
   338  
   339  	// Update the nonce no matter if we are going to send the ACK request on
   340  	// wire. We may not send the request if the watch is canceled. But the nonce
   341  	// needs to be updated so the next request will have the right nonce.
   342  	nonce = ack.nonce
   343  	t.nonceMap[rType] = nonce
   344  
   345  	s, ok := t.watchMap[rType]
   346  	if !ok || len(s) == 0 {
   347  		// We don't send the request ack if there's no active watch (this can be
   348  		// either the server sends responses before any request, or the watch is
   349  		// canceled while the ackAction is in queue), because there's no resource
   350  		// name. And if we send a request with empty resource name list, the
   351  		// server may treat it as a wild card and send us everything.
   352  		return nil, xdsresource.UnknownResource, "", "", false
   353  	}
   354  	send = true
   355  	target = mapToSlice(s)
   356  
   357  	version = ack.version
   358  	if version == "" {
   359  		// This is a nack, get the previous acked version.
   360  		version = t.versionMap[rType]
   361  		// version will still be an empty string if rType isn't
   362  		// found in versionMap, this can happen if there wasn't any ack
   363  		// before.
   364  	} else {
   365  		t.versionMap[rType] = version
   366  	}
   367  	return target, rType, version, nonce, send
   368  }
   369  
   370  // reportLoad starts an LRS stream to report load data to the management server.
   371  // It blocks until the context is cancelled.
   372  func (t *Controller) reportLoad(ctx context.Context, cc *grpc.ClientConn, opts controllerversion.LoadReportingOptions) {
   373  	retries := 0
   374  	for {
   375  		if ctx.Err() != nil {
   376  			return
   377  		}
   378  
   379  		if retries != 0 {
   380  			timer := time.NewTimer(t.backoff(retries))
   381  			select {
   382  			case <-timer.C:
   383  			case <-ctx.Done():
   384  				if !timer.Stop() {
   385  					<-timer.C
   386  				}
   387  				return
   388  			}
   389  		}
   390  
   391  		retries++
   392  		stream, err := t.vClient.NewLoadStatsStream(ctx, cc)
   393  		if err != nil {
   394  			t.logger.Warningf("lrs: failed to create stream: %v", err)
   395  			continue
   396  		}
   397  		t.logger.Infof("lrs: created LRS stream")
   398  
   399  		if err := t.vClient.SendFirstLoadStatsRequest(stream); err != nil {
   400  			t.logger.Warningf("lrs: failed to send first request: %v", err)
   401  			continue
   402  		}
   403  
   404  		clusters, interval, err := t.vClient.HandleLoadStatsResponse(stream)
   405  		if err != nil {
   406  			t.logger.Warningf("%v", err)
   407  			continue
   408  		}
   409  
   410  		retries = 0
   411  		t.sendLoads(ctx, stream, opts.LoadStore, clusters, interval)
   412  	}
   413  }
   414  
   415  func (t *Controller) sendLoads(ctx context.Context, stream grpc.ClientStream, store *load.Store, clusterNames []string, interval time.Duration) {
   416  	tick := time.NewTicker(interval)
   417  	defer tick.Stop()
   418  	for {
   419  		select {
   420  		case <-tick.C:
   421  		case <-ctx.Done():
   422  			return
   423  		}
   424  		if err := t.vClient.SendLoadStatsRequest(stream, store.Stats(clusterNames)); err != nil {
   425  			t.logger.Warningf("%v", err)
   426  			return
   427  		}
   428  	}
   429  }