dubbo.apache.org/dubbo-go/v3@v3.1.1/xds/client/controller/transport.go (about)

     1  /*
     2   * Licensed to the Apache Software Foundation (ASF) under one or more
     3   * contributor license agreements.  See the NOTICE file distributed with
     4   * this work for additional information regarding copyright ownership.
     5   * The ASF licenses this file to You under the Apache License, Version 2.0
     6   * (the "License"); you may not use this file except in compliance with
     7   * the License.  You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  /*
    19   *
    20   * Copyright 2021 gRPC authors.
    21   *
    22   */
    23  
    24  package controller
    25  
    26  import (
    27  	"context"
    28  	"fmt"
    29  	"time"
    30  )
    31  
    32  import (
    33  	"github.com/golang/protobuf/proto"
    34  
    35  	"google.golang.org/grpc"
    36  )
    37  
    38  import (
    39  	resourceversion "dubbo.apache.org/dubbo-go/v3/xds/client/controller/version"
    40  	"dubbo.apache.org/dubbo-go/v3/xds/client/load"
    41  	"dubbo.apache.org/dubbo-go/v3/xds/client/resource"
    42  )
    43  
    44  // AddWatch adds a watch for an xDS resource given its type and name.
    45  func (t *Controller) AddWatch(rType resource.ResourceType, resourceName string) {
    46  	t.sendCh.Put(&watchAction{
    47  		rType:    rType,
    48  		remove:   false,
    49  		resource: resourceName,
    50  	})
    51  }
    52  
    53  // RemoveWatch cancels an already registered watch for an xDS resource
    54  // given its type and name.
    55  func (t *Controller) RemoveWatch(rType resource.ResourceType, resourceName string) {
    56  	t.sendCh.Put(&watchAction{
    57  		rType:    rType,
    58  		remove:   true,
    59  		resource: resourceName,
    60  	})
    61  }
    62  
    63  // run starts an ADS stream (and backs off exponentially, if the previous
    64  // stream failed without receiving a single reply) and runs the sender and
    65  // receiver routines to send and receive data from the stream respectively.
    66  func (t *Controller) run(ctx context.Context) {
    67  	go t.send(ctx)
    68  	// TODO: start a goroutine monitoring ClientConn's connectivity state, and
    69  	// report error (and log) when stats is transient failure.
    70  
    71  	retries := 0
    72  	for {
    73  		select {
    74  		case <-ctx.Done():
    75  			return
    76  		default:
    77  		}
    78  
    79  		if retries != 0 {
    80  			timer := time.NewTimer(t.backoff(retries))
    81  			select {
    82  			case <-timer.C:
    83  			case <-ctx.Done():
    84  				if !timer.Stop() {
    85  					<-timer.C
    86  				}
    87  				return
    88  			}
    89  		}
    90  
    91  		retries++
    92  		stream, err := t.vClient.NewStream(ctx, t.cc)
    93  		if err != nil {
    94  			t.updateHandler.NewConnectionError(err)
    95  			t.logger.Warnf("xds: ADS stream creation failed: %v", err)
    96  			continue
    97  		}
    98  		t.logger.Infof("ADS stream created")
    99  
   100  		select {
   101  		case <-t.streamCh:
   102  		default:
   103  		}
   104  		t.streamCh <- stream
   105  		if t.recv(stream) {
   106  			retries = 0
   107  		}
   108  	}
   109  }
   110  
   111  // send is a separate goroutine for sending watch requests on the xds stream.
   112  //
   113  // It watches the stream channel for new streams, and the request channel for
   114  // new requests to send on the stream.
   115  //
   116  // For each new request (watchAction), it's
   117  //   - processed and added to the watch map
   118  //   - so resend will pick them up when there are new streams
   119  //   - sent on the current stream if there's one
   120  //   - the current stream is cleared when any send on it fails
   121  //
   122  // For each new stream, all the existing requests will be resent.
   123  //
   124  // Note that this goroutine doesn't do anything to the old stream when there's a
   125  // new one. In fact, there should be only one stream in progress, and new one
   126  // should only be created when the old one fails (recv returns an error).
   127  func (t *Controller) send(ctx context.Context) {
   128  	var stream grpc.ClientStream
   129  	for {
   130  		select {
   131  		case <-ctx.Done():
   132  			return
   133  		case stream = <-t.streamCh:
   134  			if !t.sendExisting(stream) {
   135  				// send failed, clear the current stream.
   136  				stream = nil
   137  			}
   138  		case u := <-t.sendCh.Get():
   139  			t.sendCh.Load()
   140  
   141  			var (
   142  				target                 []string
   143  				rType                  resource.ResourceType
   144  				version, nonce, errMsg string
   145  				send                   bool
   146  			)
   147  			switch update := u.(type) {
   148  			case *watchAction:
   149  				target, rType, version, nonce = t.processWatchInfo(update)
   150  			case *ackAction:
   151  				target, rType, version, nonce, send = t.processAckInfo(update, stream)
   152  				if !send {
   153  					continue
   154  				}
   155  				errMsg = update.errMsg
   156  			}
   157  			if stream == nil {
   158  				// There's no stream yet. Skip the request. This request
   159  				// will be resent to the new streams. If no stream is
   160  				// created, the watcher will timeout (same as server not
   161  				// sending response back).
   162  				continue
   163  			}
   164  			if err := t.vClient.SendRequest(stream, target, rType, version, nonce, errMsg); err != nil {
   165  				t.logger.Warnf("ADS request for {target: %q, type: %v, version: %q, nonce: %q} failed: %v", target, rType, version, nonce, err)
   166  				// send failed, clear the current stream.
   167  				stream = nil
   168  			}
   169  		}
   170  	}
   171  }
   172  
   173  // sendExisting sends out xDS requests for registered watchers when recovering
   174  // from a broken stream.
   175  //
   176  // We call stream.Send() here with the lock being held. It should be OK to do
   177  // that here because the stream has just started and Send() usually returns
   178  // quickly (once it pushes the message onto the transport layer) and is only
   179  // ever blocked if we don't have enough flow control quota.
   180  func (t *Controller) sendExisting(stream grpc.ClientStream) bool {
   181  	t.mu.Lock()
   182  	defer t.mu.Unlock()
   183  
   184  	// Reset the ack versions when the stream restarts.
   185  	t.versionMap = make(map[resource.ResourceType]string)
   186  	t.nonceMap = make(map[resource.ResourceType]string)
   187  
   188  	for rType, s := range t.watchMap {
   189  		if err := t.vClient.SendRequest(stream, mapToSlice(s), rType, "", "", ""); err != nil {
   190  			t.logger.Warnf("ADS request failed: %v", err)
   191  			return false
   192  		}
   193  	}
   194  
   195  	return true
   196  }
   197  
   198  // recv receives xDS responses on the provided ADS stream and branches out to
   199  // message specific handlers.
   200  func (t *Controller) recv(stream grpc.ClientStream) bool {
   201  	success := false
   202  	for {
   203  		resp, err := t.vClient.RecvResponse(stream)
   204  		if err != nil {
   205  			t.updateHandler.NewConnectionError(err)
   206  			t.logger.Warnf("ADS stream is closed with error: %v", err)
   207  			return success
   208  		}
   209  
   210  		rType, version, nonce, err := t.handleResponse(resp)
   211  
   212  		if e, ok := err.(resourceversion.ErrResourceTypeUnsupported); ok {
   213  			t.logger.Warnf("%s", e.ErrStr)
   214  			continue
   215  		}
   216  		if err != nil {
   217  			t.sendCh.Put(&ackAction{
   218  				rType:   rType,
   219  				version: "",
   220  				nonce:   nonce,
   221  				errMsg:  err.Error(),
   222  				stream:  stream,
   223  			})
   224  			t.logger.Warnf("Sending NACK for response type: %v, version: %v, nonce: %v, reason: %v", rType, version, nonce, err)
   225  			continue
   226  		}
   227  		t.sendCh.Put(&ackAction{
   228  			rType:   rType,
   229  			version: version,
   230  			nonce:   nonce,
   231  			stream:  stream,
   232  		})
   233  		t.logger.Infof("Sending ACK for response type: %v, version: %v, nonce: %v", rType, version, nonce)
   234  		success = true
   235  	}
   236  }
   237  
   238  func (t *Controller) handleResponse(resp proto.Message) (resource.ResourceType, string, string, error) {
   239  	rType, resources, version, nonce, err := t.vClient.ParseResponse(resp)
   240  	if err != nil {
   241  		return rType, version, nonce, err
   242  	}
   243  	opts := &resource.UnmarshalOptions{
   244  		Version:         version,
   245  		Resources:       resources,
   246  		Logger:          t.logger,
   247  		UpdateValidator: t.updateValidator,
   248  	}
   249  	var md resource.UpdateMetadata
   250  	switch rType {
   251  	case resource.ListenerResource:
   252  		var update map[string]resource.ListenerUpdateErrTuple
   253  		update, md, err = resource.UnmarshalListener(opts)
   254  		t.updateHandler.NewListeners(update, md)
   255  	case resource.RouteConfigResource:
   256  		var update map[string]resource.RouteConfigUpdateErrTuple
   257  		update, md, err = resource.UnmarshalRouteConfig(opts)
   258  		t.updateHandler.NewRouteConfigs(update, md)
   259  	case resource.ClusterResource:
   260  		var update map[string]resource.ClusterUpdateErrTuple
   261  		update, md, err = resource.UnmarshalCluster(opts)
   262  		t.updateHandler.NewClusters(update, md)
   263  	case resource.EndpointsResource:
   264  		var update map[string]resource.EndpointsUpdateErrTuple
   265  		update, md, err = resource.UnmarshalEndpoints(opts)
   266  		t.updateHandler.NewEndpoints(update, md)
   267  	default:
   268  		return rType, "", "", resourceversion.ErrResourceTypeUnsupported{
   269  			ErrStr: fmt.Sprintf("Resource type %v unknown in response from server", rType),
   270  		}
   271  	}
   272  	return rType, version, nonce, err
   273  }
   274  
   275  func mapToSlice(m map[string]bool) []string {
   276  	ret := make([]string, 0, len(m))
   277  	for i := range m {
   278  		ret = append(ret, i)
   279  	}
   280  	return ret
   281  }
   282  
   283  type watchAction struct {
   284  	rType    resource.ResourceType
   285  	remove   bool // Whether this is to remove watch for the resource.
   286  	resource string
   287  }
   288  
   289  // processWatchInfo pulls the fields needed by the request from a watchAction.
   290  //
   291  // It also updates the watch map.
   292  func (t *Controller) processWatchInfo(w *watchAction) (target []string, rType resource.ResourceType, ver, nonce string) {
   293  	t.mu.Lock()
   294  	defer t.mu.Unlock()
   295  
   296  	var current map[string]bool
   297  	current, ok := t.watchMap[w.rType]
   298  	if !ok {
   299  		current = make(map[string]bool)
   300  		t.watchMap[w.rType] = current
   301  	}
   302  
   303  	if w.remove {
   304  		delete(current, w.resource)
   305  		if len(current) == 0 {
   306  			delete(t.watchMap, w.rType)
   307  		}
   308  	} else {
   309  		current[w.resource] = true
   310  	}
   311  
   312  	rType = w.rType
   313  	target = mapToSlice(current)
   314  	// We don't reset version or nonce when a new watch is started. The version
   315  	// and nonce from previous response are carried by the request unless the
   316  	// stream is recreated.
   317  	ver = t.versionMap[rType]
   318  	nonce = t.nonceMap[rType]
   319  	return target, rType, ver, nonce
   320  }
   321  
   322  type ackAction struct {
   323  	rType   resource.ResourceType
   324  	version string // NACK if version is an empty string.
   325  	nonce   string
   326  	errMsg  string // Empty unless it's a NACK.
   327  	// ACK/NACK are tagged with the stream it's for. When the stream is down,
   328  	// all the ACK/NACK for this stream will be dropped, and the version/nonce
   329  	// won't be updated.
   330  	stream grpc.ClientStream
   331  }
   332  
   333  // processAckInfo pulls the fields needed by the ack request from a ackAction.
   334  //
   335  // If no active watch is found for this ack, it returns false for send.
   336  func (t *Controller) processAckInfo(ack *ackAction, stream grpc.ClientStream) (target []string, rType resource.ResourceType, version, nonce string, send bool) {
   337  	if ack.stream != stream {
   338  		// If ACK's stream isn't the current sending stream, this means the ACK
   339  		// was pushed to queue before the old stream broke, and a new stream has
   340  		// been started since. Return immediately here so we don't update the
   341  		// nonce for the new stream.
   342  		return nil, resource.UnknownResource, "", "", false
   343  	}
   344  	rType = ack.rType
   345  
   346  	t.mu.Lock()
   347  	defer t.mu.Unlock()
   348  
   349  	// Update the nonce no matter if we are going to send the ACK request on
   350  	// wire. We may not send the request if the watch is canceled. But the nonce
   351  	// needs to be updated so the next request will have the right nonce.
   352  	nonce = ack.nonce
   353  	t.nonceMap[rType] = nonce
   354  
   355  	s, ok := t.watchMap[rType]
   356  	if !ok || len(s) == 0 {
   357  		// We don't send the request ack if there's no active watch (this can be
   358  		// either the server sends responses before any request, or the watch is
   359  		// canceled while the ackAction is in queue), because there's no resource
   360  		// name. And if we send a request with empty resource name list, the
   361  		// server may treat it as a wild card and send us everything.
   362  		return nil, resource.UnknownResource, "", "", false
   363  	}
   364  	send = true
   365  	target = mapToSlice(s)
   366  
   367  	version = ack.version
   368  	if version == "" {
   369  		// This is a nack, get the previous acked version.
   370  		version = t.versionMap[rType]
   371  		// version will still be an empty string if rType isn't
   372  		// found in versionMap, this can happen if there wasn't any ack
   373  		// before.
   374  	} else {
   375  		t.versionMap[rType] = version
   376  	}
   377  	return target, rType, version, nonce, send
   378  }
   379  
   380  // reportLoad starts an LRS stream to report load data to the management server.
   381  // It blocks until the context is canceled.
   382  func (t *Controller) reportLoad(ctx context.Context, cc *grpc.ClientConn, opts resourceversion.LoadReportingOptions) {
   383  	retries := 0
   384  	for {
   385  		if ctx.Err() != nil {
   386  			return
   387  		}
   388  
   389  		if retries != 0 {
   390  			timer := time.NewTimer(t.backoff(retries))
   391  			select {
   392  			case <-timer.C:
   393  			case <-ctx.Done():
   394  				if !timer.Stop() {
   395  					<-timer.C
   396  				}
   397  				return
   398  			}
   399  		}
   400  
   401  		retries++
   402  		stream, err := t.vClient.NewLoadStatsStream(ctx, cc)
   403  		if err != nil {
   404  			t.logger.Warnf("lrs: failed to create stream: %v", err)
   405  			continue
   406  		}
   407  		t.logger.Infof("lrs: created LRS stream")
   408  
   409  		if err = t.vClient.SendFirstLoadStatsRequest(stream); err != nil {
   410  			t.logger.Warnf("lrs: failed to send first request: %v", err)
   411  			continue
   412  		}
   413  
   414  		clusters, interval, err := t.vClient.HandleLoadStatsResponse(stream)
   415  		if err != nil {
   416  			t.logger.Warnf("%v", err)
   417  			continue
   418  		}
   419  
   420  		retries = 0
   421  		t.sendLoads(ctx, stream, opts.LoadStore, clusters, interval)
   422  	}
   423  }
   424  
   425  func (t *Controller) sendLoads(ctx context.Context, stream grpc.ClientStream, store *load.Store, clusterNames []string, interval time.Duration) {
   426  	tick := time.NewTicker(interval)
   427  	defer tick.Stop()
   428  	for {
   429  		select {
   430  		case <-tick.C:
   431  		case <-ctx.Done():
   432  			return
   433  		}
   434  		if err := t.vClient.SendLoadStatsRequest(stream, store.Stats(clusterNames)); err != nil {
   435  			t.logger.Warnf("%v", err)
   436  			return
   437  		}
   438  	}
   439  }