github.phpd.cn/cilium/cilium@v1.6.12/pkg/envoy/xds/server.go (about)

     1  // Copyright 2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package xds
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"reflect"
    23  	"strconv"
    24  	"strings"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/cilium/cilium/pkg/logging/logfields"
    29  
    30  	envoy_api_v2 "github.com/cilium/proxy/go/envoy/api/v2"
    31  	"github.com/golang/protobuf/proto"
    32  	"github.com/golang/protobuf/ptypes/any"
    33  	"github.com/sirupsen/logrus"
    34  	"google.golang.org/grpc/codes"
    35  )
    36  
    37  const (
    38  	// AnyTypeURL is the default type URL to use for ADS resource sets.
    39  	AnyTypeURL = ""
    40  )
    41  
    42  var (
    43  	// ErrNoADSTypeURL is the error returned when receiving a request without
    44  	// a type URL from an ADS stream.
    45  	ErrNoADSTypeURL = errors.New("type URL is required for ADS")
    46  
    47  	// ErrUnknownTypeURL is the error returned when receiving a request with
    48  	// an unknown type URL.
    49  	ErrUnknownTypeURL = errors.New("unknown type URL")
    50  
    51  	// ErrInvalidVersionInfo is the error returned when receiving a request
    52  	// with a version info that is not a positive integer.
    53  	ErrInvalidVersionInfo = errors.New("invalid version info")
    54  
    55  	// ErrInvalidNonce is the error returned when receiving a request
    56  	// with a response nonce that is not a positive integer.
    57  	ErrInvalidResponseNonce = errors.New("invalid response nonce info")
    58  
    59  	// ErrInvalidNodeFormat is the error returned when receiving a request
    60  	// with a node that is not a formatted correctly.
    61  	ErrInvalidNodeFormat = errors.New("invalid node format")
    62  
    63  	// ErrResourceWatch is the error returned whenever an internal error
    64  	// occurs while waiting for new versions of resources.
    65  	ErrResourceWatch = errors.New("resource watch failed")
    66  
    67  	// grpcCanceled is the string prefix of any gRPC error related
    68  	// to the stream being canceled. Ignore the description, as it
    69  	// is derived from the client and may vary, while the code is
    70  	// set by the gRPC library we link with.
    71  	//
    72  	// Ref. vendor/google.golang.org/grpc/status/status.go:
    73  	// return fmt.Sprintf("rpc error: code = %s desc = %s", codes.Code(p.GetCode()), p.GetMessage())
    74  	grpcCanceled = fmt.Sprintf("rpc error: code = %s", codes.Canceled.String())
    75  )
    76  
    77  // Server implements the handling of xDS streams.
    78  type Server struct {
    79  	// watchers maps each supported type URL to its corresponding resource
    80  	// watcher.
    81  	watchers map[string]*ResourceWatcher
    82  
    83  	// ackObservers maps each supported type URL to its corresponding observer
    84  	// of ACKs received from Envoy nodes.
    85  	ackObservers map[string]ResourceVersionAckObserver
    86  
    87  	// lastStreamID is the identifier of the last processed stream.
    88  	// It is incremented atomically when starting the handling of a new stream.
    89  	lastStreamID uint64
    90  }
    91  
    92  // ResourceTypeConfiguration is the configuration of the XDS server for a
    93  // resource type.
    94  type ResourceTypeConfiguration struct {
    95  	// Source contains the resources of this type.
    96  	Source ObservableResourceSource
    97  
    98  	// AckObserver is called back whenever a node acknowledges having applied a
    99  	// version of the resources of this type.
   100  	AckObserver ResourceVersionAckObserver
   101  }
   102  
   103  // NewServer creates an xDS gRPC stream handler using the given resource
   104  // sources.
   105  // types maps each supported resource type URL to its corresponding resource
   106  // source and ACK observer.
   107  func NewServer(resourceTypes map[string]*ResourceTypeConfiguration,
   108  	resourceAccessTimeout time.Duration) *Server {
   109  	watchers := make(map[string]*ResourceWatcher, len(resourceTypes))
   110  	ackObservers := make(map[string]ResourceVersionAckObserver, len(resourceTypes))
   111  	for typeURL, resType := range resourceTypes {
   112  		w := NewResourceWatcher(typeURL, resType.Source, resourceAccessTimeout)
   113  		resType.Source.AddResourceVersionObserver(w)
   114  		watchers[typeURL] = w
   115  
   116  		if resType.AckObserver != nil {
   117  			ackObservers[typeURL] = resType.AckObserver
   118  		}
   119  	}
   120  
   121  	// TODO: Unregister the watchers when stopping the server.
   122  
   123  	return &Server{watchers: watchers, ackObservers: ackObservers}
   124  }
   125  
   126  func getXDSRequestFields(req *envoy_api_v2.DiscoveryRequest) logrus.Fields {
   127  	return logrus.Fields{
   128  		logfields.XDSAckedVersion: req.GetVersionInfo(),
   129  		logfields.XDSTypeURL:      req.GetTypeUrl(),
   130  		logfields.XDSNonce:        req.GetResponseNonce(),
   131  	}
   132  }
   133  
   134  // HandleRequestStream receives and processes the requests from an xDS stream.
   135  func (s *Server) HandleRequestStream(ctx context.Context, stream Stream, defaultTypeURL string) error {
   136  	// increment stream count
   137  	streamID := atomic.AddUint64(&s.lastStreamID, 1)
   138  
   139  	streamLog := log.WithField(logfields.XDSStreamID, streamID)
   140  
   141  	reqCh := make(chan *envoy_api_v2.DiscoveryRequest)
   142  
   143  	stopRecv := make(chan struct{})
   144  	defer close(stopRecv)
   145  
   146  	nodeId := ""
   147  
   148  	go func() {
   149  		defer close(reqCh)
   150  		for {
   151  			req, err := stream.Recv()
   152  			if err != nil {
   153  				if err == io.EOF {
   154  					streamLog.Debug("xDS stream closed")
   155  				} else if strings.HasPrefix(err.Error(), grpcCanceled) {
   156  					streamLog.WithError(err).Debug("xDS stream canceled")
   157  				} else {
   158  					streamLog.WithError(err).Error("error while receiving request from xDS stream")
   159  				}
   160  				return
   161  			}
   162  			if req == nil {
   163  				streamLog.Error("received nil request from xDS stream; stopping xDS stream handling")
   164  				return
   165  			}
   166  			if req.GetTypeUrl() == "" {
   167  				req.TypeUrl = defaultTypeURL
   168  			}
   169  			if nodeId == "" {
   170  				nodeId = req.GetNode().GetId()
   171  				streamLog = streamLog.WithField(logfields.XDSClientNode, nodeId)
   172  			}
   173  			streamLog.WithFields(getXDSRequestFields(req)).Debug("received request from xDS stream")
   174  
   175  			select {
   176  			case <-stopRecv:
   177  				streamLog.Debug("stopping xDS stream handling")
   178  				return
   179  			case reqCh <- req:
   180  			}
   181  		}
   182  	}()
   183  
   184  	return s.processRequestStream(ctx, streamLog, stream, reqCh, defaultTypeURL)
   185  }
   186  
   187  // perTypeStreamState is the state maintained per resource type for each
   188  // xDS stream.
   189  type perTypeStreamState struct {
   190  	// typeURL identifies the resource type.
   191  	typeURL string
   192  
   193  	// pendingWatchCancel is a pending watch on this resource type.
   194  	// If nil, no watch is pending.
   195  	pendingWatchCancel context.CancelFunc
   196  
   197  	// version is the last version sent. This is needed so that we'll know
   198  	// if a new request is an ACK (VersionInfo matches current version), or a NACK
   199  	// (VersionInfo matches an earlier version).
   200  	version uint64
   201  
   202  	// resourceNames is the list of names of resources sent in the last
   203  	// response to a request for this resource type.
   204  	resourceNames []string
   205  }
   206  
   207  // processRequestStream processes the requests in an xDS stream from a channel.
   208  func (s *Server) processRequestStream(ctx context.Context, streamLog *logrus.Entry, stream Stream,
   209  	reqCh <-chan *envoy_api_v2.DiscoveryRequest, defaultTypeURL string) error {
   210  	// The request state for every type URL.
   211  	typeStates := make([]perTypeStreamState, len(s.watchers))
   212  	defer func() {
   213  		for _, state := range typeStates {
   214  			if state.pendingWatchCancel != nil {
   215  				state.pendingWatchCancel()
   216  			}
   217  		}
   218  	}()
   219  
   220  	// A map of a resource type's URL to the corresponding index in typeStates
   221  	// for the resource type.
   222  	typeIndexes := make(map[string]int, len(typeStates))
   223  
   224  	// The set of channels to select from. Since the set of channels is
   225  	// dynamic, we use reflection for selection.
   226  	// The indexes in selectCases from 0 to len(typeStates)-1 match the indexes
   227  	// in typeStates.
   228  	selectCases := make([]reflect.SelectCase, len(typeStates)+2)
   229  
   230  	// The last select case index is always the request channel.
   231  	reqChIndex := len(selectCases) - 1
   232  	selectCases[reqChIndex] = reflect.SelectCase{
   233  		Dir:  reflect.SelectRecv,
   234  		Chan: reflect.ValueOf(reqCh),
   235  	}
   236  
   237  	// The next-to-last select case is the context's Done channel.
   238  	doneChIndex := reqChIndex - 1
   239  	selectCases[doneChIndex] = reflect.SelectCase{
   240  		Dir:  reflect.SelectRecv,
   241  		Chan: reflect.ValueOf(ctx.Done()),
   242  	}
   243  
   244  	// Initially there are no pending watches, so just select a dead channel
   245  	// that will never be selected.
   246  	quietCh := make(chan *VersionedResources)
   247  	defer close(quietCh)
   248  	quietChValue := reflect.ValueOf(quietCh)
   249  
   250  	i := 0
   251  	for typeURL := range s.watchers {
   252  		typeStates[i] = perTypeStreamState{
   253  			typeURL: typeURL,
   254  		}
   255  
   256  		selectCases[i] = reflect.SelectCase{
   257  			Dir:  reflect.SelectRecv,
   258  			Chan: quietChValue,
   259  		}
   260  
   261  		typeIndexes[typeURL] = i
   262  
   263  		i++
   264  	}
   265  
   266  	streamLog.Info("starting xDS stream processing")
   267  
   268  	nodeIP := ""
   269  
   270  	for {
   271  		// Process either a new request from the xDS stream or a response
   272  		// from the resource watcher.
   273  		chosen, recv, recvOK := reflect.Select(selectCases)
   274  
   275  		switch chosen {
   276  		case doneChIndex: // Context got canceled, most likely by the client terminating.
   277  			streamLog.WithError(ctx.Err()).Debug("xDS stream context canceled")
   278  			return ctx.Err()
   279  
   280  		case reqChIndex: // Request received from the stream.
   281  			if !recvOK {
   282  				streamLog.Info("xDS stream closed")
   283  				return nil
   284  			}
   285  
   286  			req := recv.Interface().(*envoy_api_v2.DiscoveryRequest)
   287  
   288  			// only require Node to exist in the first request
   289  			if nodeIP == "" {
   290  				id := req.GetNode().GetId()
   291  				streamLog = streamLog.WithField(logfields.XDSClientNode, id)
   292  				var err error
   293  				nodeIP, err = IstioNodeToIP(id)
   294  				if err != nil {
   295  					streamLog.WithError(err).Error("invalid Node in xDS request")
   296  					return ErrInvalidNodeFormat
   297  				}
   298  			}
   299  
   300  			requestLog := streamLog.WithFields(getXDSRequestFields(req))
   301  
   302  			// Ensure that the version info is a string that was sent by this
   303  			// server or the empty string (the first request in a stream should
   304  			// always have an empty version info).
   305  			var versionInfo uint64
   306  			if req.GetVersionInfo() != "" {
   307  				var err error
   308  				versionInfo, err = strconv.ParseUint(req.VersionInfo, 10, 64)
   309  				if err != nil {
   310  					requestLog.Errorf("invalid version info in xDS request, not a uint64")
   311  					return ErrInvalidVersionInfo
   312  				}
   313  			}
   314  			var nonce uint64
   315  			if req.GetResponseNonce() != "" {
   316  				var err error
   317  				nonce, err = strconv.ParseUint(req.ResponseNonce, 10, 64)
   318  				if err != nil {
   319  					requestLog.Error("invalid response nonce info in xDS request, not a uint64")
   320  					return ErrInvalidResponseNonce
   321  				}
   322  			}
   323  			var detail string
   324  			status := req.GetErrorDetail()
   325  			if status != nil {
   326  				detail = status.Message
   327  			}
   328  
   329  			typeURL := req.GetTypeUrl()
   330  			if defaultTypeURL == AnyTypeURL && typeURL == "" {
   331  				requestLog.Error("no type URL given in ADS request")
   332  				return ErrNoADSTypeURL
   333  			}
   334  
   335  			index, exists := typeIndexes[typeURL]
   336  			if !exists {
   337  				requestLog.Error("unknown type URL in xDS request")
   338  				return ErrUnknownTypeURL
   339  			}
   340  
   341  			state := &typeStates[index]
   342  			watcher := s.watchers[typeURL]
   343  
   344  			// Response nonce is always the same as the response version.
   345  			// Request version indicates the last acked version. If the
   346  			// response nonce in the request is different (smaller) than
   347  			// the version, all versions upto that version are acked, but
   348  			// the versions from that to and including the nonce are nacked.
   349  			if versionInfo <= nonce {
   350  				ackObserver := s.ackObservers[typeURL]
   351  				if ackObserver != nil {
   352  					requestLog.Debug("notifying observers of ACKs")
   353  					ackObserver.HandleResourceVersionAck(versionInfo, nonce, nodeIP, state.resourceNames, typeURL, detail)
   354  				} else {
   355  					requestLog.Debug("ACK received but no observers are waiting for ACKs")
   356  				}
   357  				if versionInfo < nonce {
   358  					// versions after VersionInfo, upto and including ResponseNonce are NACKed
   359  					requestLog.WithField(logfields.XDSDetail, detail).Warningf("NACK received for versions after %s and up to %s; waiting for a version update before sending again", req.VersionInfo, req.ResponseNonce)
   360  					// Watcher will behave as if the sent version was acked.
   361  					// Otherwise we will just be sending the same failing
   362  					// version over and over filling logs.
   363  					versionInfo = state.version
   364  				}
   365  
   366  				if state.pendingWatchCancel != nil {
   367  					// A pending watch exists for this type URL. Cancel it to
   368  					// start a new watch.
   369  					requestLog.Debug("canceling pending watch")
   370  					state.pendingWatchCancel()
   371  				}
   372  
   373  				respCh := make(chan *VersionedResources, 1)
   374  				selectCases[index].Chan = reflect.ValueOf(respCh)
   375  
   376  				ctx, cancel := context.WithCancel(ctx)
   377  				state.pendingWatchCancel = cancel
   378  
   379  				requestLog.Debugf("starting watch on %d resources", len(req.GetResourceNames()))
   380  				go watcher.WatchResources(ctx, typeURL, versionInfo, nodeIP, req.GetResourceNames(), respCh)
   381  			} else {
   382  				requestLog.Debug("received invalid nonce in xDS request; ignoring request")
   383  			}
   384  		default: // Pending watch response.
   385  			state := &typeStates[chosen]
   386  			state.pendingWatchCancel()
   387  			state.pendingWatchCancel = nil
   388  
   389  			if !recvOK {
   390  				streamLog.WithField(logfields.XDSTypeURL, state.typeURL).
   391  					Error("xDS resource watch failed; terminating")
   392  				return ErrResourceWatch
   393  			}
   394  
   395  			// Disabling reading from the channel after reading any from it,
   396  			// since the watcher will close it anyway.
   397  			selectCases[chosen].Chan = quietChValue
   398  
   399  			resp := recv.Interface().(*VersionedResources)
   400  
   401  			responseLog := streamLog.WithFields(logrus.Fields{
   402  				logfields.XDSCachedVersion: resp.Version,
   403  				logfields.XDSCanary:        resp.Canary,
   404  				logfields.XDSTypeURL:       state.typeURL,
   405  				logfields.XDSNonce:         resp.Version,
   406  			})
   407  
   408  			resources := make([]*any.Any, len(resp.Resources))
   409  
   410  			// Marshall the resources into protobuf's Any type.
   411  			for i, res := range resp.Resources {
   412  				data, err := proto.Marshal(res)
   413  				if err != nil {
   414  					responseLog.WithError(err).Errorf("error marshalling xDS response (%d resources)", len(resp.Resources))
   415  					return err
   416  				}
   417  				resources[i] = &any.Any{
   418  					TypeUrl: state.typeURL,
   419  					Value:   data,
   420  				}
   421  			}
   422  
   423  			responseLog.Debugf("sending xDS response with %d resources", len(resp.Resources))
   424  
   425  			versionStr := strconv.FormatUint(resp.Version, 10)
   426  			out := &envoy_api_v2.DiscoveryResponse{
   427  				VersionInfo: versionStr,
   428  				Resources:   resources,
   429  				Canary:      resp.Canary,
   430  				TypeUrl:     state.typeURL,
   431  				Nonce:       versionStr,
   432  			}
   433  			err := stream.Send(out)
   434  			if err != nil {
   435  				return err
   436  			}
   437  
   438  			state.version = resp.Version
   439  			state.resourceNames = resp.ResourceNames
   440  		}
   441  	}
   442  }