github.com/imran-kn/cilium-fork@v1.6.9/pkg/envoy/xds/server.go (about)

     1  // Copyright 2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package xds
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"reflect"
    23  	"strconv"
    24  	"strings"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"github.com/cilium/cilium/pkg/logging/logfields"
    29  
    30  	envoy_api_v2 "github.com/cilium/proxy/go/envoy/api/v2"
    31  	"github.com/golang/protobuf/proto"
    32  	"github.com/golang/protobuf/ptypes/any"
    33  	"github.com/sirupsen/logrus"
    34  	"google.golang.org/grpc/codes"
    35  )
    36  
    37  const (
    38  	// AnyTypeURL is the default type URL to use for ADS resource sets.
    39  	AnyTypeURL = ""
    40  )
    41  
    42  var (
    43  	// ErrNoADSTypeURL is the error returned when receiving a request without
    44  	// a type URL from an ADS stream.
    45  	ErrNoADSTypeURL = errors.New("type URL is required for ADS")
    46  
    47  	// ErrUnknownTypeURL is the error returned when receiving a request with
    48  	// an unknown type URL.
    49  	ErrUnknownTypeURL = errors.New("unknown type URL")
    50  
    51  	// ErrInvalidVersionInfo is the error returned when receiving a request
    52  	// with a version info that is not a positive integer.
    53  	ErrInvalidVersionInfo = errors.New("invalid version info")
    54  
    55  	// ErrInvalidNonce is the error returned when receiving a request
    56  	// with a response nonce that is not a positive integer.
    57  	ErrInvalidResponseNonce = errors.New("invalid response nonce info")
    58  
    59  	// ErrInvalidNodeFormat is the error returned when receiving a request
    60  	// with a node that is not a formatted correctly.
    61  	ErrInvalidNodeFormat = errors.New("invalid node format")
    62  
    63  	// ErrResourceWatch is the error returned whenever an internal error
    64  	// occurs while waiting for new versions of resources.
    65  	ErrResourceWatch = errors.New("resource watch failed")
    66  
    67  	// grpcCanceled is the string prefix of any gRPC error related
    68  	// to the stream being canceled. Ignore the description, as it
    69  	// is derived from the client and may vary, while the code is
    70  	// set by the gRPC library we link with.
    71  	//
    72  	// Ref. vendor/google.golang.org/grpc/status/status.go:
    73  	// return fmt.Sprintf("rpc error: code = %s desc = %s", codes.Code(p.GetCode()), p.GetMessage())
    74  	grpcCanceled = fmt.Sprintf("rpc error: code = %s", codes.Canceled.String())
    75  )
    76  
    77  // Server implements the handling of xDS streams.
    78  type Server struct {
    79  	// watchers maps each supported type URL to its corresponding resource
    80  	// watcher.
    81  	watchers map[string]*ResourceWatcher
    82  
    83  	// ackObservers maps each supported type URL to its corresponding observer
    84  	// of ACKs received from Envoy nodes.
    85  	ackObservers map[string]ResourceVersionAckObserver
    86  
    87  	// lastStreamID is the identifier of the last processed stream.
    88  	// It is incremented atomically when starting the handling of a new stream.
    89  	lastStreamID uint64
    90  }
    91  
    92  // ResourceTypeConfiguration is the configuration of the XDS server for a
    93  // resource type.
    94  type ResourceTypeConfiguration struct {
    95  	// Source contains the resources of this type.
    96  	Source ObservableResourceSource
    97  
    98  	// AckObserver is called back whenever a node acknowledges having applied a
    99  	// version of the resources of this type.
   100  	AckObserver ResourceVersionAckObserver
   101  }
   102  
   103  // NewServer creates an xDS gRPC stream handler using the given resource
   104  // sources.
   105  // types maps each supported resource type URL to its corresponding resource
   106  // source and ACK observer.
   107  func NewServer(resourceTypes map[string]*ResourceTypeConfiguration,
   108  	resourceAccessTimeout time.Duration) *Server {
   109  	watchers := make(map[string]*ResourceWatcher, len(resourceTypes))
   110  	ackObservers := make(map[string]ResourceVersionAckObserver, len(resourceTypes))
   111  	for typeURL, resType := range resourceTypes {
   112  		w := NewResourceWatcher(typeURL, resType.Source, resourceAccessTimeout)
   113  		resType.Source.AddResourceVersionObserver(w)
   114  		watchers[typeURL] = w
   115  
   116  		if resType.AckObserver != nil {
   117  			ackObservers[typeURL] = resType.AckObserver
   118  		}
   119  	}
   120  
   121  	// TODO: Unregister the watchers when stopping the server.
   122  
   123  	return &Server{watchers: watchers, ackObservers: ackObservers}
   124  }
   125  
   126  func getXDSRequestFields(req *envoy_api_v2.DiscoveryRequest) logrus.Fields {
   127  	return logrus.Fields{
   128  		logfields.XDSAckedVersion: req.GetVersionInfo(),
   129  		logfields.XDSClientNode:   req.GetNode(),
   130  		logfields.XDSTypeURL:      req.GetTypeUrl(),
   131  		logfields.XDSNonce:        req.GetResponseNonce(),
   132  	}
   133  }
   134  
   135  // HandleRequestStream receives and processes the requests from an xDS stream.
   136  func (s *Server) HandleRequestStream(ctx context.Context, stream Stream, defaultTypeURL string) error {
   137  	// increment stream count
   138  	streamID := atomic.AddUint64(&s.lastStreamID, 1)
   139  
   140  	streamLog := log.WithField(logfields.XDSStreamID, streamID)
   141  
   142  	reqCh := make(chan *envoy_api_v2.DiscoveryRequest)
   143  
   144  	stopRecv := make(chan struct{})
   145  	defer close(stopRecv)
   146  
   147  	go func() {
   148  		defer close(reqCh)
   149  		for {
   150  			req, err := stream.Recv()
   151  			if err != nil {
   152  				if err == io.EOF {
   153  					streamLog.Debug("xDS stream closed")
   154  				} else if strings.HasPrefix(err.Error(), grpcCanceled) {
   155  					streamLog.WithError(err).Debug("xDS stream canceled")
   156  				} else {
   157  					streamLog.WithError(err).Error("error while receiving request from xDS stream")
   158  				}
   159  				return
   160  			}
   161  			if req == nil {
   162  				streamLog.Error("received nil request from xDS stream; stopping xDS stream handling")
   163  				return
   164  			}
   165  			if req.GetTypeUrl() == "" {
   166  				req.TypeUrl = defaultTypeURL
   167  			}
   168  			streamLog.WithFields(getXDSRequestFields(req)).Debug("received request from xDS stream")
   169  			select {
   170  			case <-stopRecv:
   171  				streamLog.Debug("stopping xDS stream handling")
   172  				return
   173  			case reqCh <- req:
   174  			}
   175  		}
   176  	}()
   177  
   178  	return s.processRequestStream(ctx, streamLog, stream, reqCh, defaultTypeURL)
   179  }
   180  
   181  // perTypeStreamState is the state maintained per resource type for each
   182  // xDS stream.
   183  type perTypeStreamState struct {
   184  	// typeURL identifies the resource type.
   185  	typeURL string
   186  
   187  	// pendingWatchCancel is a pending watch on this resource type.
   188  	// If nil, no watch is pending.
   189  	pendingWatchCancel context.CancelFunc
   190  
   191  	// version is the last version sent. This is needed so that we'll know
   192  	// if a new request is an ACK (VersionInfo matches current version), or a NACK
   193  	// (VersionInfo matches an earlier version).
   194  	version uint64
   195  
   196  	// resourceNames is the list of names of resources sent in the last
   197  	// response to a request for this resource type.
   198  	resourceNames []string
   199  }
   200  
   201  // processRequestStream processes the requests in an xDS stream from a channel.
   202  func (s *Server) processRequestStream(ctx context.Context, streamLog *logrus.Entry, stream Stream,
   203  	reqCh <-chan *envoy_api_v2.DiscoveryRequest, defaultTypeURL string) error {
   204  	// The request state for every type URL.
   205  	typeStates := make([]perTypeStreamState, len(s.watchers))
   206  	defer func() {
   207  		for _, state := range typeStates {
   208  			if state.pendingWatchCancel != nil {
   209  				state.pendingWatchCancel()
   210  			}
   211  		}
   212  	}()
   213  
   214  	// A map of a resource type's URL to the corresponding index in typeStates
   215  	// for the resource type.
   216  	typeIndexes := make(map[string]int, len(typeStates))
   217  
   218  	// The set of channels to select from. Since the set of channels is
   219  	// dynamic, we use reflection for selection.
   220  	// The indexes in selectCases from 0 to len(typeStates)-1 match the indexes
   221  	// in typeStates.
   222  	selectCases := make([]reflect.SelectCase, len(typeStates)+2)
   223  
   224  	// The last select case index is always the request channel.
   225  	reqChIndex := len(selectCases) - 1
   226  	selectCases[reqChIndex] = reflect.SelectCase{
   227  		Dir:  reflect.SelectRecv,
   228  		Chan: reflect.ValueOf(reqCh),
   229  	}
   230  
   231  	// The next-to-last select case is the context's Done channel.
   232  	doneChIndex := reqChIndex - 1
   233  	selectCases[doneChIndex] = reflect.SelectCase{
   234  		Dir:  reflect.SelectRecv,
   235  		Chan: reflect.ValueOf(ctx.Done()),
   236  	}
   237  
   238  	// Initially there are no pending watches, so just select a dead channel
   239  	// that will never be selected.
   240  	quietCh := make(chan *VersionedResources)
   241  	defer close(quietCh)
   242  	quietChValue := reflect.ValueOf(quietCh)
   243  
   244  	i := 0
   245  	for typeURL := range s.watchers {
   246  		typeStates[i] = perTypeStreamState{
   247  			typeURL: typeURL,
   248  		}
   249  
   250  		selectCases[i] = reflect.SelectCase{
   251  			Dir:  reflect.SelectRecv,
   252  			Chan: quietChValue,
   253  		}
   254  
   255  		typeIndexes[typeURL] = i
   256  
   257  		i++
   258  	}
   259  
   260  	streamLog.Info("starting xDS stream processing")
   261  
   262  	for {
   263  		// Process either a new request from the xDS stream or a response
   264  		// from the resource watcher.
   265  		chosen, recv, recvOK := reflect.Select(selectCases)
   266  
   267  		switch chosen {
   268  		case doneChIndex: // Context got canceled, most likely by the client terminating.
   269  			streamLog.WithError(ctx.Err()).Debug("xDS stream context canceled")
   270  			return ctx.Err()
   271  
   272  		case reqChIndex: // Request received from the stream.
   273  			if !recvOK {
   274  				streamLog.Info("xDS stream closed")
   275  				return nil
   276  			}
   277  
   278  			req := recv.Interface().(*envoy_api_v2.DiscoveryRequest)
   279  
   280  			requestLog := streamLog.WithFields(getXDSRequestFields(req))
   281  
   282  			// Ensure that the version info is a string that was sent by this
   283  			// server or the empty string (the first request in a stream should
   284  			// always have an empty version info).
   285  			var versionInfo uint64
   286  			if req.GetVersionInfo() != "" {
   287  				var err error
   288  				versionInfo, err = strconv.ParseUint(req.VersionInfo, 10, 64)
   289  				if err != nil {
   290  					requestLog.Errorf("invalid version info in xDS request, not a uint64")
   291  					return ErrInvalidVersionInfo
   292  				}
   293  			}
   294  			var nonce uint64
   295  			if req.GetResponseNonce() != "" {
   296  				var err error
   297  				nonce, err = strconv.ParseUint(req.ResponseNonce, 10, 64)
   298  				if err != nil {
   299  					requestLog.Error("invalid response nonce info in xDS request, not a uint64")
   300  					return ErrInvalidResponseNonce
   301  				}
   302  			}
   303  			var detail string
   304  			status := req.GetErrorDetail()
   305  			if status != nil {
   306  				detail = status.Message
   307  			}
   308  
   309  			typeURL := req.GetTypeUrl()
   310  			if defaultTypeURL == AnyTypeURL && typeURL == "" {
   311  				requestLog.Error("no type URL given in ADS request")
   312  				return ErrNoADSTypeURL
   313  			}
   314  
   315  			index, exists := typeIndexes[typeURL]
   316  			if !exists {
   317  				requestLog.Error("unknown type URL in xDS request")
   318  				return ErrUnknownTypeURL
   319  			}
   320  
   321  			state := &typeStates[index]
   322  			watcher := s.watchers[typeURL]
   323  
   324  			nodeIP, err := IstioNodeToIP(req.GetNode())
   325  			if err != nil {
   326  				requestLog.WithError(err).Error("invalid Node in xDS request")
   327  				return ErrInvalidNodeFormat
   328  			}
   329  
   330  			// Response nonce is always the same as the response version.
   331  			// Request version indicates the last acked version. If the
   332  			// response nonce in the request is different (smaller) than
   333  			// the version, all versions upto that version are acked, but
   334  			// the versions from that to and including the nonce are nacked.
   335  			if versionInfo <= nonce {
   336  				ackObserver := s.ackObservers[typeURL]
   337  				if ackObserver != nil {
   338  					requestLog.Debug("notifying observers of ACKs")
   339  					ackObserver.HandleResourceVersionAck(versionInfo, nonce, nodeIP, state.resourceNames, typeURL, detail)
   340  				} else {
   341  					requestLog.Debug("ACK received but no observers are waiting for ACKs")
   342  				}
   343  				if versionInfo < nonce {
   344  					// versions after VersionInfo, upto and including ResponseNonce are NACKed
   345  					requestLog.Warningf("NACK received for versions after %s and up to %s; waiting for a version update before sending again", req.VersionInfo, req.ResponseNonce)
   346  					// Watcher will behave as if the sent version was acked.
   347  					// Otherwise we will just be sending the same failing
   348  					// version over and over filling logs.
   349  					versionInfo = state.version
   350  				}
   351  
   352  				if state.pendingWatchCancel != nil {
   353  					// A pending watch exists for this type URL. Cancel it to
   354  					// start a new watch.
   355  					requestLog.Debug("canceling pending watch")
   356  					state.pendingWatchCancel()
   357  				}
   358  
   359  				respCh := make(chan *VersionedResources, 1)
   360  				selectCases[index].Chan = reflect.ValueOf(respCh)
   361  
   362  				ctx, cancel := context.WithCancel(ctx)
   363  				state.pendingWatchCancel = cancel
   364  
   365  				requestLog.Debugf("starting watch on %d resources", len(req.GetResourceNames()))
   366  				go watcher.WatchResources(ctx, typeURL, versionInfo, nodeIP, req.GetResourceNames(), respCh)
   367  			} else {
   368  				requestLog.Debug("received invalid nonce in xDS request; ignoring request")
   369  			}
   370  		default: // Pending watch response.
   371  			state := &typeStates[chosen]
   372  			state.pendingWatchCancel()
   373  			state.pendingWatchCancel = nil
   374  
   375  			if !recvOK {
   376  				streamLog.WithField(logfields.XDSTypeURL, state.typeURL).
   377  					Error("xDS resource watch failed; terminating")
   378  				return ErrResourceWatch
   379  			}
   380  
   381  			// Disabling reading from the channel after reading any from it,
   382  			// since the watcher will close it anyway.
   383  			selectCases[chosen].Chan = quietChValue
   384  
   385  			resp := recv.Interface().(*VersionedResources)
   386  
   387  			responseLog := streamLog.WithFields(logrus.Fields{
   388  				logfields.XDSCachedVersion: resp.Version,
   389  				logfields.XDSCanary:        resp.Canary,
   390  				logfields.XDSTypeURL:       state.typeURL,
   391  				logfields.XDSNonce:         resp.Version,
   392  			})
   393  
   394  			resources := make([]*any.Any, len(resp.Resources))
   395  
   396  			// Marshall the resources into protobuf's Any type.
   397  			for i, res := range resp.Resources {
   398  				data, err := proto.Marshal(res)
   399  				if err != nil {
   400  					responseLog.WithError(err).Errorf("error marshalling xDS response (%d resources)", len(resp.Resources))
   401  					return err
   402  				}
   403  				resources[i] = &any.Any{
   404  					TypeUrl: state.typeURL,
   405  					Value:   data,
   406  				}
   407  			}
   408  
   409  			responseLog.Debugf("sending xDS response with %d resources", len(resp.Resources))
   410  
   411  			versionStr := strconv.FormatUint(resp.Version, 10)
   412  			out := &envoy_api_v2.DiscoveryResponse{
   413  				VersionInfo: versionStr,
   414  				Resources:   resources,
   415  				Canary:      resp.Canary,
   416  				TypeUrl:     state.typeURL,
   417  				Nonce:       versionStr,
   418  			}
   419  			err := stream.Send(out)
   420  			if err != nil {
   421  				return err
   422  			}
   423  
   424  			state.version = resp.Version
   425  			state.resourceNames = resp.ResourceNames
   426  		}
   427  	}
   428  }