github.com/imran-kn/cilium-fork@v1.6.9/pkg/envoy/xds/server.go (about) 1 // Copyright 2018 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package xds 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "io" 22 "reflect" 23 "strconv" 24 "strings" 25 "sync/atomic" 26 "time" 27 28 "github.com/cilium/cilium/pkg/logging/logfields" 29 30 envoy_api_v2 "github.com/cilium/proxy/go/envoy/api/v2" 31 "github.com/golang/protobuf/proto" 32 "github.com/golang/protobuf/ptypes/any" 33 "github.com/sirupsen/logrus" 34 "google.golang.org/grpc/codes" 35 ) 36 37 const ( 38 // AnyTypeURL is the default type URL to use for ADS resource sets. 39 AnyTypeURL = "" 40 ) 41 42 var ( 43 // ErrNoADSTypeURL is the error returned when receiving a request without 44 // a type URL from an ADS stream. 45 ErrNoADSTypeURL = errors.New("type URL is required for ADS") 46 47 // ErrUnknownTypeURL is the error returned when receiving a request with 48 // an unknown type URL. 49 ErrUnknownTypeURL = errors.New("unknown type URL") 50 51 // ErrInvalidVersionInfo is the error returned when receiving a request 52 // with a version info that is not a positive integer. 53 ErrInvalidVersionInfo = errors.New("invalid version info") 54 55 // ErrInvalidNonce is the error returned when receiving a request 56 // with a response nonce that is not a positive integer. 57 ErrInvalidResponseNonce = errors.New("invalid response nonce info") 58 59 // ErrInvalidNodeFormat is the error returned when receiving a request 60 // with a node that is not a formatted correctly. 61 ErrInvalidNodeFormat = errors.New("invalid node format") 62 63 // ErrResourceWatch is the error returned whenever an internal error 64 // occurs while waiting for new versions of resources. 65 ErrResourceWatch = errors.New("resource watch failed") 66 67 // grpcCanceled is the string prefix of any gRPC error related 68 // to the stream being canceled. Ignore the description, as it 69 // is derived from the client and may vary, while the code is 70 // set by the gRPC library we link with. 71 // 72 // Ref. vendor/google.golang.org/grpc/status/status.go: 73 // return fmt.Sprintf("rpc error: code = %s desc = %s", codes.Code(p.GetCode()), p.GetMessage()) 74 grpcCanceled = fmt.Sprintf("rpc error: code = %s", codes.Canceled.String()) 75 ) 76 77 // Server implements the handling of xDS streams. 78 type Server struct { 79 // watchers maps each supported type URL to its corresponding resource 80 // watcher. 81 watchers map[string]*ResourceWatcher 82 83 // ackObservers maps each supported type URL to its corresponding observer 84 // of ACKs received from Envoy nodes. 85 ackObservers map[string]ResourceVersionAckObserver 86 87 // lastStreamID is the identifier of the last processed stream. 88 // It is incremented atomically when starting the handling of a new stream. 89 lastStreamID uint64 90 } 91 92 // ResourceTypeConfiguration is the configuration of the XDS server for a 93 // resource type. 94 type ResourceTypeConfiguration struct { 95 // Source contains the resources of this type. 96 Source ObservableResourceSource 97 98 // AckObserver is called back whenever a node acknowledges having applied a 99 // version of the resources of this type. 100 AckObserver ResourceVersionAckObserver 101 } 102 103 // NewServer creates an xDS gRPC stream handler using the given resource 104 // sources. 105 // types maps each supported resource type URL to its corresponding resource 106 // source and ACK observer. 107 func NewServer(resourceTypes map[string]*ResourceTypeConfiguration, 108 resourceAccessTimeout time.Duration) *Server { 109 watchers := make(map[string]*ResourceWatcher, len(resourceTypes)) 110 ackObservers := make(map[string]ResourceVersionAckObserver, len(resourceTypes)) 111 for typeURL, resType := range resourceTypes { 112 w := NewResourceWatcher(typeURL, resType.Source, resourceAccessTimeout) 113 resType.Source.AddResourceVersionObserver(w) 114 watchers[typeURL] = w 115 116 if resType.AckObserver != nil { 117 ackObservers[typeURL] = resType.AckObserver 118 } 119 } 120 121 // TODO: Unregister the watchers when stopping the server. 122 123 return &Server{watchers: watchers, ackObservers: ackObservers} 124 } 125 126 func getXDSRequestFields(req *envoy_api_v2.DiscoveryRequest) logrus.Fields { 127 return logrus.Fields{ 128 logfields.XDSAckedVersion: req.GetVersionInfo(), 129 logfields.XDSClientNode: req.GetNode(), 130 logfields.XDSTypeURL: req.GetTypeUrl(), 131 logfields.XDSNonce: req.GetResponseNonce(), 132 } 133 } 134 135 // HandleRequestStream receives and processes the requests from an xDS stream. 136 func (s *Server) HandleRequestStream(ctx context.Context, stream Stream, defaultTypeURL string) error { 137 // increment stream count 138 streamID := atomic.AddUint64(&s.lastStreamID, 1) 139 140 streamLog := log.WithField(logfields.XDSStreamID, streamID) 141 142 reqCh := make(chan *envoy_api_v2.DiscoveryRequest) 143 144 stopRecv := make(chan struct{}) 145 defer close(stopRecv) 146 147 go func() { 148 defer close(reqCh) 149 for { 150 req, err := stream.Recv() 151 if err != nil { 152 if err == io.EOF { 153 streamLog.Debug("xDS stream closed") 154 } else if strings.HasPrefix(err.Error(), grpcCanceled) { 155 streamLog.WithError(err).Debug("xDS stream canceled") 156 } else { 157 streamLog.WithError(err).Error("error while receiving request from xDS stream") 158 } 159 return 160 } 161 if req == nil { 162 streamLog.Error("received nil request from xDS stream; stopping xDS stream handling") 163 return 164 } 165 if req.GetTypeUrl() == "" { 166 req.TypeUrl = defaultTypeURL 167 } 168 streamLog.WithFields(getXDSRequestFields(req)).Debug("received request from xDS stream") 169 select { 170 case <-stopRecv: 171 streamLog.Debug("stopping xDS stream handling") 172 return 173 case reqCh <- req: 174 } 175 } 176 }() 177 178 return s.processRequestStream(ctx, streamLog, stream, reqCh, defaultTypeURL) 179 } 180 181 // perTypeStreamState is the state maintained per resource type for each 182 // xDS stream. 183 type perTypeStreamState struct { 184 // typeURL identifies the resource type. 185 typeURL string 186 187 // pendingWatchCancel is a pending watch on this resource type. 188 // If nil, no watch is pending. 189 pendingWatchCancel context.CancelFunc 190 191 // version is the last version sent. This is needed so that we'll know 192 // if a new request is an ACK (VersionInfo matches current version), or a NACK 193 // (VersionInfo matches an earlier version). 194 version uint64 195 196 // resourceNames is the list of names of resources sent in the last 197 // response to a request for this resource type. 198 resourceNames []string 199 } 200 201 // processRequestStream processes the requests in an xDS stream from a channel. 202 func (s *Server) processRequestStream(ctx context.Context, streamLog *logrus.Entry, stream Stream, 203 reqCh <-chan *envoy_api_v2.DiscoveryRequest, defaultTypeURL string) error { 204 // The request state for every type URL. 205 typeStates := make([]perTypeStreamState, len(s.watchers)) 206 defer func() { 207 for _, state := range typeStates { 208 if state.pendingWatchCancel != nil { 209 state.pendingWatchCancel() 210 } 211 } 212 }() 213 214 // A map of a resource type's URL to the corresponding index in typeStates 215 // for the resource type. 216 typeIndexes := make(map[string]int, len(typeStates)) 217 218 // The set of channels to select from. Since the set of channels is 219 // dynamic, we use reflection for selection. 220 // The indexes in selectCases from 0 to len(typeStates)-1 match the indexes 221 // in typeStates. 222 selectCases := make([]reflect.SelectCase, len(typeStates)+2) 223 224 // The last select case index is always the request channel. 225 reqChIndex := len(selectCases) - 1 226 selectCases[reqChIndex] = reflect.SelectCase{ 227 Dir: reflect.SelectRecv, 228 Chan: reflect.ValueOf(reqCh), 229 } 230 231 // The next-to-last select case is the context's Done channel. 232 doneChIndex := reqChIndex - 1 233 selectCases[doneChIndex] = reflect.SelectCase{ 234 Dir: reflect.SelectRecv, 235 Chan: reflect.ValueOf(ctx.Done()), 236 } 237 238 // Initially there are no pending watches, so just select a dead channel 239 // that will never be selected. 240 quietCh := make(chan *VersionedResources) 241 defer close(quietCh) 242 quietChValue := reflect.ValueOf(quietCh) 243 244 i := 0 245 for typeURL := range s.watchers { 246 typeStates[i] = perTypeStreamState{ 247 typeURL: typeURL, 248 } 249 250 selectCases[i] = reflect.SelectCase{ 251 Dir: reflect.SelectRecv, 252 Chan: quietChValue, 253 } 254 255 typeIndexes[typeURL] = i 256 257 i++ 258 } 259 260 streamLog.Info("starting xDS stream processing") 261 262 for { 263 // Process either a new request from the xDS stream or a response 264 // from the resource watcher. 265 chosen, recv, recvOK := reflect.Select(selectCases) 266 267 switch chosen { 268 case doneChIndex: // Context got canceled, most likely by the client terminating. 269 streamLog.WithError(ctx.Err()).Debug("xDS stream context canceled") 270 return ctx.Err() 271 272 case reqChIndex: // Request received from the stream. 273 if !recvOK { 274 streamLog.Info("xDS stream closed") 275 return nil 276 } 277 278 req := recv.Interface().(*envoy_api_v2.DiscoveryRequest) 279 280 requestLog := streamLog.WithFields(getXDSRequestFields(req)) 281 282 // Ensure that the version info is a string that was sent by this 283 // server or the empty string (the first request in a stream should 284 // always have an empty version info). 285 var versionInfo uint64 286 if req.GetVersionInfo() != "" { 287 var err error 288 versionInfo, err = strconv.ParseUint(req.VersionInfo, 10, 64) 289 if err != nil { 290 requestLog.Errorf("invalid version info in xDS request, not a uint64") 291 return ErrInvalidVersionInfo 292 } 293 } 294 var nonce uint64 295 if req.GetResponseNonce() != "" { 296 var err error 297 nonce, err = strconv.ParseUint(req.ResponseNonce, 10, 64) 298 if err != nil { 299 requestLog.Error("invalid response nonce info in xDS request, not a uint64") 300 return ErrInvalidResponseNonce 301 } 302 } 303 var detail string 304 status := req.GetErrorDetail() 305 if status != nil { 306 detail = status.Message 307 } 308 309 typeURL := req.GetTypeUrl() 310 if defaultTypeURL == AnyTypeURL && typeURL == "" { 311 requestLog.Error("no type URL given in ADS request") 312 return ErrNoADSTypeURL 313 } 314 315 index, exists := typeIndexes[typeURL] 316 if !exists { 317 requestLog.Error("unknown type URL in xDS request") 318 return ErrUnknownTypeURL 319 } 320 321 state := &typeStates[index] 322 watcher := s.watchers[typeURL] 323 324 nodeIP, err := IstioNodeToIP(req.GetNode()) 325 if err != nil { 326 requestLog.WithError(err).Error("invalid Node in xDS request") 327 return ErrInvalidNodeFormat 328 } 329 330 // Response nonce is always the same as the response version. 331 // Request version indicates the last acked version. If the 332 // response nonce in the request is different (smaller) than 333 // the version, all versions upto that version are acked, but 334 // the versions from that to and including the nonce are nacked. 335 if versionInfo <= nonce { 336 ackObserver := s.ackObservers[typeURL] 337 if ackObserver != nil { 338 requestLog.Debug("notifying observers of ACKs") 339 ackObserver.HandleResourceVersionAck(versionInfo, nonce, nodeIP, state.resourceNames, typeURL, detail) 340 } else { 341 requestLog.Debug("ACK received but no observers are waiting for ACKs") 342 } 343 if versionInfo < nonce { 344 // versions after VersionInfo, upto and including ResponseNonce are NACKed 345 requestLog.Warningf("NACK received for versions after %s and up to %s; waiting for a version update before sending again", req.VersionInfo, req.ResponseNonce) 346 // Watcher will behave as if the sent version was acked. 347 // Otherwise we will just be sending the same failing 348 // version over and over filling logs. 349 versionInfo = state.version 350 } 351 352 if state.pendingWatchCancel != nil { 353 // A pending watch exists for this type URL. Cancel it to 354 // start a new watch. 355 requestLog.Debug("canceling pending watch") 356 state.pendingWatchCancel() 357 } 358 359 respCh := make(chan *VersionedResources, 1) 360 selectCases[index].Chan = reflect.ValueOf(respCh) 361 362 ctx, cancel := context.WithCancel(ctx) 363 state.pendingWatchCancel = cancel 364 365 requestLog.Debugf("starting watch on %d resources", len(req.GetResourceNames())) 366 go watcher.WatchResources(ctx, typeURL, versionInfo, nodeIP, req.GetResourceNames(), respCh) 367 } else { 368 requestLog.Debug("received invalid nonce in xDS request; ignoring request") 369 } 370 default: // Pending watch response. 371 state := &typeStates[chosen] 372 state.pendingWatchCancel() 373 state.pendingWatchCancel = nil 374 375 if !recvOK { 376 streamLog.WithField(logfields.XDSTypeURL, state.typeURL). 377 Error("xDS resource watch failed; terminating") 378 return ErrResourceWatch 379 } 380 381 // Disabling reading from the channel after reading any from it, 382 // since the watcher will close it anyway. 383 selectCases[chosen].Chan = quietChValue 384 385 resp := recv.Interface().(*VersionedResources) 386 387 responseLog := streamLog.WithFields(logrus.Fields{ 388 logfields.XDSCachedVersion: resp.Version, 389 logfields.XDSCanary: resp.Canary, 390 logfields.XDSTypeURL: state.typeURL, 391 logfields.XDSNonce: resp.Version, 392 }) 393 394 resources := make([]*any.Any, len(resp.Resources)) 395 396 // Marshall the resources into protobuf's Any type. 397 for i, res := range resp.Resources { 398 data, err := proto.Marshal(res) 399 if err != nil { 400 responseLog.WithError(err).Errorf("error marshalling xDS response (%d resources)", len(resp.Resources)) 401 return err 402 } 403 resources[i] = &any.Any{ 404 TypeUrl: state.typeURL, 405 Value: data, 406 } 407 } 408 409 responseLog.Debugf("sending xDS response with %d resources", len(resp.Resources)) 410 411 versionStr := strconv.FormatUint(resp.Version, 10) 412 out := &envoy_api_v2.DiscoveryResponse{ 413 VersionInfo: versionStr, 414 Resources: resources, 415 Canary: resp.Canary, 416 TypeUrl: state.typeURL, 417 Nonce: versionStr, 418 } 419 err := stream.Send(out) 420 if err != nil { 421 return err 422 } 423 424 state.version = resp.Version 425 state.resourceNames = resp.ResourceNames 426 } 427 } 428 }