github.phpd.cn/cilium/cilium@v1.6.12/pkg/envoy/xds/server.go (about) 1 // Copyright 2018 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package xds 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "io" 22 "reflect" 23 "strconv" 24 "strings" 25 "sync/atomic" 26 "time" 27 28 "github.com/cilium/cilium/pkg/logging/logfields" 29 30 envoy_api_v2 "github.com/cilium/proxy/go/envoy/api/v2" 31 "github.com/golang/protobuf/proto" 32 "github.com/golang/protobuf/ptypes/any" 33 "github.com/sirupsen/logrus" 34 "google.golang.org/grpc/codes" 35 ) 36 37 const ( 38 // AnyTypeURL is the default type URL to use for ADS resource sets. 39 AnyTypeURL = "" 40 ) 41 42 var ( 43 // ErrNoADSTypeURL is the error returned when receiving a request without 44 // a type URL from an ADS stream. 45 ErrNoADSTypeURL = errors.New("type URL is required for ADS") 46 47 // ErrUnknownTypeURL is the error returned when receiving a request with 48 // an unknown type URL. 49 ErrUnknownTypeURL = errors.New("unknown type URL") 50 51 // ErrInvalidVersionInfo is the error returned when receiving a request 52 // with a version info that is not a positive integer. 53 ErrInvalidVersionInfo = errors.New("invalid version info") 54 55 // ErrInvalidNonce is the error returned when receiving a request 56 // with a response nonce that is not a positive integer. 57 ErrInvalidResponseNonce = errors.New("invalid response nonce info") 58 59 // ErrInvalidNodeFormat is the error returned when receiving a request 60 // with a node that is not a formatted correctly. 61 ErrInvalidNodeFormat = errors.New("invalid node format") 62 63 // ErrResourceWatch is the error returned whenever an internal error 64 // occurs while waiting for new versions of resources. 65 ErrResourceWatch = errors.New("resource watch failed") 66 67 // grpcCanceled is the string prefix of any gRPC error related 68 // to the stream being canceled. Ignore the description, as it 69 // is derived from the client and may vary, while the code is 70 // set by the gRPC library we link with. 71 // 72 // Ref. vendor/google.golang.org/grpc/status/status.go: 73 // return fmt.Sprintf("rpc error: code = %s desc = %s", codes.Code(p.GetCode()), p.GetMessage()) 74 grpcCanceled = fmt.Sprintf("rpc error: code = %s", codes.Canceled.String()) 75 ) 76 77 // Server implements the handling of xDS streams. 78 type Server struct { 79 // watchers maps each supported type URL to its corresponding resource 80 // watcher. 81 watchers map[string]*ResourceWatcher 82 83 // ackObservers maps each supported type URL to its corresponding observer 84 // of ACKs received from Envoy nodes. 85 ackObservers map[string]ResourceVersionAckObserver 86 87 // lastStreamID is the identifier of the last processed stream. 88 // It is incremented atomically when starting the handling of a new stream. 89 lastStreamID uint64 90 } 91 92 // ResourceTypeConfiguration is the configuration of the XDS server for a 93 // resource type. 94 type ResourceTypeConfiguration struct { 95 // Source contains the resources of this type. 96 Source ObservableResourceSource 97 98 // AckObserver is called back whenever a node acknowledges having applied a 99 // version of the resources of this type. 100 AckObserver ResourceVersionAckObserver 101 } 102 103 // NewServer creates an xDS gRPC stream handler using the given resource 104 // sources. 105 // types maps each supported resource type URL to its corresponding resource 106 // source and ACK observer. 107 func NewServer(resourceTypes map[string]*ResourceTypeConfiguration, 108 resourceAccessTimeout time.Duration) *Server { 109 watchers := make(map[string]*ResourceWatcher, len(resourceTypes)) 110 ackObservers := make(map[string]ResourceVersionAckObserver, len(resourceTypes)) 111 for typeURL, resType := range resourceTypes { 112 w := NewResourceWatcher(typeURL, resType.Source, resourceAccessTimeout) 113 resType.Source.AddResourceVersionObserver(w) 114 watchers[typeURL] = w 115 116 if resType.AckObserver != nil { 117 ackObservers[typeURL] = resType.AckObserver 118 } 119 } 120 121 // TODO: Unregister the watchers when stopping the server. 122 123 return &Server{watchers: watchers, ackObservers: ackObservers} 124 } 125 126 func getXDSRequestFields(req *envoy_api_v2.DiscoveryRequest) logrus.Fields { 127 return logrus.Fields{ 128 logfields.XDSAckedVersion: req.GetVersionInfo(), 129 logfields.XDSTypeURL: req.GetTypeUrl(), 130 logfields.XDSNonce: req.GetResponseNonce(), 131 } 132 } 133 134 // HandleRequestStream receives and processes the requests from an xDS stream. 135 func (s *Server) HandleRequestStream(ctx context.Context, stream Stream, defaultTypeURL string) error { 136 // increment stream count 137 streamID := atomic.AddUint64(&s.lastStreamID, 1) 138 139 streamLog := log.WithField(logfields.XDSStreamID, streamID) 140 141 reqCh := make(chan *envoy_api_v2.DiscoveryRequest) 142 143 stopRecv := make(chan struct{}) 144 defer close(stopRecv) 145 146 nodeId := "" 147 148 go func() { 149 defer close(reqCh) 150 for { 151 req, err := stream.Recv() 152 if err != nil { 153 if err == io.EOF { 154 streamLog.Debug("xDS stream closed") 155 } else if strings.HasPrefix(err.Error(), grpcCanceled) { 156 streamLog.WithError(err).Debug("xDS stream canceled") 157 } else { 158 streamLog.WithError(err).Error("error while receiving request from xDS stream") 159 } 160 return 161 } 162 if req == nil { 163 streamLog.Error("received nil request from xDS stream; stopping xDS stream handling") 164 return 165 } 166 if req.GetTypeUrl() == "" { 167 req.TypeUrl = defaultTypeURL 168 } 169 if nodeId == "" { 170 nodeId = req.GetNode().GetId() 171 streamLog = streamLog.WithField(logfields.XDSClientNode, nodeId) 172 } 173 streamLog.WithFields(getXDSRequestFields(req)).Debug("received request from xDS stream") 174 175 select { 176 case <-stopRecv: 177 streamLog.Debug("stopping xDS stream handling") 178 return 179 case reqCh <- req: 180 } 181 } 182 }() 183 184 return s.processRequestStream(ctx, streamLog, stream, reqCh, defaultTypeURL) 185 } 186 187 // perTypeStreamState is the state maintained per resource type for each 188 // xDS stream. 189 type perTypeStreamState struct { 190 // typeURL identifies the resource type. 191 typeURL string 192 193 // pendingWatchCancel is a pending watch on this resource type. 194 // If nil, no watch is pending. 195 pendingWatchCancel context.CancelFunc 196 197 // version is the last version sent. This is needed so that we'll know 198 // if a new request is an ACK (VersionInfo matches current version), or a NACK 199 // (VersionInfo matches an earlier version). 200 version uint64 201 202 // resourceNames is the list of names of resources sent in the last 203 // response to a request for this resource type. 204 resourceNames []string 205 } 206 207 // processRequestStream processes the requests in an xDS stream from a channel. 208 func (s *Server) processRequestStream(ctx context.Context, streamLog *logrus.Entry, stream Stream, 209 reqCh <-chan *envoy_api_v2.DiscoveryRequest, defaultTypeURL string) error { 210 // The request state for every type URL. 211 typeStates := make([]perTypeStreamState, len(s.watchers)) 212 defer func() { 213 for _, state := range typeStates { 214 if state.pendingWatchCancel != nil { 215 state.pendingWatchCancel() 216 } 217 } 218 }() 219 220 // A map of a resource type's URL to the corresponding index in typeStates 221 // for the resource type. 222 typeIndexes := make(map[string]int, len(typeStates)) 223 224 // The set of channels to select from. Since the set of channels is 225 // dynamic, we use reflection for selection. 226 // The indexes in selectCases from 0 to len(typeStates)-1 match the indexes 227 // in typeStates. 228 selectCases := make([]reflect.SelectCase, len(typeStates)+2) 229 230 // The last select case index is always the request channel. 231 reqChIndex := len(selectCases) - 1 232 selectCases[reqChIndex] = reflect.SelectCase{ 233 Dir: reflect.SelectRecv, 234 Chan: reflect.ValueOf(reqCh), 235 } 236 237 // The next-to-last select case is the context's Done channel. 238 doneChIndex := reqChIndex - 1 239 selectCases[doneChIndex] = reflect.SelectCase{ 240 Dir: reflect.SelectRecv, 241 Chan: reflect.ValueOf(ctx.Done()), 242 } 243 244 // Initially there are no pending watches, so just select a dead channel 245 // that will never be selected. 246 quietCh := make(chan *VersionedResources) 247 defer close(quietCh) 248 quietChValue := reflect.ValueOf(quietCh) 249 250 i := 0 251 for typeURL := range s.watchers { 252 typeStates[i] = perTypeStreamState{ 253 typeURL: typeURL, 254 } 255 256 selectCases[i] = reflect.SelectCase{ 257 Dir: reflect.SelectRecv, 258 Chan: quietChValue, 259 } 260 261 typeIndexes[typeURL] = i 262 263 i++ 264 } 265 266 streamLog.Info("starting xDS stream processing") 267 268 nodeIP := "" 269 270 for { 271 // Process either a new request from the xDS stream or a response 272 // from the resource watcher. 273 chosen, recv, recvOK := reflect.Select(selectCases) 274 275 switch chosen { 276 case doneChIndex: // Context got canceled, most likely by the client terminating. 277 streamLog.WithError(ctx.Err()).Debug("xDS stream context canceled") 278 return ctx.Err() 279 280 case reqChIndex: // Request received from the stream. 281 if !recvOK { 282 streamLog.Info("xDS stream closed") 283 return nil 284 } 285 286 req := recv.Interface().(*envoy_api_v2.DiscoveryRequest) 287 288 // only require Node to exist in the first request 289 if nodeIP == "" { 290 id := req.GetNode().GetId() 291 streamLog = streamLog.WithField(logfields.XDSClientNode, id) 292 var err error 293 nodeIP, err = IstioNodeToIP(id) 294 if err != nil { 295 streamLog.WithError(err).Error("invalid Node in xDS request") 296 return ErrInvalidNodeFormat 297 } 298 } 299 300 requestLog := streamLog.WithFields(getXDSRequestFields(req)) 301 302 // Ensure that the version info is a string that was sent by this 303 // server or the empty string (the first request in a stream should 304 // always have an empty version info). 305 var versionInfo uint64 306 if req.GetVersionInfo() != "" { 307 var err error 308 versionInfo, err = strconv.ParseUint(req.VersionInfo, 10, 64) 309 if err != nil { 310 requestLog.Errorf("invalid version info in xDS request, not a uint64") 311 return ErrInvalidVersionInfo 312 } 313 } 314 var nonce uint64 315 if req.GetResponseNonce() != "" { 316 var err error 317 nonce, err = strconv.ParseUint(req.ResponseNonce, 10, 64) 318 if err != nil { 319 requestLog.Error("invalid response nonce info in xDS request, not a uint64") 320 return ErrInvalidResponseNonce 321 } 322 } 323 var detail string 324 status := req.GetErrorDetail() 325 if status != nil { 326 detail = status.Message 327 } 328 329 typeURL := req.GetTypeUrl() 330 if defaultTypeURL == AnyTypeURL && typeURL == "" { 331 requestLog.Error("no type URL given in ADS request") 332 return ErrNoADSTypeURL 333 } 334 335 index, exists := typeIndexes[typeURL] 336 if !exists { 337 requestLog.Error("unknown type URL in xDS request") 338 return ErrUnknownTypeURL 339 } 340 341 state := &typeStates[index] 342 watcher := s.watchers[typeURL] 343 344 // Response nonce is always the same as the response version. 345 // Request version indicates the last acked version. If the 346 // response nonce in the request is different (smaller) than 347 // the version, all versions upto that version are acked, but 348 // the versions from that to and including the nonce are nacked. 349 if versionInfo <= nonce { 350 ackObserver := s.ackObservers[typeURL] 351 if ackObserver != nil { 352 requestLog.Debug("notifying observers of ACKs") 353 ackObserver.HandleResourceVersionAck(versionInfo, nonce, nodeIP, state.resourceNames, typeURL, detail) 354 } else { 355 requestLog.Debug("ACK received but no observers are waiting for ACKs") 356 } 357 if versionInfo < nonce { 358 // versions after VersionInfo, upto and including ResponseNonce are NACKed 359 requestLog.WithField(logfields.XDSDetail, detail).Warningf("NACK received for versions after %s and up to %s; waiting for a version update before sending again", req.VersionInfo, req.ResponseNonce) 360 // Watcher will behave as if the sent version was acked. 361 // Otherwise we will just be sending the same failing 362 // version over and over filling logs. 363 versionInfo = state.version 364 } 365 366 if state.pendingWatchCancel != nil { 367 // A pending watch exists for this type URL. Cancel it to 368 // start a new watch. 369 requestLog.Debug("canceling pending watch") 370 state.pendingWatchCancel() 371 } 372 373 respCh := make(chan *VersionedResources, 1) 374 selectCases[index].Chan = reflect.ValueOf(respCh) 375 376 ctx, cancel := context.WithCancel(ctx) 377 state.pendingWatchCancel = cancel 378 379 requestLog.Debugf("starting watch on %d resources", len(req.GetResourceNames())) 380 go watcher.WatchResources(ctx, typeURL, versionInfo, nodeIP, req.GetResourceNames(), respCh) 381 } else { 382 requestLog.Debug("received invalid nonce in xDS request; ignoring request") 383 } 384 default: // Pending watch response. 385 state := &typeStates[chosen] 386 state.pendingWatchCancel() 387 state.pendingWatchCancel = nil 388 389 if !recvOK { 390 streamLog.WithField(logfields.XDSTypeURL, state.typeURL). 391 Error("xDS resource watch failed; terminating") 392 return ErrResourceWatch 393 } 394 395 // Disabling reading from the channel after reading any from it, 396 // since the watcher will close it anyway. 397 selectCases[chosen].Chan = quietChValue 398 399 resp := recv.Interface().(*VersionedResources) 400 401 responseLog := streamLog.WithFields(logrus.Fields{ 402 logfields.XDSCachedVersion: resp.Version, 403 logfields.XDSCanary: resp.Canary, 404 logfields.XDSTypeURL: state.typeURL, 405 logfields.XDSNonce: resp.Version, 406 }) 407 408 resources := make([]*any.Any, len(resp.Resources)) 409 410 // Marshall the resources into protobuf's Any type. 411 for i, res := range resp.Resources { 412 data, err := proto.Marshal(res) 413 if err != nil { 414 responseLog.WithError(err).Errorf("error marshalling xDS response (%d resources)", len(resp.Resources)) 415 return err 416 } 417 resources[i] = &any.Any{ 418 TypeUrl: state.typeURL, 419 Value: data, 420 } 421 } 422 423 responseLog.Debugf("sending xDS response with %d resources", len(resp.Resources)) 424 425 versionStr := strconv.FormatUint(resp.Version, 10) 426 out := &envoy_api_v2.DiscoveryResponse{ 427 VersionInfo: versionStr, 428 Resources: resources, 429 Canary: resp.Canary, 430 TypeUrl: state.typeURL, 431 Nonce: versionStr, 432 } 433 err := stream.Send(out) 434 if err != nil { 435 return err 436 } 437 438 state.version = resp.Version 439 state.resourceNames = resp.ResourceNames 440 } 441 } 442 }