gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/xds/internal/xdsclient/controller/transport.go (about) 1 /* 2 * 3 * Copyright 2020 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package controller 20 21 import ( 22 "context" 23 "fmt" 24 "time" 25 26 grpc "gitee.com/ks-custle/core-gm/grpc" 27 controllerversion "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/controller/version" 28 xdsresourceversion "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/controller/version" 29 "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/load" 30 "gitee.com/ks-custle/core-gm/grpc/xds/internal/xdsclient/xdsresource" 31 "github.com/golang/protobuf/proto" 32 ) 33 34 // AddWatch adds a watch for an xDS resource given its type and name. 35 func (t *Controller) AddWatch(rType xdsresource.ResourceType, resourceName string) { 36 t.sendCh.Put(&watchAction{ 37 rType: rType, 38 remove: false, 39 resource: resourceName, 40 }) 41 } 42 43 // RemoveWatch cancels an already registered watch for an xDS resource 44 // given its type and name. 45 func (t *Controller) RemoveWatch(rType xdsresource.ResourceType, resourceName string) { 46 t.sendCh.Put(&watchAction{ 47 rType: rType, 48 remove: true, 49 resource: resourceName, 50 }) 51 } 52 53 // run starts an ADS stream (and backs off exponentially, if the previous 54 // stream failed without receiving a single reply) and runs the sender and 55 // receiver routines to send and receive data from the stream respectively. 56 func (t *Controller) run(ctx context.Context) { 57 go t.send(ctx) 58 // TODO: start a goroutine monitoring ClientConn's connectivity state, and 59 // report error (and log) when stats is transient failure. 60 61 retries := 0 62 for { 63 select { 64 case <-ctx.Done(): 65 return 66 default: 67 } 68 69 if retries != 0 { 70 timer := time.NewTimer(t.backoff(retries)) 71 select { 72 case <-timer.C: 73 case <-ctx.Done(): 74 if !timer.Stop() { 75 <-timer.C 76 } 77 return 78 } 79 } 80 81 retries++ 82 stream, err := t.vClient.NewStream(ctx, t.cc) 83 if err != nil { 84 t.updateHandler.NewConnectionError(err) 85 t.logger.Warningf("xds: ADS stream creation failed: %v", err) 86 continue 87 } 88 t.logger.Infof("ADS stream created") 89 90 select { 91 case <-t.streamCh: 92 default: 93 } 94 t.streamCh <- stream 95 if t.recv(stream) { 96 retries = 0 97 } 98 } 99 } 100 101 // send is a separate goroutine for sending watch requests on the xds stream. 102 // 103 // It watches the stream channel for new streams, and the request channel for 104 // new requests to send on the stream. 105 // 106 // For each new request (watchAction), it's 107 // - processed and added to the watch map 108 // - so resend will pick them up when there are new streams 109 // - sent on the current stream if there's one 110 // - the current stream is cleared when any send on it fails 111 // 112 // For each new stream, all the existing requests will be resent. 113 // 114 // Note that this goroutine doesn't do anything to the old stream when there's a 115 // new one. In fact, there should be only one stream in progress, and new one 116 // should only be created when the old one fails (recv returns an error). 117 func (t *Controller) send(ctx context.Context) { 118 var stream grpc.ClientStream 119 for { 120 select { 121 case <-ctx.Done(): 122 return 123 case stream = <-t.streamCh: 124 if !t.sendExisting(stream) { 125 // send failed, clear the current stream. 126 stream = nil 127 } 128 case u := <-t.sendCh.Get(): 129 t.sendCh.Load() 130 131 var ( 132 target []string 133 rType xdsresource.ResourceType 134 version, nonce, errMsg string 135 send bool 136 ) 137 switch update := u.(type) { 138 case *watchAction: 139 target, rType, version, nonce = t.processWatchInfo(update) 140 case *ackAction: 141 target, rType, version, nonce, send = t.processAckInfo(update, stream) 142 if !send { 143 continue 144 } 145 errMsg = update.errMsg 146 } 147 if stream == nil { 148 // There's no stream yet. Skip the request. This request 149 // will be resent to the new streams. If no stream is 150 // created, the watcher will timeout (same as server not 151 // sending response back). 152 continue 153 } 154 if err := t.vClient.SendRequest(stream, target, rType, version, nonce, errMsg); err != nil { 155 t.logger.Warningf("ADS request for {target: %q, type: %v, version: %q, nonce: %q} failed: %v", target, rType, version, nonce, err) 156 // send failed, clear the current stream. 157 stream = nil 158 } 159 } 160 } 161 } 162 163 // sendExisting sends out xDS requests for registered watchers when recovering 164 // from a broken stream. 165 // 166 // We call stream.Send() here with the lock being held. It should be OK to do 167 // that here because the stream has just started and Send() usually returns 168 // quickly (once it pushes the message onto the transport layer) and is only 169 // ever blocked if we don't have enough flow control quota. 170 func (t *Controller) sendExisting(stream grpc.ClientStream) bool { 171 t.mu.Lock() 172 defer t.mu.Unlock() 173 174 // Reset the ack versions when the stream restarts. 175 t.versionMap = make(map[xdsresource.ResourceType]string) 176 t.nonceMap = make(map[xdsresource.ResourceType]string) 177 178 for rType, s := range t.watchMap { 179 if err := t.vClient.SendRequest(stream, mapToSlice(s), rType, "", "", ""); err != nil { 180 t.logger.Warningf("ADS request failed: %v", err) 181 return false 182 } 183 } 184 185 return true 186 } 187 188 // recv receives xDS responses on the provided ADS stream and branches out to 189 // message specific handlers. 190 func (t *Controller) recv(stream grpc.ClientStream) bool { 191 success := false 192 for { 193 resp, err := t.vClient.RecvResponse(stream) 194 if err != nil { 195 t.updateHandler.NewConnectionError(err) 196 t.logger.Warningf("ADS stream is closed with error: %v", err) 197 return success 198 } 199 200 rType, version, nonce, err := t.handleResponse(resp) 201 202 if e, ok := err.(xdsresourceversion.ErrResourceTypeUnsupported); ok { 203 t.logger.Warningf("%s", e.ErrStr) 204 continue 205 } 206 if err != nil { 207 t.sendCh.Put(&ackAction{ 208 rType: rType, 209 version: "", 210 nonce: nonce, 211 errMsg: err.Error(), 212 stream: stream, 213 }) 214 t.logger.Warningf("Sending NACK for response type: %v, version: %v, nonce: %v, reason: %v", rType, version, nonce, err) 215 continue 216 } 217 t.sendCh.Put(&ackAction{ 218 rType: rType, 219 version: version, 220 nonce: nonce, 221 stream: stream, 222 }) 223 t.logger.Infof("Sending ACK for response type: %v, version: %v, nonce: %v", rType, version, nonce) 224 success = true 225 } 226 } 227 228 func (t *Controller) handleResponse(resp proto.Message) (xdsresource.ResourceType, string, string, error) { 229 rType, resource, version, nonce, err := t.vClient.ParseResponse(resp) 230 if err != nil { 231 return rType, version, nonce, err 232 } 233 opts := &xdsresource.UnmarshalOptions{ 234 Version: version, 235 Resources: resource, 236 Logger: t.logger, 237 UpdateValidator: t.updateValidator, 238 } 239 var md xdsresource.UpdateMetadata 240 switch rType { 241 case xdsresource.ListenerResource: 242 var update map[string]xdsresource.ListenerUpdateErrTuple 243 update, md, err = xdsresource.UnmarshalListener(opts) 244 t.updateHandler.NewListeners(update, md) 245 case xdsresource.RouteConfigResource: 246 var update map[string]xdsresource.RouteConfigUpdateErrTuple 247 update, md, err = xdsresource.UnmarshalRouteConfig(opts) 248 t.updateHandler.NewRouteConfigs(update, md) 249 case xdsresource.ClusterResource: 250 var update map[string]xdsresource.ClusterUpdateErrTuple 251 update, md, err = xdsresource.UnmarshalCluster(opts) 252 t.updateHandler.NewClusters(update, md) 253 case xdsresource.EndpointsResource: 254 var update map[string]xdsresource.EndpointsUpdateErrTuple 255 update, md, err = xdsresource.UnmarshalEndpoints(opts) 256 t.updateHandler.NewEndpoints(update, md) 257 default: 258 return rType, "", "", xdsresourceversion.ErrResourceTypeUnsupported{ 259 ErrStr: fmt.Sprintf("Resource type %v unknown in response from server", rType), 260 } 261 } 262 return rType, version, nonce, err 263 } 264 265 func mapToSlice(m map[string]bool) []string { 266 ret := make([]string, 0, len(m)) 267 for i := range m { 268 ret = append(ret, i) 269 } 270 return ret 271 } 272 273 type watchAction struct { 274 rType xdsresource.ResourceType 275 remove bool // Whether this is to remove watch for the resource. 276 resource string 277 } 278 279 // processWatchInfo pulls the fields needed by the request from a watchAction. 280 // 281 // It also updates the watch map. 282 func (t *Controller) processWatchInfo(w *watchAction) (target []string, rType xdsresource.ResourceType, ver, nonce string) { 283 t.mu.Lock() 284 defer t.mu.Unlock() 285 286 var current map[string]bool 287 current, ok := t.watchMap[w.rType] 288 if !ok { 289 current = make(map[string]bool) 290 t.watchMap[w.rType] = current 291 } 292 293 if w.remove { 294 delete(current, w.resource) 295 if len(current) == 0 { 296 delete(t.watchMap, w.rType) 297 } 298 } else { 299 current[w.resource] = true 300 } 301 302 rType = w.rType 303 target = mapToSlice(current) 304 // We don't reset version or nonce when a new watch is started. The version 305 // and nonce from previous response are carried by the request unless the 306 // stream is recreated. 307 ver = t.versionMap[rType] 308 nonce = t.nonceMap[rType] 309 return target, rType, ver, nonce 310 } 311 312 type ackAction struct { 313 rType xdsresource.ResourceType 314 version string // NACK if version is an empty string. 315 nonce string 316 errMsg string // Empty unless it's a NACK. 317 // ACK/NACK are tagged with the stream it's for. When the stream is down, 318 // all the ACK/NACK for this stream will be dropped, and the version/nonce 319 // won't be updated. 320 stream grpc.ClientStream 321 } 322 323 // processAckInfo pulls the fields needed by the ack request from a ackAction. 324 // 325 // If no active watch is found for this ack, it returns false for send. 326 func (t *Controller) processAckInfo(ack *ackAction, stream grpc.ClientStream) (target []string, rType xdsresource.ResourceType, version, nonce string, send bool) { 327 if ack.stream != stream { 328 // If ACK's stream isn't the current sending stream, this means the ACK 329 // was pushed to queue before the old stream broke, and a new stream has 330 // been started since. Return immediately here so we don't update the 331 // nonce for the new stream. 332 return nil, xdsresource.UnknownResource, "", "", false 333 } 334 rType = ack.rType 335 336 t.mu.Lock() 337 defer t.mu.Unlock() 338 339 // Update the nonce no matter if we are going to send the ACK request on 340 // wire. We may not send the request if the watch is canceled. But the nonce 341 // needs to be updated so the next request will have the right nonce. 342 nonce = ack.nonce 343 t.nonceMap[rType] = nonce 344 345 s, ok := t.watchMap[rType] 346 if !ok || len(s) == 0 { 347 // We don't send the request ack if there's no active watch (this can be 348 // either the server sends responses before any request, or the watch is 349 // canceled while the ackAction is in queue), because there's no resource 350 // name. And if we send a request with empty resource name list, the 351 // server may treat it as a wild card and send us everything. 352 return nil, xdsresource.UnknownResource, "", "", false 353 } 354 send = true 355 target = mapToSlice(s) 356 357 version = ack.version 358 if version == "" { 359 // This is a nack, get the previous acked version. 360 version = t.versionMap[rType] 361 // version will still be an empty string if rType isn't 362 // found in versionMap, this can happen if there wasn't any ack 363 // before. 364 } else { 365 t.versionMap[rType] = version 366 } 367 return target, rType, version, nonce, send 368 } 369 370 // reportLoad starts an LRS stream to report load data to the management server. 371 // It blocks until the context is cancelled. 372 func (t *Controller) reportLoad(ctx context.Context, cc *grpc.ClientConn, opts controllerversion.LoadReportingOptions) { 373 retries := 0 374 for { 375 if ctx.Err() != nil { 376 return 377 } 378 379 if retries != 0 { 380 timer := time.NewTimer(t.backoff(retries)) 381 select { 382 case <-timer.C: 383 case <-ctx.Done(): 384 if !timer.Stop() { 385 <-timer.C 386 } 387 return 388 } 389 } 390 391 retries++ 392 stream, err := t.vClient.NewLoadStatsStream(ctx, cc) 393 if err != nil { 394 t.logger.Warningf("lrs: failed to create stream: %v", err) 395 continue 396 } 397 t.logger.Infof("lrs: created LRS stream") 398 399 if err := t.vClient.SendFirstLoadStatsRequest(stream); err != nil { 400 t.logger.Warningf("lrs: failed to send first request: %v", err) 401 continue 402 } 403 404 clusters, interval, err := t.vClient.HandleLoadStatsResponse(stream) 405 if err != nil { 406 t.logger.Warningf("%v", err) 407 continue 408 } 409 410 retries = 0 411 t.sendLoads(ctx, stream, opts.LoadStore, clusters, interval) 412 } 413 } 414 415 func (t *Controller) sendLoads(ctx context.Context, stream grpc.ClientStream, store *load.Store, clusterNames []string, interval time.Duration) { 416 tick := time.NewTicker(interval) 417 defer tick.Stop() 418 for { 419 select { 420 case <-tick.C: 421 case <-ctx.Done(): 422 return 423 } 424 if err := t.vClient.SendLoadStatsRequest(stream, store.Stats(clusterNames)); err != nil { 425 t.logger.Warningf("%v", err) 426 return 427 } 428 } 429 }