dubbo.apache.org/dubbo-go/v3@v3.1.1/xds/client/controller/transport.go (about) 1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * 20 * Copyright 2021 gRPC authors. 21 * 22 */ 23 24 package controller 25 26 import ( 27 "context" 28 "fmt" 29 "time" 30 ) 31 32 import ( 33 "github.com/golang/protobuf/proto" 34 35 "google.golang.org/grpc" 36 ) 37 38 import ( 39 resourceversion "dubbo.apache.org/dubbo-go/v3/xds/client/controller/version" 40 "dubbo.apache.org/dubbo-go/v3/xds/client/load" 41 "dubbo.apache.org/dubbo-go/v3/xds/client/resource" 42 ) 43 44 // AddWatch adds a watch for an xDS resource given its type and name. 45 func (t *Controller) AddWatch(rType resource.ResourceType, resourceName string) { 46 t.sendCh.Put(&watchAction{ 47 rType: rType, 48 remove: false, 49 resource: resourceName, 50 }) 51 } 52 53 // RemoveWatch cancels an already registered watch for an xDS resource 54 // given its type and name. 55 func (t *Controller) RemoveWatch(rType resource.ResourceType, resourceName string) { 56 t.sendCh.Put(&watchAction{ 57 rType: rType, 58 remove: true, 59 resource: resourceName, 60 }) 61 } 62 63 // run starts an ADS stream (and backs off exponentially, if the previous 64 // stream failed without receiving a single reply) and runs the sender and 65 // receiver routines to send and receive data from the stream respectively. 66 func (t *Controller) run(ctx context.Context) { 67 go t.send(ctx) 68 // TODO: start a goroutine monitoring ClientConn's connectivity state, and 69 // report error (and log) when stats is transient failure. 70 71 retries := 0 72 for { 73 select { 74 case <-ctx.Done(): 75 return 76 default: 77 } 78 79 if retries != 0 { 80 timer := time.NewTimer(t.backoff(retries)) 81 select { 82 case <-timer.C: 83 case <-ctx.Done(): 84 if !timer.Stop() { 85 <-timer.C 86 } 87 return 88 } 89 } 90 91 retries++ 92 stream, err := t.vClient.NewStream(ctx, t.cc) 93 if err != nil { 94 t.updateHandler.NewConnectionError(err) 95 t.logger.Warnf("xds: ADS stream creation failed: %v", err) 96 continue 97 } 98 t.logger.Infof("ADS stream created") 99 100 select { 101 case <-t.streamCh: 102 default: 103 } 104 t.streamCh <- stream 105 if t.recv(stream) { 106 retries = 0 107 } 108 } 109 } 110 111 // send is a separate goroutine for sending watch requests on the xds stream. 112 // 113 // It watches the stream channel for new streams, and the request channel for 114 // new requests to send on the stream. 115 // 116 // For each new request (watchAction), it's 117 // - processed and added to the watch map 118 // - so resend will pick them up when there are new streams 119 // - sent on the current stream if there's one 120 // - the current stream is cleared when any send on it fails 121 // 122 // For each new stream, all the existing requests will be resent. 123 // 124 // Note that this goroutine doesn't do anything to the old stream when there's a 125 // new one. In fact, there should be only one stream in progress, and new one 126 // should only be created when the old one fails (recv returns an error). 127 func (t *Controller) send(ctx context.Context) { 128 var stream grpc.ClientStream 129 for { 130 select { 131 case <-ctx.Done(): 132 return 133 case stream = <-t.streamCh: 134 if !t.sendExisting(stream) { 135 // send failed, clear the current stream. 136 stream = nil 137 } 138 case u := <-t.sendCh.Get(): 139 t.sendCh.Load() 140 141 var ( 142 target []string 143 rType resource.ResourceType 144 version, nonce, errMsg string 145 send bool 146 ) 147 switch update := u.(type) { 148 case *watchAction: 149 target, rType, version, nonce = t.processWatchInfo(update) 150 case *ackAction: 151 target, rType, version, nonce, send = t.processAckInfo(update, stream) 152 if !send { 153 continue 154 } 155 errMsg = update.errMsg 156 } 157 if stream == nil { 158 // There's no stream yet. Skip the request. This request 159 // will be resent to the new streams. If no stream is 160 // created, the watcher will timeout (same as server not 161 // sending response back). 162 continue 163 } 164 if err := t.vClient.SendRequest(stream, target, rType, version, nonce, errMsg); err != nil { 165 t.logger.Warnf("ADS request for {target: %q, type: %v, version: %q, nonce: %q} failed: %v", target, rType, version, nonce, err) 166 // send failed, clear the current stream. 167 stream = nil 168 } 169 } 170 } 171 } 172 173 // sendExisting sends out xDS requests for registered watchers when recovering 174 // from a broken stream. 175 // 176 // We call stream.Send() here with the lock being held. It should be OK to do 177 // that here because the stream has just started and Send() usually returns 178 // quickly (once it pushes the message onto the transport layer) and is only 179 // ever blocked if we don't have enough flow control quota. 180 func (t *Controller) sendExisting(stream grpc.ClientStream) bool { 181 t.mu.Lock() 182 defer t.mu.Unlock() 183 184 // Reset the ack versions when the stream restarts. 185 t.versionMap = make(map[resource.ResourceType]string) 186 t.nonceMap = make(map[resource.ResourceType]string) 187 188 for rType, s := range t.watchMap { 189 if err := t.vClient.SendRequest(stream, mapToSlice(s), rType, "", "", ""); err != nil { 190 t.logger.Warnf("ADS request failed: %v", err) 191 return false 192 } 193 } 194 195 return true 196 } 197 198 // recv receives xDS responses on the provided ADS stream and branches out to 199 // message specific handlers. 200 func (t *Controller) recv(stream grpc.ClientStream) bool { 201 success := false 202 for { 203 resp, err := t.vClient.RecvResponse(stream) 204 if err != nil { 205 t.updateHandler.NewConnectionError(err) 206 t.logger.Warnf("ADS stream is closed with error: %v", err) 207 return success 208 } 209 210 rType, version, nonce, err := t.handleResponse(resp) 211 212 if e, ok := err.(resourceversion.ErrResourceTypeUnsupported); ok { 213 t.logger.Warnf("%s", e.ErrStr) 214 continue 215 } 216 if err != nil { 217 t.sendCh.Put(&ackAction{ 218 rType: rType, 219 version: "", 220 nonce: nonce, 221 errMsg: err.Error(), 222 stream: stream, 223 }) 224 t.logger.Warnf("Sending NACK for response type: %v, version: %v, nonce: %v, reason: %v", rType, version, nonce, err) 225 continue 226 } 227 t.sendCh.Put(&ackAction{ 228 rType: rType, 229 version: version, 230 nonce: nonce, 231 stream: stream, 232 }) 233 t.logger.Infof("Sending ACK for response type: %v, version: %v, nonce: %v", rType, version, nonce) 234 success = true 235 } 236 } 237 238 func (t *Controller) handleResponse(resp proto.Message) (resource.ResourceType, string, string, error) { 239 rType, resources, version, nonce, err := t.vClient.ParseResponse(resp) 240 if err != nil { 241 return rType, version, nonce, err 242 } 243 opts := &resource.UnmarshalOptions{ 244 Version: version, 245 Resources: resources, 246 Logger: t.logger, 247 UpdateValidator: t.updateValidator, 248 } 249 var md resource.UpdateMetadata 250 switch rType { 251 case resource.ListenerResource: 252 var update map[string]resource.ListenerUpdateErrTuple 253 update, md, err = resource.UnmarshalListener(opts) 254 t.updateHandler.NewListeners(update, md) 255 case resource.RouteConfigResource: 256 var update map[string]resource.RouteConfigUpdateErrTuple 257 update, md, err = resource.UnmarshalRouteConfig(opts) 258 t.updateHandler.NewRouteConfigs(update, md) 259 case resource.ClusterResource: 260 var update map[string]resource.ClusterUpdateErrTuple 261 update, md, err = resource.UnmarshalCluster(opts) 262 t.updateHandler.NewClusters(update, md) 263 case resource.EndpointsResource: 264 var update map[string]resource.EndpointsUpdateErrTuple 265 update, md, err = resource.UnmarshalEndpoints(opts) 266 t.updateHandler.NewEndpoints(update, md) 267 default: 268 return rType, "", "", resourceversion.ErrResourceTypeUnsupported{ 269 ErrStr: fmt.Sprintf("Resource type %v unknown in response from server", rType), 270 } 271 } 272 return rType, version, nonce, err 273 } 274 275 func mapToSlice(m map[string]bool) []string { 276 ret := make([]string, 0, len(m)) 277 for i := range m { 278 ret = append(ret, i) 279 } 280 return ret 281 } 282 283 type watchAction struct { 284 rType resource.ResourceType 285 remove bool // Whether this is to remove watch for the resource. 286 resource string 287 } 288 289 // processWatchInfo pulls the fields needed by the request from a watchAction. 290 // 291 // It also updates the watch map. 292 func (t *Controller) processWatchInfo(w *watchAction) (target []string, rType resource.ResourceType, ver, nonce string) { 293 t.mu.Lock() 294 defer t.mu.Unlock() 295 296 var current map[string]bool 297 current, ok := t.watchMap[w.rType] 298 if !ok { 299 current = make(map[string]bool) 300 t.watchMap[w.rType] = current 301 } 302 303 if w.remove { 304 delete(current, w.resource) 305 if len(current) == 0 { 306 delete(t.watchMap, w.rType) 307 } 308 } else { 309 current[w.resource] = true 310 } 311 312 rType = w.rType 313 target = mapToSlice(current) 314 // We don't reset version or nonce when a new watch is started. The version 315 // and nonce from previous response are carried by the request unless the 316 // stream is recreated. 317 ver = t.versionMap[rType] 318 nonce = t.nonceMap[rType] 319 return target, rType, ver, nonce 320 } 321 322 type ackAction struct { 323 rType resource.ResourceType 324 version string // NACK if version is an empty string. 325 nonce string 326 errMsg string // Empty unless it's a NACK. 327 // ACK/NACK are tagged with the stream it's for. When the stream is down, 328 // all the ACK/NACK for this stream will be dropped, and the version/nonce 329 // won't be updated. 330 stream grpc.ClientStream 331 } 332 333 // processAckInfo pulls the fields needed by the ack request from a ackAction. 334 // 335 // If no active watch is found for this ack, it returns false for send. 336 func (t *Controller) processAckInfo(ack *ackAction, stream grpc.ClientStream) (target []string, rType resource.ResourceType, version, nonce string, send bool) { 337 if ack.stream != stream { 338 // If ACK's stream isn't the current sending stream, this means the ACK 339 // was pushed to queue before the old stream broke, and a new stream has 340 // been started since. Return immediately here so we don't update the 341 // nonce for the new stream. 342 return nil, resource.UnknownResource, "", "", false 343 } 344 rType = ack.rType 345 346 t.mu.Lock() 347 defer t.mu.Unlock() 348 349 // Update the nonce no matter if we are going to send the ACK request on 350 // wire. We may not send the request if the watch is canceled. But the nonce 351 // needs to be updated so the next request will have the right nonce. 352 nonce = ack.nonce 353 t.nonceMap[rType] = nonce 354 355 s, ok := t.watchMap[rType] 356 if !ok || len(s) == 0 { 357 // We don't send the request ack if there's no active watch (this can be 358 // either the server sends responses before any request, or the watch is 359 // canceled while the ackAction is in queue), because there's no resource 360 // name. And if we send a request with empty resource name list, the 361 // server may treat it as a wild card and send us everything. 362 return nil, resource.UnknownResource, "", "", false 363 } 364 send = true 365 target = mapToSlice(s) 366 367 version = ack.version 368 if version == "" { 369 // This is a nack, get the previous acked version. 370 version = t.versionMap[rType] 371 // version will still be an empty string if rType isn't 372 // found in versionMap, this can happen if there wasn't any ack 373 // before. 374 } else { 375 t.versionMap[rType] = version 376 } 377 return target, rType, version, nonce, send 378 } 379 380 // reportLoad starts an LRS stream to report load data to the management server. 381 // It blocks until the context is canceled. 382 func (t *Controller) reportLoad(ctx context.Context, cc *grpc.ClientConn, opts resourceversion.LoadReportingOptions) { 383 retries := 0 384 for { 385 if ctx.Err() != nil { 386 return 387 } 388 389 if retries != 0 { 390 timer := time.NewTimer(t.backoff(retries)) 391 select { 392 case <-timer.C: 393 case <-ctx.Done(): 394 if !timer.Stop() { 395 <-timer.C 396 } 397 return 398 } 399 } 400 401 retries++ 402 stream, err := t.vClient.NewLoadStatsStream(ctx, cc) 403 if err != nil { 404 t.logger.Warnf("lrs: failed to create stream: %v", err) 405 continue 406 } 407 t.logger.Infof("lrs: created LRS stream") 408 409 if err = t.vClient.SendFirstLoadStatsRequest(stream); err != nil { 410 t.logger.Warnf("lrs: failed to send first request: %v", err) 411 continue 412 } 413 414 clusters, interval, err := t.vClient.HandleLoadStatsResponse(stream) 415 if err != nil { 416 t.logger.Warnf("%v", err) 417 continue 418 } 419 420 retries = 0 421 t.sendLoads(ctx, stream, opts.LoadStore, clusters, interval) 422 } 423 } 424 425 func (t *Controller) sendLoads(ctx context.Context, stream grpc.ClientStream, store *load.Store, clusterNames []string, interval time.Duration) { 426 tick := time.NewTicker(interval) 427 defer tick.Stop() 428 for { 429 select { 430 case <-tick.C: 431 case <-ctx.Done(): 432 return 433 } 434 if err := t.vClient.SendLoadStatsRequest(stream, store.Stats(clusterNames)); err != nil { 435 t.logger.Warnf("%v", err) 436 return 437 } 438 } 439 }