istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/istio-agent/xds_proxy_delta.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package istioagent 16 17 import ( 18 "context" 19 "fmt" 20 "strings" 21 "time" 22 23 discovery "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 24 "go.uber.org/atomic" 25 google_rpc "google.golang.org/genproto/googleapis/rpc/status" 26 "google.golang.org/grpc" 27 "google.golang.org/grpc/codes" 28 "google.golang.org/grpc/metadata" 29 anypb "google.golang.org/protobuf/types/known/anypb" 30 31 "istio.io/istio/pilot/pkg/features" 32 "istio.io/istio/pkg/channels" 33 "istio.io/istio/pkg/istio-agent/metrics" 34 "istio.io/istio/pkg/log" 35 "istio.io/istio/pkg/model" 36 "istio.io/istio/pkg/slices" 37 "istio.io/istio/pkg/wasm" 38 ) 39 40 // sendDeltaRequest is a small wrapper around sending to con.requestsChan. This ensures that we do not 41 // block forever on 42 func (con *ProxyConnection) sendDeltaRequest(req *discovery.DeltaDiscoveryRequest) { 43 con.deltaRequestsChan.Put(req) 44 } 45 46 // DeltaAggregatedResources is an implementation of Delta XDS API used for proxying between Istiod and Envoy. 47 // Every time envoy makes a fresh connection to the agent, we reestablish a new connection to the upstream xds 48 // This ensures that a new connection between istiod and agent doesn't end up consuming pending messages from envoy 49 // as the new connection may not go to the same istiod. Vice versa case also applies. 50 func (p *XdsProxy) DeltaAggregatedResources(downstream DeltaDiscoveryStream) error { 51 proxyLog.Debugf("accepted delta xds connection from envoy, forwarding to upstream") 52 53 con := &ProxyConnection{ 54 conID: connectionNumber.Inc(), 55 upstreamError: make(chan error), // can be produced by recv and send 56 downstreamError: make(chan error), // can be produced by recv and send 57 deltaRequestsChan: channels.NewUnbounded[*discovery.DeltaDiscoveryRequest](), 58 // Allow a buffer of 1. This ensures we queue up at most 2 (one in process, 1 pending) responses before forwarding. 59 deltaResponsesChan: make(chan *discovery.DeltaDiscoveryResponse, 1), 60 stopChan: make(chan struct{}), 61 downstreamDeltas: downstream, 62 } 63 p.registerStream(con) 64 defer p.unregisterStream(con) 65 66 ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) 67 defer cancel() 68 69 upstreamConn, err := p.buildUpstreamConn(ctx) 70 if err != nil { 71 proxyLog.Errorf("failed to connect to upstream %s: %v", p.istiodAddress, err) 72 metrics.IstiodConnectionFailures.Increment() 73 return err 74 } 75 defer upstreamConn.Close() 76 77 xds := discovery.NewAggregatedDiscoveryServiceClient(upstreamConn) 78 ctx = metadata.AppendToOutgoingContext(context.Background(), "ClusterID", p.clusterID) 79 for k, v := range p.xdsHeaders { 80 ctx = metadata.AppendToOutgoingContext(ctx, k, v) 81 } 82 // We must propagate upstream termination to Envoy. This ensures that we resume the full XDS sequence on new connection 83 return p.handleDeltaUpstream(ctx, con, xds) 84 } 85 86 func (p *XdsProxy) handleDeltaUpstream(ctx context.Context, con *ProxyConnection, xds discovery.AggregatedDiscoveryServiceClient) error { 87 log := proxyLog.WithLabels("id", con.conID) 88 deltaUpstream, err := xds.DeltaAggregatedResources(ctx, 89 grpc.MaxCallRecvMsgSize(defaultClientMaxReceiveMessageSize)) 90 if err != nil { 91 // Envoy logs errors again, so no need to log beyond debug level 92 log.Debugf("failed to create delta upstream grpc client: %v", err) 93 // Increase metric when xds connection error, for example: forgot to restart ingressgateway or sidecar after changing root CA. 94 metrics.IstiodConnectionErrors.Increment() 95 return err 96 } 97 log.Infof("connected to delta upstream XDS server: %s", p.istiodAddress) 98 defer log.Debugf("disconnected from delta XDS server: %s", p.istiodAddress) 99 100 con.upstreamDeltas = deltaUpstream 101 102 // handle responses from istiod 103 go func() { 104 for { 105 resp, err := con.upstreamDeltas.Recv() 106 if err != nil { 107 upstreamErr(con, err) 108 return 109 } 110 select { 111 case con.deltaResponsesChan <- resp: 112 case <-con.stopChan: 113 } 114 } 115 }() 116 117 go p.handleUpstreamDeltaRequest(con) 118 go p.handleUpstreamDeltaResponse(con) 119 120 for { 121 select { 122 case err := <-con.upstreamError: 123 return err 124 case err := <-con.downstreamError: 125 // On downstream error, we will return. This propagates the error to downstream envoy which will trigger reconnect 126 return err 127 case <-con.stopChan: 128 log.Debugf("upstream stopped") 129 return nil 130 } 131 } 132 } 133 134 func (p *XdsProxy) handleUpstreamDeltaRequest(con *ProxyConnection) { 135 log := proxyLog.WithLabels("id", con.conID) 136 initialRequestsSent := atomic.NewBool(false) 137 go func() { 138 for { 139 // recv delta xds requests from envoy 140 req, err := con.downstreamDeltas.Recv() 141 if err != nil { 142 downstreamErr(con, err) 143 return 144 } 145 146 // forward to istiod 147 con.sendDeltaRequest(req) 148 if !initialRequestsSent.Load() && req.TypeUrl == model.ListenerType { 149 // fire off an initial NDS request 150 if _, f := p.handlers[model.NameTableType]; f { 151 con.sendDeltaRequest(&discovery.DeltaDiscoveryRequest{ 152 TypeUrl: model.NameTableType, 153 }) 154 } 155 // fire off an initial PCDS request 156 if _, f := p.handlers[model.ProxyConfigType]; f { 157 con.sendDeltaRequest(&discovery.DeltaDiscoveryRequest{ 158 TypeUrl: model.ProxyConfigType, 159 }) 160 } 161 // set flag before sending the initial request to prevent race. 162 initialRequestsSent.Store(true) 163 // Fire of a configured initial request, if there is one 164 p.connectedMutex.RLock() 165 initialRequest := p.initialDeltaHealthRequest 166 if initialRequest != nil { 167 con.sendDeltaRequest(initialRequest) 168 } 169 p.connectedMutex.RUnlock() 170 } 171 } 172 }() 173 174 defer func() { 175 _ = con.upstreamDeltas.CloseSend() 176 }() 177 for { 178 select { 179 case req := <-con.deltaRequestsChan.Get(): 180 con.deltaRequestsChan.Load() 181 if req.TypeUrl == model.HealthInfoType && !initialRequestsSent.Load() { 182 // only send healthcheck probe after LDS request has been sent 183 continue 184 } 185 log.WithLabels( 186 "type", model.GetShortType(req.TypeUrl), 187 "sub", len(req.ResourceNamesSubscribe), 188 "unsub", len(req.ResourceNamesUnsubscribe), 189 "nonce", req.ResponseNonce, 190 "initial", len(req.InitialResourceVersions), 191 ).Debugf("delta request") 192 metrics.XdsProxyRequests.Increment() 193 if req.TypeUrl == model.ExtensionConfigurationType { 194 p.ecdsLastNonce.Store(req.ResponseNonce) 195 } 196 197 if err := con.upstreamDeltas.Send(req); err != nil { 198 err = fmt.Errorf("send error for type url %s: %v", req.TypeUrl, err) 199 upstreamErr(con, err) 200 return 201 } 202 case <-con.stopChan: 203 return 204 } 205 } 206 } 207 208 func (p *XdsProxy) handleUpstreamDeltaResponse(con *ProxyConnection) { 209 forwardEnvoyCh := make(chan *discovery.DeltaDiscoveryResponse, 1) 210 for { 211 select { 212 case resp := <-con.deltaResponsesChan: 213 // TODO: separate upstream response handling from requests sending, which are both time costly 214 proxyLog.WithLabels( 215 "id", con.conID, 216 "type", model.GetShortType(resp.TypeUrl), 217 "nonce", resp.Nonce, 218 "resources", len(resp.Resources), 219 "removes", len(resp.RemovedResources), 220 ).Debugf("upstream response") 221 metrics.XdsProxyResponses.Increment() 222 if h, f := p.handlers[resp.TypeUrl]; f { 223 if len(resp.Resources) == 0 { 224 // Empty response, nothing to do 225 // This assumes internal types are always singleton 226 break 227 } 228 err := h(resp.Resources[0].Resource) 229 var errorResp *google_rpc.Status 230 if err != nil { 231 errorResp = &google_rpc.Status{ 232 Code: int32(codes.Internal), 233 Message: err.Error(), 234 } 235 } 236 // Send ACK/NACK 237 con.sendDeltaRequest(&discovery.DeltaDiscoveryRequest{ 238 TypeUrl: resp.TypeUrl, 239 ResponseNonce: resp.Nonce, 240 ErrorDetail: errorResp, 241 }) 242 continue 243 } 244 switch resp.TypeUrl { 245 case model.ExtensionConfigurationType: 246 if features.WasmRemoteLoadConversion { 247 // If Wasm remote load conversion feature is enabled, rewrite and send. 248 go p.deltaRewriteAndForward(con, resp, func(resp *discovery.DeltaDiscoveryResponse) { 249 // Forward the response using the thread of `handleUpstreamResponse` 250 // to prevent concurrent access to forwardToEnvoy 251 select { 252 case forwardEnvoyCh <- resp: 253 case <-con.stopChan: 254 } 255 }) 256 } else { 257 // Otherwise, forward ECDS resource update directly to Envoy. 258 forwardDeltaToEnvoy(con, resp) 259 } 260 default: 261 if strings.HasPrefix(resp.TypeUrl, model.DebugType) { 262 p.forwardDeltaToTap(resp) 263 } else { 264 forwardDeltaToEnvoy(con, resp) 265 } 266 } 267 case resp := <-forwardEnvoyCh: 268 forwardDeltaToEnvoy(con, resp) 269 case <-con.stopChan: 270 return 271 } 272 } 273 } 274 275 func (p *XdsProxy) deltaRewriteAndForward(con *ProxyConnection, resp *discovery.DeltaDiscoveryResponse, forward func(resp *discovery.DeltaDiscoveryResponse)) { 276 resources := make([]*anypb.Any, 0, len(resp.Resources)) 277 for i := range resp.Resources { 278 resources = append(resources, resp.Resources[i].Resource) 279 } 280 281 if err := wasm.MaybeConvertWasmExtensionConfig(resources, p.wasmCache); err != nil { 282 proxyLog.Debugf("sending NACK for ECDS resources %+v, err: %+v", resp.Resources, err) 283 con.sendDeltaRequest(&discovery.DeltaDiscoveryRequest{ 284 TypeUrl: resp.TypeUrl, 285 ResponseNonce: resp.Nonce, 286 ErrorDetail: &google_rpc.Status{ 287 Code: int32(codes.Internal), 288 Message: err.Error(), 289 }, 290 }) 291 return 292 } 293 294 for i := range resources { 295 resp.Resources[i].Resource = resources[i] 296 } 297 298 proxyLog.WithLabels("resources", slices.Map(resp.Resources, (*discovery.Resource).GetName), "removes", resp.RemovedResources).Debugf("forward ECDS") 299 forward(resp) 300 } 301 302 func forwardDeltaToEnvoy(con *ProxyConnection, resp *discovery.DeltaDiscoveryResponse) { 303 if !model.IsEnvoyType(resp.TypeUrl) && resp.TypeUrl != model.WorkloadType { 304 proxyLog.Errorf("Skipping forwarding type url %s to Envoy as is not a valid Envoy type", resp.TypeUrl) 305 return 306 } 307 if con.isClosed() { 308 proxyLog.WithLabels("id", con.conID).Errorf("downstream dropped delta xds push to Envoy, connection already closed") 309 return 310 } 311 if err := sendDownstreamDelta(con.downstreamDeltas, resp); err != nil { 312 err = fmt.Errorf("send error for type url %s: %v", resp.TypeUrl, err) 313 downstreamErr(con, err) 314 return 315 } 316 } 317 318 func sendDownstreamDelta(deltaDownstream DeltaDiscoveryStream, res *discovery.DeltaDiscoveryResponse) error { 319 tStart := time.Now() 320 defer func() { 321 // This is a hint to help debug slow responses. 322 if time.Since(tStart) > 10*time.Second { 323 proxyLog.Warnf("sendDownstreamDelta took %v", time.Since(tStart)) 324 } 325 }() 326 return deltaDownstream.Send(res) 327 } 328 329 func (p *XdsProxy) sendDeltaHealthRequest(req *discovery.DeltaDiscoveryRequest) { 330 p.connectedMutex.Lock() 331 // Immediately send if we are currently connected. 332 if p.connected != nil && p.connected.deltaRequestsChan != nil { 333 p.connected.deltaRequestsChan.Put(req) 334 } 335 // Otherwise place it as our initial request for new connections 336 p.initialDeltaHealthRequest = req 337 p.connectedMutex.Unlock() 338 } 339 340 func (p *XdsProxy) forwardDeltaToTap(resp *discovery.DeltaDiscoveryResponse) { 341 select { 342 // Convert back to a SotW response 343 case p.tapResponseChannel <- &discovery.DiscoveryResponse{ 344 VersionInfo: resp.SystemVersionInfo, 345 Resources: slices.Map(resp.Resources, (*discovery.Resource).GetResource), 346 Canary: false, 347 TypeUrl: resp.TypeUrl, 348 Nonce: resp.Nonce, 349 ControlPlane: resp.ControlPlane, 350 }: 351 default: 352 log.Infof("tap response %q arrived too late; discarding", resp.TypeUrl) 353 } 354 }