google.golang.org/grpc@v1.74.2/xds/internal/clients/lrsclient/lrs_stream.go (about) 1 /* 2 * 3 * Copyright 2025 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package lrsclient 19 20 import ( 21 "context" 22 "fmt" 23 "io" 24 "time" 25 26 "google.golang.org/grpc/grpclog" 27 "google.golang.org/grpc/internal/backoff" 28 igrpclog "google.golang.org/grpc/internal/grpclog" 29 "google.golang.org/grpc/internal/pretty" 30 "google.golang.org/grpc/xds/internal/clients" 31 "google.golang.org/protobuf/proto" 32 "google.golang.org/protobuf/types/known/durationpb" 33 34 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 35 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 36 v3lrspb "github.com/envoyproxy/go-control-plane/envoy/service/load_stats/v3" 37 ) 38 39 // Any per-RPC level logs which print complete request or response messages 40 // should be gated at this verbosity level. Other per-RPC level logs which print 41 // terse output should be at `INFO` and verbosity 2. 42 const perRPCVerbosityLevel = 9 43 44 // streamImpl provides all the functionality associated with an LRS (Load 45 // Reporting Service) stream on the client-side. It manages the lifecycle of 46 // the LRS stream, including starting, stopping, and retrying the stream. It 47 // also provides a LoadStore that can be used to report load, with a Stop 48 // function that should be called when the load reporting is no longer 49 // needed. 50 type streamImpl struct { 51 // The following fields are initialized when a stream instance is created 52 // and are read-only afterwards, and hence can be accessed without a mutex. 53 transport clients.Transport // Transport to use for LRS stream. 54 backoff func(int) time.Duration // Backoff for retries, after stream failures. 55 nodeProto *v3corepb.Node // Identifies the gRPC application. 56 doneCh chan struct{} // To notify exit of LRS goroutine. 57 logger *igrpclog.PrefixLogger 58 59 cancelStream context.CancelFunc // Cancel the stream. If nil, the stream is not active. 60 loadStore *LoadStore // LoadStore returned to user for pushing loads. 61 62 finalSendRequest chan struct{} // To request for the final attempt to send loads. 63 finalSendDone chan error // To signal completion of the final attempt of sending loads. 64 } 65 66 // streamOpts holds the options for creating an lrsStream. 67 type streamOpts struct { 68 transport clients.Transport // xDS transport to create the stream on. 69 backoff func(int) time.Duration // Backoff for retries, after stream failures. 70 nodeProto *v3corepb.Node // Node proto to identify the gRPC application. 71 logPrefix string // Prefix to be used for log messages. 72 } 73 74 // newStreamImpl creates a new StreamImpl with the provided options. 75 // 76 // The actual streaming RPC call is initiated when the first call to ReportLoad 77 // is made, and is terminated when the last call to ReportLoad is canceled. 78 func newStreamImpl(opts streamOpts) *streamImpl { 79 ctx, cancel := context.WithCancel(context.Background()) 80 81 lrs := &streamImpl{ 82 transport: opts.transport, 83 backoff: opts.backoff, 84 nodeProto: opts.nodeProto, 85 cancelStream: cancel, 86 doneCh: make(chan struct{}), 87 finalSendRequest: make(chan struct{}, 1), 88 finalSendDone: make(chan error, 1), 89 } 90 91 l := grpclog.Component("xds") 92 lrs.logger = igrpclog.NewPrefixLogger(l, opts.logPrefix+fmt.Sprintf("[lrs-stream %p] ", lrs)) 93 lrs.loadStore = newLoadStore() 94 go lrs.runner(ctx) 95 return lrs 96 } 97 98 // runner is responsible for managing the lifetime of an LRS streaming call. It 99 // creates the stream, sends the initial LoadStatsRequest, receives the first 100 // LoadStatsResponse, and then starts a goroutine to periodically send 101 // LoadStatsRequests. The runner will restart the stream if it encounters any 102 // errors. 103 func (lrs *streamImpl) runner(ctx context.Context) { 104 defer close(lrs.doneCh) 105 106 // This feature indicates that the client supports the 107 // LoadStatsResponse.send_all_clusters field in the LRS response. 108 node := proto.Clone(lrs.nodeProto).(*v3corepb.Node) 109 node.ClientFeatures = append(node.ClientFeatures, "envoy.lrs.supports_send_all_clusters") 110 111 runLoadReportStream := func() error { 112 // streamCtx is created and canceled in case we terminate the stream 113 // early for any reason, to avoid gRPC-Go leaking the RPC's monitoring 114 // goroutine. 115 streamCtx, cancel := context.WithCancel(ctx) 116 defer cancel() 117 118 stream, err := lrs.transport.NewStream(streamCtx, "/envoy.service.load_stats.v3.LoadReportingService/StreamLoadStats") 119 if err != nil { 120 lrs.logger.Warningf("Failed to create new LRS streaming RPC: %v", err) 121 return nil 122 } 123 if lrs.logger.V(2) { 124 lrs.logger.Infof("LRS stream created") 125 } 126 127 if err := lrs.sendFirstLoadStatsRequest(stream, node); err != nil { 128 lrs.logger.Warningf("Sending first LRS request failed: %v", err) 129 return nil 130 } 131 132 clusters, interval, err := lrs.recvFirstLoadStatsResponse(stream) 133 if err != nil { 134 lrs.logger.Warningf("Reading from LRS streaming RPC failed: %v", err) 135 return nil 136 } 137 138 // We reset backoff state when we successfully receive at least one 139 // message from the server. 140 lrs.sendLoads(streamCtx, stream, clusters, interval) 141 return backoff.ErrResetBackoff 142 } 143 backoff.RunF(ctx, runLoadReportStream, lrs.backoff) 144 } 145 146 // sendLoads is responsible for periodically sending load reports to the LRS 147 // server at the specified interval for the specified clusters, until the passed 148 // in context is canceled. 149 func (lrs *streamImpl) sendLoads(ctx context.Context, stream clients.Stream, clusterNames []string, interval time.Duration) { 150 tick := time.NewTicker(interval) 151 defer tick.Stop() 152 for { 153 select { 154 case <-tick.C: 155 case <-ctx.Done(): 156 return 157 case <-lrs.finalSendRequest: 158 var finalSendErr error 159 if lrs.logger.V(2) { 160 lrs.logger.Infof("Final send request received. Attempting final LRS report.") 161 } 162 if err := lrs.sendLoadStatsRequest(stream, lrs.loadStore.stats(clusterNames)); err != nil { 163 lrs.logger.Warningf("Failed to send final load report. Writing to LRS stream failed: %v", err) 164 finalSendErr = err 165 } 166 if lrs.logger.V(2) { 167 lrs.logger.Infof("Successfully sent final load report.") 168 } 169 lrs.finalSendDone <- finalSendErr 170 return 171 } 172 173 if err := lrs.sendLoadStatsRequest(stream, lrs.loadStore.stats(clusterNames)); err != nil { 174 lrs.logger.Warningf("Failed to send periodic load report. Writing to LRS stream failed: %v", err) 175 return 176 } 177 } 178 } 179 180 func (lrs *streamImpl) sendFirstLoadStatsRequest(stream clients.Stream, node *v3corepb.Node) error { 181 req := &v3lrspb.LoadStatsRequest{Node: node} 182 if lrs.logger.V(perRPCVerbosityLevel) { 183 lrs.logger.Infof("Sending initial LoadStatsRequest: %s", pretty.ToJSON(req)) 184 } 185 msg, err := proto.Marshal(req) 186 if err != nil { 187 lrs.logger.Warningf("Failed to marshal LoadStatsRequest: %v", err) 188 return err 189 } 190 err = stream.Send(msg) 191 if err == io.EOF { 192 return getStreamError(stream) 193 } 194 return err 195 } 196 197 // recvFirstLoadStatsResponse receives the first LoadStatsResponse from the LRS 198 // server. Returns the following: 199 // - a list of cluster names requested by the server or an empty slice if the 200 // server requested for load from all clusters 201 // - the load reporting interval, and 202 // - any error encountered 203 func (lrs *streamImpl) recvFirstLoadStatsResponse(stream clients.Stream) ([]string, time.Duration, error) { 204 r, err := stream.Recv() 205 if err != nil { 206 return nil, 0, fmt.Errorf("lrs: failed to receive first LoadStatsResponse: %v", err) 207 } 208 var resp v3lrspb.LoadStatsResponse 209 if err := proto.Unmarshal(r, &resp); err != nil { 210 if lrs.logger.V(2) { 211 lrs.logger.Infof("Failed to unmarshal response to LoadStatsResponse: %v", err) 212 } 213 return nil, time.Duration(0), fmt.Errorf("lrs: unexpected message type %T", r) 214 } 215 if lrs.logger.V(perRPCVerbosityLevel) { 216 lrs.logger.Infof("Received first LoadStatsResponse: %s", pretty.ToJSON(&resp)) 217 } 218 219 internal := resp.GetLoadReportingInterval() 220 if internal.CheckValid() != nil { 221 return nil, 0, fmt.Errorf("lrs: invalid load_reporting_interval: %v", err) 222 } 223 loadReportingInterval := internal.AsDuration() 224 225 clusters := resp.Clusters 226 if resp.SendAllClusters { 227 // Return an empty slice to send stats for all clusters. 228 clusters = []string{} 229 } 230 231 return clusters, loadReportingInterval, nil 232 } 233 234 func (lrs *streamImpl) sendLoadStatsRequest(stream clients.Stream, loads []*loadData) error { 235 clusterStats := make([]*v3endpointpb.ClusterStats, 0, len(loads)) 236 for _, sd := range loads { 237 droppedReqs := make([]*v3endpointpb.ClusterStats_DroppedRequests, 0, len(sd.drops)) 238 for category, count := range sd.drops { 239 droppedReqs = append(droppedReqs, &v3endpointpb.ClusterStats_DroppedRequests{ 240 Category: category, 241 DroppedCount: count, 242 }) 243 } 244 localityStats := make([]*v3endpointpb.UpstreamLocalityStats, 0, len(sd.localityStats)) 245 for lid, localityData := range sd.localityStats { 246 loadMetricStats := make([]*v3endpointpb.EndpointLoadMetricStats, 0, len(localityData.loadStats)) 247 for name, loadData := range localityData.loadStats { 248 loadMetricStats = append(loadMetricStats, &v3endpointpb.EndpointLoadMetricStats{ 249 MetricName: name, 250 NumRequestsFinishedWithMetric: loadData.count, 251 TotalMetricValue: loadData.sum, 252 }) 253 } 254 localityStats = append(localityStats, &v3endpointpb.UpstreamLocalityStats{ 255 Locality: &v3corepb.Locality{ 256 Region: lid.Region, 257 Zone: lid.Zone, 258 SubZone: lid.SubZone, 259 }, 260 TotalSuccessfulRequests: localityData.requestStats.succeeded, 261 TotalRequestsInProgress: localityData.requestStats.inProgress, 262 TotalErrorRequests: localityData.requestStats.errored, 263 TotalIssuedRequests: localityData.requestStats.issued, 264 LoadMetricStats: loadMetricStats, 265 UpstreamEndpointStats: nil, // TODO: populate for per endpoint loads. 266 }) 267 } 268 269 clusterStats = append(clusterStats, &v3endpointpb.ClusterStats{ 270 ClusterName: sd.cluster, 271 ClusterServiceName: sd.service, 272 UpstreamLocalityStats: localityStats, 273 TotalDroppedRequests: sd.totalDrops, 274 DroppedRequests: droppedReqs, 275 LoadReportInterval: durationpb.New(sd.reportInterval), 276 }) 277 } 278 279 req := &v3lrspb.LoadStatsRequest{ClusterStats: clusterStats} 280 if lrs.logger.V(perRPCVerbosityLevel) { 281 lrs.logger.Infof("Sending LRS loads: %s", pretty.ToJSON(req)) 282 } 283 msg, err := proto.Marshal(req) 284 if err != nil { 285 if lrs.logger.V(2) { 286 lrs.logger.Infof("Failed to marshal LoadStatsRequest: %v", err) 287 } 288 return err 289 } 290 err = stream.Send(msg) 291 if err == io.EOF { 292 return getStreamError(stream) 293 } 294 return err 295 } 296 297 func getStreamError(stream clients.Stream) error { 298 for { 299 if _, err := stream.Recv(); err != nil { 300 return err 301 } 302 } 303 }