google.golang.org/grpc@v1.74.2/stats/opentelemetry/e2e_test.go (about) 1 /* 2 * Copyright 2024 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package opentelemetry_test 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "slices" 24 "strconv" 25 "testing" 26 "time" 27 28 otelcodes "go.opentelemetry.io/otel/codes" 29 oteltrace "go.opentelemetry.io/otel/trace" 30 31 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 32 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 33 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 34 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 35 v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" 36 v3clientsideweightedroundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3" 37 v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" 38 "github.com/google/go-cmp/cmp" 39 "github.com/google/go-cmp/cmp/cmpopts" 40 "google.golang.org/protobuf/proto" 41 "google.golang.org/protobuf/types/known/durationpb" 42 "google.golang.org/protobuf/types/known/wrapperspb" 43 44 "go.opentelemetry.io/otel/attribute" 45 "go.opentelemetry.io/otel/propagation" 46 "go.opentelemetry.io/otel/sdk/metric" 47 "go.opentelemetry.io/otel/sdk/metric/metricdata" 48 "go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest" 49 "go.opentelemetry.io/otel/sdk/trace" 50 "go.opentelemetry.io/otel/sdk/trace/tracetest" 51 "google.golang.org/grpc" 52 "google.golang.org/grpc/codes" 53 "google.golang.org/grpc/credentials/insecure" 54 "google.golang.org/grpc/encoding/gzip" 55 experimental "google.golang.org/grpc/experimental/opentelemetry" 56 "google.golang.org/grpc/internal" 57 "google.golang.org/grpc/internal/grpcsync" 58 "google.golang.org/grpc/internal/grpctest" 59 "google.golang.org/grpc/internal/stubserver" 60 itestutils "google.golang.org/grpc/internal/testutils" 61 "google.golang.org/grpc/internal/testutils/xds/e2e" 62 setup "google.golang.org/grpc/internal/testutils/xds/e2e/setup" 63 testgrpc "google.golang.org/grpc/interop/grpc_testing" 64 testpb "google.golang.org/grpc/interop/grpc_testing" 65 "google.golang.org/grpc/metadata" 66 "google.golang.org/grpc/orca" 67 "google.golang.org/grpc/resolver" 68 "google.golang.org/grpc/resolver/manual" 69 "google.golang.org/grpc/stats/opentelemetry" 70 "google.golang.org/grpc/stats/opentelemetry/internal/testutils" 71 "google.golang.org/grpc/status" 72 ) 73 74 var defaultTestTimeout = 5 * time.Second 75 76 type s struct { 77 grpctest.Tester 78 } 79 80 func Test(t *testing.T) { 81 grpctest.RunSubTests(t, s{}) 82 } 83 84 // traceSpanInfo is the information received about the trace span. It contains 85 // subset of information that is needed to verify if correct trace is being 86 // attributed to the rpc. 87 type traceSpanInfo struct { 88 spanKind string 89 name string 90 events []trace.Event 91 attributes []attribute.KeyValue 92 } 93 94 // traceSpanInfoMapKey is the key struct for constructing a map of trace spans 95 // retrievable by span name and span kind 96 type traceSpanInfoMapKey struct { 97 spanName string 98 spanKind string 99 } 100 101 // defaultMetricsOptions creates default metrics options 102 func defaultMetricsOptions(_ *testing.T, methodAttributeFilter func(string) bool) (*opentelemetry.MetricsOptions, *metric.ManualReader) { 103 reader := metric.NewManualReader() 104 provider := metric.NewMeterProvider(metric.WithReader(reader)) 105 metricsOptions := &opentelemetry.MetricsOptions{ 106 MeterProvider: provider, 107 Metrics: opentelemetry.DefaultMetrics(), 108 MethodAttributeFilter: methodAttributeFilter, 109 } 110 return metricsOptions, reader 111 } 112 113 // defaultTraceOptions function to create default trace options 114 func defaultTraceOptions(_ *testing.T) (*experimental.TraceOptions, *tracetest.InMemoryExporter) { 115 spanExporter := tracetest.NewInMemoryExporter() 116 spanProcessor := trace.NewSimpleSpanProcessor(spanExporter) 117 tracerProvider := trace.NewTracerProvider(trace.WithSpanProcessor(spanProcessor)) 118 textMapPropagator := propagation.NewCompositeTextMapPropagator(opentelemetry.GRPCTraceBinPropagator{}) 119 traceOptions := &experimental.TraceOptions{ 120 TracerProvider: tracerProvider, 121 TextMapPropagator: textMapPropagator, 122 } 123 return traceOptions, spanExporter 124 } 125 126 // setupStubServer creates a stub server with OpenTelemetry component configured on client 127 // and server side and returns the server. 128 func setupStubServer(t *testing.T, metricsOptions *opentelemetry.MetricsOptions, traceOptions *experimental.TraceOptions) *stubserver.StubServer { 129 ss := &stubserver.StubServer{ 130 UnaryCallF: func(_ context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 131 return &testpb.SimpleResponse{Payload: &testpb.Payload{ 132 Body: make([]byte, len(in.GetPayload().GetBody())), 133 }}, nil 134 }, 135 FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { 136 for { 137 _, err := stream.Recv() 138 if err == io.EOF { 139 return nil 140 } 141 } 142 }, 143 } 144 145 otelOptions := opentelemetry.Options{} 146 if metricsOptions != nil { 147 otelOptions.MetricsOptions = *metricsOptions 148 } 149 if traceOptions != nil { 150 otelOptions.TraceOptions = *traceOptions 151 } 152 153 if err := ss.Start([]grpc.ServerOption{opentelemetry.ServerOption(otelOptions)}, 154 opentelemetry.DialOption(otelOptions)); err != nil { 155 t.Fatalf("Error starting endpoint server: %v", err) 156 } 157 return ss 158 } 159 160 // waitForTraceSpans waits until the in-memory span exporter has received the 161 // expected trace spans based on span name and kind. It polls the exporter at a 162 // short interval until the desired spans are available or the context is 163 // cancelled. 164 // 165 // Returns the collected spans or an error if the context deadline is exceeded 166 // before the expected spans are exported. 167 func waitForTraceSpans(ctx context.Context, exporter *tracetest.InMemoryExporter, wantSpans []traceSpanInfo) (tracetest.SpanStubs, error) { 168 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 169 spans := exporter.GetSpans() 170 missingAnySpan := false 171 for _, wantSpan := range wantSpans { 172 if !slices.ContainsFunc(spans, func(span tracetest.SpanStub) bool { 173 return span.Name == wantSpan.name && span.SpanKind.String() == wantSpan.spanKind 174 }) { 175 missingAnySpan = true 176 } 177 } 178 if !missingAnySpan { 179 return spans, nil 180 } 181 } 182 return nil, fmt.Errorf("error waiting for complete trace spans %v: %v", wantSpans, ctx.Err()) 183 } 184 185 // validateTraces first first groups the received spans by their TraceID. For 186 // each trace group, it identifies the client, server, and attempt spans for 187 // both unary and streaming RPCs. It checks that the expected spans are 188 // present and that the server spans have the correct parent (attempt span). 189 // Finally, it compares the content of each span (name, kind, attributes, 190 // events) against the provided expected spans information. 191 func validateTraces(t *testing.T, spans tracetest.SpanStubs, wantSpanInfos []traceSpanInfo) { 192 // Group spans by TraceID. 193 traceSpans := make(map[oteltrace.TraceID][]tracetest.SpanStub) 194 for _, span := range spans { 195 traceID := span.SpanContext.TraceID() 196 traceSpans[traceID] = append(traceSpans[traceID], span) 197 } 198 199 // For each trace group, verify relationships and content. 200 for traceID, spans := range traceSpans { 201 var unaryClient, unaryServer, unaryAttempt *tracetest.SpanStub 202 var streamClient, streamServer, streamAttempt *tracetest.SpanStub 203 var isUnary, isStream bool 204 205 for _, span := range spans { 206 switch { 207 case span.Name == "Sent.grpc.testing.TestService.UnaryCall": 208 isUnary = true 209 if span.SpanKind == oteltrace.SpanKindClient { 210 unaryClient = &span 211 } 212 case span.Name == "Recv.grpc.testing.TestService.UnaryCall": 213 isUnary = true 214 if span.SpanKind == oteltrace.SpanKindServer { 215 unaryServer = &span 216 } 217 case span.Name == "Attempt.grpc.testing.TestService.UnaryCall": 218 isUnary = true 219 unaryAttempt = &span 220 221 case span.Name == "Sent.grpc.testing.TestService.FullDuplexCall": 222 isStream = true 223 if span.SpanKind == oteltrace.SpanKindClient { 224 streamClient = &span 225 } 226 case span.Name == "Recv.grpc.testing.TestService.FullDuplexCall": 227 isStream = true 228 if span.SpanKind == oteltrace.SpanKindServer { 229 streamServer = &span 230 } 231 case span.Name == "Attempt.grpc.testing.TestService.FullDuplexCall": 232 isStream = true 233 streamAttempt = &span 234 } 235 } 236 237 if isUnary { 238 // Verify Unary Call Spans. 239 if unaryClient == nil { 240 t.Error("Unary call client span not found") 241 } 242 if unaryServer == nil { 243 t.Error("Unary call server span not found") 244 } 245 if unaryAttempt == nil { 246 t.Error("Unary call attempt span not found") 247 } 248 // Check TraceID consistency. 249 if unaryClient != nil && unaryClient.SpanContext.TraceID() != traceID || unaryServer.SpanContext.TraceID() != traceID { 250 t.Error("Unary call spans have inconsistent TraceIDs") 251 } 252 // Check parent-child relationship via SpanID. 253 if unaryServer != nil && unaryServer.Parent.SpanID() != unaryAttempt.SpanContext.SpanID() { 254 t.Error("Unary server span parent does not match attempt span ID") 255 } 256 } 257 258 if isStream { 259 // Verify Streaming Call Spans. 260 if streamClient == nil { 261 t.Error("Streaming call client span not found") 262 } 263 if streamServer == nil { 264 t.Error("Streaming call server span not found") 265 } 266 if streamAttempt == nil { 267 t.Error("Streaming call attempt span not found") 268 } 269 // Check TraceID consistency. 270 if streamClient != nil && streamClient.SpanContext.TraceID() != traceID || streamServer.SpanContext.TraceID() != traceID { 271 t.Error("Streaming call spans have inconsistent TraceIDs") 272 } 273 if streamServer != nil && streamServer.Parent.SpanID() != streamAttempt.SpanContext.SpanID() { 274 t.Error("Streaming server span parent does not match attempt span ID") 275 } 276 } 277 } 278 279 // Constructs a map from a slice of traceSpanInfo to retrieve the 280 // corresponding expected span info based on span name and span kind 281 // for comparison. 282 wantSpanInfosMap := make(map[traceSpanInfoMapKey]traceSpanInfo) 283 for _, info := range wantSpanInfos { 284 key := traceSpanInfoMapKey{spanName: info.name, spanKind: info.spanKind} 285 wantSpanInfosMap[key] = info 286 } 287 288 // Compare retrieved spans with expected spans. 289 for _, span := range spans { 290 // Check that the attempt span has the correct status. 291 if got, want := span.Status.Code, otelcodes.Ok; got != want { 292 t.Errorf("Got status code %v, want %v", got, want) 293 } 294 295 // Retrieve the corresponding expected span info based on span name and 296 // span kind to compare. 297 want, ok := wantSpanInfosMap[traceSpanInfoMapKey{spanName: span.Name, spanKind: span.SpanKind.String()}] 298 if !ok { 299 t.Errorf("Unexpected span: %v", span) 300 continue 301 } 302 303 // comparers 304 attributesSort := cmpopts.SortSlices(func(a, b attribute.KeyValue) bool { 305 return a.Key < b.Key 306 }) 307 attributesValueComparable := cmpopts.EquateComparable(attribute.KeyValue{}.Value) 308 eventsTimeIgnore := cmpopts.IgnoreFields(trace.Event{}, "Time") 309 310 // attributes 311 if diff := cmp.Diff(want.attributes, span.Attributes, attributesSort, attributesValueComparable); diff != "" { 312 t.Errorf("Attributes mismatch for span %s (-want +got):\n%s", span.Name, diff) 313 } 314 // events 315 if diff := cmp.Diff(want.events, span.Events, attributesSort, attributesValueComparable, eventsTimeIgnore); diff != "" { 316 t.Errorf("Events mismatch for span %s (-want +got):\n%s", span.Name, diff) 317 } 318 } 319 } 320 321 // TestMethodAttributeFilter tests the method attribute filter. The method 322 // filter set should bucket the grpc.method attribute into "other" if the method 323 // attribute filter specifies. 324 func (s) TestMethodAttributeFilter(t *testing.T) { 325 maf := func(str string) bool { 326 // Will allow duplex/any other type of RPC. 327 return str != testgrpc.TestService_UnaryCall_FullMethodName 328 } 329 mo, reader := defaultMetricsOptions(t, maf) 330 ss := setupStubServer(t, mo, nil) 331 defer ss.Stop() 332 333 // Make a Unary and Streaming RPC. The Unary RPC should be filtered by the 334 // method attribute filter, and the Full Duplex (Streaming) RPC should not. 335 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 336 defer cancel() 337 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 338 Body: make([]byte, 10000), 339 }}); err != nil { 340 t.Fatalf("Unexpected error from UnaryCall: %v", err) 341 } 342 stream, err := ss.Client.FullDuplexCall(ctx) 343 if err != nil { 344 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 345 } 346 347 stream.CloseSend() 348 if _, err = stream.Recv(); err != io.EOF { 349 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 350 } 351 rm := &metricdata.ResourceMetrics{} 352 reader.Collect(ctx, rm) 353 gotMetrics := map[string]metricdata.Metrics{} 354 for _, sm := range rm.ScopeMetrics { 355 for _, m := range sm.Metrics { 356 gotMetrics[m.Name] = m 357 } 358 } 359 360 wantMetrics := []metricdata.Metrics{ 361 { 362 Name: "grpc.client.attempt.started", 363 Description: "Number of client call attempts started.", 364 Unit: "attempt", 365 Data: metricdata.Sum[int64]{ 366 DataPoints: []metricdata.DataPoint[int64]{ 367 { 368 Attributes: attribute.NewSet(attribute.String("grpc.method", "grpc.testing.TestService/UnaryCall"), attribute.String("grpc.target", ss.Target)), 369 Value: 1, 370 }, 371 { 372 Attributes: attribute.NewSet(attribute.String("grpc.method", "grpc.testing.TestService/FullDuplexCall"), attribute.String("grpc.target", ss.Target)), 373 Value: 1, 374 }, 375 }, 376 Temporality: metricdata.CumulativeTemporality, 377 IsMonotonic: true, 378 }, 379 }, 380 { 381 Name: "grpc.server.call.duration", 382 Description: "End-to-end time taken to complete a call from server transport's perspective.", 383 Unit: "s", 384 Data: metricdata.Histogram[float64]{ 385 DataPoints: []metricdata.HistogramDataPoint[float64]{ 386 { // Method should go to "other" due to the method attribute filter. 387 Attributes: attribute.NewSet(attribute.String("grpc.method", "other"), attribute.String("grpc.status", "OK")), 388 Count: 1, 389 Bounds: testutils.DefaultLatencyBounds, 390 }, 391 { 392 Attributes: attribute.NewSet(attribute.String("grpc.method", "grpc.testing.TestService/FullDuplexCall"), attribute.String("grpc.status", "OK")), 393 Count: 1, 394 Bounds: testutils.DefaultLatencyBounds, 395 }, 396 }, 397 Temporality: metricdata.CumulativeTemporality, 398 }, 399 }, 400 } 401 402 gotMetrics = testutils.WaitForServerMetrics(ctx, t, reader, gotMetrics, wantMetrics) 403 testutils.CompareMetrics(t, gotMetrics, wantMetrics) 404 } 405 406 // TestAllMetricsOneFunction tests emitted metrics from OpenTelemetry 407 // instrumentation component. It then configures a system with a gRPC Client and 408 // gRPC server with the OpenTelemetry Dial and Server Option configured 409 // specifying all the metrics provided by this package, and makes a Unary RPC 410 // and a Streaming RPC. These two RPCs should cause certain recording for each 411 // registered metric observed through a Manual Metrics Reader on the provided 412 // OpenTelemetry SDK's Meter Provider. It then makes an RPC that is unregistered 413 // on the Client (no StaticMethodCallOption set) and Server. The method 414 // attribute on subsequent metrics should be bucketed in "other". 415 func (s) TestAllMetricsOneFunction(t *testing.T) { 416 mo, reader := defaultMetricsOptions(t, nil) 417 ss := setupStubServer(t, mo, nil) 418 defer ss.Stop() 419 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 420 defer cancel() 421 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 422 // certain metrics to be emitted, which should be observed through the 423 // Metric Reader. 424 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 425 Body: make([]byte, 10000), 426 }}, grpc.UseCompressor(gzip.Name)); err != nil { // Deterministic compression. 427 t.Fatalf("Unexpected error from UnaryCall: %v", err) 428 } 429 stream, err := ss.Client.FullDuplexCall(ctx) 430 if err != nil { 431 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 432 } 433 434 stream.CloseSend() 435 if _, err = stream.Recv(); err != io.EOF { 436 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 437 } 438 439 rm := &metricdata.ResourceMetrics{} 440 reader.Collect(ctx, rm) 441 442 gotMetrics := map[string]metricdata.Metrics{} 443 for _, sm := range rm.ScopeMetrics { 444 for _, m := range sm.Metrics { 445 gotMetrics[m.Name] = m 446 } 447 } 448 449 wantMetrics := testutils.MetricData(testutils.MetricDataOptions{ 450 Target: ss.Target, 451 UnaryCompressedMessageSize: float64(57), 452 }) 453 gotMetrics = testutils.WaitForServerMetrics(ctx, t, reader, gotMetrics, wantMetrics) 454 testutils.CompareMetrics(t, gotMetrics, wantMetrics) 455 456 stream, err = ss.Client.FullDuplexCall(ctx) 457 if err != nil { 458 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 459 } 460 461 stream.CloseSend() 462 if _, err = stream.Recv(); err != io.EOF { 463 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 464 } 465 // This Invoke doesn't pass the StaticMethodCallOption. Thus, the method 466 // attribute should become "other" on client side metrics. Since it is also 467 // not registered on the server either, it should also become "other" on the 468 // server metrics method attribute. 469 ss.CC.Invoke(ctx, "/grpc.testing.TestService/UnregisteredCall", nil, nil, []grpc.CallOption{}...) 470 ss.CC.Invoke(ctx, "/grpc.testing.TestService/UnregisteredCall", nil, nil, []grpc.CallOption{}...) 471 ss.CC.Invoke(ctx, "/grpc.testing.TestService/UnregisteredCall", nil, nil, []grpc.CallOption{}...) 472 473 rm = &metricdata.ResourceMetrics{} 474 reader.Collect(ctx, rm) 475 gotMetrics = map[string]metricdata.Metrics{} 476 for _, sm := range rm.ScopeMetrics { 477 for _, m := range sm.Metrics { 478 gotMetrics[m.Name] = m 479 } 480 } 481 unaryMethodAttr := attribute.String("grpc.method", "grpc.testing.TestService/UnaryCall") 482 duplexMethodAttr := attribute.String("grpc.method", "grpc.testing.TestService/FullDuplexCall") 483 484 targetAttr := attribute.String("grpc.target", ss.Target) 485 otherMethodAttr := attribute.String("grpc.method", "other") 486 wantMetrics = []metricdata.Metrics{ 487 { 488 Name: "grpc.client.attempt.started", 489 Description: "Number of client call attempts started.", 490 Unit: "attempt", 491 Data: metricdata.Sum[int64]{ 492 DataPoints: []metricdata.DataPoint[int64]{ 493 { 494 Attributes: attribute.NewSet(unaryMethodAttr, targetAttr), 495 Value: 1, 496 }, 497 { 498 Attributes: attribute.NewSet(duplexMethodAttr, targetAttr), 499 Value: 2, 500 }, 501 { 502 Attributes: attribute.NewSet(otherMethodAttr, targetAttr), 503 Value: 3, 504 }, 505 }, 506 Temporality: metricdata.CumulativeTemporality, 507 IsMonotonic: true, 508 }, 509 }, 510 { 511 Name: "grpc.server.call.started", 512 Description: "Number of server calls started.", 513 Unit: "call", 514 Data: metricdata.Sum[int64]{ 515 DataPoints: []metricdata.DataPoint[int64]{ 516 { 517 Attributes: attribute.NewSet(unaryMethodAttr), 518 Value: 1, 519 }, 520 { 521 Attributes: attribute.NewSet(duplexMethodAttr), 522 Value: 2, 523 }, 524 { 525 Attributes: attribute.NewSet(otherMethodAttr), 526 Value: 3, 527 }, 528 }, 529 Temporality: metricdata.CumulativeTemporality, 530 IsMonotonic: true, 531 }, 532 }, 533 } 534 for _, metric := range wantMetrics { 535 val, ok := gotMetrics[metric.Name] 536 if !ok { 537 t.Fatalf("Metric %v not present in recorded metrics", metric.Name) 538 } 539 if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) { 540 t.Fatalf("Metrics data type not equal for metric: %v", metric.Name) 541 } 542 } 543 } 544 545 // clusterWithLBConfiguration returns a cluster resource with the proto message 546 // passed Marshaled to an any and specified through the load_balancing_policy 547 // field. 548 func clusterWithLBConfiguration(t *testing.T, clusterName, edsServiceName string, secLevel e2e.SecurityLevel, m proto.Message) *v3clusterpb.Cluster { 549 cluster := e2e.DefaultCluster(clusterName, edsServiceName, secLevel) 550 cluster.LoadBalancingPolicy = &v3clusterpb.LoadBalancingPolicy{ 551 Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ 552 { 553 TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ 554 TypedConfig: itestutils.MarshalAny(t, m), 555 }, 556 }, 557 }, 558 } 559 return cluster 560 } 561 562 func metricsDataFromReader(ctx context.Context, reader *metric.ManualReader) map[string]metricdata.Metrics { 563 rm := &metricdata.ResourceMetrics{} 564 reader.Collect(ctx, rm) 565 gotMetrics := map[string]metricdata.Metrics{} 566 for _, sm := range rm.ScopeMetrics { 567 for _, m := range sm.Metrics { 568 gotMetrics[m.Name] = m 569 } 570 } 571 return gotMetrics 572 } 573 574 // TestWRRMetrics tests the metrics emitted from the WRR LB Policy. It 575 // configures WRR as an endpoint picking policy through xDS on a ClientConn 576 // alongside an OpenTelemetry stats handler. It makes a few RPC's, and then 577 // sleeps for a bit to allow weight to expire. It then asserts OpenTelemetry 578 // metrics atoms are eventually present for all four WRR Metrics, alongside the 579 // correct target and locality label for each metric. 580 func (s) TestWRRMetrics(t *testing.T) { 581 cmr := orca.NewServerMetricsRecorder().(orca.CallMetricsRecorder) 582 backend1 := stubserver.StartTestService(t, &stubserver.StubServer{ 583 EmptyCallF: func(ctx context.Context, _ *testpb.Empty) (*testpb.Empty, error) { 584 if r := orca.CallMetricsRecorderFromContext(ctx); r != nil { 585 // Copy metrics from what the test set in cmr into r. 586 sm := cmr.(orca.ServerMetricsProvider).ServerMetrics() 587 r.SetApplicationUtilization(sm.AppUtilization) 588 r.SetQPS(sm.QPS) 589 r.SetEPS(sm.EPS) 590 } 591 return &testpb.Empty{}, nil 592 }, 593 }, orca.CallMetricsServerOption(nil)) 594 port1 := itestutils.ParsePort(t, backend1.Address) 595 defer backend1.Stop() 596 597 cmr.SetQPS(10.0) 598 cmr.SetApplicationUtilization(1.0) 599 600 backend2 := stubserver.StartTestService(t, &stubserver.StubServer{ 601 EmptyCallF: func(ctx context.Context, _ *testpb.Empty) (*testpb.Empty, error) { 602 if r := orca.CallMetricsRecorderFromContext(ctx); r != nil { 603 // Copy metrics from what the test set in cmr into r. 604 sm := cmr.(orca.ServerMetricsProvider).ServerMetrics() 605 r.SetApplicationUtilization(sm.AppUtilization) 606 r.SetQPS(sm.QPS) 607 r.SetEPS(sm.EPS) 608 } 609 return &testpb.Empty{}, nil 610 }, 611 }, orca.CallMetricsServerOption(nil)) 612 port2 := itestutils.ParsePort(t, backend2.Address) 613 defer backend2.Stop() 614 615 const serviceName = "my-service-client-side-xds" 616 617 // Start an xDS management server. 618 managementServer, nodeID, _, xdsResolver := setup.ManagementServerAndResolver(t) 619 620 wrrConfig := &v3wrrlocalitypb.WrrLocality{ 621 EndpointPickingPolicy: &v3clusterpb.LoadBalancingPolicy{ 622 Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ 623 { 624 TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ 625 TypedConfig: itestutils.MarshalAny(t, &v3clientsideweightedroundrobinpb.ClientSideWeightedRoundRobin{ 626 EnableOobLoadReport: &wrapperspb.BoolValue{ 627 Value: false, 628 }, 629 // BlackoutPeriod long enough to cause load report 630 // weight to trigger in the scope of test case. 631 // WeightExpirationPeriod will cause the load report 632 // weight for backend 1 to expire. 633 BlackoutPeriod: durationpb.New(5 * time.Millisecond), 634 WeightExpirationPeriod: durationpb.New(500 * time.Millisecond), 635 WeightUpdatePeriod: durationpb.New(time.Second), 636 ErrorUtilizationPenalty: &wrapperspb.FloatValue{Value: 1}, 637 }), 638 }, 639 }, 640 }, 641 }, 642 } 643 644 routeConfigName := "route-" + serviceName 645 clusterName := "cluster-" + serviceName 646 endpointsName := "endpoints-" + serviceName 647 resources := e2e.UpdateOptions{ 648 NodeID: nodeID, 649 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, routeConfigName)}, 650 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, serviceName, clusterName)}, 651 Clusters: []*v3clusterpb.Cluster{clusterWithLBConfiguration(t, clusterName, endpointsName, e2e.SecurityLevelNone, wrrConfig)}, 652 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 653 ClusterName: endpointsName, 654 Host: "localhost", 655 Localities: []e2e.LocalityOptions{ 656 { 657 Backends: []e2e.BackendOptions{{Ports: []uint32{port1}}, {Ports: []uint32{port2}}}, 658 Weight: 1, 659 }, 660 }, 661 })}, 662 } 663 664 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 665 defer cancel() 666 if err := managementServer.Update(ctx, resources); err != nil { 667 t.Fatal(err) 668 } 669 670 reader := metric.NewManualReader() 671 provider := metric.NewMeterProvider(metric.WithReader(reader)) 672 673 mo := opentelemetry.MetricsOptions{ 674 MeterProvider: provider, 675 Metrics: opentelemetry.DefaultMetrics().Add("grpc.lb.wrr.rr_fallback", "grpc.lb.wrr.endpoint_weight_not_yet_usable", "grpc.lb.wrr.endpoint_weight_stale", "grpc.lb.wrr.endpoint_weights"), 676 OptionalLabels: []string{"grpc.lb.locality"}, 677 } 678 679 target := fmt.Sprintf("xds:///%s", serviceName) 680 cc, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsResolver), opentelemetry.DialOption(opentelemetry.Options{MetricsOptions: mo})) 681 if err != nil { 682 t.Fatalf("Failed to dial local test server: %v", err) 683 } 684 defer cc.Close() 685 686 client := testgrpc.NewTestServiceClient(cc) 687 688 // Make 100 RPC's. The two backends will send back load reports per call 689 // giving the two SubChannels weights which will eventually expire. Two 690 // backends needed as for only one backend, WRR does not recompute the 691 // scheduler. 692 receivedExpectedMetrics := grpcsync.NewEvent() 693 go func() { 694 for !receivedExpectedMetrics.HasFired() && ctx.Err() == nil { 695 client.EmptyCall(ctx, &testpb.Empty{}) 696 time.Sleep(2 * time.Millisecond) 697 } 698 }() 699 700 targetAttr := attribute.String("grpc.target", target) 701 localityAttr := attribute.String("grpc.lb.locality", `{region="region-1", zone="zone-1", sub_zone="subzone-1"}`) 702 703 wantMetrics := []metricdata.Metrics{ 704 { 705 Name: "grpc.lb.wrr.rr_fallback", 706 Description: "EXPERIMENTAL. Number of scheduler updates in which there were not enough endpoints with valid weight, which caused the WRR policy to fall back to RR behavior.", 707 Unit: "update", 708 Data: metricdata.Sum[int64]{ 709 DataPoints: []metricdata.DataPoint[int64]{ 710 { 711 Attributes: attribute.NewSet(targetAttr, localityAttr), 712 Value: 1, // value ignored 713 }, 714 }, 715 Temporality: metricdata.CumulativeTemporality, 716 IsMonotonic: true, 717 }, 718 }, 719 720 { 721 Name: "grpc.lb.wrr.endpoint_weight_not_yet_usable", 722 Description: "EXPERIMENTAL. Number of endpoints from each scheduler update that don't yet have usable weight information (i.e., either the load report has not yet been received, or it is within the blackout period).", 723 Unit: "endpoint", 724 Data: metricdata.Sum[int64]{ 725 DataPoints: []metricdata.DataPoint[int64]{ 726 { 727 Attributes: attribute.NewSet(targetAttr, localityAttr), 728 Value: 1, // value ignored 729 }, 730 }, 731 Temporality: metricdata.CumulativeTemporality, 732 IsMonotonic: true, 733 }, 734 }, 735 { 736 Name: "grpc.lb.wrr.endpoint_weights", 737 Description: "EXPERIMENTAL. Weight of each endpoint, recorded on every scheduler update. Endpoints without usable weights will be recorded as weight 0.", 738 Unit: "endpoint", 739 Data: metricdata.Histogram[float64]{ 740 DataPoints: []metricdata.HistogramDataPoint[float64]{ 741 { 742 Attributes: attribute.NewSet(targetAttr, localityAttr), 743 }, 744 }, 745 Temporality: metricdata.CumulativeTemporality, 746 }, 747 }, 748 } 749 750 if err := pollForWantMetrics(ctx, t, reader, wantMetrics); err != nil { 751 t.Fatal(err) 752 } 753 receivedExpectedMetrics.Fire() 754 755 // Poll for 5 seconds for weight expiration metric. No more RPC's are being 756 // made, so weight should expire on a subsequent scheduler update. 757 eventuallyWantMetric := metricdata.Metrics{ 758 Name: "grpc.lb.wrr.endpoint_weight_stale", 759 Description: "EXPERIMENTAL. Number of endpoints from each scheduler update whose latest weight is older than the expiration period.", 760 Unit: "endpoint", 761 Data: metricdata.Sum[int64]{ 762 DataPoints: []metricdata.DataPoint[int64]{ 763 { 764 Attributes: attribute.NewSet(targetAttr, localityAttr), 765 Value: 1, // value ignored 766 }, 767 }, 768 Temporality: metricdata.CumulativeTemporality, 769 IsMonotonic: true, 770 }, 771 } 772 773 if err := pollForWantMetrics(ctx, t, reader, []metricdata.Metrics{eventuallyWantMetric}); err != nil { 774 t.Fatal(err) 775 } 776 } 777 778 // pollForWantMetrics polls for the wantMetrics to show up on reader. Returns an 779 // error if metric is present but not equal to expected, or if the wantMetrics 780 // do not show up during the context timeout. 781 func pollForWantMetrics(ctx context.Context, t *testing.T, reader *metric.ManualReader, wantMetrics []metricdata.Metrics) error { 782 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 783 gotMetrics := metricsDataFromReader(ctx, reader) 784 containsAllMetrics := true 785 for _, metric := range wantMetrics { 786 val, ok := gotMetrics[metric.Name] 787 if !ok { 788 containsAllMetrics = false 789 break 790 } 791 if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreValue(), metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) { 792 return fmt.Errorf("metrics data type not equal for metric: %v", metric.Name) 793 } 794 } 795 if containsAllMetrics { 796 return nil 797 } 798 time.Sleep(5 * time.Millisecond) 799 } 800 801 return fmt.Errorf("error waiting for metrics %v: %v", wantMetrics, ctx.Err()) 802 } 803 804 // TestMetricsAndTracesOptionEnabled verifies the integration of metrics and traces 805 // emitted by the OpenTelemetry instrumentation in a gRPC environment. It sets up a 806 // stub server with both metrics and traces enabled, and tests the correct emission 807 // of metrics and traces during a Unary RPC and a Streaming RPC. The test ensures 808 // that the emitted metrics reflect the operations performed, including the size of 809 // the compressed message, and verifies that tracing information is correctly recorded. 810 func (s) TestMetricsAndTracesOptionEnabled(t *testing.T) { 811 // Create default metrics options 812 mo, reader := defaultMetricsOptions(t, nil) 813 // Create default trace options 814 to, exporter := defaultTraceOptions(t) 815 816 ss := setupStubServer(t, mo, to) 817 defer ss.Stop() 818 819 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout*2) 820 defer cancel() 821 822 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 823 // certain metrics and traces to be emitted which should be observed 824 // through metrics reader and span exporter respectively. 825 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 826 Body: make([]byte, 10000), 827 }}, grpc.UseCompressor(gzip.Name)); err != nil { // Deterministic compression. 828 t.Fatalf("Unexpected error from UnaryCall: %v", err) 829 } 830 stream, err := ss.Client.FullDuplexCall(ctx) 831 if err != nil { 832 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 833 } 834 835 stream.CloseSend() 836 if _, err = stream.Recv(); err != io.EOF { 837 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 838 } 839 840 // Verify metrics 841 rm := &metricdata.ResourceMetrics{} 842 reader.Collect(ctx, rm) 843 844 gotMetrics := map[string]metricdata.Metrics{} 845 for _, sm := range rm.ScopeMetrics { 846 for _, m := range sm.Metrics { 847 gotMetrics[m.Name] = m 848 } 849 } 850 851 wantMetrics := testutils.MetricData(testutils.MetricDataOptions{ 852 Target: ss.Target, 853 UnaryCompressedMessageSize: float64(57), 854 }) 855 gotMetrics = testutils.WaitForServerMetrics(ctx, t, reader, gotMetrics, wantMetrics) 856 testutils.CompareMetrics(t, gotMetrics, wantMetrics) 857 858 wantSpanInfos := []traceSpanInfo{ 859 { 860 name: "Recv.grpc.testing.TestService.UnaryCall", 861 spanKind: oteltrace.SpanKindServer.String(), 862 attributes: []attribute.KeyValue{ 863 { 864 Key: "Client", 865 Value: attribute.BoolValue(false), 866 }, 867 { 868 Key: "FailFast", 869 Value: attribute.BoolValue(false), 870 }, 871 { 872 Key: "previous-rpc-attempts", 873 Value: attribute.IntValue(0), 874 }, 875 { 876 Key: "transparent-retry", 877 Value: attribute.BoolValue(false), 878 }, 879 }, 880 events: []trace.Event{ 881 { 882 Name: "Inbound message", 883 Attributes: []attribute.KeyValue{ 884 { 885 Key: "sequence-number", 886 Value: attribute.IntValue(0), 887 }, 888 { 889 Key: "message-size", 890 Value: attribute.IntValue(10006), 891 }, 892 { 893 Key: "message-size-compressed", 894 Value: attribute.IntValue(57), 895 }, 896 }, 897 }, 898 { 899 Name: "Outbound message", 900 Attributes: []attribute.KeyValue{ 901 { 902 Key: "sequence-number", 903 Value: attribute.IntValue(0), 904 }, 905 { 906 Key: "message-size", 907 Value: attribute.IntValue(10006), 908 }, 909 { 910 Key: "message-size-compressed", 911 Value: attribute.IntValue(57), 912 }, 913 }, 914 }, 915 }, 916 }, 917 { 918 name: "Attempt.grpc.testing.TestService.UnaryCall", 919 spanKind: oteltrace.SpanKindInternal.String(), 920 attributes: []attribute.KeyValue{ 921 { 922 Key: "Client", 923 Value: attribute.BoolValue(true), 924 }, 925 { 926 Key: "FailFast", 927 Value: attribute.BoolValue(true), 928 }, 929 { 930 Key: "previous-rpc-attempts", 931 Value: attribute.IntValue(0), 932 }, 933 { 934 Key: "transparent-retry", 935 Value: attribute.BoolValue(false), 936 }, 937 }, 938 events: []trace.Event{ 939 { 940 Name: "Outbound message", 941 Attributes: []attribute.KeyValue{ 942 { 943 Key: "sequence-number", 944 Value: attribute.IntValue(0), 945 }, 946 { 947 Key: "message-size", 948 Value: attribute.IntValue(10006), 949 }, 950 { 951 Key: "message-size-compressed", 952 Value: attribute.IntValue(57), 953 }, 954 }, 955 }, 956 { 957 Name: "Inbound message", 958 Attributes: []attribute.KeyValue{ 959 { 960 Key: "sequence-number", 961 Value: attribute.IntValue(0), 962 }, 963 { 964 Key: "message-size", 965 Value: attribute.IntValue(10006), 966 }, 967 { 968 Key: "message-size-compressed", 969 Value: attribute.IntValue(57), 970 }, 971 }, 972 }, 973 }, 974 }, 975 { 976 name: "Sent.grpc.testing.TestService.UnaryCall", 977 spanKind: oteltrace.SpanKindClient.String(), 978 attributes: nil, 979 events: nil, 980 }, 981 { 982 name: "Recv.grpc.testing.TestService.FullDuplexCall", 983 spanKind: oteltrace.SpanKindServer.String(), 984 attributes: []attribute.KeyValue{ 985 { 986 Key: "Client", 987 Value: attribute.BoolValue(false), 988 }, 989 { 990 Key: "FailFast", 991 Value: attribute.BoolValue(false), 992 }, 993 { 994 Key: "previous-rpc-attempts", 995 Value: attribute.IntValue(0), 996 }, 997 { 998 Key: "transparent-retry", 999 Value: attribute.BoolValue(false), 1000 }, 1001 }, 1002 events: nil, 1003 }, 1004 { 1005 name: "Sent.grpc.testing.TestService.FullDuplexCall", 1006 spanKind: oteltrace.SpanKindClient.String(), 1007 attributes: nil, 1008 events: nil, 1009 }, 1010 { 1011 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 1012 spanKind: oteltrace.SpanKindInternal.String(), 1013 attributes: []attribute.KeyValue{ 1014 { 1015 Key: "Client", 1016 Value: attribute.BoolValue(true), 1017 }, 1018 { 1019 Key: "FailFast", 1020 Value: attribute.BoolValue(true), 1021 }, 1022 { 1023 Key: "previous-rpc-attempts", 1024 Value: attribute.IntValue(0), 1025 }, 1026 { 1027 Key: "transparent-retry", 1028 Value: attribute.BoolValue(false), 1029 }, 1030 }, 1031 events: nil, 1032 }, 1033 } 1034 1035 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1036 if err != nil { 1037 t.Fatal(err) 1038 } 1039 validateTraces(t, spans, wantSpanInfos) 1040 } 1041 1042 // TestSpan verifies that the gRPC Trace Binary propagator correctly 1043 // propagates span context between a client and server using the grpc- 1044 // trace-bin header. It sets up a stub server with OpenTelemetry tracing 1045 // enabled, makes a unary RPC, and streaming RPC as well. 1046 // 1047 // Verification: 1048 // - Verifies that the span context is correctly propagated from the client 1049 // to the server, including the trace ID and span ID. 1050 // - Verifies that the server can access the span context and create 1051 // child spans as expected during the RPC calls. 1052 // - Verifies that the tracing information is recorded accurately in 1053 // the OpenTelemetry backend. 1054 func (s) TestSpan(t *testing.T) { 1055 mo, _ := defaultMetricsOptions(t, nil) 1056 // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter. 1057 to, exporter := defaultTraceOptions(t) 1058 // Start the server with trace options. 1059 ss := setupStubServer(t, mo, to) 1060 defer ss.Stop() 1061 1062 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1063 defer cancel() 1064 1065 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 1066 // certain traces to be emitted, which should be observed through the 1067 // span exporter. 1068 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1069 Body: make([]byte, 10000), 1070 }}); err != nil { 1071 t.Fatalf("Unexpected error from UnaryCall: %v", err) 1072 } 1073 stream, err := ss.Client.FullDuplexCall(ctx) 1074 if err != nil { 1075 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 1076 } 1077 stream.CloseSend() 1078 if _, err = stream.Recv(); err != io.EOF { 1079 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 1080 } 1081 1082 wantSpanInfos := []traceSpanInfo{ 1083 { 1084 name: "Recv.grpc.testing.TestService.UnaryCall", 1085 spanKind: oteltrace.SpanKindServer.String(), 1086 attributes: []attribute.KeyValue{ 1087 { 1088 Key: "Client", 1089 Value: attribute.BoolValue(false), 1090 }, 1091 { 1092 Key: "FailFast", 1093 Value: attribute.BoolValue(false), 1094 }, 1095 { 1096 Key: "previous-rpc-attempts", 1097 Value: attribute.IntValue(0), 1098 }, 1099 { 1100 Key: "transparent-retry", 1101 Value: attribute.BoolValue(false), 1102 }, 1103 }, 1104 events: []trace.Event{ 1105 { 1106 Name: "Inbound message", 1107 Attributes: []attribute.KeyValue{ 1108 { 1109 Key: "sequence-number", 1110 Value: attribute.IntValue(0), 1111 }, 1112 { 1113 Key: "message-size", 1114 Value: attribute.IntValue(10006), 1115 }, 1116 }, 1117 }, 1118 { 1119 Name: "Outbound message", 1120 Attributes: []attribute.KeyValue{ 1121 { 1122 Key: "sequence-number", 1123 Value: attribute.IntValue(0), 1124 }, 1125 { 1126 Key: "message-size", 1127 Value: attribute.IntValue(10006), 1128 }, 1129 }, 1130 }, 1131 }, 1132 }, 1133 { 1134 name: "Attempt.grpc.testing.TestService.UnaryCall", 1135 spanKind: oteltrace.SpanKindInternal.String(), 1136 attributes: []attribute.KeyValue{ 1137 { 1138 Key: "Client", 1139 Value: attribute.BoolValue(true), 1140 }, 1141 { 1142 Key: "FailFast", 1143 Value: attribute.BoolValue(true), 1144 }, 1145 { 1146 Key: "previous-rpc-attempts", 1147 Value: attribute.IntValue(0), 1148 }, 1149 { 1150 Key: "transparent-retry", 1151 Value: attribute.BoolValue(false), 1152 }, 1153 }, 1154 events: []trace.Event{ 1155 { 1156 Name: "Outbound message", 1157 Attributes: []attribute.KeyValue{ 1158 { 1159 Key: "sequence-number", 1160 Value: attribute.IntValue(0), 1161 }, 1162 { 1163 Key: "message-size", 1164 Value: attribute.IntValue(10006), 1165 }, 1166 }, 1167 }, 1168 { 1169 Name: "Inbound message", 1170 Attributes: []attribute.KeyValue{ 1171 { 1172 Key: "sequence-number", 1173 Value: attribute.IntValue(0), 1174 }, 1175 { 1176 Key: "message-size", 1177 Value: attribute.IntValue(10006), 1178 }, 1179 }, 1180 }, 1181 }, 1182 }, 1183 { 1184 name: "Sent.grpc.testing.TestService.UnaryCall", 1185 spanKind: oteltrace.SpanKindClient.String(), 1186 attributes: nil, 1187 events: nil, 1188 }, 1189 { 1190 name: "Recv.grpc.testing.TestService.FullDuplexCall", 1191 spanKind: oteltrace.SpanKindServer.String(), 1192 attributes: []attribute.KeyValue{ 1193 { 1194 Key: "Client", 1195 Value: attribute.BoolValue(false), 1196 }, 1197 { 1198 Key: "FailFast", 1199 Value: attribute.BoolValue(false), 1200 }, 1201 { 1202 Key: "previous-rpc-attempts", 1203 Value: attribute.IntValue(0), 1204 }, 1205 { 1206 Key: "transparent-retry", 1207 Value: attribute.BoolValue(false), 1208 }, 1209 }, 1210 events: nil, 1211 }, 1212 { 1213 name: "Sent.grpc.testing.TestService.FullDuplexCall", 1214 spanKind: oteltrace.SpanKindClient.String(), 1215 attributes: nil, 1216 events: nil, 1217 }, 1218 { 1219 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 1220 spanKind: oteltrace.SpanKindInternal.String(), 1221 attributes: []attribute.KeyValue{ 1222 { 1223 Key: "Client", 1224 Value: attribute.BoolValue(true), 1225 }, 1226 { 1227 Key: "FailFast", 1228 Value: attribute.BoolValue(true), 1229 }, 1230 { 1231 Key: "previous-rpc-attempts", 1232 Value: attribute.IntValue(0), 1233 }, 1234 { 1235 Key: "transparent-retry", 1236 Value: attribute.BoolValue(false), 1237 }, 1238 }, 1239 events: nil, 1240 }, 1241 } 1242 1243 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1244 if err != nil { 1245 t.Fatal(err) 1246 } 1247 validateTraces(t, spans, wantSpanInfos) 1248 } 1249 1250 // TestSpan_WithW3CContextPropagator sets up a stub server with OpenTelemetry tracing 1251 // enabled, makes a unary and a streaming RPC, and then asserts that the correct 1252 // number of spans are created with the expected spans. 1253 // 1254 // Verification: 1255 // - Verifies that the correct number of spans are created for both unary and 1256 // streaming RPCs. 1257 // - Verifies that the spans have the expected names and attributes, ensuring 1258 // they accurately reflect the operations performed. 1259 // - Verifies that the trace ID and span ID are correctly assigned and accessible 1260 // in the OpenTelemetry backend. 1261 func (s) TestSpan_WithW3CContextPropagator(t *testing.T) { 1262 mo, _ := defaultMetricsOptions(t, nil) 1263 // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter 1264 to, exporter := defaultTraceOptions(t) 1265 // Set the W3CContextPropagator as part of TracingOptions. 1266 to.TextMapPropagator = propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}) 1267 // Start the server with OpenTelemetry options 1268 ss := setupStubServer(t, mo, to) 1269 defer ss.Stop() 1270 1271 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1272 defer cancel() 1273 1274 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 1275 // certain traces to be emitted, which should be observed through the 1276 // span exporter. 1277 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1278 Body: make([]byte, 10000), 1279 }}); err != nil { 1280 t.Fatalf("Unexpected error from UnaryCall: %v", err) 1281 } 1282 stream, err := ss.Client.FullDuplexCall(ctx) 1283 if err != nil { 1284 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 1285 } 1286 1287 stream.CloseSend() 1288 if _, err = stream.Recv(); err != io.EOF { 1289 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 1290 } 1291 1292 wantSpanInfos := []traceSpanInfo{ 1293 { 1294 name: "Recv.grpc.testing.TestService.UnaryCall", 1295 spanKind: oteltrace.SpanKindServer.String(), 1296 attributes: []attribute.KeyValue{ 1297 { 1298 Key: "Client", 1299 Value: attribute.BoolValue(false), 1300 }, 1301 { 1302 Key: "FailFast", 1303 Value: attribute.BoolValue(false), 1304 }, 1305 { 1306 Key: "previous-rpc-attempts", 1307 Value: attribute.IntValue(0), 1308 }, 1309 { 1310 Key: "transparent-retry", 1311 Value: attribute.BoolValue(false), 1312 }, 1313 }, 1314 events: []trace.Event{ 1315 { 1316 Name: "Inbound message", 1317 Attributes: []attribute.KeyValue{ 1318 { 1319 Key: "sequence-number", 1320 Value: attribute.IntValue(0), 1321 }, 1322 { 1323 Key: "message-size", 1324 Value: attribute.IntValue(10006), 1325 }, 1326 }, 1327 }, 1328 { 1329 Name: "Outbound message", 1330 Attributes: []attribute.KeyValue{ 1331 { 1332 Key: "sequence-number", 1333 Value: attribute.IntValue(0), 1334 }, 1335 { 1336 Key: "message-size", 1337 Value: attribute.IntValue(10006), 1338 }, 1339 }, 1340 }, 1341 }, 1342 }, 1343 { 1344 name: "Attempt.grpc.testing.TestService.UnaryCall", 1345 spanKind: oteltrace.SpanKindInternal.String(), 1346 attributes: []attribute.KeyValue{ 1347 { 1348 Key: "Client", 1349 Value: attribute.BoolValue(true), 1350 }, 1351 { 1352 Key: "FailFast", 1353 Value: attribute.BoolValue(true), 1354 }, 1355 { 1356 Key: "previous-rpc-attempts", 1357 Value: attribute.IntValue(0), 1358 }, 1359 { 1360 Key: "transparent-retry", 1361 Value: attribute.BoolValue(false), 1362 }, 1363 }, 1364 events: []trace.Event{ 1365 { 1366 Name: "Outbound message", 1367 Attributes: []attribute.KeyValue{ 1368 { 1369 Key: "sequence-number", 1370 Value: attribute.IntValue(0), 1371 }, 1372 { 1373 Key: "message-size", 1374 Value: attribute.IntValue(10006), 1375 }, 1376 }, 1377 }, 1378 { 1379 Name: "Inbound message", 1380 Attributes: []attribute.KeyValue{ 1381 { 1382 Key: "sequence-number", 1383 Value: attribute.IntValue(0), 1384 }, 1385 { 1386 Key: "message-size", 1387 Value: attribute.IntValue(10006), 1388 }, 1389 }, 1390 }, 1391 }, 1392 }, 1393 { 1394 name: "Sent.grpc.testing.TestService.UnaryCall", 1395 spanKind: oteltrace.SpanKindClient.String(), 1396 attributes: nil, 1397 events: nil, 1398 }, 1399 { 1400 name: "Recv.grpc.testing.TestService.FullDuplexCall", 1401 spanKind: oteltrace.SpanKindServer.String(), 1402 attributes: []attribute.KeyValue{ 1403 { 1404 Key: "Client", 1405 Value: attribute.BoolValue(false), 1406 }, 1407 { 1408 Key: "FailFast", 1409 Value: attribute.BoolValue(false), 1410 }, 1411 { 1412 Key: "previous-rpc-attempts", 1413 Value: attribute.IntValue(0), 1414 }, 1415 { 1416 Key: "transparent-retry", 1417 Value: attribute.BoolValue(false), 1418 }, 1419 }, 1420 events: nil, 1421 }, 1422 { 1423 name: "Sent.grpc.testing.TestService.FullDuplexCall", 1424 spanKind: oteltrace.SpanKindClient.String(), 1425 attributes: nil, 1426 events: nil, 1427 }, 1428 { 1429 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 1430 spanKind: oteltrace.SpanKindInternal.String(), 1431 attributes: []attribute.KeyValue{ 1432 { 1433 Key: "Client", 1434 Value: attribute.BoolValue(true), 1435 }, 1436 { 1437 Key: "FailFast", 1438 Value: attribute.BoolValue(true), 1439 }, 1440 { 1441 Key: "previous-rpc-attempts", 1442 Value: attribute.IntValue(0), 1443 }, 1444 { 1445 Key: "transparent-retry", 1446 Value: attribute.BoolValue(false), 1447 }, 1448 }, 1449 events: nil, 1450 }, 1451 } 1452 1453 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1454 if err != nil { 1455 t.Fatal(err) 1456 } 1457 validateTraces(t, spans, wantSpanInfos) 1458 } 1459 1460 // TestMetricsAndTracesDisabled verifies that RPCs call succeed as expected 1461 // when metrics and traces are disabled in the OpenTelemetry instrumentation. 1462 func (s) TestMetricsAndTracesDisabled(t *testing.T) { 1463 ss := &stubserver.StubServer{ 1464 UnaryCallF: func(_ context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 1465 return &testpb.SimpleResponse{Payload: &testpb.Payload{ 1466 Body: make([]byte, len(in.GetPayload().GetBody())), 1467 }}, nil 1468 }, 1469 FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { 1470 for { 1471 _, err := stream.Recv() 1472 if err == io.EOF { 1473 return nil 1474 } 1475 } 1476 }, 1477 } 1478 1479 if err := ss.Start(nil); err != nil { 1480 t.Fatalf("Error starting endpoint server: %v", err) 1481 } 1482 defer ss.Stop() 1483 1484 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1485 defer cancel() 1486 1487 // Make two RPCs, a unary RPC and a streaming RPC. 1488 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1489 Body: make([]byte, 10000), 1490 }}); err != nil { 1491 t.Fatalf("Unexpected error from UnaryCall: %v", err) 1492 } 1493 stream, err := ss.Client.FullDuplexCall(ctx) 1494 if err != nil { 1495 t.Fatalf("ss.Client.FullDuplexCall failed: %v", err) 1496 } 1497 1498 stream.CloseSend() 1499 if _, err = stream.Recv(); err != io.EOF { 1500 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 1501 } 1502 } 1503 1504 // TestRPCSpanErrorStatus verifies that errors during RPC calls are correctly 1505 // reflected in the span status. It simulates a unary RPC that returns an error 1506 // and checks that the span's status is set to error with the appropriate message. 1507 func (s) TestRPCSpanErrorStatus(t *testing.T) { 1508 mo, _ := defaultMetricsOptions(t, nil) 1509 // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter 1510 to, exporter := defaultTraceOptions(t) 1511 const rpcErrorMsg = "unary call: internal server error" 1512 ss := &stubserver.StubServer{ 1513 UnaryCallF: func(context.Context, *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 1514 return nil, fmt.Errorf("%v", rpcErrorMsg) 1515 }, 1516 } 1517 1518 otelOptions := opentelemetry.Options{ 1519 MetricsOptions: *mo, 1520 TraceOptions: *to, 1521 } 1522 1523 if err := ss.Start([]grpc.ServerOption{opentelemetry.ServerOption(otelOptions)}, 1524 opentelemetry.DialOption(otelOptions)); err != nil { 1525 t.Fatalf("Error starting endpoint server: %v", err) 1526 } 1527 defer ss.Stop() 1528 1529 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1530 defer cancel() 1531 1532 ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1533 Body: make([]byte, 10000), 1534 }}) 1535 1536 // Verify spans has error status with rpcErrorMsg as error message. 1537 for ; len(exporter.GetSpans()) == 0 && ctx.Err() == nil; <-time.After(time.Millisecond) { 1538 // wait until trace spans are collected 1539 } 1540 spans := exporter.GetSpans() 1541 if got, want := spans[0].Status.Description, rpcErrorMsg; got != want { 1542 t.Fatalf("got rpc error %s, want %s", spans[0].Status.Description, rpcErrorMsg) 1543 } 1544 } 1545 1546 const delayedResolutionEventName = "Delayed name resolution complete" 1547 1548 // TestTraceSpan_WithRetriesAndNameResolutionDelay verifies that 1549 // "Delayed name resolution complete" event is recorded in the call trace span 1550 // only once if any of the retry attempt encountered a delay in name resolution 1551 func (s) TestTraceSpan_WithRetriesAndNameResolutionDelay(t *testing.T) { 1552 tests := []struct { 1553 name string 1554 setupStub func() *stubserver.StubServer 1555 doCall func(context.Context, testgrpc.TestServiceClient) error 1556 spanName string 1557 }{ 1558 { 1559 name: "unary", 1560 setupStub: func() *stubserver.StubServer { 1561 return &stubserver.StubServer{ 1562 UnaryCallF: func(ctx context.Context, _ *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 1563 md, _ := metadata.FromIncomingContext(ctx) 1564 headerAttempts := 0 1565 if h := md["grpc-previous-rpc-attempts"]; len(h) > 0 { 1566 headerAttempts, _ = strconv.Atoi(h[0]) 1567 } 1568 if headerAttempts < 2 { 1569 return nil, status.Errorf(codes.Unavailable, "retry (%d)", headerAttempts) 1570 } 1571 return &testpb.SimpleResponse{}, nil 1572 }, 1573 } 1574 }, 1575 doCall: func(ctx context.Context, client testgrpc.TestServiceClient) error { 1576 _, err := client.UnaryCall(ctx, &testpb.SimpleRequest{}) 1577 return err 1578 }, 1579 spanName: "Sent.grpc.testing.TestService.UnaryCall", 1580 }, 1581 { 1582 name: "streaming", 1583 setupStub: func() *stubserver.StubServer { 1584 return &stubserver.StubServer{ 1585 FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { 1586 md, _ := metadata.FromIncomingContext(stream.Context()) 1587 headerAttempts := 0 1588 if h := md["grpc-previous-rpc-attempts"]; len(h) > 0 { 1589 headerAttempts, _ = strconv.Atoi(h[0]) 1590 } 1591 if headerAttempts < 2 { 1592 return status.Errorf(codes.Unavailable, "retry (%d)", headerAttempts) 1593 } 1594 for { 1595 _, err := stream.Recv() 1596 if err == io.EOF { 1597 return nil 1598 } 1599 if err != nil { 1600 return err 1601 } 1602 } 1603 }, 1604 } 1605 }, 1606 doCall: func(ctx context.Context, client testgrpc.TestServiceClient) error { 1607 stream, err := client.FullDuplexCall(ctx) 1608 if err != nil { 1609 return err 1610 } 1611 if err := stream.Send(&testpb.StreamingOutputCallRequest{}); err != nil { 1612 return err 1613 } 1614 if err := stream.CloseSend(); err != nil { 1615 return err 1616 } 1617 _, err = stream.Recv() 1618 if err != nil && err != io.EOF { 1619 return err 1620 } 1621 return nil 1622 }, 1623 spanName: "Sent.grpc.testing.TestService.FullDuplexCall", 1624 }, 1625 } 1626 1627 for _, tt := range tests { 1628 t.Run(tt.name, func(t *testing.T) { 1629 resolutionWait := grpcsync.NewEvent() 1630 prevHook := internal.NewStreamWaitingForResolver 1631 internal.NewStreamWaitingForResolver = func() { resolutionWait.Fire() } 1632 defer func() { internal.NewStreamWaitingForResolver = prevHook }() 1633 1634 mo, _ := defaultMetricsOptions(t, nil) 1635 to, exporter := defaultTraceOptions(t) 1636 rb := manual.NewBuilderWithScheme("delayed") 1637 ss := tt.setupStub() 1638 opts := opentelemetry.Options{MetricsOptions: *mo, TraceOptions: *to} 1639 if err := ss.Start([]grpc.ServerOption{opentelemetry.ServerOption(opts)}); err != nil { 1640 t.Fatal(err) 1641 } 1642 defer ss.Stop() 1643 1644 retryPolicy := `{ 1645 "methodConfig": [{ 1646 "name": [{"service": "grpc.testing.TestService"}], 1647 "retryPolicy": { 1648 "maxAttempts": 3, 1649 "initialBackoff": "0.05s", 1650 "maxBackoff": "0.2s", 1651 "backoffMultiplier": 1.0, 1652 "retryableStatusCodes": ["UNAVAILABLE"] 1653 } 1654 }] 1655 }` 1656 cc, err := grpc.NewClient( 1657 rb.Scheme()+":///test.server", 1658 grpc.WithTransportCredentials(insecure.NewCredentials()), 1659 grpc.WithResolvers(rb), 1660 opentelemetry.DialOption(opts), 1661 grpc.WithDefaultServiceConfig(retryPolicy), 1662 ) 1663 if err != nil { 1664 t.Fatal(err) 1665 } 1666 defer cc.Close() 1667 1668 client := testgrpc.NewTestServiceClient(cc) 1669 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1670 defer cancel() 1671 1672 go func() { 1673 <-resolutionWait.Done() 1674 rb.UpdateState(resolver.State{Addresses: []resolver.Address{{Addr: ss.Address}}}) 1675 }() 1676 if err := tt.doCall(ctx, client); err != nil { 1677 t.Fatalf("%s call failed: %v", tt.name, err) 1678 } 1679 1680 wantSpanInfo := traceSpanInfo{ 1681 name: tt.spanName, 1682 spanKind: oteltrace.SpanKindClient.String(), 1683 events: []trace.Event{{Name: delayedResolutionEventName}}, 1684 } 1685 spans, err := waitForTraceSpans(ctx, exporter, []traceSpanInfo{wantSpanInfo}) 1686 if err != nil { 1687 t.Fatal(err) 1688 } 1689 verifyTrace(t, spans, wantSpanInfo) 1690 }) 1691 } 1692 } 1693 1694 func verifyTrace(t *testing.T, spans tracetest.SpanStubs, want traceSpanInfo) { 1695 match := false 1696 for _, span := range spans { 1697 if span.Name == want.name && span.SpanKind.String() == want.spanKind { 1698 match = true 1699 if diff := cmp.Diff(want.events, span.Events, cmpopts.IgnoreFields(trace.Event{}, "Time")); diff != "" { 1700 t.Errorf("Span event mismatch for %q (kind: %s) (-want +got):\n%s", 1701 want.name, want.spanKind, diff) 1702 } 1703 break 1704 } 1705 } 1706 if !match { 1707 t.Errorf("Expected span not found: %q (kind: %s)", want.name, want.spanKind) 1708 } 1709 } 1710 1711 // TestStreamingRPC_TraceSequenceNumbers verifies that sequence numbers 1712 // are incremented correctly for multiple messages sent and received 1713 // during a streaming RPC. 1714 func (s) TestStreamingRPC_TraceSequenceNumbers(t *testing.T) { 1715 mo, _ := defaultMetricsOptions(t, nil) 1716 to, exporter := defaultTraceOptions(t) 1717 ss := setupStubServer(t, mo, to) 1718 defer ss.Stop() 1719 1720 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1721 defer cancel() 1722 1723 stream, err := ss.Client.FullDuplexCall(ctx) 1724 if err != nil { 1725 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 1726 } 1727 1728 const numMessages = 3 1729 var wantOutboundEvents, wantInboundEvents []trace.Event 1730 for i := range numMessages { 1731 if err := stream.Send(&testpb.StreamingOutputCallRequest{}); err != nil { 1732 t.Fatalf("stream.Send() failed at message %d: %v", i, err) 1733 } 1734 wantOutboundEvents = append(wantOutboundEvents, trace.Event{ 1735 Name: "Outbound message", 1736 Attributes: []attribute.KeyValue{ 1737 attribute.Int("sequence-number", i), 1738 attribute.Int("message-size", 0), 1739 }, 1740 }) 1741 wantInboundEvents = append(wantInboundEvents, trace.Event{ 1742 Name: "Inbound message", 1743 Attributes: []attribute.KeyValue{ 1744 attribute.Int("sequence-number", i), 1745 attribute.Int("message-size", 0), 1746 }, 1747 }) 1748 } 1749 stream.CloseSend() 1750 _, err = stream.Recv() 1751 if err != io.EOF { 1752 t.Fatalf("stream.Recv() got unexpected err=%v; want io.EOF", err) 1753 } 1754 1755 wantSpanInfos := []traceSpanInfo{ 1756 { 1757 name: "Sent.grpc.testing.TestService.FullDuplexCall", 1758 spanKind: oteltrace.SpanKindClient.String(), 1759 events: nil, 1760 attributes: nil, 1761 }, 1762 { 1763 name: "Recv.grpc.testing.TestService.FullDuplexCall", 1764 spanKind: oteltrace.SpanKindServer.String(), 1765 events: wantInboundEvents, 1766 attributes: []attribute.KeyValue{ 1767 attribute.Bool("Client", false), 1768 attribute.Bool("FailFast", false), 1769 attribute.Int("previous-rpc-attempts", 0), 1770 attribute.Bool("transparent-retry", false), 1771 }, 1772 }, 1773 { 1774 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 1775 spanKind: oteltrace.SpanKindInternal.String(), 1776 events: wantOutboundEvents, 1777 attributes: []attribute.KeyValue{ 1778 attribute.Bool("Client", true), 1779 attribute.Bool("FailFast", true), 1780 attribute.Int("previous-rpc-attempts", 0), 1781 attribute.Bool("transparent-retry", false), 1782 }, 1783 }, 1784 } 1785 1786 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1787 if err != nil { 1788 t.Fatal(err) 1789 } 1790 validateTraces(t, spans, wantSpanInfos) 1791 }