google.golang.org/grpc@v1.72.2/stats/opentelemetry/e2e_test.go (about) 1 /* 2 * Copyright 2024 gRPC authors. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package opentelemetry_test 18 19 import ( 20 "context" 21 "fmt" 22 "io" 23 "slices" 24 "testing" 25 "time" 26 27 otelcodes "go.opentelemetry.io/otel/codes" 28 oteltrace "go.opentelemetry.io/otel/trace" 29 30 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 31 v3corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 32 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 33 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 34 v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" 35 v3clientsideweightedroundrobinpb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/client_side_weighted_round_robin/v3" 36 v3wrrlocalitypb "github.com/envoyproxy/go-control-plane/envoy/extensions/load_balancing_policies/wrr_locality/v3" 37 "github.com/google/go-cmp/cmp" 38 "github.com/google/go-cmp/cmp/cmpopts" 39 "google.golang.org/protobuf/proto" 40 "google.golang.org/protobuf/types/known/durationpb" 41 "google.golang.org/protobuf/types/known/wrapperspb" 42 43 "go.opentelemetry.io/otel/attribute" 44 "go.opentelemetry.io/otel/propagation" 45 "go.opentelemetry.io/otel/sdk/metric" 46 "go.opentelemetry.io/otel/sdk/metric/metricdata" 47 "go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest" 48 "go.opentelemetry.io/otel/sdk/trace" 49 "go.opentelemetry.io/otel/sdk/trace/tracetest" 50 "google.golang.org/grpc" 51 "google.golang.org/grpc/credentials/insecure" 52 "google.golang.org/grpc/encoding/gzip" 53 experimental "google.golang.org/grpc/experimental/opentelemetry" 54 "google.golang.org/grpc/internal/grpcsync" 55 "google.golang.org/grpc/internal/grpctest" 56 "google.golang.org/grpc/internal/stubserver" 57 itestutils "google.golang.org/grpc/internal/testutils" 58 "google.golang.org/grpc/internal/testutils/xds/e2e" 59 setup "google.golang.org/grpc/internal/testutils/xds/e2e/setup" 60 testgrpc "google.golang.org/grpc/interop/grpc_testing" 61 testpb "google.golang.org/grpc/interop/grpc_testing" 62 "google.golang.org/grpc/orca" 63 "google.golang.org/grpc/stats/opentelemetry" 64 "google.golang.org/grpc/stats/opentelemetry/internal/testutils" 65 ) 66 67 var defaultTestTimeout = 5 * time.Second 68 69 type s struct { 70 grpctest.Tester 71 } 72 73 func Test(t *testing.T) { 74 grpctest.RunSubTests(t, s{}) 75 } 76 77 // traceSpanInfo is the information received about the trace span. It contains 78 // subset of information that is needed to verify if correct trace is being 79 // attributed to the rpc. 80 type traceSpanInfo struct { 81 spanKind string 82 name string 83 events []trace.Event 84 attributes []attribute.KeyValue 85 } 86 87 // traceSpanInfoMapKey is the key struct for constructing a map of trace spans 88 // retrievable by span name and span kind 89 type traceSpanInfoMapKey struct { 90 spanName string 91 spanKind string 92 } 93 94 // defaultMetricsOptions creates default metrics options 95 func defaultMetricsOptions(_ *testing.T, methodAttributeFilter func(string) bool) (*opentelemetry.MetricsOptions, *metric.ManualReader) { 96 reader := metric.NewManualReader() 97 provider := metric.NewMeterProvider(metric.WithReader(reader)) 98 metricsOptions := &opentelemetry.MetricsOptions{ 99 MeterProvider: provider, 100 Metrics: opentelemetry.DefaultMetrics(), 101 MethodAttributeFilter: methodAttributeFilter, 102 } 103 return metricsOptions, reader 104 } 105 106 // defaultTraceOptions function to create default trace options 107 func defaultTraceOptions(_ *testing.T) (*experimental.TraceOptions, *tracetest.InMemoryExporter) { 108 spanExporter := tracetest.NewInMemoryExporter() 109 spanProcessor := trace.NewSimpleSpanProcessor(spanExporter) 110 tracerProvider := trace.NewTracerProvider(trace.WithSpanProcessor(spanProcessor)) 111 textMapPropagator := propagation.NewCompositeTextMapPropagator(opentelemetry.GRPCTraceBinPropagator{}) 112 traceOptions := &experimental.TraceOptions{ 113 TracerProvider: tracerProvider, 114 TextMapPropagator: textMapPropagator, 115 } 116 return traceOptions, spanExporter 117 } 118 119 // setupStubServer creates a stub server with OpenTelemetry component configured on client 120 // and server side and returns the server. 121 func setupStubServer(t *testing.T, metricsOptions *opentelemetry.MetricsOptions, traceOptions *experimental.TraceOptions) *stubserver.StubServer { 122 ss := &stubserver.StubServer{ 123 UnaryCallF: func(_ context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 124 return &testpb.SimpleResponse{Payload: &testpb.Payload{ 125 Body: make([]byte, len(in.GetPayload().GetBody())), 126 }}, nil 127 }, 128 FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { 129 for { 130 _, err := stream.Recv() 131 if err == io.EOF { 132 return nil 133 } 134 } 135 }, 136 } 137 138 otelOptions := opentelemetry.Options{} 139 if metricsOptions != nil { 140 otelOptions.MetricsOptions = *metricsOptions 141 } 142 if traceOptions != nil { 143 otelOptions.TraceOptions = *traceOptions 144 } 145 146 if err := ss.Start([]grpc.ServerOption{opentelemetry.ServerOption(otelOptions)}, 147 opentelemetry.DialOption(otelOptions)); err != nil { 148 t.Fatalf("Error starting endpoint server: %v", err) 149 } 150 return ss 151 } 152 153 // waitForTraceSpans waits until the in-memory span exporter has received the 154 // expected trace spans based on span name and kind. It polls the exporter at a 155 // short interval until the desired spans are available or the context is 156 // cancelled. 157 // 158 // Returns the collected spans or an error if the context deadline is exceeded 159 // before the expected spans are exported. 160 func waitForTraceSpans(ctx context.Context, exporter *tracetest.InMemoryExporter, wantSpans []traceSpanInfo) (tracetest.SpanStubs, error) { 161 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 162 spans := exporter.GetSpans() 163 missingAnySpan := false 164 for _, wantSpan := range wantSpans { 165 if !slices.ContainsFunc(spans, func(span tracetest.SpanStub) bool { 166 return span.Name == wantSpan.name && span.SpanKind.String() == wantSpan.spanKind 167 }) { 168 missingAnySpan = true 169 } 170 } 171 if !missingAnySpan { 172 return spans, nil 173 } 174 } 175 return nil, fmt.Errorf("error waiting for complete trace spans %v: %v", wantSpans, ctx.Err()) 176 } 177 178 // validateTraces first first groups the received spans by their TraceID. For 179 // each trace group, it identifies the client, server, and attempt spans for 180 // both unary and streaming RPCs. It checks that the expected spans are 181 // present and that the server spans have the correct parent (attempt span). 182 // Finally, it compares the content of each span (name, kind, attributes, 183 // events) against the provided expected spans information. 184 func validateTraces(t *testing.T, spans tracetest.SpanStubs, wantSpanInfos []traceSpanInfo) { 185 // Group spans by TraceID. 186 traceSpans := make(map[oteltrace.TraceID][]tracetest.SpanStub) 187 for _, span := range spans { 188 traceID := span.SpanContext.TraceID() 189 traceSpans[traceID] = append(traceSpans[traceID], span) 190 } 191 192 // For each trace group, verify relationships and content. 193 for traceID, spans := range traceSpans { 194 var unaryClient, unaryServer, unaryAttempt *tracetest.SpanStub 195 var streamClient, streamServer, streamAttempt *tracetest.SpanStub 196 var isUnary, isStream bool 197 198 for _, span := range spans { 199 switch { 200 case span.Name == "grpc.testing.TestService.UnaryCall": 201 isUnary = true 202 if span.SpanKind == oteltrace.SpanKindClient { 203 unaryClient = &span 204 } else { 205 unaryServer = &span 206 } 207 case span.Name == "Attempt.grpc.testing.TestService.UnaryCall": 208 isUnary = true 209 unaryAttempt = &span 210 case span.Name == "grpc.testing.TestService.FullDuplexCall": 211 isStream = true 212 if span.SpanKind == oteltrace.SpanKindClient { 213 streamClient = &span 214 } else { 215 streamServer = &span 216 } 217 case span.Name == "Attempt.grpc.testing.TestService.FullDuplexCall": 218 isStream = true 219 streamAttempt = &span 220 } 221 } 222 223 if isUnary { 224 // Verify Unary Call Spans. 225 if unaryClient == nil { 226 t.Error("Unary call client span not found") 227 } 228 if unaryServer == nil { 229 t.Error("Unary call server span not found") 230 } 231 if unaryAttempt == nil { 232 t.Error("Unary call attempt span not found") 233 } 234 // Check TraceID consistency. 235 if unaryClient != nil && unaryClient.SpanContext.TraceID() != traceID || unaryServer.SpanContext.TraceID() != traceID { 236 t.Error("Unary call spans have inconsistent TraceIDs") 237 } 238 // Check parent-child relationship via SpanID. 239 if unaryServer != nil && unaryServer.Parent.SpanID() != unaryAttempt.SpanContext.SpanID() { 240 t.Error("Unary server span parent does not match attempt span ID") 241 } 242 } 243 244 if isStream { 245 // Verify Streaming Call Spans. 246 if streamClient == nil { 247 t.Error("Streaming call client span not found") 248 } 249 if streamServer == nil { 250 t.Error("Streaming call server span not found") 251 } 252 if streamAttempt == nil { 253 t.Error("Streaming call attempt span not found") 254 } 255 // Check TraceID consistency. 256 if streamClient != nil && streamClient.SpanContext.TraceID() != traceID || streamServer.SpanContext.TraceID() != traceID { 257 t.Error("Streaming call spans have inconsistent TraceIDs") 258 } 259 if streamServer != nil && streamServer.Parent.SpanID() != streamAttempt.SpanContext.SpanID() { 260 t.Error("Streaming server span parent does not match attempt span ID") 261 } 262 } 263 } 264 265 // Constructs a map from a slice of traceSpanInfo to retrieve the 266 // corresponding expected span info based on span name and span kind 267 // for comparison. 268 wantSpanInfosMap := make(map[traceSpanInfoMapKey]traceSpanInfo) 269 for _, info := range wantSpanInfos { 270 key := traceSpanInfoMapKey{spanName: info.name, spanKind: info.spanKind} 271 wantSpanInfosMap[key] = info 272 } 273 274 // Compare retrieved spans with expected spans. 275 for _, span := range spans { 276 // Check that the attempt span has the correct status. 277 if got, want := span.Status.Code, otelcodes.Ok; got != want { 278 t.Errorf("Got status code %v, want %v", got, want) 279 } 280 281 // Retrieve the corresponding expected span info based on span name and 282 // span kind to compare. 283 want, ok := wantSpanInfosMap[traceSpanInfoMapKey{spanName: span.Name, spanKind: span.SpanKind.String()}] 284 if !ok { 285 t.Errorf("Unexpected span: %v", span) 286 continue 287 } 288 289 // comparers 290 attributesSort := cmpopts.SortSlices(func(a, b attribute.KeyValue) bool { 291 return a.Key < b.Key 292 }) 293 attributesValueComparable := cmpopts.EquateComparable(attribute.KeyValue{}.Value) 294 eventsTimeIgnore := cmpopts.IgnoreFields(trace.Event{}, "Time") 295 296 // attributes 297 if diff := cmp.Diff(want.attributes, span.Attributes, attributesSort, attributesValueComparable); diff != "" { 298 t.Errorf("Attributes mismatch for span %s (-want +got):\n%s", span.Name, diff) 299 } 300 // events 301 if diff := cmp.Diff(want.events, span.Events, attributesSort, attributesValueComparable, eventsTimeIgnore); diff != "" { 302 t.Errorf("Events mismatch for span %s (-want +got):\n%s", span.Name, diff) 303 } 304 } 305 } 306 307 // TestMethodAttributeFilter tests the method attribute filter. The method 308 // filter set should bucket the grpc.method attribute into "other" if the method 309 // attribute filter specifies. 310 func (s) TestMethodAttributeFilter(t *testing.T) { 311 maf := func(str string) bool { 312 // Will allow duplex/any other type of RPC. 313 return str != testgrpc.TestService_UnaryCall_FullMethodName 314 } 315 mo, reader := defaultMetricsOptions(t, maf) 316 ss := setupStubServer(t, mo, nil) 317 defer ss.Stop() 318 319 // Make a Unary and Streaming RPC. The Unary RPC should be filtered by the 320 // method attribute filter, and the Full Duplex (Streaming) RPC should not. 321 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 322 defer cancel() 323 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 324 Body: make([]byte, 10000), 325 }}); err != nil { 326 t.Fatalf("Unexpected error from UnaryCall: %v", err) 327 } 328 stream, err := ss.Client.FullDuplexCall(ctx) 329 if err != nil { 330 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 331 } 332 333 stream.CloseSend() 334 if _, err = stream.Recv(); err != io.EOF { 335 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 336 } 337 rm := &metricdata.ResourceMetrics{} 338 reader.Collect(ctx, rm) 339 gotMetrics := map[string]metricdata.Metrics{} 340 for _, sm := range rm.ScopeMetrics { 341 for _, m := range sm.Metrics { 342 gotMetrics[m.Name] = m 343 } 344 } 345 346 wantMetrics := []metricdata.Metrics{ 347 { 348 Name: "grpc.client.attempt.started", 349 Description: "Number of client call attempts started.", 350 Unit: "attempt", 351 Data: metricdata.Sum[int64]{ 352 DataPoints: []metricdata.DataPoint[int64]{ 353 { 354 Attributes: attribute.NewSet(attribute.String("grpc.method", "grpc.testing.TestService/UnaryCall"), attribute.String("grpc.target", ss.Target)), 355 Value: 1, 356 }, 357 { 358 Attributes: attribute.NewSet(attribute.String("grpc.method", "grpc.testing.TestService/FullDuplexCall"), attribute.String("grpc.target", ss.Target)), 359 Value: 1, 360 }, 361 }, 362 Temporality: metricdata.CumulativeTemporality, 363 IsMonotonic: true, 364 }, 365 }, 366 { 367 Name: "grpc.server.call.duration", 368 Description: "End-to-end time taken to complete a call from server transport's perspective.", 369 Unit: "s", 370 Data: metricdata.Histogram[float64]{ 371 DataPoints: []metricdata.HistogramDataPoint[float64]{ 372 { // Method should go to "other" due to the method attribute filter. 373 Attributes: attribute.NewSet(attribute.String("grpc.method", "other"), attribute.String("grpc.status", "OK")), 374 Count: 1, 375 Bounds: testutils.DefaultLatencyBounds, 376 }, 377 { 378 Attributes: attribute.NewSet(attribute.String("grpc.method", "grpc.testing.TestService/FullDuplexCall"), attribute.String("grpc.status", "OK")), 379 Count: 1, 380 Bounds: testutils.DefaultLatencyBounds, 381 }, 382 }, 383 Temporality: metricdata.CumulativeTemporality, 384 }, 385 }, 386 } 387 388 gotMetrics = testutils.WaitForServerMetrics(ctx, t, reader, gotMetrics, wantMetrics) 389 testutils.CompareMetrics(t, gotMetrics, wantMetrics) 390 } 391 392 // TestAllMetricsOneFunction tests emitted metrics from OpenTelemetry 393 // instrumentation component. It then configures a system with a gRPC Client and 394 // gRPC server with the OpenTelemetry Dial and Server Option configured 395 // specifying all the metrics provided by this package, and makes a Unary RPC 396 // and a Streaming RPC. These two RPCs should cause certain recording for each 397 // registered metric observed through a Manual Metrics Reader on the provided 398 // OpenTelemetry SDK's Meter Provider. It then makes an RPC that is unregistered 399 // on the Client (no StaticMethodCallOption set) and Server. The method 400 // attribute on subsequent metrics should be bucketed in "other". 401 func (s) TestAllMetricsOneFunction(t *testing.T) { 402 mo, reader := defaultMetricsOptions(t, nil) 403 ss := setupStubServer(t, mo, nil) 404 defer ss.Stop() 405 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 406 defer cancel() 407 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 408 // certain metrics to be emitted, which should be observed through the 409 // Metric Reader. 410 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 411 Body: make([]byte, 10000), 412 }}, grpc.UseCompressor(gzip.Name)); err != nil { // Deterministic compression. 413 t.Fatalf("Unexpected error from UnaryCall: %v", err) 414 } 415 stream, err := ss.Client.FullDuplexCall(ctx) 416 if err != nil { 417 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 418 } 419 420 stream.CloseSend() 421 if _, err = stream.Recv(); err != io.EOF { 422 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 423 } 424 425 rm := &metricdata.ResourceMetrics{} 426 reader.Collect(ctx, rm) 427 428 gotMetrics := map[string]metricdata.Metrics{} 429 for _, sm := range rm.ScopeMetrics { 430 for _, m := range sm.Metrics { 431 gotMetrics[m.Name] = m 432 } 433 } 434 435 wantMetrics := testutils.MetricData(testutils.MetricDataOptions{ 436 Target: ss.Target, 437 UnaryCompressedMessageSize: float64(57), 438 }) 439 gotMetrics = testutils.WaitForServerMetrics(ctx, t, reader, gotMetrics, wantMetrics) 440 testutils.CompareMetrics(t, gotMetrics, wantMetrics) 441 442 stream, err = ss.Client.FullDuplexCall(ctx) 443 if err != nil { 444 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 445 } 446 447 stream.CloseSend() 448 if _, err = stream.Recv(); err != io.EOF { 449 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 450 } 451 // This Invoke doesn't pass the StaticMethodCallOption. Thus, the method 452 // attribute should become "other" on client side metrics. Since it is also 453 // not registered on the server either, it should also become "other" on the 454 // server metrics method attribute. 455 ss.CC.Invoke(ctx, "/grpc.testing.TestService/UnregisteredCall", nil, nil, []grpc.CallOption{}...) 456 ss.CC.Invoke(ctx, "/grpc.testing.TestService/UnregisteredCall", nil, nil, []grpc.CallOption{}...) 457 ss.CC.Invoke(ctx, "/grpc.testing.TestService/UnregisteredCall", nil, nil, []grpc.CallOption{}...) 458 459 rm = &metricdata.ResourceMetrics{} 460 reader.Collect(ctx, rm) 461 gotMetrics = map[string]metricdata.Metrics{} 462 for _, sm := range rm.ScopeMetrics { 463 for _, m := range sm.Metrics { 464 gotMetrics[m.Name] = m 465 } 466 } 467 unaryMethodAttr := attribute.String("grpc.method", "grpc.testing.TestService/UnaryCall") 468 duplexMethodAttr := attribute.String("grpc.method", "grpc.testing.TestService/FullDuplexCall") 469 470 targetAttr := attribute.String("grpc.target", ss.Target) 471 otherMethodAttr := attribute.String("grpc.method", "other") 472 wantMetrics = []metricdata.Metrics{ 473 { 474 Name: "grpc.client.attempt.started", 475 Description: "Number of client call attempts started.", 476 Unit: "attempt", 477 Data: metricdata.Sum[int64]{ 478 DataPoints: []metricdata.DataPoint[int64]{ 479 { 480 Attributes: attribute.NewSet(unaryMethodAttr, targetAttr), 481 Value: 1, 482 }, 483 { 484 Attributes: attribute.NewSet(duplexMethodAttr, targetAttr), 485 Value: 2, 486 }, 487 { 488 Attributes: attribute.NewSet(otherMethodAttr, targetAttr), 489 Value: 3, 490 }, 491 }, 492 Temporality: metricdata.CumulativeTemporality, 493 IsMonotonic: true, 494 }, 495 }, 496 { 497 Name: "grpc.server.call.started", 498 Description: "Number of server calls started.", 499 Unit: "call", 500 Data: metricdata.Sum[int64]{ 501 DataPoints: []metricdata.DataPoint[int64]{ 502 { 503 Attributes: attribute.NewSet(unaryMethodAttr), 504 Value: 1, 505 }, 506 { 507 Attributes: attribute.NewSet(duplexMethodAttr), 508 Value: 2, 509 }, 510 { 511 Attributes: attribute.NewSet(otherMethodAttr), 512 Value: 3, 513 }, 514 }, 515 Temporality: metricdata.CumulativeTemporality, 516 IsMonotonic: true, 517 }, 518 }, 519 } 520 for _, metric := range wantMetrics { 521 val, ok := gotMetrics[metric.Name] 522 if !ok { 523 t.Fatalf("Metric %v not present in recorded metrics", metric.Name) 524 } 525 if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) { 526 t.Fatalf("Metrics data type not equal for metric: %v", metric.Name) 527 } 528 } 529 } 530 531 // clusterWithLBConfiguration returns a cluster resource with the proto message 532 // passed Marshaled to an any and specified through the load_balancing_policy 533 // field. 534 func clusterWithLBConfiguration(t *testing.T, clusterName, edsServiceName string, secLevel e2e.SecurityLevel, m proto.Message) *v3clusterpb.Cluster { 535 cluster := e2e.DefaultCluster(clusterName, edsServiceName, secLevel) 536 cluster.LoadBalancingPolicy = &v3clusterpb.LoadBalancingPolicy{ 537 Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ 538 { 539 TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ 540 TypedConfig: itestutils.MarshalAny(t, m), 541 }, 542 }, 543 }, 544 } 545 return cluster 546 } 547 548 func metricsDataFromReader(ctx context.Context, reader *metric.ManualReader) map[string]metricdata.Metrics { 549 rm := &metricdata.ResourceMetrics{} 550 reader.Collect(ctx, rm) 551 gotMetrics := map[string]metricdata.Metrics{} 552 for _, sm := range rm.ScopeMetrics { 553 for _, m := range sm.Metrics { 554 gotMetrics[m.Name] = m 555 } 556 } 557 return gotMetrics 558 } 559 560 // TestWRRMetrics tests the metrics emitted from the WRR LB Policy. It 561 // configures WRR as an endpoint picking policy through xDS on a ClientConn 562 // alongside an OpenTelemetry stats handler. It makes a few RPC's, and then 563 // sleeps for a bit to allow weight to expire. It then asserts OpenTelemetry 564 // metrics atoms are eventually present for all four WRR Metrics, alongside the 565 // correct target and locality label for each metric. 566 func (s) TestWRRMetrics(t *testing.T) { 567 cmr := orca.NewServerMetricsRecorder().(orca.CallMetricsRecorder) 568 backend1 := stubserver.StartTestService(t, &stubserver.StubServer{ 569 EmptyCallF: func(ctx context.Context, _ *testpb.Empty) (*testpb.Empty, error) { 570 if r := orca.CallMetricsRecorderFromContext(ctx); r != nil { 571 // Copy metrics from what the test set in cmr into r. 572 sm := cmr.(orca.ServerMetricsProvider).ServerMetrics() 573 r.SetApplicationUtilization(sm.AppUtilization) 574 r.SetQPS(sm.QPS) 575 r.SetEPS(sm.EPS) 576 } 577 return &testpb.Empty{}, nil 578 }, 579 }, orca.CallMetricsServerOption(nil)) 580 port1 := itestutils.ParsePort(t, backend1.Address) 581 defer backend1.Stop() 582 583 cmr.SetQPS(10.0) 584 cmr.SetApplicationUtilization(1.0) 585 586 backend2 := stubserver.StartTestService(t, &stubserver.StubServer{ 587 EmptyCallF: func(ctx context.Context, _ *testpb.Empty) (*testpb.Empty, error) { 588 if r := orca.CallMetricsRecorderFromContext(ctx); r != nil { 589 // Copy metrics from what the test set in cmr into r. 590 sm := cmr.(orca.ServerMetricsProvider).ServerMetrics() 591 r.SetApplicationUtilization(sm.AppUtilization) 592 r.SetQPS(sm.QPS) 593 r.SetEPS(sm.EPS) 594 } 595 return &testpb.Empty{}, nil 596 }, 597 }, orca.CallMetricsServerOption(nil)) 598 port2 := itestutils.ParsePort(t, backend2.Address) 599 defer backend2.Stop() 600 601 const serviceName = "my-service-client-side-xds" 602 603 // Start an xDS management server. 604 managementServer, nodeID, _, xdsResolver := setup.ManagementServerAndResolver(t) 605 606 wrrConfig := &v3wrrlocalitypb.WrrLocality{ 607 EndpointPickingPolicy: &v3clusterpb.LoadBalancingPolicy{ 608 Policies: []*v3clusterpb.LoadBalancingPolicy_Policy{ 609 { 610 TypedExtensionConfig: &v3corepb.TypedExtensionConfig{ 611 TypedConfig: itestutils.MarshalAny(t, &v3clientsideweightedroundrobinpb.ClientSideWeightedRoundRobin{ 612 EnableOobLoadReport: &wrapperspb.BoolValue{ 613 Value: false, 614 }, 615 // BlackoutPeriod long enough to cause load report 616 // weight to trigger in the scope of test case. 617 // WeightExpirationPeriod will cause the load report 618 // weight for backend 1 to expire. 619 BlackoutPeriod: durationpb.New(5 * time.Millisecond), 620 WeightExpirationPeriod: durationpb.New(500 * time.Millisecond), 621 WeightUpdatePeriod: durationpb.New(time.Second), 622 ErrorUtilizationPenalty: &wrapperspb.FloatValue{Value: 1}, 623 }), 624 }, 625 }, 626 }, 627 }, 628 } 629 630 routeConfigName := "route-" + serviceName 631 clusterName := "cluster-" + serviceName 632 endpointsName := "endpoints-" + serviceName 633 resources := e2e.UpdateOptions{ 634 NodeID: nodeID, 635 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(serviceName, routeConfigName)}, 636 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, serviceName, clusterName)}, 637 Clusters: []*v3clusterpb.Cluster{clusterWithLBConfiguration(t, clusterName, endpointsName, e2e.SecurityLevelNone, wrrConfig)}, 638 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.EndpointResourceWithOptions(e2e.EndpointOptions{ 639 ClusterName: endpointsName, 640 Host: "localhost", 641 Localities: []e2e.LocalityOptions{ 642 { 643 Backends: []e2e.BackendOptions{{Ports: []uint32{port1}}, {Ports: []uint32{port2}}}, 644 Weight: 1, 645 }, 646 }, 647 })}, 648 } 649 650 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 651 defer cancel() 652 if err := managementServer.Update(ctx, resources); err != nil { 653 t.Fatal(err) 654 } 655 656 reader := metric.NewManualReader() 657 provider := metric.NewMeterProvider(metric.WithReader(reader)) 658 659 mo := opentelemetry.MetricsOptions{ 660 MeterProvider: provider, 661 Metrics: opentelemetry.DefaultMetrics().Add("grpc.lb.wrr.rr_fallback", "grpc.lb.wrr.endpoint_weight_not_yet_usable", "grpc.lb.wrr.endpoint_weight_stale", "grpc.lb.wrr.endpoint_weights"), 662 OptionalLabels: []string{"grpc.lb.locality"}, 663 } 664 665 target := fmt.Sprintf("xds:///%s", serviceName) 666 cc, err := grpc.NewClient(target, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsResolver), opentelemetry.DialOption(opentelemetry.Options{MetricsOptions: mo})) 667 if err != nil { 668 t.Fatalf("Failed to dial local test server: %v", err) 669 } 670 defer cc.Close() 671 672 client := testgrpc.NewTestServiceClient(cc) 673 674 // Make 100 RPC's. The two backends will send back load reports per call 675 // giving the two SubChannels weights which will eventually expire. Two 676 // backends needed as for only one backend, WRR does not recompute the 677 // scheduler. 678 receivedExpectedMetrics := grpcsync.NewEvent() 679 go func() { 680 for !receivedExpectedMetrics.HasFired() && ctx.Err() == nil { 681 client.EmptyCall(ctx, &testpb.Empty{}) 682 time.Sleep(2 * time.Millisecond) 683 } 684 }() 685 686 targetAttr := attribute.String("grpc.target", target) 687 localityAttr := attribute.String("grpc.lb.locality", `{"region":"region-1","zone":"zone-1","subZone":"subzone-1"}`) 688 689 wantMetrics := []metricdata.Metrics{ 690 { 691 Name: "grpc.lb.wrr.rr_fallback", 692 Description: "EXPERIMENTAL. Number of scheduler updates in which there were not enough endpoints with valid weight, which caused the WRR policy to fall back to RR behavior.", 693 Unit: "update", 694 Data: metricdata.Sum[int64]{ 695 DataPoints: []metricdata.DataPoint[int64]{ 696 { 697 Attributes: attribute.NewSet(targetAttr, localityAttr), 698 Value: 1, // value ignored 699 }, 700 }, 701 Temporality: metricdata.CumulativeTemporality, 702 IsMonotonic: true, 703 }, 704 }, 705 706 { 707 Name: "grpc.lb.wrr.endpoint_weight_not_yet_usable", 708 Description: "EXPERIMENTAL. Number of endpoints from each scheduler update that don't yet have usable weight information (i.e., either the load report has not yet been received, or it is within the blackout period).", 709 Unit: "endpoint", 710 Data: metricdata.Sum[int64]{ 711 DataPoints: []metricdata.DataPoint[int64]{ 712 { 713 Attributes: attribute.NewSet(targetAttr, localityAttr), 714 Value: 1, // value ignored 715 }, 716 }, 717 Temporality: metricdata.CumulativeTemporality, 718 IsMonotonic: true, 719 }, 720 }, 721 { 722 Name: "grpc.lb.wrr.endpoint_weights", 723 Description: "EXPERIMENTAL. Weight of each endpoint, recorded on every scheduler update. Endpoints without usable weights will be recorded as weight 0.", 724 Unit: "endpoint", 725 Data: metricdata.Histogram[float64]{ 726 DataPoints: []metricdata.HistogramDataPoint[float64]{ 727 { 728 Attributes: attribute.NewSet(targetAttr, localityAttr), 729 }, 730 }, 731 Temporality: metricdata.CumulativeTemporality, 732 }, 733 }, 734 } 735 736 if err := pollForWantMetrics(ctx, t, reader, wantMetrics); err != nil { 737 t.Fatal(err) 738 } 739 receivedExpectedMetrics.Fire() 740 741 // Poll for 5 seconds for weight expiration metric. No more RPC's are being 742 // made, so weight should expire on a subsequent scheduler update. 743 eventuallyWantMetric := metricdata.Metrics{ 744 Name: "grpc.lb.wrr.endpoint_weight_stale", 745 Description: "EXPERIMENTAL. Number of endpoints from each scheduler update whose latest weight is older than the expiration period.", 746 Unit: "endpoint", 747 Data: metricdata.Sum[int64]{ 748 DataPoints: []metricdata.DataPoint[int64]{ 749 { 750 Attributes: attribute.NewSet(targetAttr, localityAttr), 751 Value: 1, // value ignored 752 }, 753 }, 754 Temporality: metricdata.CumulativeTemporality, 755 IsMonotonic: true, 756 }, 757 } 758 759 if err := pollForWantMetrics(ctx, t, reader, []metricdata.Metrics{eventuallyWantMetric}); err != nil { 760 t.Fatal(err) 761 } 762 } 763 764 // pollForWantMetrics polls for the wantMetrics to show up on reader. Returns an 765 // error if metric is present but not equal to expected, or if the wantMetrics 766 // do not show up during the context timeout. 767 func pollForWantMetrics(ctx context.Context, t *testing.T, reader *metric.ManualReader, wantMetrics []metricdata.Metrics) error { 768 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 769 gotMetrics := metricsDataFromReader(ctx, reader) 770 containsAllMetrics := true 771 for _, metric := range wantMetrics { 772 val, ok := gotMetrics[metric.Name] 773 if !ok { 774 containsAllMetrics = false 775 break 776 } 777 if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreValue(), metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) { 778 return fmt.Errorf("metrics data type not equal for metric: %v", metric.Name) 779 } 780 } 781 if containsAllMetrics { 782 return nil 783 } 784 time.Sleep(5 * time.Millisecond) 785 } 786 787 return fmt.Errorf("error waiting for metrics %v: %v", wantMetrics, ctx.Err()) 788 } 789 790 // TestMetricsAndTracesOptionEnabled verifies the integration of metrics and traces 791 // emitted by the OpenTelemetry instrumentation in a gRPC environment. It sets up a 792 // stub server with both metrics and traces enabled, and tests the correct emission 793 // of metrics and traces during a Unary RPC and a Streaming RPC. The test ensures 794 // that the emitted metrics reflect the operations performed, including the size of 795 // the compressed message, and verifies that tracing information is correctly recorded. 796 func (s) TestMetricsAndTracesOptionEnabled(t *testing.T) { 797 // Create default metrics options 798 mo, reader := defaultMetricsOptions(t, nil) 799 // Create default trace options 800 to, exporter := defaultTraceOptions(t) 801 802 ss := setupStubServer(t, mo, to) 803 defer ss.Stop() 804 805 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout*2) 806 defer cancel() 807 808 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 809 // certain metrics and traces to be emitted which should be observed 810 // through metrics reader and span exporter respectively. 811 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 812 Body: make([]byte, 10000), 813 }}, grpc.UseCompressor(gzip.Name)); err != nil { // Deterministic compression. 814 t.Fatalf("Unexpected error from UnaryCall: %v", err) 815 } 816 stream, err := ss.Client.FullDuplexCall(ctx) 817 if err != nil { 818 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 819 } 820 821 stream.CloseSend() 822 if _, err = stream.Recv(); err != io.EOF { 823 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 824 } 825 826 // Verify metrics 827 rm := &metricdata.ResourceMetrics{} 828 reader.Collect(ctx, rm) 829 830 gotMetrics := map[string]metricdata.Metrics{} 831 for _, sm := range rm.ScopeMetrics { 832 for _, m := range sm.Metrics { 833 gotMetrics[m.Name] = m 834 } 835 } 836 837 wantMetrics := testutils.MetricData(testutils.MetricDataOptions{ 838 Target: ss.Target, 839 UnaryCompressedMessageSize: float64(57), 840 }) 841 gotMetrics = testutils.WaitForServerMetrics(ctx, t, reader, gotMetrics, wantMetrics) 842 testutils.CompareMetrics(t, gotMetrics, wantMetrics) 843 844 wantSpanInfos := []traceSpanInfo{ 845 { 846 name: "grpc.testing.TestService.UnaryCall", 847 spanKind: oteltrace.SpanKindServer.String(), 848 attributes: []attribute.KeyValue{ 849 { 850 Key: "Client", 851 Value: attribute.BoolValue(false), 852 }, 853 { 854 Key: "FailFast", 855 Value: attribute.BoolValue(false), 856 }, 857 { 858 Key: "previous-rpc-attempts", 859 Value: attribute.IntValue(0), 860 }, 861 { 862 Key: "transparent-retry", 863 Value: attribute.BoolValue(false), 864 }, 865 }, 866 events: []trace.Event{ 867 { 868 Name: "Inbound compressed message", 869 Attributes: []attribute.KeyValue{ 870 { 871 Key: "sequence-number", 872 Value: attribute.IntValue(0), 873 }, 874 { 875 Key: "message-size", 876 Value: attribute.IntValue(10006), 877 }, 878 { 879 Key: "message-size-compressed", 880 Value: attribute.IntValue(57), 881 }, 882 }, 883 }, 884 { 885 Name: "Outbound compressed message", 886 Attributes: []attribute.KeyValue{ 887 { 888 Key: "sequence-number", 889 Value: attribute.IntValue(0), 890 }, 891 { 892 Key: "message-size", 893 Value: attribute.IntValue(10006), 894 }, 895 { 896 Key: "message-size-compressed", 897 Value: attribute.IntValue(57), 898 }, 899 }, 900 }, 901 }, 902 }, 903 { 904 name: "Attempt.grpc.testing.TestService.UnaryCall", 905 spanKind: oteltrace.SpanKindInternal.String(), 906 attributes: []attribute.KeyValue{ 907 { 908 Key: "Client", 909 Value: attribute.BoolValue(true), 910 }, 911 { 912 Key: "FailFast", 913 Value: attribute.BoolValue(true), 914 }, 915 { 916 Key: "previous-rpc-attempts", 917 Value: attribute.IntValue(0), 918 }, 919 { 920 Key: "transparent-retry", 921 Value: attribute.BoolValue(false), 922 }, 923 }, 924 events: []trace.Event{ 925 { 926 Name: "Outbound compressed message", 927 Attributes: []attribute.KeyValue{ 928 { 929 Key: "sequence-number", 930 Value: attribute.IntValue(0), 931 }, 932 { 933 Key: "message-size", 934 Value: attribute.IntValue(10006), 935 }, 936 { 937 Key: "message-size-compressed", 938 Value: attribute.IntValue(57), 939 }, 940 }, 941 }, 942 { 943 Name: "Inbound compressed message", 944 Attributes: []attribute.KeyValue{ 945 { 946 Key: "sequence-number", 947 Value: attribute.IntValue(0), 948 }, 949 { 950 Key: "message-size", 951 Value: attribute.IntValue(10006), 952 }, 953 { 954 Key: "message-size-compressed", 955 Value: attribute.IntValue(57), 956 }, 957 }, 958 }, 959 }, 960 }, 961 { 962 name: "grpc.testing.TestService.UnaryCall", 963 spanKind: oteltrace.SpanKindClient.String(), 964 attributes: nil, 965 events: nil, 966 }, 967 { 968 name: "grpc.testing.TestService.FullDuplexCall", 969 spanKind: oteltrace.SpanKindServer.String(), 970 attributes: []attribute.KeyValue{ 971 { 972 Key: "Client", 973 Value: attribute.BoolValue(false), 974 }, 975 { 976 Key: "FailFast", 977 Value: attribute.BoolValue(false), 978 }, 979 { 980 Key: "previous-rpc-attempts", 981 Value: attribute.IntValue(0), 982 }, 983 { 984 Key: "transparent-retry", 985 Value: attribute.BoolValue(false), 986 }, 987 }, 988 events: nil, 989 }, 990 { 991 name: "grpc.testing.TestService.FullDuplexCall", 992 spanKind: oteltrace.SpanKindClient.String(), 993 attributes: nil, 994 events: nil, 995 }, 996 { 997 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 998 spanKind: oteltrace.SpanKindInternal.String(), 999 attributes: []attribute.KeyValue{ 1000 { 1001 Key: "Client", 1002 Value: attribute.BoolValue(true), 1003 }, 1004 { 1005 Key: "FailFast", 1006 Value: attribute.BoolValue(true), 1007 }, 1008 { 1009 Key: "previous-rpc-attempts", 1010 Value: attribute.IntValue(0), 1011 }, 1012 { 1013 Key: "transparent-retry", 1014 Value: attribute.BoolValue(false), 1015 }, 1016 }, 1017 events: nil, 1018 }, 1019 } 1020 1021 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1022 if err != nil { 1023 t.Fatal(err) 1024 } 1025 validateTraces(t, spans, wantSpanInfos) 1026 } 1027 1028 // TestSpan verifies that the gRPC Trace Binary propagator correctly 1029 // propagates span context between a client and server using the grpc- 1030 // trace-bin header. It sets up a stub server with OpenTelemetry tracing 1031 // enabled, makes a unary RPC, and streaming RPC as well. 1032 // 1033 // Verification: 1034 // - Verifies that the span context is correctly propagated from the client 1035 // to the server, including the trace ID and span ID. 1036 // - Verifies that the server can access the span context and create 1037 // child spans as expected during the RPC calls. 1038 // - Verifies that the tracing information is recorded accurately in 1039 // the OpenTelemetry backend. 1040 func (s) TestSpan(t *testing.T) { 1041 mo, _ := defaultMetricsOptions(t, nil) 1042 // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter. 1043 to, exporter := defaultTraceOptions(t) 1044 // Start the server with trace options. 1045 ss := setupStubServer(t, mo, to) 1046 defer ss.Stop() 1047 1048 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1049 defer cancel() 1050 1051 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 1052 // certain traces to be emitted, which should be observed through the 1053 // span exporter. 1054 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1055 Body: make([]byte, 10000), 1056 }}); err != nil { 1057 t.Fatalf("Unexpected error from UnaryCall: %v", err) 1058 } 1059 stream, err := ss.Client.FullDuplexCall(ctx) 1060 if err != nil { 1061 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 1062 } 1063 stream.CloseSend() 1064 if _, err = stream.Recv(); err != io.EOF { 1065 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 1066 } 1067 1068 wantSpanInfos := []traceSpanInfo{ 1069 { 1070 name: "grpc.testing.TestService.UnaryCall", 1071 spanKind: oteltrace.SpanKindServer.String(), 1072 attributes: []attribute.KeyValue{ 1073 { 1074 Key: "Client", 1075 Value: attribute.BoolValue(false), 1076 }, 1077 { 1078 Key: "FailFast", 1079 Value: attribute.BoolValue(false), 1080 }, 1081 { 1082 Key: "previous-rpc-attempts", 1083 Value: attribute.IntValue(0), 1084 }, 1085 { 1086 Key: "transparent-retry", 1087 Value: attribute.BoolValue(false), 1088 }, 1089 }, 1090 events: []trace.Event{ 1091 { 1092 Name: "Inbound compressed message", 1093 Attributes: []attribute.KeyValue{ 1094 { 1095 Key: "sequence-number", 1096 Value: attribute.IntValue(0), 1097 }, 1098 { 1099 Key: "message-size", 1100 Value: attribute.IntValue(10006), 1101 }, 1102 { 1103 Key: "message-size-compressed", 1104 Value: attribute.IntValue(10006), 1105 }, 1106 }, 1107 }, 1108 { 1109 Name: "Outbound compressed message", 1110 Attributes: []attribute.KeyValue{ 1111 { 1112 Key: "sequence-number", 1113 Value: attribute.IntValue(0), 1114 }, 1115 { 1116 Key: "message-size", 1117 Value: attribute.IntValue(10006), 1118 }, 1119 { 1120 Key: "message-size-compressed", 1121 Value: attribute.IntValue(10006), 1122 }, 1123 }, 1124 }, 1125 }, 1126 }, 1127 { 1128 name: "Attempt.grpc.testing.TestService.UnaryCall", 1129 spanKind: oteltrace.SpanKindInternal.String(), 1130 attributes: []attribute.KeyValue{ 1131 { 1132 Key: "Client", 1133 Value: attribute.BoolValue(true), 1134 }, 1135 { 1136 Key: "FailFast", 1137 Value: attribute.BoolValue(true), 1138 }, 1139 { 1140 Key: "previous-rpc-attempts", 1141 Value: attribute.IntValue(0), 1142 }, 1143 { 1144 Key: "transparent-retry", 1145 Value: attribute.BoolValue(false), 1146 }, 1147 }, 1148 events: []trace.Event{ 1149 { 1150 Name: "Outbound compressed message", 1151 Attributes: []attribute.KeyValue{ 1152 { 1153 Key: "sequence-number", 1154 Value: attribute.IntValue(0), 1155 }, 1156 { 1157 Key: "message-size", 1158 Value: attribute.IntValue(10006), 1159 }, 1160 { 1161 Key: "message-size-compressed", 1162 Value: attribute.IntValue(10006), 1163 }, 1164 }, 1165 }, 1166 { 1167 Name: "Inbound compressed message", 1168 Attributes: []attribute.KeyValue{ 1169 { 1170 Key: "sequence-number", 1171 Value: attribute.IntValue(0), 1172 }, 1173 { 1174 Key: "message-size", 1175 Value: attribute.IntValue(10006), 1176 }, 1177 { 1178 Key: "message-size-compressed", 1179 Value: attribute.IntValue(10006), 1180 }, 1181 }, 1182 }, 1183 }, 1184 }, 1185 { 1186 name: "grpc.testing.TestService.UnaryCall", 1187 spanKind: oteltrace.SpanKindClient.String(), 1188 attributes: nil, 1189 events: nil, 1190 }, 1191 { 1192 name: "grpc.testing.TestService.FullDuplexCall", 1193 spanKind: oteltrace.SpanKindServer.String(), 1194 attributes: []attribute.KeyValue{ 1195 { 1196 Key: "Client", 1197 Value: attribute.BoolValue(false), 1198 }, 1199 { 1200 Key: "FailFast", 1201 Value: attribute.BoolValue(false), 1202 }, 1203 { 1204 Key: "previous-rpc-attempts", 1205 Value: attribute.IntValue(0), 1206 }, 1207 { 1208 Key: "transparent-retry", 1209 Value: attribute.BoolValue(false), 1210 }, 1211 }, 1212 events: nil, 1213 }, 1214 { 1215 name: "grpc.testing.TestService.FullDuplexCall", 1216 spanKind: oteltrace.SpanKindClient.String(), 1217 attributes: nil, 1218 events: nil, 1219 }, 1220 { 1221 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 1222 spanKind: oteltrace.SpanKindInternal.String(), 1223 attributes: []attribute.KeyValue{ 1224 { 1225 Key: "Client", 1226 Value: attribute.BoolValue(true), 1227 }, 1228 { 1229 Key: "FailFast", 1230 Value: attribute.BoolValue(true), 1231 }, 1232 { 1233 Key: "previous-rpc-attempts", 1234 Value: attribute.IntValue(0), 1235 }, 1236 { 1237 Key: "transparent-retry", 1238 Value: attribute.BoolValue(false), 1239 }, 1240 }, 1241 events: nil, 1242 }, 1243 } 1244 1245 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1246 if err != nil { 1247 t.Fatal(err) 1248 } 1249 validateTraces(t, spans, wantSpanInfos) 1250 } 1251 1252 // TestSpan_WithW3CContextPropagator sets up a stub server with OpenTelemetry tracing 1253 // enabled, makes a unary and a streaming RPC, and then asserts that the correct 1254 // number of spans are created with the expected spans. 1255 // 1256 // Verification: 1257 // - Verifies that the correct number of spans are created for both unary and 1258 // streaming RPCs. 1259 // - Verifies that the spans have the expected names and attributes, ensuring 1260 // they accurately reflect the operations performed. 1261 // - Verifies that the trace ID and span ID are correctly assigned and accessible 1262 // in the OpenTelemetry backend. 1263 func (s) TestSpan_WithW3CContextPropagator(t *testing.T) { 1264 mo, _ := defaultMetricsOptions(t, nil) 1265 // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter 1266 to, exporter := defaultTraceOptions(t) 1267 // Set the W3CContextPropagator as part of TracingOptions. 1268 to.TextMapPropagator = propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}) 1269 // Start the server with OpenTelemetry options 1270 ss := setupStubServer(t, mo, to) 1271 defer ss.Stop() 1272 1273 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1274 defer cancel() 1275 1276 // Make two RPC's, a unary RPC and a streaming RPC. These should cause 1277 // certain traces to be emitted, which should be observed through the 1278 // span exporter. 1279 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1280 Body: make([]byte, 10000), 1281 }}); err != nil { 1282 t.Fatalf("Unexpected error from UnaryCall: %v", err) 1283 } 1284 stream, err := ss.Client.FullDuplexCall(ctx) 1285 if err != nil { 1286 t.Fatalf("ss.Client.FullDuplexCall failed: %f", err) 1287 } 1288 1289 stream.CloseSend() 1290 if _, err = stream.Recv(); err != io.EOF { 1291 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 1292 } 1293 1294 wantSpanInfos := []traceSpanInfo{ 1295 { 1296 name: "grpc.testing.TestService.UnaryCall", 1297 spanKind: oteltrace.SpanKindServer.String(), 1298 attributes: []attribute.KeyValue{ 1299 { 1300 Key: "Client", 1301 Value: attribute.BoolValue(false), 1302 }, 1303 { 1304 Key: "FailFast", 1305 Value: attribute.BoolValue(false), 1306 }, 1307 { 1308 Key: "previous-rpc-attempts", 1309 Value: attribute.IntValue(0), 1310 }, 1311 { 1312 Key: "transparent-retry", 1313 Value: attribute.BoolValue(false), 1314 }, 1315 }, 1316 events: []trace.Event{ 1317 { 1318 Name: "Inbound compressed message", 1319 Attributes: []attribute.KeyValue{ 1320 { 1321 Key: "sequence-number", 1322 Value: attribute.IntValue(0), 1323 }, 1324 { 1325 Key: "message-size", 1326 Value: attribute.IntValue(10006), 1327 }, 1328 { 1329 Key: "message-size-compressed", 1330 Value: attribute.IntValue(10006), 1331 }, 1332 }, 1333 }, 1334 { 1335 Name: "Outbound compressed message", 1336 Attributes: []attribute.KeyValue{ 1337 { 1338 Key: "sequence-number", 1339 Value: attribute.IntValue(0), 1340 }, 1341 { 1342 Key: "message-size", 1343 Value: attribute.IntValue(10006), 1344 }, 1345 { 1346 Key: "message-size-compressed", 1347 Value: attribute.IntValue(10006), 1348 }, 1349 }, 1350 }, 1351 }, 1352 }, 1353 { 1354 name: "Attempt.grpc.testing.TestService.UnaryCall", 1355 spanKind: oteltrace.SpanKindInternal.String(), 1356 attributes: []attribute.KeyValue{ 1357 { 1358 Key: "Client", 1359 Value: attribute.BoolValue(true), 1360 }, 1361 { 1362 Key: "FailFast", 1363 Value: attribute.BoolValue(true), 1364 }, 1365 { 1366 Key: "previous-rpc-attempts", 1367 Value: attribute.IntValue(0), 1368 }, 1369 { 1370 Key: "transparent-retry", 1371 Value: attribute.BoolValue(false), 1372 }, 1373 }, 1374 events: []trace.Event{ 1375 { 1376 Name: "Outbound compressed message", 1377 Attributes: []attribute.KeyValue{ 1378 { 1379 Key: "sequence-number", 1380 Value: attribute.IntValue(0), 1381 }, 1382 { 1383 Key: "message-size", 1384 Value: attribute.IntValue(10006), 1385 }, 1386 { 1387 Key: "message-size-compressed", 1388 Value: attribute.IntValue(10006), 1389 }, 1390 }, 1391 }, 1392 { 1393 Name: "Inbound compressed message", 1394 Attributes: []attribute.KeyValue{ 1395 { 1396 Key: "sequence-number", 1397 Value: attribute.IntValue(0), 1398 }, 1399 { 1400 Key: "message-size", 1401 Value: attribute.IntValue(10006), 1402 }, 1403 { 1404 Key: "message-size-compressed", 1405 Value: attribute.IntValue(10006), 1406 }, 1407 }, 1408 }, 1409 }, 1410 }, 1411 { 1412 name: "grpc.testing.TestService.UnaryCall", 1413 spanKind: oteltrace.SpanKindClient.String(), 1414 attributes: nil, 1415 events: nil, 1416 }, 1417 { 1418 name: "grpc.testing.TestService.FullDuplexCall", 1419 spanKind: oteltrace.SpanKindServer.String(), 1420 attributes: []attribute.KeyValue{ 1421 { 1422 Key: "Client", 1423 Value: attribute.BoolValue(false), 1424 }, 1425 { 1426 Key: "FailFast", 1427 Value: attribute.BoolValue(false), 1428 }, 1429 { 1430 Key: "previous-rpc-attempts", 1431 Value: attribute.IntValue(0), 1432 }, 1433 { 1434 Key: "transparent-retry", 1435 Value: attribute.BoolValue(false), 1436 }, 1437 }, 1438 events: nil, 1439 }, 1440 { 1441 name: "grpc.testing.TestService.FullDuplexCall", 1442 spanKind: oteltrace.SpanKindClient.String(), 1443 attributes: nil, 1444 events: nil, 1445 }, 1446 { 1447 name: "Attempt.grpc.testing.TestService.FullDuplexCall", 1448 spanKind: oteltrace.SpanKindInternal.String(), 1449 attributes: []attribute.KeyValue{ 1450 { 1451 Key: "Client", 1452 Value: attribute.BoolValue(true), 1453 }, 1454 { 1455 Key: "FailFast", 1456 Value: attribute.BoolValue(true), 1457 }, 1458 { 1459 Key: "previous-rpc-attempts", 1460 Value: attribute.IntValue(0), 1461 }, 1462 { 1463 Key: "transparent-retry", 1464 Value: attribute.BoolValue(false), 1465 }, 1466 }, 1467 events: nil, 1468 }, 1469 } 1470 1471 spans, err := waitForTraceSpans(ctx, exporter, wantSpanInfos) 1472 if err != nil { 1473 t.Fatal(err) 1474 } 1475 validateTraces(t, spans, wantSpanInfos) 1476 } 1477 1478 // TestMetricsAndTracesDisabled verifies that RPCs call succeed as expected 1479 // when metrics and traces are disabled in the OpenTelemetry instrumentation. 1480 func (s) TestMetricsAndTracesDisabled(t *testing.T) { 1481 ss := &stubserver.StubServer{ 1482 UnaryCallF: func(_ context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 1483 return &testpb.SimpleResponse{Payload: &testpb.Payload{ 1484 Body: make([]byte, len(in.GetPayload().GetBody())), 1485 }}, nil 1486 }, 1487 FullDuplexCallF: func(stream testgrpc.TestService_FullDuplexCallServer) error { 1488 for { 1489 _, err := stream.Recv() 1490 if err == io.EOF { 1491 return nil 1492 } 1493 } 1494 }, 1495 } 1496 1497 if err := ss.Start(nil); err != nil { 1498 t.Fatalf("Error starting endpoint server: %v", err) 1499 } 1500 defer ss.Stop() 1501 1502 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1503 defer cancel() 1504 1505 // Make two RPCs, a unary RPC and a streaming RPC. 1506 if _, err := ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1507 Body: make([]byte, 10000), 1508 }}); err != nil { 1509 t.Fatalf("Unexpected error from UnaryCall: %v", err) 1510 } 1511 stream, err := ss.Client.FullDuplexCall(ctx) 1512 if err != nil { 1513 t.Fatalf("ss.Client.FullDuplexCall failed: %v", err) 1514 } 1515 1516 stream.CloseSend() 1517 if _, err = stream.Recv(); err != io.EOF { 1518 t.Fatalf("stream.Recv received an unexpected error: %v, expected an EOF error", err) 1519 } 1520 } 1521 1522 // TestRPCSpanErrorStatus verifies that errors during RPC calls are correctly 1523 // reflected in the span status. It simulates a unary RPC that returns an error 1524 // and checks that the span's status is set to error with the appropriate message. 1525 func (s) TestRPCSpanErrorStatus(t *testing.T) { 1526 mo, _ := defaultMetricsOptions(t, nil) 1527 // Using defaultTraceOptions to set up OpenTelemetry with an in-memory exporter 1528 to, exporter := defaultTraceOptions(t) 1529 const rpcErrorMsg = "unary call: internal server error" 1530 ss := &stubserver.StubServer{ 1531 UnaryCallF: func(_ context.Context, in *testpb.SimpleRequest) (*testpb.SimpleResponse, error) { 1532 return nil, fmt.Errorf("%v", rpcErrorMsg) 1533 }, 1534 } 1535 1536 otelOptions := opentelemetry.Options{ 1537 MetricsOptions: *mo, 1538 TraceOptions: *to, 1539 } 1540 1541 if err := ss.Start([]grpc.ServerOption{opentelemetry.ServerOption(otelOptions)}, 1542 opentelemetry.DialOption(otelOptions)); err != nil { 1543 t.Fatalf("Error starting endpoint server: %v", err) 1544 } 1545 defer ss.Stop() 1546 1547 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 1548 defer cancel() 1549 1550 ss.Client.UnaryCall(ctx, &testpb.SimpleRequest{Payload: &testpb.Payload{ 1551 Body: make([]byte, 10000), 1552 }}) 1553 1554 // Verify spans has error status with rpcErrorMsg as error message. 1555 for ; len(exporter.GetSpans()) == 0 && ctx.Err() == nil; <-time.After(time.Millisecond) { 1556 // wait until trace spans are collected 1557 } 1558 spans := exporter.GetSpans() 1559 if got, want := spans[0].Status.Description, rpcErrorMsg; got != want { 1560 t.Fatalf("got rpc error %s, want %s", spans[0].Status.Description, rpcErrorMsg) 1561 } 1562 }