google.golang.org/grpc@v1.72.2/balancer/rls/metrics_test.go (about)

     1  /*
     2   * Copyright 2024 gRPC authors.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package rls
    18  
    19  import (
    20  	"context"
    21  	"math/rand"
    22  	"testing"
    23  
    24  	"github.com/google/uuid"
    25  	"go.opentelemetry.io/otel/attribute"
    26  	"go.opentelemetry.io/otel/sdk/metric"
    27  	"go.opentelemetry.io/otel/sdk/metric/metricdata"
    28  	"go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest"
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/credentials/insecure"
    31  	rlspb "google.golang.org/grpc/internal/proto/grpc_lookup_v1"
    32  	"google.golang.org/grpc/internal/stubserver"
    33  	rlstest "google.golang.org/grpc/internal/testutils/rls"
    34  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    35  	testpb "google.golang.org/grpc/interop/grpc_testing"
    36  	"google.golang.org/grpc/stats/opentelemetry"
    37  )
    38  
    39  func metricsDataFromReader(ctx context.Context, reader *metric.ManualReader) map[string]metricdata.Metrics {
    40  	rm := &metricdata.ResourceMetrics{}
    41  	reader.Collect(ctx, rm)
    42  	gotMetrics := map[string]metricdata.Metrics{}
    43  	for _, sm := range rm.ScopeMetrics {
    44  		for _, m := range sm.Metrics {
    45  			gotMetrics[m.Name] = m
    46  		}
    47  	}
    48  	return gotMetrics
    49  }
    50  
    51  // TestRLSTargetPickMetric tests RLS Metrics in the case an RLS Balancer picks a
    52  // target from an RLS Response for a RPC. This should emit a
    53  // "grpc.lb.rls.target_picks" with certain labels and cache metrics with certain
    54  // labels.
    55  func (s) TestRLSTargetPickMetric(t *testing.T) {
    56  	// Overwrite the uuid random number generator to be deterministic.
    57  	uuid.SetRand(rand.New(rand.NewSource(1)))
    58  	defer uuid.SetRand(nil)
    59  	rlsServer, _ := rlstest.SetupFakeRLSServer(t, nil)
    60  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
    61  	backend := &stubserver.StubServer{
    62  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
    63  			return &testpb.Empty{}, nil
    64  		},
    65  	}
    66  	if err := backend.StartServer(); err != nil {
    67  		t.Fatalf("Failed to start backend: %v", err)
    68  	}
    69  	t.Logf("Started TestService backend at: %q", backend.Address)
    70  	defer backend.Stop()
    71  
    72  	rlsServer.SetResponseCallback(func(context.Context, *rlspb.RouteLookupRequest) *rlstest.RouteLookupResponse {
    73  		return &rlstest.RouteLookupResponse{Resp: &rlspb.RouteLookupResponse{Targets: []string{backend.Address}}}
    74  	})
    75  	r := startManualResolverWithConfig(t, rlsConfig)
    76  	reader := metric.NewManualReader()
    77  	provider := metric.NewMeterProvider(metric.WithReader(reader))
    78  	mo := opentelemetry.MetricsOptions{
    79  		MeterProvider: provider,
    80  		Metrics:       opentelemetry.DefaultMetrics().Add("grpc.lb.rls.cache_entries", "grpc.lb.rls.cache_size", "grpc.lb.rls.default_target_picks", "grpc.lb.rls.target_picks", "grpc.lb.rls.failed_picks"),
    81  	}
    82  	grpcTarget := r.Scheme() + ":///"
    83  	cc, err := grpc.NewClient(grpcTarget, grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()), opentelemetry.DialOption(opentelemetry.Options{MetricsOptions: mo}))
    84  	if err != nil {
    85  		t.Fatalf("Failed to dial local test server: %v", err)
    86  	}
    87  	defer cc.Close()
    88  
    89  	wantMetrics := []metricdata.Metrics{
    90  		{
    91  			Name:        "grpc.lb.rls.target_picks",
    92  			Description: "EXPERIMENTAL. Number of LB picks sent to each RLS target. Note that if the default target is also returned by the RLS server, RPCs sent to that target from the cache will be counted in this metric, not in grpc.rls.default_target_picks.",
    93  			Unit:        "pick",
    94  			Data: metricdata.Sum[int64]{
    95  				DataPoints: []metricdata.DataPoint[int64]{
    96  					{
    97  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.data_plane_target", backend.Address), attribute.String("grpc.lb.pick_result", "complete")),
    98  						Value:      1,
    99  					},
   100  				},
   101  				Temporality: metricdata.CumulativeTemporality,
   102  				IsMonotonic: true,
   103  			},
   104  		},
   105  
   106  		// Receives an empty RLS Response, so a single cache entry with no size.
   107  		{
   108  			Name:        "grpc.lb.rls.cache_entries",
   109  			Description: "EXPERIMENTAL. Number of entries in the RLS cache.",
   110  			Unit:        "entry",
   111  			Data: metricdata.Gauge[int64]{
   112  				DataPoints: []metricdata.DataPoint[int64]{
   113  					{
   114  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.instance_uuid", "52fdfc07-2182-454f-963f-5f0f9a621d72")),
   115  						Value:      1,
   116  					},
   117  				},
   118  			},
   119  		},
   120  		{
   121  			Name:        "grpc.lb.rls.cache_size",
   122  			Description: "EXPERIMENTAL. The current size of the RLS cache.",
   123  			Unit:        "By",
   124  			Data: metricdata.Gauge[int64]{
   125  				DataPoints: []metricdata.DataPoint[int64]{
   126  					{
   127  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.instance_uuid", "52fdfc07-2182-454f-963f-5f0f9a621d72")),
   128  						Value:      35,
   129  					},
   130  				},
   131  			},
   132  		},
   133  	}
   134  	client := testgrpc.NewTestServiceClient(cc)
   135  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   136  	defer cancel()
   137  	_, err = client.EmptyCall(ctx, &testpb.Empty{})
   138  	if err != nil {
   139  		t.Fatalf("client.EmptyCall failed with error: %v", err)
   140  	}
   141  
   142  	gotMetrics := metricsDataFromReader(ctx, reader)
   143  	for _, metric := range wantMetrics {
   144  		val, ok := gotMetrics[metric.Name]
   145  		if !ok {
   146  			t.Fatalf("Metric %v not present in recorded metrics", metric.Name)
   147  		}
   148  		if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) {
   149  			t.Fatalf("Metrics data type not equal for metric: %v", metric.Name)
   150  		}
   151  	}
   152  
   153  	// Only one pick was made, which was a target pick, so no default target
   154  	// pick or failed pick metric should emit.
   155  	for _, metric := range []string{"grpc.lb.rls.default_target_picks", "grpc.lb.rls.failed_picks"} {
   156  		if _, ok := gotMetrics[metric]; ok {
   157  			t.Fatalf("Metric %v present in recorded metrics", metric)
   158  		}
   159  	}
   160  }
   161  
   162  // TestRLSDefaultTargetPickMetric tests RLS Metrics in the case an RLS Balancer
   163  // falls back to the default target for an RPC. This should emit a
   164  // "grpc.lb.rls.default_target_picks" with certain labels and cache metrics with
   165  // certain labels.
   166  func (s) TestRLSDefaultTargetPickMetric(t *testing.T) {
   167  	// Overwrite the uuid random number generator to be deterministic.
   168  	uuid.SetRand(rand.New(rand.NewSource(1)))
   169  	defer uuid.SetRand(nil)
   170  
   171  	rlsServer, _ := rlstest.SetupFakeRLSServer(t, nil)
   172  	// Build RLS service config with a default target.
   173  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
   174  	backend := &stubserver.StubServer{
   175  		EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
   176  			return &testpb.Empty{}, nil
   177  		},
   178  	}
   179  	if err := backend.StartServer(); err != nil {
   180  		t.Fatalf("Failed to start backend: %v", err)
   181  	}
   182  	t.Logf("Started TestService backend at: %q", backend.Address)
   183  	defer backend.Stop()
   184  	rlsConfig.RouteLookupConfig.DefaultTarget = backend.Address
   185  
   186  	r := startManualResolverWithConfig(t, rlsConfig)
   187  	reader := metric.NewManualReader()
   188  	provider := metric.NewMeterProvider(metric.WithReader(reader))
   189  	mo := opentelemetry.MetricsOptions{
   190  		MeterProvider: provider,
   191  		Metrics:       opentelemetry.DefaultMetrics().Add("grpc.lb.rls.cache_entries", "grpc.lb.rls.cache_size", "grpc.lb.rls.default_target_picks", "grpc.lb.rls.target_picks", "grpc.lb.rls.failed_picks"),
   192  	}
   193  	grpcTarget := r.Scheme() + ":///"
   194  	cc, err := grpc.NewClient(grpcTarget, grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()), opentelemetry.DialOption(opentelemetry.Options{MetricsOptions: mo}))
   195  	if err != nil {
   196  		t.Fatalf("Failed to dial local test server: %v", err)
   197  	}
   198  	defer cc.Close()
   199  
   200  	wantMetrics := []metricdata.Metrics{
   201  		{
   202  			Name:        "grpc.lb.rls.default_target_picks",
   203  			Description: "EXPERIMENTAL. Number of LB picks sent to the default target.",
   204  			Unit:        "pick",
   205  			Data: metricdata.Sum[int64]{
   206  				DataPoints: []metricdata.DataPoint[int64]{
   207  					{
   208  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.data_plane_target", backend.Address), attribute.String("grpc.lb.pick_result", "complete")),
   209  						Value:      1,
   210  					},
   211  				},
   212  				Temporality: metricdata.CumulativeTemporality,
   213  				IsMonotonic: true,
   214  			},
   215  		},
   216  		// Receives a RLS Response with target information, so a single cache
   217  		// entry with a certain size.
   218  		{
   219  			Name:        "grpc.lb.rls.cache_entries",
   220  			Description: "EXPERIMENTAL. Number of entries in the RLS cache.",
   221  			Unit:        "entry",
   222  			Data: metricdata.Gauge[int64]{
   223  				DataPoints: []metricdata.DataPoint[int64]{
   224  					{
   225  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.instance_uuid", "52fdfc07-2182-454f-963f-5f0f9a621d72")),
   226  						Value:      1,
   227  					},
   228  				},
   229  			},
   230  		},
   231  		{
   232  			Name:        "grpc.lb.rls.cache_size",
   233  			Description: "EXPERIMENTAL. The current size of the RLS cache.",
   234  			Unit:        "By",
   235  			Data: metricdata.Gauge[int64]{
   236  				DataPoints: []metricdata.DataPoint[int64]{
   237  					{
   238  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.instance_uuid", "52fdfc07-2182-454f-963f-5f0f9a621d72")),
   239  						Value:      0,
   240  					},
   241  				},
   242  			},
   243  		},
   244  	}
   245  	client := testgrpc.NewTestServiceClient(cc)
   246  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   247  	defer cancel()
   248  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   249  		t.Fatalf("client.EmptyCall failed with error: %v", err)
   250  	}
   251  
   252  	gotMetrics := metricsDataFromReader(ctx, reader)
   253  	for _, metric := range wantMetrics {
   254  		val, ok := gotMetrics[metric.Name]
   255  		if !ok {
   256  			t.Fatalf("Metric %v not present in recorded metrics", metric.Name)
   257  		}
   258  		if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) {
   259  			t.Fatalf("Metrics data type not equal for metric: %v", metric.Name)
   260  		}
   261  	}
   262  	// No target picks and failed pick metrics should be emitted, as the test
   263  	// made only one RPC which recorded as a default target pick.
   264  	for _, metric := range []string{"grpc.lb.rls.target_picks", "grpc.lb.rls.failed_picks"} {
   265  		if _, ok := gotMetrics[metric]; ok {
   266  			t.Fatalf("Metric %v present in recorded metrics", metric)
   267  		}
   268  	}
   269  }
   270  
   271  // TestRLSFailedRPCMetric tests RLS Metrics in the case an RLS Balancer fails an
   272  // RPC due to an RLS failure. This should emit a
   273  // "grpc.lb.rls.default_target_picks" with certain labels and cache metrics with
   274  // certain labels.
   275  func (s) TestRLSFailedRPCMetric(t *testing.T) {
   276  	// Overwrite the uuid random number generator to be deterministic.
   277  	uuid.SetRand(rand.New(rand.NewSource(1)))
   278  	defer uuid.SetRand(nil)
   279  
   280  	rlsServer, _ := rlstest.SetupFakeRLSServer(t, nil)
   281  	// Build an RLS config without a default target.
   282  	rlsConfig := buildBasicRLSConfigWithChildPolicy(t, t.Name(), rlsServer.Address)
   283  	// Register a manual resolver and push the RLS service config through it.
   284  	r := startManualResolverWithConfig(t, rlsConfig)
   285  	reader := metric.NewManualReader()
   286  	provider := metric.NewMeterProvider(metric.WithReader(reader))
   287  	mo := opentelemetry.MetricsOptions{
   288  		MeterProvider: provider,
   289  		Metrics:       opentelemetry.DefaultMetrics().Add("grpc.lb.rls.cache_entries", "grpc.lb.rls.cache_size", "grpc.lb.rls.default_target_picks", "grpc.lb.rls.target_picks", "grpc.lb.rls.failed_picks"),
   290  	}
   291  	grpcTarget := r.Scheme() + ":///"
   292  	cc, err := grpc.NewClient(grpcTarget, grpc.WithResolvers(r), grpc.WithTransportCredentials(insecure.NewCredentials()), opentelemetry.DialOption(opentelemetry.Options{MetricsOptions: mo}))
   293  	if err != nil {
   294  		t.Fatalf("Failed to dial local test server: %v", err)
   295  	}
   296  	defer cc.Close()
   297  
   298  	wantMetrics := []metricdata.Metrics{
   299  		{
   300  			Name:        "grpc.lb.rls.failed_picks",
   301  			Description: "EXPERIMENTAL. Number of LB picks failed due to either a failed RLS request or the RLS channel being throttled.",
   302  			Unit:        "pick",
   303  			Data: metricdata.Sum[int64]{
   304  				DataPoints: []metricdata.DataPoint[int64]{
   305  					{
   306  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address)),
   307  						Value:      1,
   308  					},
   309  				},
   310  				Temporality: metricdata.CumulativeTemporality,
   311  				IsMonotonic: true,
   312  			},
   313  		},
   314  		// Receives an empty RLS Response, so a single cache entry with no size.
   315  		{
   316  			Name:        "grpc.lb.rls.cache_entries",
   317  			Description: "EXPERIMENTAL. Number of entries in the RLS cache.",
   318  			Unit:        "entry",
   319  			Data: metricdata.Gauge[int64]{
   320  				DataPoints: []metricdata.DataPoint[int64]{
   321  					{
   322  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.instance_uuid", "52fdfc07-2182-454f-963f-5f0f9a621d72")),
   323  						Value:      1,
   324  					},
   325  				},
   326  			},
   327  		},
   328  		{
   329  			Name:        "grpc.lb.rls.cache_size",
   330  			Description: "EXPERIMENTAL. The current size of the RLS cache.",
   331  			Unit:        "By",
   332  			Data: metricdata.Gauge[int64]{
   333  				DataPoints: []metricdata.DataPoint[int64]{
   334  					{
   335  						Attributes: attribute.NewSet(attribute.String("grpc.target", grpcTarget), attribute.String("grpc.lb.rls.server_target", rlsServer.Address), attribute.String("grpc.lb.rls.instance_uuid", "52fdfc07-2182-454f-963f-5f0f9a621d72")),
   336  						Value:      0,
   337  					},
   338  				},
   339  			},
   340  		},
   341  	}
   342  
   343  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   344  	defer cancel()
   345  	client := testgrpc.NewTestServiceClient(cc)
   346  	if _, err = client.EmptyCall(ctx, &testpb.Empty{}); err == nil {
   347  		t.Fatalf("client.EmptyCall error = %v, expected a non nil error", err)
   348  	}
   349  
   350  	gotMetrics := metricsDataFromReader(ctx, reader)
   351  	for _, metric := range wantMetrics {
   352  		val, ok := gotMetrics[metric.Name]
   353  		if !ok {
   354  			t.Fatalf("Metric %v not present in recorded metrics", metric.Name)
   355  		}
   356  		if !metricdatatest.AssertEqual(t, metric, val, metricdatatest.IgnoreTimestamp(), metricdatatest.IgnoreExemplars()) {
   357  			t.Fatalf("Metrics data type not equal for metric: %v", metric.Name)
   358  		}
   359  	}
   360  	// Only one RPC was made, which was a failed pick due to an RLS failure, so
   361  	// no metrics for target picks or default target picks should have emitted.
   362  	for _, metric := range []string{"grpc.lb.rls.target_picks", "grpc.lb.rls.default_target_picks"} {
   363  		if _, ok := gotMetrics[metric]; ok {
   364  			t.Fatalf("Metric %v present in recorded metrics", metric)
   365  		}
   366  	}
   367  }