google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/metrics_test.go (about)

     1  /*
     2   *
     3   * Copyright 2025 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package xdsclient
    20  
    21  import (
    22  	"context"
    23  	"encoding/json"
    24  	"fmt"
    25  	"testing"
    26  
    27  	"github.com/google/uuid"
    28  	"google.golang.org/grpc/internal/testutils"
    29  	"google.golang.org/grpc/internal/testutils/stats"
    30  	"google.golang.org/grpc/internal/testutils/xds/e2e"
    31  	"google.golang.org/grpc/internal/xds/bootstrap"
    32  	"google.golang.org/grpc/xds/internal/xdsclient/xdsresource"
    33  
    34  	v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    35  )
    36  
    37  type noopListenerWatcher struct{}
    38  
    39  func (noopListenerWatcher) OnUpdate(_ *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) {
    40  	onDone()
    41  }
    42  
    43  func (noopListenerWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) {
    44  	onDone()
    45  }
    46  
    47  func (noopListenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) {
    48  	onDone()
    49  }
    50  
    51  // TestResourceUpdateMetrics configures an xDS client, and a management server
    52  // to send valid and invalid LDS updates, and verifies that the expected metrics
    53  // for both good and bad updates are emitted.
    54  func (s) TestResourceUpdateMetrics(t *testing.T) {
    55  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
    56  	defer cancel()
    57  
    58  	tmr := stats.NewTestMetricsRecorder()
    59  	l, err := testutils.LocalTCPListener()
    60  	if err != nil {
    61  		t.Fatalf("net.Listen() failed: %v", err)
    62  	}
    63  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: l})
    64  	const listenerResourceName = "test-listener-resource"
    65  	const routeConfigurationName = "test-route-configuration-resource"
    66  	nodeID := uuid.New().String()
    67  	resources := e2e.UpdateOptions{
    68  		NodeID:         nodeID,
    69  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)},
    70  		SkipValidation: true,
    71  	}
    72  	if err := mgmtServer.Update(ctx, resources); err != nil {
    73  		t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err)
    74  	}
    75  
    76  	bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{
    77  		Servers: []byte(fmt.Sprintf(`[{
    78  			"server_uri": %q,
    79  			"channel_creds": [{"type": "insecure"}]
    80  		}]`, mgmtServer.Address)),
    81  		Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)),
    82  		Authorities: map[string]json.RawMessage{
    83  			"authority": []byte("{}"),
    84  		},
    85  	})
    86  	if err != nil {
    87  		t.Fatalf("Failed to create bootstrap configuration: %v", err)
    88  	}
    89  
    90  	config, err := bootstrap.NewConfigFromContents(bootstrapContents)
    91  	if err != nil {
    92  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err)
    93  	}
    94  	pool := NewPool(config)
    95  	client, close, err := pool.NewClientForTesting(OptionsForTesting{
    96  		Name:               t.Name(),
    97  		WatchExpiryTimeout: defaultTestWatchExpiryTimeout,
    98  		MetricsRecorder:    tmr,
    99  	})
   100  	if err != nil {
   101  		t.Fatalf("Failed to create an xDS client: %v", err)
   102  	}
   103  	defer close()
   104  
   105  	// Watch the valid listener configured on the management server. This should
   106  	// cause a resource updates valid count to emit eventually.
   107  	xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{})
   108  	mdWant := stats.MetricsData{
   109  		Handle:    xdsClientResourceUpdatesValidMetric.Descriptor(),
   110  		IntIncr:   1,
   111  		LabelKeys: []string{"grpc.target", "grpc.xds.server", "grpc.xds.resource_type"},
   112  		LabelVals: []string{"Test/ResourceUpdateMetrics", mgmtServer.Address, "ListenerResource"},
   113  	}
   114  	if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil {
   115  		t.Fatal(err.Error())
   116  	}
   117  	// Invalid should have no recording point.
   118  	if got, _ := tmr.Metric("grpc.xds_client.resource_updates_invalid"); got != 0 {
   119  		t.Fatalf("Unexpected data for metric \"grpc.xds_client.resource_updates_invalid\", got: %v, want: %v", got, 0)
   120  	}
   121  
   122  	// Update management server with a bad update. Eventually, tmr should
   123  	// receive an invalid count received metric. The successful metric should
   124  	// stay the same.
   125  	resources = e2e.UpdateOptions{
   126  		NodeID:         nodeID,
   127  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)},
   128  		SkipValidation: true,
   129  	}
   130  	resources.Listeners[0].ApiListener = nil
   131  	if err := mgmtServer.Update(ctx, resources); err != nil {
   132  		t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err)
   133  	}
   134  
   135  	mdWant = stats.MetricsData{
   136  		Handle:    xdsClientResourceUpdatesInvalidMetric.Descriptor(),
   137  		IntIncr:   1,
   138  		LabelKeys: []string{"grpc.target", "grpc.xds.server", "grpc.xds.resource_type"},
   139  		LabelVals: []string{"Test/ResourceUpdateMetrics", mgmtServer.Address, "ListenerResource"},
   140  	}
   141  	if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil {
   142  		t.Fatal(err.Error())
   143  	}
   144  	// Valid should stay the same at 1.
   145  	if got, _ := tmr.Metric("grpc.xds_client.resource_updates_valid"); got != 1 {
   146  		t.Fatalf("Unexpected data for metric \"grpc.xds_client.resource_updates_invalid\", got: %v, want: %v", got, 1)
   147  	}
   148  }
   149  
   150  // TestServerFailureMetrics_BeforeResponseRecv configures an xDS client, and a
   151  // management server. It then register a watcher and stops the management
   152  // server before sending a resource update, and verifies that the expected
   153  // metrics for server failure are emitted.
   154  func (s) TestServerFailureMetrics_BeforeResponseRecv(t *testing.T) {
   155  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   156  	defer cancel()
   157  
   158  	tmr := stats.NewTestMetricsRecorder()
   159  	l, err := testutils.LocalTCPListener()
   160  	if err != nil {
   161  		t.Fatalf("net.Listen() failed: %v", err)
   162  	}
   163  	lis := testutils.NewRestartableListener(l)
   164  	streamOpened := make(chan struct{}, 1)
   165  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   166  		Listener: lis,
   167  		OnStreamOpen: func(context.Context, int64, string) error {
   168  			select {
   169  			case streamOpened <- struct{}{}:
   170  			default:
   171  			}
   172  			return nil
   173  		},
   174  	})
   175  
   176  	nodeID := uuid.New().String()
   177  
   178  	bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{
   179  		Servers: []byte(fmt.Sprintf(`[{
   180  			"server_uri": %q,
   181  			"channel_creds": [{"type": "insecure"}]
   182  		}]`, mgmtServer.Address)),
   183  		Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)),
   184  		Authorities: map[string]json.RawMessage{
   185  			"authority": []byte("{}"),
   186  		},
   187  	})
   188  	if err != nil {
   189  		t.Fatalf("Failed to create bootstrap configuration: %v", err)
   190  	}
   191  
   192  	config, err := bootstrap.NewConfigFromContents(bootstrapContents)
   193  	if err != nil {
   194  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err)
   195  	}
   196  	pool := NewPool(config)
   197  	client, close, err := pool.NewClientForTesting(OptionsForTesting{
   198  		Name:               t.Name(),
   199  		WatchExpiryTimeout: defaultTestWatchExpiryTimeout,
   200  		MetricsRecorder:    tmr,
   201  	})
   202  	if err != nil {
   203  		t.Fatalf("Failed to create an xDS client: %v", err)
   204  	}
   205  	defer close()
   206  
   207  	const listenerResourceName = "test-listener-resource"
   208  
   209  	// Watch for the listener on the above management server.
   210  	xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{})
   211  	// Verify that an ADS stream is opened and an LDS request with the above
   212  	// resource name is sent.
   213  	select {
   214  	case <-streamOpened:
   215  	case <-ctx.Done():
   216  		t.Fatal("Timeout when waiting for ADS stream to open")
   217  	}
   218  
   219  	// Close the listener and ensure that the ADS stream breaks. This should
   220  	// cause a server failure count to emit eventually.
   221  	lis.Stop()
   222  
   223  	// Restart to prevent the attempt to create a new ADS stream after back off.
   224  	lis.Restart()
   225  
   226  	mdWant := stats.MetricsData{
   227  		Handle:    xdsClientServerFailureMetric.Descriptor(),
   228  		IntIncr:   1,
   229  		LabelKeys: []string{"grpc.target", "grpc.xds.server"},
   230  		LabelVals: []string{"Test/ServerFailureMetrics_BeforeResponseRecv", mgmtServer.Address},
   231  	}
   232  	if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil {
   233  		t.Fatal(err.Error())
   234  	}
   235  }
   236  
   237  // TestServerFailureMetrics_AfterResponseRecv configures an xDS client, and a
   238  // management server to send a valid LDS updates, and verifies that the
   239  // server failure metric is not emitted. It then closes the management server
   240  // listener to close the ADS stream and verifies that the server failure metric
   241  // is still not emitted because the the ADS stream was closed after having
   242  // received a response on the stream.
   243  func (s) TestServerFailureMetrics_AfterResponseRecv(t *testing.T) {
   244  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   245  	defer cancel()
   246  
   247  	tmr := stats.NewTestMetricsRecorder()
   248  	l, err := testutils.LocalTCPListener()
   249  	if err != nil {
   250  		t.Fatalf("net.Listen() failed: %v", err)
   251  	}
   252  	lis := testutils.NewRestartableListener(l)
   253  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis})
   254  	const listenerResourceName = "test-listener-resource"
   255  	const routeConfigurationName = "test-route-configuration-resource"
   256  	nodeID := uuid.New().String()
   257  	resources := e2e.UpdateOptions{
   258  		NodeID:         nodeID,
   259  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)},
   260  		SkipValidation: true,
   261  	}
   262  	if err := mgmtServer.Update(ctx, resources); err != nil {
   263  		t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err)
   264  	}
   265  
   266  	bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{
   267  		Servers: []byte(fmt.Sprintf(`[{
   268  			"server_uri": %q,
   269  			"channel_creds": [{"type": "insecure"}]
   270  		}]`, mgmtServer.Address)),
   271  		Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)),
   272  		Authorities: map[string]json.RawMessage{
   273  			"authority": []byte("{}"),
   274  		},
   275  	})
   276  	if err != nil {
   277  		t.Fatalf("Failed to create bootstrap configuration: %v", err)
   278  	}
   279  
   280  	config, err := bootstrap.NewConfigFromContents(bootstrapContents)
   281  	if err != nil {
   282  		t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err)
   283  	}
   284  	pool := NewPool(config)
   285  	client, close, err := pool.NewClientForTesting(OptionsForTesting{
   286  		Name:            t.Name(),
   287  		MetricsRecorder: tmr,
   288  	})
   289  	if err != nil {
   290  		t.Fatalf("Failed to create an xDS client: %v", err)
   291  	}
   292  	defer close()
   293  
   294  	// Watch the valid listener configured on the management server. This should
   295  	// cause a resource updates valid count to emit eventually.
   296  	xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{})
   297  	mdWant := stats.MetricsData{
   298  		Handle:    xdsClientResourceUpdatesValidMetric.Descriptor(),
   299  		IntIncr:   1,
   300  		LabelKeys: []string{"grpc.target", "grpc.xds.server", "grpc.xds.resource_type"},
   301  		LabelVals: []string{"Test/ServerFailureMetrics_AfterResponseRecv", mgmtServer.Address, "ListenerResource"},
   302  	}
   303  	if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil {
   304  		t.Fatal(err.Error())
   305  	}
   306  	// Server failure should have no recording point.
   307  	if got, _ := tmr.Metric("grpc.xds_client.server_failure"); got != 0 {
   308  		t.Fatalf("Unexpected data for metric \"grpc.xds_client.server_failure\", got: %v, want: %v", got, 0)
   309  	}
   310  
   311  	// Close the listener and ensure that the ADS stream breaks. This should
   312  	// cause a server failure count to emit eventually.
   313  	lis.Stop()
   314  	if ctx.Err() != nil {
   315  		t.Fatalf("Timeout when waiting for ADS stream to close")
   316  	}
   317  	// Restart to prevent the attempt to create a new ADS stream after back off.
   318  	lis.Restart()
   319  
   320  	mdWant = stats.MetricsData{
   321  		Handle:    xdsClientServerFailureMetric.Descriptor(),
   322  		IntIncr:   1,
   323  		LabelKeys: []string{"grpc.target", "grpc.xds.server"},
   324  		LabelVals: []string{"Test/ServerFailureMetrics_AfterResponseRecv", mgmtServer.Address},
   325  	}
   326  	// Server failure should still have no recording point.
   327  	if err := tmr.WaitForInt64Count(ctx, mdWant); err == nil {
   328  		t.Fatal("tmr.WaitForInt64Count(ctx, mdWant) succeeded when expected to timeout.")
   329  	}
   330  }