google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/test/metrics_test.go (about)

     1  /*
     2   *
     3   * Copyright 2025 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package xdsclient_test
    20  
    21  import (
    22  	"context"
    23  	"errors"
    24  	"net"
    25  	"testing"
    26  
    27  	"github.com/google/uuid"
    28  	"google.golang.org/grpc/credentials/insecure"
    29  	"google.golang.org/grpc/internal/testutils"
    30  	"google.golang.org/grpc/xds/internal/clients"
    31  	"google.golang.org/grpc/xds/internal/clients/grpctransport"
    32  	"google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e"
    33  	"google.golang.org/grpc/xds/internal/clients/xdsclient"
    34  	"google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource"
    35  	"google.golang.org/grpc/xds/internal/clients/xdsclient/metrics"
    36  
    37  	v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3"
    38  	v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3"
    39  )
    40  
    41  // TestResourceUpdateMetrics configures an xDS client, and a management server
    42  // to send valid and invalid LDS updates, and verifies that the expected metrics
    43  // for both good and bad updates are emitted.
    44  func (s) TestResourceUpdateMetrics(t *testing.T) {
    45  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
    46  	defer cancel()
    47  
    48  	tmr := newTestMetricsReporter()
    49  	l, err := net.Listen("tcp", "localhost:0")
    50  	if err != nil {
    51  		t.Fatalf("net.Listen() failed: %v", err)
    52  	}
    53  
    54  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: l})
    55  	const listenerResourceName = "test-listener-resource"
    56  	const routeConfigurationName = "test-route-configuration-resource"
    57  	nodeID := uuid.New().String()
    58  	resources := e2e.UpdateOptions{
    59  		NodeID:         nodeID,
    60  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)},
    61  		SkipValidation: true,
    62  	}
    63  	if err := mgmtServer.Update(ctx, resources); err != nil {
    64  		t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err)
    65  	}
    66  
    67  	resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType}
    68  	si := clients.ServerIdentifier{
    69  		ServerURI:  mgmtServer.Address,
    70  		Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"},
    71  	}
    72  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
    73  	xdsClientConfig := xdsclient.Config{
    74  		Servers:          []xdsclient.ServerConfig{{ServerIdentifier: si}},
    75  		Node:             clients.Node{ID: nodeID},
    76  		TransportBuilder: grpctransport.NewBuilder(configs),
    77  		ResourceTypes:    resourceTypes,
    78  		// Xdstp resource names used in this test do not specify an
    79  		// authority. These will end up looking up an entry with the
    80  		// empty key in the authorities map. Having an entry with an
    81  		// empty key and empty configuration, results in these
    82  		// resources also using the top-level configuration.
    83  		Authorities: map[string]xdsclient.Authority{
    84  			"": {XDSServers: []xdsclient.ServerConfig{}},
    85  		},
    86  		MetricsReporter: tmr,
    87  	}
    88  	// Create an xDS client with the above config.
    89  	client, err := xdsclient.New(xdsClientConfig)
    90  	if err != nil {
    91  		t.Fatalf("Failed to create xDS client: %v", err)
    92  	}
    93  	defer client.Close()
    94  
    95  	// Watch the valid listener configured on the management server. This should
    96  	// cause a resource update valid metric to emit eventually.
    97  	client.WatchResource(listenerType.TypeURL, listenerResourceName, noopListenerWatcher{})
    98  	if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil {
    99  		t.Fatal(err.Error())
   100  	}
   101  
   102  	// Update management server with a bad update. This should cause a resource
   103  	// update invalid metric to emit eventually.
   104  	resources = e2e.UpdateOptions{
   105  		NodeID:         nodeID,
   106  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)},
   107  		SkipValidation: true,
   108  	}
   109  	resources.Listeners[0].ApiListener = nil
   110  	if err := mgmtServer.Update(ctx, resources); err != nil {
   111  		t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err)
   112  	}
   113  	if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateInvalid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil {
   114  		t.Fatal(err.Error())
   115  	}
   116  
   117  	// Resource update valid metric should have not emitted.
   118  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   119  	defer sCancel()
   120  	if err := tmr.waitForMetric(sCtx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err == nil {
   121  		t.Fatal("tmr.WaitForInt64Count(ctx, mdWant) succeeded when expected to timeout.")
   122  	}
   123  }
   124  
   125  // TestServerFailureMetrics_BeforeResponseRecv configures an xDS client, and a
   126  // management server. It then register a watcher and stops the management
   127  // server before sending a resource update, and verifies that the expected
   128  // metric for server failure is emitted.
   129  func (s) TestServerFailureMetrics_BeforeResponseRecv(t *testing.T) {
   130  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   131  	defer cancel()
   132  
   133  	tmr := newTestMetricsReporter()
   134  	l, err := net.Listen("tcp", "localhost:0")
   135  	if err != nil {
   136  		t.Fatalf("net.Listen() failed: %v", err)
   137  	}
   138  
   139  	lis := testutils.NewRestartableListener(l)
   140  	streamOpened := make(chan struct{}, 1)
   141  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   142  		Listener: lis,
   143  		OnStreamOpen: func(context.Context, int64, string) error {
   144  			select {
   145  			case streamOpened <- struct{}{}:
   146  			default:
   147  			}
   148  			return nil
   149  		},
   150  	})
   151  
   152  	nodeID := uuid.New().String()
   153  
   154  	resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType}
   155  	si := clients.ServerIdentifier{
   156  		ServerURI:  mgmtServer.Address,
   157  		Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"},
   158  	}
   159  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
   160  	xdsClientConfig := xdsclient.Config{
   161  		Servers:          []xdsclient.ServerConfig{{ServerIdentifier: si}},
   162  		Node:             clients.Node{ID: nodeID},
   163  		TransportBuilder: grpctransport.NewBuilder(configs),
   164  		ResourceTypes:    resourceTypes,
   165  		// Xdstp resource names used in this test do not specify an
   166  		// authority. These will end up looking up an entry with the
   167  		// empty key in the authorities map. Having an entry with an
   168  		// empty key and empty configuration, results in these
   169  		// resources also using the top-level configuration.
   170  		Authorities: map[string]xdsclient.Authority{
   171  			"": {XDSServers: []xdsclient.ServerConfig{}},
   172  		},
   173  		MetricsReporter: tmr,
   174  	}
   175  	// Create an xDS client with the above config.
   176  	client, err := xdsclient.New(xdsClientConfig)
   177  	if err != nil {
   178  		t.Fatalf("Failed to create xDS client: %v", err)
   179  	}
   180  	defer client.Close()
   181  
   182  	const listenerResourceName = "test-listener-resource"
   183  
   184  	// Watch for the listener on the above management server.
   185  	client.WatchResource(listenerType.TypeURL, listenerResourceName, noopListenerWatcher{})
   186  	// Verify that an ADS stream is opened and an LDS request with the above
   187  	// resource name is sent.
   188  	select {
   189  	case <-streamOpened:
   190  	case <-ctx.Done():
   191  		t.Fatal("Timeout when waiting for ADS stream to open")
   192  	}
   193  
   194  	// Close the listener and ensure that the ADS stream breaks. This should
   195  	// cause a server failure metric to emit eventually.
   196  	lis.Stop()
   197  
   198  	// Restart to prevent the attempt to create a new ADS stream after back off.
   199  	lis.Restart()
   200  
   201  	if err := tmr.waitForMetric(ctx, &metrics.ServerFailure{ServerURI: mgmtServer.Address}); err != nil {
   202  		t.Fatal(err.Error())
   203  	}
   204  }
   205  
   206  // TestServerFailureMetrics_AfterResponseRecv configures an xDS client and a
   207  // management server to send a valid LDS update, and verifies that the
   208  // successful update metric is emitted. When the client ACKs the update, the
   209  // server returns an error, breaking the stream. The test then verifies that the
   210  // server failure metric is not emitted, because the ADS stream was closed after
   211  // a response was received on the stream. Finally, the test waits for the client
   212  // to establish a new stream and verifies that the client emits a metric after
   213  // receiving a successful update.
   214  func (s) TestServerFailureMetrics_AfterResponseRecv(t *testing.T) {
   215  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   216  	defer cancel()
   217  
   218  	tmr := newTestMetricsReporter()
   219  	l, err := testutils.LocalTCPListener()
   220  	if err != nil {
   221  		t.Fatalf("net.Listen() failed: %v", err)
   222  	}
   223  	lis := testutils.NewRestartableListener(l)
   224  	streamCreationQuota := make(chan struct{}, 1)
   225  	streamCreationQuota <- struct{}{}
   226  
   227  	mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{
   228  		Listener: lis,
   229  		OnStreamOpen: func(context.Context, int64, string) error {
   230  			// The following select block is used to block stream creation after
   231  			// the first stream has failed, but while we are waiting to verify
   232  			// that the failure metric is not reported.
   233  			select {
   234  			case <-streamCreationQuota:
   235  			case <-ctx.Done():
   236  			}
   237  			return nil
   238  		},
   239  		OnStreamRequest: func(streamID int64, req *v3discoverypb.DiscoveryRequest) error {
   240  			// We only want the ACK on the first stream to return an error
   241  			// (leading to stream closure), without effecting subsequent stream
   242  			// attempts.
   243  			if streamID == 1 && req.GetVersionInfo() != "" {
   244  				return errors.New("test configured error")
   245  			}
   246  			return nil
   247  		}},
   248  	)
   249  	const listenerResourceName = "test-listener-resource"
   250  	const routeConfigurationName = "test-route-configuration-resource"
   251  	nodeID := uuid.New().String()
   252  	resources := e2e.UpdateOptions{
   253  		NodeID:         nodeID,
   254  		Listeners:      []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)},
   255  		SkipValidation: true,
   256  	}
   257  	if err := mgmtServer.Update(ctx, resources); err != nil {
   258  		t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err)
   259  	}
   260  
   261  	resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType}
   262  	si := clients.ServerIdentifier{
   263  		ServerURI:  mgmtServer.Address,
   264  		Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"},
   265  	}
   266  	configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}}
   267  	xdsClientConfig := xdsclient.Config{
   268  		Servers:          []xdsclient.ServerConfig{{ServerIdentifier: si}},
   269  		Node:             clients.Node{ID: nodeID},
   270  		TransportBuilder: grpctransport.NewBuilder(configs),
   271  		ResourceTypes:    resourceTypes,
   272  		// Xdstp resource names used in this test do not specify an
   273  		// authority. These will end up looking up an entry with the
   274  		// empty key in the authorities map. Having an entry with an
   275  		// empty key and empty configuration, results in these
   276  		// resources also using the top-level configuration.
   277  		Authorities: map[string]xdsclient.Authority{
   278  			"": {XDSServers: []xdsclient.ServerConfig{}},
   279  		},
   280  		MetricsReporter: tmr,
   281  	}
   282  	// Create an xDS client with the above config.
   283  	client, err := xdsclient.New(xdsClientConfig)
   284  	if err != nil {
   285  		t.Fatalf("Failed to create xDS client: %v", err)
   286  	}
   287  	defer client.Close()
   288  
   289  	// Watch the valid listener configured on the management server. This should
   290  	// cause a resource update valid metric to emit eventually.
   291  	client.WatchResource(listenerType.TypeURL, listenerResourceName, noopListenerWatcher{})
   292  	if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil {
   293  		t.Fatal(err.Error())
   294  	}
   295  
   296  	// When the client sends an ACK, the management server would reply with an
   297  	// error, breaking the stream.
   298  	// Server failure should still have no recording point.
   299  	sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   300  	defer sCancel()
   301  	failureMetric := &metrics.ServerFailure{ServerURI: mgmtServer.Address}
   302  	if err := tmr.waitForMetric(sCtx, failureMetric); err == nil {
   303  		t.Fatalf("tmr.waitForMetric(%v) succeeded when expected to timeout.", failureMetric)
   304  	} else if sCtx.Err() == nil {
   305  		t.Fatalf("tmr.WaitForInt64Count(%v) = %v, want context deadline exceeded", failureMetric, err)
   306  	}
   307  	// Unblock stream creation and verify that an update is received
   308  	// successfully.
   309  	close(streamCreationQuota)
   310  	if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil {
   311  		t.Fatal(err.Error())
   312  	}
   313  }