google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/test/metrics_test.go (about) 1 /* 2 * 3 * Copyright 2025 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient_test 20 21 import ( 22 "context" 23 "errors" 24 "net" 25 "testing" 26 27 "github.com/google/uuid" 28 "google.golang.org/grpc/credentials/insecure" 29 "google.golang.org/grpc/internal/testutils" 30 "google.golang.org/grpc/xds/internal/clients" 31 "google.golang.org/grpc/xds/internal/clients/grpctransport" 32 "google.golang.org/grpc/xds/internal/clients/internal/testutils/e2e" 33 "google.golang.org/grpc/xds/internal/clients/xdsclient" 34 "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" 35 "google.golang.org/grpc/xds/internal/clients/xdsclient/metrics" 36 37 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 38 v3discoverypb "github.com/envoyproxy/go-control-plane/envoy/service/discovery/v3" 39 ) 40 41 // TestResourceUpdateMetrics configures an xDS client, and a management server 42 // to send valid and invalid LDS updates, and verifies that the expected metrics 43 // for both good and bad updates are emitted. 44 func (s) TestResourceUpdateMetrics(t *testing.T) { 45 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 46 defer cancel() 47 48 tmr := newTestMetricsReporter() 49 l, err := net.Listen("tcp", "localhost:0") 50 if err != nil { 51 t.Fatalf("net.Listen() failed: %v", err) 52 } 53 54 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: l}) 55 const listenerResourceName = "test-listener-resource" 56 const routeConfigurationName = "test-route-configuration-resource" 57 nodeID := uuid.New().String() 58 resources := e2e.UpdateOptions{ 59 NodeID: nodeID, 60 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 61 SkipValidation: true, 62 } 63 if err := mgmtServer.Update(ctx, resources); err != nil { 64 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 65 } 66 67 resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} 68 si := clients.ServerIdentifier{ 69 ServerURI: mgmtServer.Address, 70 Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}, 71 } 72 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 73 xdsClientConfig := xdsclient.Config{ 74 Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, 75 Node: clients.Node{ID: nodeID}, 76 TransportBuilder: grpctransport.NewBuilder(configs), 77 ResourceTypes: resourceTypes, 78 // Xdstp resource names used in this test do not specify an 79 // authority. These will end up looking up an entry with the 80 // empty key in the authorities map. Having an entry with an 81 // empty key and empty configuration, results in these 82 // resources also using the top-level configuration. 83 Authorities: map[string]xdsclient.Authority{ 84 "": {XDSServers: []xdsclient.ServerConfig{}}, 85 }, 86 MetricsReporter: tmr, 87 } 88 // Create an xDS client with the above config. 89 client, err := xdsclient.New(xdsClientConfig) 90 if err != nil { 91 t.Fatalf("Failed to create xDS client: %v", err) 92 } 93 defer client.Close() 94 95 // Watch the valid listener configured on the management server. This should 96 // cause a resource update valid metric to emit eventually. 97 client.WatchResource(listenerType.TypeURL, listenerResourceName, noopListenerWatcher{}) 98 if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil { 99 t.Fatal(err.Error()) 100 } 101 102 // Update management server with a bad update. This should cause a resource 103 // update invalid metric to emit eventually. 104 resources = e2e.UpdateOptions{ 105 NodeID: nodeID, 106 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 107 SkipValidation: true, 108 } 109 resources.Listeners[0].ApiListener = nil 110 if err := mgmtServer.Update(ctx, resources); err != nil { 111 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 112 } 113 if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateInvalid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil { 114 t.Fatal(err.Error()) 115 } 116 117 // Resource update valid metric should have not emitted. 118 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 119 defer sCancel() 120 if err := tmr.waitForMetric(sCtx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err == nil { 121 t.Fatal("tmr.WaitForInt64Count(ctx, mdWant) succeeded when expected to timeout.") 122 } 123 } 124 125 // TestServerFailureMetrics_BeforeResponseRecv configures an xDS client, and a 126 // management server. It then register a watcher and stops the management 127 // server before sending a resource update, and verifies that the expected 128 // metric for server failure is emitted. 129 func (s) TestServerFailureMetrics_BeforeResponseRecv(t *testing.T) { 130 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 131 defer cancel() 132 133 tmr := newTestMetricsReporter() 134 l, err := net.Listen("tcp", "localhost:0") 135 if err != nil { 136 t.Fatalf("net.Listen() failed: %v", err) 137 } 138 139 lis := testutils.NewRestartableListener(l) 140 streamOpened := make(chan struct{}, 1) 141 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 142 Listener: lis, 143 OnStreamOpen: func(context.Context, int64, string) error { 144 select { 145 case streamOpened <- struct{}{}: 146 default: 147 } 148 return nil 149 }, 150 }) 151 152 nodeID := uuid.New().String() 153 154 resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} 155 si := clients.ServerIdentifier{ 156 ServerURI: mgmtServer.Address, 157 Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}, 158 } 159 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 160 xdsClientConfig := xdsclient.Config{ 161 Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, 162 Node: clients.Node{ID: nodeID}, 163 TransportBuilder: grpctransport.NewBuilder(configs), 164 ResourceTypes: resourceTypes, 165 // Xdstp resource names used in this test do not specify an 166 // authority. These will end up looking up an entry with the 167 // empty key in the authorities map. Having an entry with an 168 // empty key and empty configuration, results in these 169 // resources also using the top-level configuration. 170 Authorities: map[string]xdsclient.Authority{ 171 "": {XDSServers: []xdsclient.ServerConfig{}}, 172 }, 173 MetricsReporter: tmr, 174 } 175 // Create an xDS client with the above config. 176 client, err := xdsclient.New(xdsClientConfig) 177 if err != nil { 178 t.Fatalf("Failed to create xDS client: %v", err) 179 } 180 defer client.Close() 181 182 const listenerResourceName = "test-listener-resource" 183 184 // Watch for the listener on the above management server. 185 client.WatchResource(listenerType.TypeURL, listenerResourceName, noopListenerWatcher{}) 186 // Verify that an ADS stream is opened and an LDS request with the above 187 // resource name is sent. 188 select { 189 case <-streamOpened: 190 case <-ctx.Done(): 191 t.Fatal("Timeout when waiting for ADS stream to open") 192 } 193 194 // Close the listener and ensure that the ADS stream breaks. This should 195 // cause a server failure metric to emit eventually. 196 lis.Stop() 197 198 // Restart to prevent the attempt to create a new ADS stream after back off. 199 lis.Restart() 200 201 if err := tmr.waitForMetric(ctx, &metrics.ServerFailure{ServerURI: mgmtServer.Address}); err != nil { 202 t.Fatal(err.Error()) 203 } 204 } 205 206 // TestServerFailureMetrics_AfterResponseRecv configures an xDS client and a 207 // management server to send a valid LDS update, and verifies that the 208 // successful update metric is emitted. When the client ACKs the update, the 209 // server returns an error, breaking the stream. The test then verifies that the 210 // server failure metric is not emitted, because the ADS stream was closed after 211 // a response was received on the stream. Finally, the test waits for the client 212 // to establish a new stream and verifies that the client emits a metric after 213 // receiving a successful update. 214 func (s) TestServerFailureMetrics_AfterResponseRecv(t *testing.T) { 215 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 216 defer cancel() 217 218 tmr := newTestMetricsReporter() 219 l, err := testutils.LocalTCPListener() 220 if err != nil { 221 t.Fatalf("net.Listen() failed: %v", err) 222 } 223 lis := testutils.NewRestartableListener(l) 224 streamCreationQuota := make(chan struct{}, 1) 225 streamCreationQuota <- struct{}{} 226 227 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 228 Listener: lis, 229 OnStreamOpen: func(context.Context, int64, string) error { 230 // The following select block is used to block stream creation after 231 // the first stream has failed, but while we are waiting to verify 232 // that the failure metric is not reported. 233 select { 234 case <-streamCreationQuota: 235 case <-ctx.Done(): 236 } 237 return nil 238 }, 239 OnStreamRequest: func(streamID int64, req *v3discoverypb.DiscoveryRequest) error { 240 // We only want the ACK on the first stream to return an error 241 // (leading to stream closure), without effecting subsequent stream 242 // attempts. 243 if streamID == 1 && req.GetVersionInfo() != "" { 244 return errors.New("test configured error") 245 } 246 return nil 247 }}, 248 ) 249 const listenerResourceName = "test-listener-resource" 250 const routeConfigurationName = "test-route-configuration-resource" 251 nodeID := uuid.New().String() 252 resources := e2e.UpdateOptions{ 253 NodeID: nodeID, 254 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 255 SkipValidation: true, 256 } 257 if err := mgmtServer.Update(ctx, resources); err != nil { 258 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 259 } 260 261 resourceTypes := map[string]xdsclient.ResourceType{xdsresource.V3ListenerURL: listenerType} 262 si := clients.ServerIdentifier{ 263 ServerURI: mgmtServer.Address, 264 Extensions: grpctransport.ServerIdentifierExtension{ConfigName: "insecure"}, 265 } 266 configs := map[string]grpctransport.Config{"insecure": {Credentials: insecure.NewBundle()}} 267 xdsClientConfig := xdsclient.Config{ 268 Servers: []xdsclient.ServerConfig{{ServerIdentifier: si}}, 269 Node: clients.Node{ID: nodeID}, 270 TransportBuilder: grpctransport.NewBuilder(configs), 271 ResourceTypes: resourceTypes, 272 // Xdstp resource names used in this test do not specify an 273 // authority. These will end up looking up an entry with the 274 // empty key in the authorities map. Having an entry with an 275 // empty key and empty configuration, results in these 276 // resources also using the top-level configuration. 277 Authorities: map[string]xdsclient.Authority{ 278 "": {XDSServers: []xdsclient.ServerConfig{}}, 279 }, 280 MetricsReporter: tmr, 281 } 282 // Create an xDS client with the above config. 283 client, err := xdsclient.New(xdsClientConfig) 284 if err != nil { 285 t.Fatalf("Failed to create xDS client: %v", err) 286 } 287 defer client.Close() 288 289 // Watch the valid listener configured on the management server. This should 290 // cause a resource update valid metric to emit eventually. 291 client.WatchResource(listenerType.TypeURL, listenerResourceName, noopListenerWatcher{}) 292 if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil { 293 t.Fatal(err.Error()) 294 } 295 296 // When the client sends an ACK, the management server would reply with an 297 // error, breaking the stream. 298 // Server failure should still have no recording point. 299 sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 300 defer sCancel() 301 failureMetric := &metrics.ServerFailure{ServerURI: mgmtServer.Address} 302 if err := tmr.waitForMetric(sCtx, failureMetric); err == nil { 303 t.Fatalf("tmr.waitForMetric(%v) succeeded when expected to timeout.", failureMetric) 304 } else if sCtx.Err() == nil { 305 t.Fatalf("tmr.WaitForInt64Count(%v) = %v, want context deadline exceeded", failureMetric, err) 306 } 307 // Unblock stream creation and verify that an update is received 308 // successfully. 309 close(streamCreationQuota) 310 if err := tmr.waitForMetric(ctx, &metrics.ResourceUpdateValid{ServerURI: mgmtServer.Address, ResourceType: "ListenerResource"}); err != nil { 311 t.Fatal(err.Error()) 312 } 313 }