google.golang.org/grpc@v1.72.2/xds/internal/xdsclient/metrics_test.go (about) 1 /* 2 * 3 * Copyright 2025 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xdsclient 20 21 import ( 22 "context" 23 "encoding/json" 24 "fmt" 25 "testing" 26 27 "github.com/google/uuid" 28 "google.golang.org/grpc/internal/testutils" 29 "google.golang.org/grpc/internal/testutils/stats" 30 "google.golang.org/grpc/internal/testutils/xds/e2e" 31 "google.golang.org/grpc/internal/xds/bootstrap" 32 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" 33 34 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 35 ) 36 37 type noopListenerWatcher struct{} 38 39 func (noopListenerWatcher) OnUpdate(_ *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { 40 onDone() 41 } 42 43 func (noopListenerWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) { 44 onDone() 45 } 46 47 func (noopListenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { 48 onDone() 49 } 50 51 // TestResourceUpdateMetrics configures an xDS client, and a management server 52 // to send valid and invalid LDS updates, and verifies that the expected metrics 53 // for both good and bad updates are emitted. 54 func (s) TestResourceUpdateMetrics(t *testing.T) { 55 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 56 defer cancel() 57 58 tmr := stats.NewTestMetricsRecorder() 59 l, err := testutils.LocalTCPListener() 60 if err != nil { 61 t.Fatalf("net.Listen() failed: %v", err) 62 } 63 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: l}) 64 const listenerResourceName = "test-listener-resource" 65 const routeConfigurationName = "test-route-configuration-resource" 66 nodeID := uuid.New().String() 67 resources := e2e.UpdateOptions{ 68 NodeID: nodeID, 69 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 70 SkipValidation: true, 71 } 72 if err := mgmtServer.Update(ctx, resources); err != nil { 73 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 74 } 75 76 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 77 Servers: []byte(fmt.Sprintf(`[{ 78 "server_uri": %q, 79 "channel_creds": [{"type": "insecure"}] 80 }]`, mgmtServer.Address)), 81 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 82 Authorities: map[string]json.RawMessage{ 83 "authority": []byte("{}"), 84 }, 85 }) 86 if err != nil { 87 t.Fatalf("Failed to create bootstrap configuration: %v", err) 88 } 89 90 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 91 if err != nil { 92 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 93 } 94 pool := NewPool(config) 95 client, close, err := pool.NewClientForTesting(OptionsForTesting{ 96 Name: t.Name(), 97 WatchExpiryTimeout: defaultTestWatchExpiryTimeout, 98 MetricsRecorder: tmr, 99 }) 100 if err != nil { 101 t.Fatalf("Failed to create an xDS client: %v", err) 102 } 103 defer close() 104 105 // Watch the valid listener configured on the management server. This should 106 // cause a resource updates valid count to emit eventually. 107 xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{}) 108 mdWant := stats.MetricsData{ 109 Handle: xdsClientResourceUpdatesValidMetric.Descriptor(), 110 IntIncr: 1, 111 LabelKeys: []string{"grpc.target", "grpc.xds.server", "grpc.xds.resource_type"}, 112 LabelVals: []string{"Test/ResourceUpdateMetrics", mgmtServer.Address, "ListenerResource"}, 113 } 114 if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil { 115 t.Fatal(err.Error()) 116 } 117 // Invalid should have no recording point. 118 if got, _ := tmr.Metric("grpc.xds_client.resource_updates_invalid"); got != 0 { 119 t.Fatalf("Unexpected data for metric \"grpc.xds_client.resource_updates_invalid\", got: %v, want: %v", got, 0) 120 } 121 122 // Update management server with a bad update. Eventually, tmr should 123 // receive an invalid count received metric. The successful metric should 124 // stay the same. 125 resources = e2e.UpdateOptions{ 126 NodeID: nodeID, 127 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 128 SkipValidation: true, 129 } 130 resources.Listeners[0].ApiListener = nil 131 if err := mgmtServer.Update(ctx, resources); err != nil { 132 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 133 } 134 135 mdWant = stats.MetricsData{ 136 Handle: xdsClientResourceUpdatesInvalidMetric.Descriptor(), 137 IntIncr: 1, 138 LabelKeys: []string{"grpc.target", "grpc.xds.server", "grpc.xds.resource_type"}, 139 LabelVals: []string{"Test/ResourceUpdateMetrics", mgmtServer.Address, "ListenerResource"}, 140 } 141 if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil { 142 t.Fatal(err.Error()) 143 } 144 // Valid should stay the same at 1. 145 if got, _ := tmr.Metric("grpc.xds_client.resource_updates_valid"); got != 1 { 146 t.Fatalf("Unexpected data for metric \"grpc.xds_client.resource_updates_invalid\", got: %v, want: %v", got, 1) 147 } 148 } 149 150 // TestServerFailureMetrics_BeforeResponseRecv configures an xDS client, and a 151 // management server. It then register a watcher and stops the management 152 // server before sending a resource update, and verifies that the expected 153 // metrics for server failure are emitted. 154 func (s) TestServerFailureMetrics_BeforeResponseRecv(t *testing.T) { 155 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 156 defer cancel() 157 158 tmr := stats.NewTestMetricsRecorder() 159 l, err := testutils.LocalTCPListener() 160 if err != nil { 161 t.Fatalf("net.Listen() failed: %v", err) 162 } 163 lis := testutils.NewRestartableListener(l) 164 streamOpened := make(chan struct{}, 1) 165 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{ 166 Listener: lis, 167 OnStreamOpen: func(context.Context, int64, string) error { 168 select { 169 case streamOpened <- struct{}{}: 170 default: 171 } 172 return nil 173 }, 174 }) 175 176 nodeID := uuid.New().String() 177 178 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 179 Servers: []byte(fmt.Sprintf(`[{ 180 "server_uri": %q, 181 "channel_creds": [{"type": "insecure"}] 182 }]`, mgmtServer.Address)), 183 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 184 Authorities: map[string]json.RawMessage{ 185 "authority": []byte("{}"), 186 }, 187 }) 188 if err != nil { 189 t.Fatalf("Failed to create bootstrap configuration: %v", err) 190 } 191 192 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 193 if err != nil { 194 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 195 } 196 pool := NewPool(config) 197 client, close, err := pool.NewClientForTesting(OptionsForTesting{ 198 Name: t.Name(), 199 WatchExpiryTimeout: defaultTestWatchExpiryTimeout, 200 MetricsRecorder: tmr, 201 }) 202 if err != nil { 203 t.Fatalf("Failed to create an xDS client: %v", err) 204 } 205 defer close() 206 207 const listenerResourceName = "test-listener-resource" 208 209 // Watch for the listener on the above management server. 210 xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{}) 211 // Verify that an ADS stream is opened and an LDS request with the above 212 // resource name is sent. 213 select { 214 case <-streamOpened: 215 case <-ctx.Done(): 216 t.Fatal("Timeout when waiting for ADS stream to open") 217 } 218 219 // Close the listener and ensure that the ADS stream breaks. This should 220 // cause a server failure count to emit eventually. 221 lis.Stop() 222 223 // Restart to prevent the attempt to create a new ADS stream after back off. 224 lis.Restart() 225 226 mdWant := stats.MetricsData{ 227 Handle: xdsClientServerFailureMetric.Descriptor(), 228 IntIncr: 1, 229 LabelKeys: []string{"grpc.target", "grpc.xds.server"}, 230 LabelVals: []string{"Test/ServerFailureMetrics_BeforeResponseRecv", mgmtServer.Address}, 231 } 232 if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil { 233 t.Fatal(err.Error()) 234 } 235 } 236 237 // TestServerFailureMetrics_AfterResponseRecv configures an xDS client, and a 238 // management server to send a valid LDS updates, and verifies that the 239 // server failure metric is not emitted. It then closes the management server 240 // listener to close the ADS stream and verifies that the server failure metric 241 // is still not emitted because the the ADS stream was closed after having 242 // received a response on the stream. 243 func (s) TestServerFailureMetrics_AfterResponseRecv(t *testing.T) { 244 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 245 defer cancel() 246 247 tmr := stats.NewTestMetricsRecorder() 248 l, err := testutils.LocalTCPListener() 249 if err != nil { 250 t.Fatalf("net.Listen() failed: %v", err) 251 } 252 lis := testutils.NewRestartableListener(l) 253 mgmtServer := e2e.StartManagementServer(t, e2e.ManagementServerOptions{Listener: lis}) 254 const listenerResourceName = "test-listener-resource" 255 const routeConfigurationName = "test-route-configuration-resource" 256 nodeID := uuid.New().String() 257 resources := e2e.UpdateOptions{ 258 NodeID: nodeID, 259 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(listenerResourceName, routeConfigurationName)}, 260 SkipValidation: true, 261 } 262 if err := mgmtServer.Update(ctx, resources); err != nil { 263 t.Fatalf("Failed to update management server with resources: %v, err: %v", resources, err) 264 } 265 266 bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ 267 Servers: []byte(fmt.Sprintf(`[{ 268 "server_uri": %q, 269 "channel_creds": [{"type": "insecure"}] 270 }]`, mgmtServer.Address)), 271 Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), 272 Authorities: map[string]json.RawMessage{ 273 "authority": []byte("{}"), 274 }, 275 }) 276 if err != nil { 277 t.Fatalf("Failed to create bootstrap configuration: %v", err) 278 } 279 280 config, err := bootstrap.NewConfigFromContents(bootstrapContents) 281 if err != nil { 282 t.Fatalf("Failed to parse bootstrap contents: %s, %v", string(bootstrapContents), err) 283 } 284 pool := NewPool(config) 285 client, close, err := pool.NewClientForTesting(OptionsForTesting{ 286 Name: t.Name(), 287 MetricsRecorder: tmr, 288 }) 289 if err != nil { 290 t.Fatalf("Failed to create an xDS client: %v", err) 291 } 292 defer close() 293 294 // Watch the valid listener configured on the management server. This should 295 // cause a resource updates valid count to emit eventually. 296 xdsresource.WatchListener(client, listenerResourceName, noopListenerWatcher{}) 297 mdWant := stats.MetricsData{ 298 Handle: xdsClientResourceUpdatesValidMetric.Descriptor(), 299 IntIncr: 1, 300 LabelKeys: []string{"grpc.target", "grpc.xds.server", "grpc.xds.resource_type"}, 301 LabelVals: []string{"Test/ServerFailureMetrics_AfterResponseRecv", mgmtServer.Address, "ListenerResource"}, 302 } 303 if err := tmr.WaitForInt64Count(ctx, mdWant); err != nil { 304 t.Fatal(err.Error()) 305 } 306 // Server failure should have no recording point. 307 if got, _ := tmr.Metric("grpc.xds_client.server_failure"); got != 0 { 308 t.Fatalf("Unexpected data for metric \"grpc.xds_client.server_failure\", got: %v, want: %v", got, 0) 309 } 310 311 // Close the listener and ensure that the ADS stream breaks. This should 312 // cause a server failure count to emit eventually. 313 lis.Stop() 314 if ctx.Err() != nil { 315 t.Fatalf("Timeout when waiting for ADS stream to close") 316 } 317 // Restart to prevent the attempt to create a new ADS stream after back off. 318 lis.Restart() 319 320 mdWant = stats.MetricsData{ 321 Handle: xdsClientServerFailureMetric.Descriptor(), 322 IntIncr: 1, 323 LabelKeys: []string{"grpc.target", "grpc.xds.server"}, 324 LabelVals: []string{"Test/ServerFailureMetrics_AfterResponseRecv", mgmtServer.Address}, 325 } 326 // Server failure should still have no recording point. 327 if err := tmr.WaitForInt64Count(ctx, mdWant); err == nil { 328 t.Fatal("tmr.WaitForInt64Count(ctx, mdWant) succeeded when expected to timeout.") 329 } 330 }