google.golang.org/grpc@v1.72.2/test/xds/xds_client_outlier_detection_test.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package xds_test 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "testing" 26 "time" 27 28 v3clusterpb "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" 29 v3endpointpb "github.com/envoyproxy/go-control-plane/envoy/config/endpoint/v3" 30 v3listenerpb "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" 31 v3routepb "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" 32 "github.com/google/go-cmp/cmp" 33 "google.golang.org/grpc" 34 "google.golang.org/grpc/credentials/insecure" 35 "google.golang.org/grpc/internal/stubserver" 36 "google.golang.org/grpc/internal/testutils" 37 "google.golang.org/grpc/internal/testutils/xds/e2e" 38 "google.golang.org/grpc/internal/testutils/xds/e2e/setup" 39 testgrpc "google.golang.org/grpc/interop/grpc_testing" 40 testpb "google.golang.org/grpc/interop/grpc_testing" 41 "google.golang.org/grpc/peer" 42 "google.golang.org/grpc/resolver" 43 "google.golang.org/protobuf/types/known/durationpb" 44 "google.golang.org/protobuf/types/known/wrapperspb" 45 ) 46 47 // TestOutlierDetection_NoopConfig tests the scenario where the Outlier 48 // Detection feature is enabled on the gRPC client, but it receives no Outlier 49 // Detection configuration from the management server. This should result in a 50 // no-op Outlier Detection configuration being used to configure the Outlier 51 // Detection balancer. This test verifies that an RPC is able to proceed 52 // normally with this configuration. 53 func (s) TestOutlierDetection_NoopConfig(t *testing.T) { 54 managementServer, nodeID, _, xdsResolver := setup.ManagementServerAndResolver(t) 55 56 server := &stubserver.StubServer{ 57 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return &testpb.Empty{}, nil }, 58 } 59 server.StartServer() 60 t.Logf("Started test service backend at %q", server.Address) 61 defer server.Stop() 62 63 const serviceName = "my-service-client-side-xds" 64 resources := e2e.DefaultClientResources(e2e.ResourceParams{ 65 DialTarget: serviceName, 66 NodeID: nodeID, 67 Host: "localhost", 68 Port: testutils.ParsePort(t, server.Address), 69 SecLevel: e2e.SecurityLevelNone, 70 }) 71 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 72 defer cancel() 73 if err := managementServer.Update(ctx, resources); err != nil { 74 t.Fatal(err) 75 } 76 77 // Create a ClientConn and make a successful RPC. 78 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsResolver)) 79 if err != nil { 80 t.Fatalf("failed to dial local test server: %v", err) 81 } 82 defer cc.Close() 83 84 client := testgrpc.NewTestServiceClient(cc) 85 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.WaitForReady(true)); err != nil { 86 t.Fatalf("rpc EmptyCall() failed: %v", err) 87 } 88 } 89 90 // clientResourcesMultipleBackendsAndOD returns xDS resources which correspond 91 // to multiple upstreams, corresponding different backends listening on 92 // different localhost:port combinations. The resources also configure an 93 // Outlier Detection Balancer configured through the passed in Outlier Detection 94 // proto. 95 func clientResourcesMultipleBackendsAndOD(params e2e.ResourceParams, ports []uint32, od *v3clusterpb.OutlierDetection) e2e.UpdateOptions { 96 routeConfigName := "route-" + params.DialTarget 97 clusterName := "cluster-" + params.DialTarget 98 endpointsName := "endpoints-" + params.DialTarget 99 return e2e.UpdateOptions{ 100 NodeID: params.NodeID, 101 Listeners: []*v3listenerpb.Listener{e2e.DefaultClientListener(params.DialTarget, routeConfigName)}, 102 Routes: []*v3routepb.RouteConfiguration{e2e.DefaultRouteConfig(routeConfigName, params.DialTarget, clusterName)}, 103 Clusters: []*v3clusterpb.Cluster{clusterWithOutlierDetection(clusterName, endpointsName, params.SecLevel, od)}, 104 Endpoints: []*v3endpointpb.ClusterLoadAssignment{e2e.DefaultEndpoint(endpointsName, params.Host, ports)}, 105 } 106 } 107 108 func clusterWithOutlierDetection(clusterName, edsServiceName string, secLevel e2e.SecurityLevel, od *v3clusterpb.OutlierDetection) *v3clusterpb.Cluster { 109 cluster := e2e.DefaultCluster(clusterName, edsServiceName, secLevel) 110 cluster.OutlierDetection = od 111 return cluster 112 } 113 114 // checkRoundRobinRPCs verifies that EmptyCall RPCs on the given ClientConn, 115 // connected to a server exposing the test.grpc_testing.TestService, are 116 // roundrobined across the given backend addresses. 117 // 118 // Returns a non-nil error if context deadline expires before RPCs start to get 119 // roundrobined across the given backends. 120 func checkRoundRobinRPCs(ctx context.Context, client testgrpc.TestServiceClient, addrs []resolver.Address) error { 121 wantAddrCount := make(map[string]int) 122 for _, addr := range addrs { 123 wantAddrCount[addr.Addr]++ 124 } 125 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 126 // Perform 3 iterations. 127 var iterations [][]string 128 for i := 0; i < 3; i++ { 129 iteration := make([]string, len(addrs)) 130 for c := 0; c < len(addrs); c++ { 131 var peer peer.Peer 132 client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)) 133 if peer.Addr != nil { 134 iteration[c] = peer.Addr.String() 135 } 136 } 137 iterations = append(iterations, iteration) 138 } 139 // Ensure the first iteration contains all addresses in addrs. 140 gotAddrCount := make(map[string]int) 141 for _, addr := range iterations[0] { 142 gotAddrCount[addr]++ 143 } 144 if diff := cmp.Diff(gotAddrCount, wantAddrCount); diff != "" { 145 continue 146 } 147 // Ensure all three iterations contain the same addresses. 148 if !cmp.Equal(iterations[0], iterations[1]) || !cmp.Equal(iterations[0], iterations[2]) { 149 continue 150 } 151 return nil 152 } 153 return fmt.Errorf("timeout when waiting for roundrobin distribution of RPCs across addresses: %v", addrs) 154 } 155 156 // TestOutlierDetectionWithOutlier tests the Outlier Detection Balancer e2e. It 157 // spins up three backends, one which consistently errors, and configures the 158 // ClientConn using xDS to connect to all three of those backends. The Outlier 159 // Detection Balancer should eject the connection to the backend which 160 // constantly errors, causing RPC's to not be routed to that upstream, and only 161 // be Round Robined across the two healthy upstreams. Other than the intervals 162 // the unhealthy upstream is ejected, RPC's should regularly round robin across 163 // all three upstreams. 164 func (s) TestOutlierDetectionWithOutlier(t *testing.T) { 165 managementServer, nodeID, _, xdsResolver := setup.ManagementServerAndResolver(t) 166 167 // Working backend 1. 168 backend1 := stubserver.StartTestService(t, nil) 169 port1 := testutils.ParsePort(t, backend1.Address) 170 defer backend1.Stop() 171 172 // Working backend 2. 173 backend2 := stubserver.StartTestService(t, nil) 174 port2 := testutils.ParsePort(t, backend2.Address) 175 defer backend2.Stop() 176 177 // Backend 3 that will always return an error and eventually ejected. 178 backend3 := stubserver.StartTestService(t, &stubserver.StubServer{ 179 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return nil, errors.New("some error") }, 180 }) 181 port3 := testutils.ParsePort(t, backend3.Address) 182 defer backend3.Stop() 183 184 const serviceName = "my-service-client-side-xds" 185 resources := clientResourcesMultipleBackendsAndOD(e2e.ResourceParams{ 186 DialTarget: serviceName, 187 NodeID: nodeID, 188 Host: "localhost", 189 SecLevel: e2e.SecurityLevelNone, 190 }, []uint32{port1, port2, port3}, &v3clusterpb.OutlierDetection{ 191 Interval: &durationpb.Duration{Nanos: 50000000}, // .5 seconds 192 BaseEjectionTime: &durationpb.Duration{Seconds: 30}, 193 MaxEjectionTime: &durationpb.Duration{Seconds: 300}, 194 MaxEjectionPercent: &wrapperspb.UInt32Value{Value: 1}, 195 FailurePercentageThreshold: &wrapperspb.UInt32Value{Value: 50}, 196 EnforcingFailurePercentage: &wrapperspb.UInt32Value{Value: 100}, 197 FailurePercentageRequestVolume: &wrapperspb.UInt32Value{Value: 8}, 198 FailurePercentageMinimumHosts: &wrapperspb.UInt32Value{Value: 3}, 199 }) 200 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 201 defer cancel() 202 if err := managementServer.Update(ctx, resources); err != nil { 203 t.Fatal(err) 204 } 205 206 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsResolver)) 207 if err != nil { 208 t.Fatalf("failed to dial local test server: %v", err) 209 } 210 defer cc.Close() 211 212 client := testgrpc.NewTestServiceClient(cc) 213 214 fullAddresses := []resolver.Address{ 215 {Addr: backend1.Address}, 216 {Addr: backend2.Address}, 217 {Addr: backend3.Address}, 218 } 219 // At first, due to no statistics on each of the backends, the 3 220 // upstreams should all be round robined across. 221 if err = checkRoundRobinRPCs(ctx, client, fullAddresses); err != nil { 222 t.Fatalf("error in expected round robin: %v", err) 223 } 224 225 // The addresses which don't return errors. 226 okAddresses := []resolver.Address{ 227 {Addr: backend1.Address}, 228 {Addr: backend2.Address}, 229 } 230 // After calling the three upstreams, one of them constantly error 231 // and should eventually be ejected for a period of time. This 232 // period of time should cause the RPC's to be round robined only 233 // across the two that are healthy. 234 if err = checkRoundRobinRPCs(ctx, client, okAddresses); err != nil { 235 t.Fatalf("error in expected round robin: %v", err) 236 } 237 } 238 239 // TestOutlierDetectionXDSDefaultOn tests that Outlier Detection is by default 240 // configured on in the xDS Flow. If the Outlier Detection proto message is 241 // present with SuccessRateEjection unset, then Outlier Detection should be 242 // turned on. The test setups and xDS system with xDS resources with Outlier 243 // Detection present in the CDS update, but with SuccessRateEjection unset, and 244 // asserts that Outlier Detection is turned on and ejects upstreams. 245 func (s) TestOutlierDetectionXDSDefaultOn(t *testing.T) { 246 managementServer, nodeID, _, xdsResolver := setup.ManagementServerAndResolver(t) 247 248 // Working backend 1. 249 backend1 := stubserver.StartTestService(t, nil) 250 port1 := testutils.ParsePort(t, backend1.Address) 251 defer backend1.Stop() 252 253 // Working backend 2. 254 backend2 := stubserver.StartTestService(t, nil) 255 port2 := testutils.ParsePort(t, backend2.Address) 256 defer backend2.Stop() 257 258 // Backend 3 that will always return an error and eventually ejected. 259 backend3 := stubserver.StartTestService(t, &stubserver.StubServer{ 260 EmptyCallF: func(context.Context, *testpb.Empty) (*testpb.Empty, error) { return nil, errors.New("some error") }, 261 }) 262 port3 := testutils.ParsePort(t, backend3.Address) 263 defer backend3.Stop() 264 265 // Configure CDS resources with Outlier Detection set but 266 // EnforcingSuccessRate unset. This should cause Outlier Detection to be 267 // configured with SuccessRateEjection present in configuration, which will 268 // eventually be populated with its default values along with the knobs set 269 // as SuccessRate fields in the proto, and thus Outlier Detection should be 270 // on and actively eject upstreams. 271 const serviceName = "my-service-client-side-xds" 272 resources := clientResourcesMultipleBackendsAndOD(e2e.ResourceParams{ 273 DialTarget: serviceName, 274 NodeID: nodeID, 275 Host: "localhost", 276 SecLevel: e2e.SecurityLevelNone, 277 }, []uint32{port1, port2, port3}, &v3clusterpb.OutlierDetection{ 278 // Need to set knobs to trigger ejection within the test time frame. 279 Interval: &durationpb.Duration{Nanos: 50000000}, 280 // EnforcingSuccessRateSet to nil, causes success rate algorithm to be 281 // turned on. 282 SuccessRateMinimumHosts: &wrapperspb.UInt32Value{Value: 1}, 283 SuccessRateRequestVolume: &wrapperspb.UInt32Value{Value: 8}, 284 SuccessRateStdevFactor: &wrapperspb.UInt32Value{Value: 1}, 285 }) 286 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 287 defer cancel() 288 if err := managementServer.Update(ctx, resources); err != nil { 289 t.Fatal(err) 290 } 291 292 cc, err := grpc.NewClient(fmt.Sprintf("xds:///%s", serviceName), grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithResolvers(xdsResolver)) 293 if err != nil { 294 t.Fatalf("failed to dial local test server: %v", err) 295 } 296 defer cc.Close() 297 298 client := testgrpc.NewTestServiceClient(cc) 299 300 fullAddresses := []resolver.Address{ 301 {Addr: backend1.Address}, 302 {Addr: backend2.Address}, 303 {Addr: backend3.Address}, 304 } 305 // At first, due to no statistics on each of the backends, the 3 306 // upstreams should all be round robined across. 307 if err = checkRoundRobinRPCs(ctx, client, fullAddresses); err != nil { 308 t.Fatalf("error in expected round robin: %v", err) 309 } 310 311 // The addresses which don't return errors. 312 okAddresses := []resolver.Address{ 313 {Addr: backend1.Address}, 314 {Addr: backend2.Address}, 315 } 316 // After calling the three upstreams, one of them constantly error 317 // and should eventually be ejected for a period of time. This 318 // period of time should cause the RPC's to be round robined only 319 // across the two that are healthy. 320 if err = checkRoundRobinRPCs(ctx, client, okAddresses); err != nil { 321 t.Fatalf("error in expected round robin: %v", err) 322 } 323 }