google.golang.org/grpc@v1.62.1/xds/internal/balancer/outlierdetection/e2e_test/outlierdetection_test.go (about) 1 /* 2 * 3 * Copyright 2022 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package e2e_test contains e2e test cases for the Outlier Detection LB Policy. 20 package e2e_test 21 22 import ( 23 "context" 24 "errors" 25 "fmt" 26 "testing" 27 "time" 28 29 "github.com/google/go-cmp/cmp" 30 "google.golang.org/grpc" 31 "google.golang.org/grpc/credentials/insecure" 32 "google.golang.org/grpc/internal" 33 "google.golang.org/grpc/internal/grpctest" 34 "google.golang.org/grpc/internal/stubserver" 35 "google.golang.org/grpc/peer" 36 "google.golang.org/grpc/resolver" 37 "google.golang.org/grpc/resolver/manual" 38 "google.golang.org/grpc/serviceconfig" 39 40 testgrpc "google.golang.org/grpc/interop/grpc_testing" 41 testpb "google.golang.org/grpc/interop/grpc_testing" 42 43 _ "google.golang.org/grpc/xds/internal/balancer/outlierdetection" // To register helper functions which register/unregister Outlier Detection LB Policy. 44 ) 45 46 var defaultTestTimeout = 5 * time.Second 47 48 type s struct { 49 grpctest.Tester 50 } 51 52 func Test(t *testing.T) { 53 grpctest.RunSubTests(t, s{}) 54 } 55 56 // Setup spins up three test backends, each listening on a port on localhost. 57 // Two of the backends are configured to always reply with an empty response and 58 // no error and one is configured to always return an error. 59 func setupBackends(t *testing.T) ([]string, func()) { 60 t.Helper() 61 62 backends := make([]*stubserver.StubServer, 3) 63 addresses := make([]string, 3) 64 // Construct and start 2 working backends. 65 for i := 0; i < 2; i++ { 66 backend := &stubserver.StubServer{ 67 EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { 68 return &testpb.Empty{}, nil 69 }, 70 } 71 if err := backend.StartServer(); err != nil { 72 t.Fatalf("Failed to start backend: %v", err) 73 } 74 t.Logf("Started good TestService backend at: %q", backend.Address) 75 backends[i] = backend 76 addresses[i] = backend.Address 77 } 78 79 // Construct and start a failing backend. 80 backend := &stubserver.StubServer{ 81 EmptyCallF: func(ctx context.Context, in *testpb.Empty) (*testpb.Empty, error) { 82 return nil, errors.New("some error") 83 }, 84 } 85 if err := backend.StartServer(); err != nil { 86 t.Fatalf("Failed to start backend: %v", err) 87 } 88 t.Logf("Started bad TestService backend at: %q", backend.Address) 89 backends[2] = backend 90 addresses[2] = backend.Address 91 cancel := func() { 92 for _, backend := range backends { 93 backend.Stop() 94 } 95 } 96 return addresses, cancel 97 } 98 99 // checkRoundRobinRPCs verifies that EmptyCall RPCs on the given ClientConn, 100 // connected to a server exposing the test.grpc_testing.TestService, are 101 // roundrobined across the given backend addresses. 102 // 103 // Returns a non-nil error if context deadline expires before RPCs start to get 104 // roundrobined across the given backends. 105 func checkRoundRobinRPCs(ctx context.Context, client testgrpc.TestServiceClient, addrs []resolver.Address) error { 106 wantAddrCount := make(map[string]int) 107 for _, addr := range addrs { 108 wantAddrCount[addr.Addr]++ 109 } 110 gotAddrCount := make(map[string]int) 111 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 112 gotAddrCount = make(map[string]int) 113 // Perform 3 iterations. 114 var iterations [][]string 115 for i := 0; i < 3; i++ { 116 iteration := make([]string, len(addrs)) 117 for c := 0; c < len(addrs); c++ { 118 var peer peer.Peer 119 client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)) 120 if peer.Addr != nil { 121 iteration[c] = peer.Addr.String() 122 } 123 } 124 iterations = append(iterations, iteration) 125 } 126 // Ensure the the first iteration contains all addresses in addrs. 127 for _, addr := range iterations[0] { 128 gotAddrCount[addr]++ 129 } 130 if diff := cmp.Diff(gotAddrCount, wantAddrCount); diff != "" { 131 continue 132 } 133 // Ensure all three iterations contain the same addresses. 134 if !cmp.Equal(iterations[0], iterations[1]) || !cmp.Equal(iterations[0], iterations[2]) { 135 continue 136 } 137 return nil 138 } 139 return fmt.Errorf("timeout when waiting for roundrobin distribution of RPCs across addresses: %v; got: %v", addrs, gotAddrCount) 140 } 141 142 // TestOutlierDetectionAlgorithmsE2E tests the Outlier Detection Success Rate 143 // and Failure Percentage algorithms in an e2e fashion. The Outlier Detection 144 // Balancer is configured as the top level LB Policy of the channel with a Round 145 // Robin child, and connects to three upstreams. Two of the upstreams are healthy and 146 // one is unhealthy. The two algorithms should at some point eject the failing 147 // upstream, causing RPC's to not be routed to that upstream, and only be 148 // Round Robined across the two healthy upstreams. Other than the intervals the 149 // unhealthy upstream is ejected, RPC's should regularly round robin 150 // across all three upstreams. 151 func (s) TestOutlierDetectionAlgorithmsE2E(t *testing.T) { 152 tests := []struct { 153 name string 154 odscJSON string 155 }{ 156 { 157 name: "Success Rate Algorithm", 158 odscJSON: ` 159 { 160 "loadBalancingConfig": [ 161 { 162 "outlier_detection_experimental": { 163 "interval": "0.050s", 164 "baseEjectionTime": "0.100s", 165 "maxEjectionTime": "300s", 166 "maxEjectionPercent": 33, 167 "successRateEjection": { 168 "stdevFactor": 50, 169 "enforcementPercentage": 100, 170 "minimumHosts": 3, 171 "requestVolume": 5 172 }, 173 "childPolicy": [{"round_robin": {}}] 174 } 175 } 176 ] 177 }`, 178 }, 179 { 180 name: "Failure Percentage Algorithm", 181 odscJSON: ` 182 { 183 "loadBalancingConfig": [ 184 { 185 "outlier_detection_experimental": { 186 "interval": "0.050s", 187 "baseEjectionTime": "0.100s", 188 "maxEjectionTime": "300s", 189 "maxEjectionPercent": 33, 190 "failurePercentageEjection": { 191 "threshold": 50, 192 "enforcementPercentage": 100, 193 "minimumHosts": 3, 194 "requestVolume": 5 195 }, 196 "childPolicy": [{"round_robin": {}} 197 ] 198 } 199 } 200 ] 201 }`, 202 }, 203 } 204 for _, test := range tests { 205 t.Run(test.name, func(t *testing.T) { 206 addresses, cancel := setupBackends(t) 207 defer cancel() 208 209 mr := manual.NewBuilderWithScheme("od-e2e") 210 defer mr.Close() 211 212 sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(test.odscJSON) 213 // The full list of addresses. 214 fullAddresses := []resolver.Address{ 215 {Addr: addresses[0]}, 216 {Addr: addresses[1]}, 217 {Addr: addresses[2]}, 218 } 219 mr.InitialState(resolver.State{ 220 Addresses: fullAddresses, 221 ServiceConfig: sc, 222 }) 223 224 cc, err := grpc.Dial(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) 225 if err != nil { 226 t.Fatalf("grpc.Dial() failed: %v", err) 227 } 228 defer cc.Close() 229 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 230 defer cancel() 231 testServiceClient := testgrpc.NewTestServiceClient(cc) 232 233 // At first, due to no statistics on each of the backends, the 3 234 // upstreams should all be round robined across. 235 if err = checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil { 236 t.Fatalf("error in expected round robin: %v", err) 237 } 238 239 // The addresses which don't return errors. 240 okAddresses := []resolver.Address{ 241 {Addr: addresses[0]}, 242 {Addr: addresses[1]}, 243 } 244 // After calling the three upstreams, one of them constantly error 245 // and should eventually be ejected for a period of time. This 246 // period of time should cause the RPC's to be round robined only 247 // across the two that are healthy. 248 if err = checkRoundRobinRPCs(ctx, testServiceClient, okAddresses); err != nil { 249 t.Fatalf("error in expected round robin: %v", err) 250 } 251 252 // The failing upstream isn't ejected indefinitely, and eventually 253 // should be unejected in subsequent iterations of the interval 254 // algorithm as per the spec for the two specific algorithms. 255 if err = checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil { 256 t.Fatalf("error in expected round robin: %v", err) 257 } 258 }) 259 } 260 } 261 262 // TestNoopConfiguration tests the Outlier Detection Balancer configured with a 263 // noop configuration. The noop configuration should cause the Outlier Detection 264 // Balancer to not count RPC's, and thus never eject any upstreams and continue 265 // to route to every upstream connected to, even if they continuously error. 266 // Once the Outlier Detection Balancer gets reconfigured with configuration 267 // requiring counting RPC's, the Outlier Detection Balancer should start 268 // ejecting any upstreams as specified in the configuration. 269 func (s) TestNoopConfiguration(t *testing.T) { 270 addresses, cancel := setupBackends(t) 271 defer cancel() 272 273 mr := manual.NewBuilderWithScheme("od-e2e") 274 defer mr.Close() 275 276 noopODServiceConfigJSON := ` 277 { 278 "loadBalancingConfig": [ 279 { 280 "outlier_detection_experimental": { 281 "interval": "0.050s", 282 "baseEjectionTime": "0.100s", 283 "maxEjectionTime": "300s", 284 "maxEjectionPercent": 33, 285 "childPolicy": [{"round_robin": {}}] 286 } 287 } 288 ] 289 }` 290 sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(noopODServiceConfigJSON) 291 // The full list of addresses. 292 fullAddresses := []resolver.Address{ 293 {Addr: addresses[0]}, 294 {Addr: addresses[1]}, 295 {Addr: addresses[2]}, 296 } 297 mr.InitialState(resolver.State{ 298 Addresses: fullAddresses, 299 ServiceConfig: sc, 300 }) 301 cc, err := grpc.Dial(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) 302 if err != nil { 303 t.Fatalf("grpc.Dial() failed: %v", err) 304 } 305 defer cc.Close() 306 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 307 defer cancel() 308 testServiceClient := testgrpc.NewTestServiceClient(cc) 309 310 for i := 0; i < 2; i++ { 311 // Since the Outlier Detection Balancer starts with a noop 312 // configuration, it shouldn't count RPCs or eject any upstreams. Thus, 313 // even though an upstream it connects to constantly errors, it should 314 // continue to Round Robin across every upstream. 315 if err := checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil { 316 t.Fatalf("error in expected round robin: %v", err) 317 } 318 } 319 320 // Reconfigure the Outlier Detection Balancer with a configuration that 321 // specifies to count RPC's and eject upstreams. Due to the balancer no 322 // longer being a noop, it should eject any unhealthy addresses as specified 323 // by the failure percentage portion of the configuration. 324 countingODServiceConfigJSON := ` 325 { 326 "loadBalancingConfig": [ 327 { 328 "outlier_detection_experimental": { 329 "interval": "0.050s", 330 "baseEjectionTime": "0.100s", 331 "maxEjectionTime": "300s", 332 "maxEjectionPercent": 33, 333 "failurePercentageEjection": { 334 "threshold": 50, 335 "enforcementPercentage": 100, 336 "minimumHosts": 3, 337 "requestVolume": 5 338 }, 339 "childPolicy": [{"round_robin": {}}] 340 } 341 } 342 ] 343 }` 344 sc = internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(countingODServiceConfigJSON) 345 346 mr.UpdateState(resolver.State{ 347 Addresses: fullAddresses, 348 ServiceConfig: sc, 349 }) 350 351 // At first on the reconfigured balancer, the balancer has no stats 352 // collected about upstreams. Thus, it should at first route across the full 353 // upstream list. 354 if err = checkRoundRobinRPCs(ctx, testServiceClient, fullAddresses); err != nil { 355 t.Fatalf("error in expected round robin: %v", err) 356 } 357 358 // The addresses which don't return errors. 359 okAddresses := []resolver.Address{ 360 {Addr: addresses[0]}, 361 {Addr: addresses[1]}, 362 } 363 // Now that the reconfigured balancer has data about the failing upstream, 364 // it should eject the upstream and only route across the two healthy 365 // upstreams. 366 if err = checkRoundRobinRPCs(ctx, testServiceClient, okAddresses); err != nil { 367 t.Fatalf("error in expected round robin: %v", err) 368 } 369 }