google.golang.org/grpc@v1.74.2/balancer/endpointsharding/endpointsharding_test.go (about) 1 /* 2 * 3 * Copyright 2024 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package endpointsharding_test 20 21 import ( 22 "context" 23 "encoding/json" 24 "errors" 25 "fmt" 26 "strings" 27 "testing" 28 "time" 29 30 "google.golang.org/grpc" 31 "google.golang.org/grpc/backoff" 32 "google.golang.org/grpc/balancer" 33 "google.golang.org/grpc/balancer/endpointsharding" 34 "google.golang.org/grpc/balancer/pickfirst/pickfirstleaf" 35 "google.golang.org/grpc/codes" 36 "google.golang.org/grpc/connectivity" 37 "google.golang.org/grpc/credentials/insecure" 38 "google.golang.org/grpc/grpclog" 39 "google.golang.org/grpc/internal" 40 "google.golang.org/grpc/internal/balancer/stub" 41 "google.golang.org/grpc/internal/grpctest" 42 "google.golang.org/grpc/internal/stubserver" 43 "google.golang.org/grpc/internal/testutils" 44 "google.golang.org/grpc/internal/testutils/roundrobin" 45 "google.golang.org/grpc/peer" 46 "google.golang.org/grpc/resolver" 47 "google.golang.org/grpc/resolver/manual" 48 "google.golang.org/grpc/serviceconfig" 49 "google.golang.org/grpc/status" 50 51 testgrpc "google.golang.org/grpc/interop/grpc_testing" 52 testpb "google.golang.org/grpc/interop/grpc_testing" 53 ) 54 55 var ( 56 defaultTestTimeout = time.Second * 10 57 defaultTestShortTimeout = time.Millisecond * 10 58 ) 59 60 type s struct { 61 grpctest.Tester 62 } 63 64 func Test(t *testing.T) { 65 grpctest.RunSubTests(t, s{}) 66 } 67 68 var logger = grpclog.Component("endpoint-sharding-test") 69 70 func init() { 71 balancer.Register(fakePetioleBuilder{}) 72 } 73 74 const fakePetioleName = "fake_petiole" 75 76 type fakePetioleBuilder struct{} 77 78 func (fakePetioleBuilder) Name() string { 79 return fakePetioleName 80 } 81 82 func (fakePetioleBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 83 fp := &fakePetiole{ 84 ClientConn: cc, 85 bOpts: opts, 86 } 87 fp.Balancer = endpointsharding.NewBalancer(fp, opts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{}) 88 return fp 89 } 90 91 func (fakePetioleBuilder) ParseConfig(json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 92 return nil, nil 93 } 94 95 // fakePetiole is a load balancer that wraps the endpointShardingBalancer, and 96 // forwards ClientConnUpdates with a child config of graceful switch that wraps 97 // pick first. It also intercepts UpdateState to make sure it can access the 98 // child state maintained by EndpointSharding. 99 type fakePetiole struct { 100 balancer.Balancer 101 balancer.ClientConn 102 bOpts balancer.BuildOptions 103 } 104 105 func (fp *fakePetiole) UpdateClientConnState(state balancer.ClientConnState) error { 106 if el := state.ResolverState.Endpoints; len(el) != 2 { 107 return fmt.Errorf("UpdateClientConnState wants two endpoints, got: %v", el) 108 } 109 110 return fp.Balancer.UpdateClientConnState(state) 111 } 112 113 func (fp *fakePetiole) UpdateState(state balancer.State) { 114 childStates := endpointsharding.ChildStatesFromPicker(state.Picker) 115 // Both child states should be present in the child picker. States and 116 // picker change over the lifecycle of test, but there should always be two. 117 if len(childStates) != 2 { 118 logger.Fatal(fmt.Errorf("length of child states received: %v, want 2", len(childStates))) 119 } 120 121 fp.ClientConn.UpdateState(state) 122 } 123 124 // TestEndpointShardingBasic tests the basic functionality of the endpoint 125 // sharding balancer. It specifies a petiole policy that is essentially a 126 // wrapper around the endpoint sharder. Two backends are started, with each 127 // backend's address specified in an endpoint. The petiole does not have a 128 // special picker, so it should fallback to the default behavior, which is to 129 // round_robin amongst the endpoint children that are in the aggregated state. 130 // It also verifies the petiole has access to the raw child state in case it 131 // wants to implement a custom picker. The test sends a resolver error to the 132 // endpointsharding balancer and verifies an error picker from the children 133 // is used while making an RPC. 134 func (s) TestEndpointShardingBasic(t *testing.T) { 135 backend1 := stubserver.StartTestService(t, nil) 136 defer backend1.Stop() 137 backend2 := stubserver.StartTestService(t, nil) 138 defer backend2.Stop() 139 140 mr := manual.NewBuilderWithScheme("e2e-test") 141 defer mr.Close() 142 143 json := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, fakePetioleName) 144 sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(json) 145 mr.InitialState(resolver.State{ 146 Endpoints: []resolver.Endpoint{ 147 {Addresses: []resolver.Address{{Addr: backend1.Address}}}, 148 {Addresses: []resolver.Address{{Addr: backend2.Address}}}, 149 }, 150 ServiceConfig: sc, 151 }) 152 153 dOpts := []grpc.DialOption{ 154 grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials()), 155 // Use a large backoff delay to avoid the error picker being updated 156 // too quickly. 157 grpc.WithConnectParams(grpc.ConnectParams{ 158 Backoff: backoff.Config{ 159 BaseDelay: 2 * defaultTestTimeout, 160 Multiplier: float64(0), 161 Jitter: float64(0), 162 MaxDelay: 2 * defaultTestTimeout, 163 }, 164 }), 165 } 166 cc, err := grpc.NewClient(mr.Scheme()+":///", dOpts...) 167 if err != nil { 168 t.Fatalf("Failed to create new client: %v", err) 169 } 170 defer cc.Close() 171 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 172 defer cancel() 173 client := testgrpc.NewTestServiceClient(cc) 174 // Assert a round robin distribution between the two spun up backends. This 175 // requires a poll and eventual consistency as both endpoint children do not 176 // start in state READY. 177 if err = roundrobin.CheckRoundRobinRPCs(ctx, client, []resolver.Address{{Addr: backend1.Address}, {Addr: backend2.Address}}); err != nil { 178 t.Fatalf("error in expected round robin: %v", err) 179 } 180 181 // Stopping both the backends should make the channel enter 182 // TransientFailure. 183 backend1.Stop() 184 backend2.Stop() 185 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 186 187 // When the resolver reports an error, the picker should get updated to 188 // return the resolver error. 189 mr.CC().ReportError(errors.New("test error")) 190 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 191 for ; ctx.Err() == nil; <-time.After(time.Millisecond) { 192 _, err := client.EmptyCall(ctx, &testpb.Empty{}) 193 if err == nil { 194 t.Fatalf("EmptyCall succeeded when expected to fail with %q", "test error") 195 } 196 if strings.Contains(err.Error(), "test error") { 197 break 198 } 199 } 200 if ctx.Err() != nil { 201 t.Fatalf("Context timed out waiting for picker with resolver error.") 202 } 203 } 204 205 // Tests that endpointsharding doesn't automatically re-connect IDLE children. 206 // The test creates an endpoint with two servers and another with a single 207 // server. The active service in endpoint 1 is closed to make the child 208 // pickfirst enter IDLE state. The test verifies that the child pickfirst 209 // doesn't connect to the second address in the endpoint. 210 func (s) TestEndpointShardingReconnectDisabled(t *testing.T) { 211 backend1 := stubserver.StartTestService(t, nil) 212 defer backend1.Stop() 213 backend2 := stubserver.StartTestService(t, nil) 214 defer backend2.Stop() 215 backend3 := stubserver.StartTestService(t, nil) 216 defer backend3.Stop() 217 218 mr := manual.NewBuilderWithScheme("e2e-test") 219 defer mr.Close() 220 221 name := strings.ReplaceAll(strings.ToLower(t.Name()), "/", "") 222 bf := stub.BalancerFuncs{ 223 Init: func(bd *stub.BalancerData) { 224 epOpts := endpointsharding.Options{DisableAutoReconnect: true} 225 bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build, epOpts) 226 }, 227 UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { 228 return bd.ChildBalancer.UpdateClientConnState(ccs) 229 }, 230 Close: func(bd *stub.BalancerData) { 231 bd.ChildBalancer.Close() 232 }, 233 } 234 stub.Register(name, bf) 235 236 json := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, name) 237 sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(json) 238 mr.InitialState(resolver.State{ 239 Endpoints: []resolver.Endpoint{ 240 {Addresses: []resolver.Address{{Addr: backend1.Address}, {Addr: backend2.Address}}}, 241 {Addresses: []resolver.Address{{Addr: backend3.Address}}}, 242 }, 243 ServiceConfig: sc, 244 }) 245 246 cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) 247 if err != nil { 248 t.Fatalf("Failed to create new client: %v", err) 249 } 250 defer cc.Close() 251 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 252 defer cancel() 253 client := testgrpc.NewTestServiceClient(cc) 254 // Assert a round robin distribution between the two spun up backends. This 255 // requires a poll and eventual consistency as both endpoint children do not 256 // start in state READY. 257 if err = roundrobin.CheckRoundRobinRPCs(ctx, client, []resolver.Address{{Addr: backend1.Address}, {Addr: backend3.Address}}); err != nil { 258 t.Fatalf("error in expected round robin: %v", err) 259 } 260 261 // On closing the first server, the first child balancer should enter 262 // IDLE. Since endpointsharding is configured not to auto-reconnect, it will 263 // remain IDLE and will not try to connect to the second backend in the same 264 // endpoint. 265 backend1.Stop() 266 // CheckRoundRobinRPCs waits for all the backends to become reachable, we 267 // call it to ensure the picker no longer sends RPCs to closed backend. 268 if err = roundrobin.CheckRoundRobinRPCs(ctx, client, []resolver.Address{{Addr: backend3.Address}}); err != nil { 269 t.Fatalf("error in expected round robin: %v", err) 270 } 271 272 // Verify requests go only to backend3 for a short time. 273 shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout) 274 defer cancel() 275 for ; shortCtx.Err() == nil; <-time.After(time.Millisecond) { 276 var peer peer.Peer 277 if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil { 278 if status.Code(err) != codes.DeadlineExceeded { 279 t.Fatalf("EmptyCall() returned unexpected error %v", err) 280 } 281 break 282 } 283 if got, want := peer.Addr.String(), backend3.Address; got != want { 284 t.Fatalf("EmptyCall() went to unexpected backend: got %q, want %q", got, want) 285 } 286 } 287 } 288 289 // Tests that endpointsharding doesn't automatically re-connect IDLE children 290 // until cc.Connect() is called. The test creates an endpoint with a single 291 // address. The client is connected and the active server is closed to make the 292 // child pickfirst enter IDLE state. The test verifies that the child pickfirst 293 // doesn't re-connect automatically. The test calls cc.Connect() and verified 294 // that the balancer connects causing the channel to enter TransientFailure. 295 func (s) TestEndpointShardingExitIdle(t *testing.T) { 296 backend := stubserver.StartTestService(t, nil) 297 defer backend.Stop() 298 299 mr := manual.NewBuilderWithScheme("e2e-test") 300 defer mr.Close() 301 302 name := strings.ReplaceAll(strings.ToLower(t.Name()), "/", "") 303 bf := stub.BalancerFuncs{ 304 Init: func(bd *stub.BalancerData) { 305 epOpts := endpointsharding.Options{DisableAutoReconnect: true} 306 bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build, epOpts) 307 }, 308 UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error { 309 return bd.ChildBalancer.UpdateClientConnState(ccs) 310 }, 311 Close: func(bd *stub.BalancerData) { 312 bd.ChildBalancer.Close() 313 }, 314 ExitIdle: func(bd *stub.BalancerData) { 315 bd.ChildBalancer.ExitIdle() 316 }, 317 } 318 stub.Register(name, bf) 319 320 json := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, name) 321 sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(json) 322 mr.InitialState(resolver.State{ 323 Endpoints: []resolver.Endpoint{ 324 {Addresses: []resolver.Address{{Addr: backend.Address}}}, 325 }, 326 ServiceConfig: sc, 327 }) 328 329 cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials())) 330 if err != nil { 331 t.Fatalf("Failed to create new client: %v", err) 332 } 333 defer cc.Close() 334 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 335 defer cancel() 336 client := testgrpc.NewTestServiceClient(cc) 337 if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil { 338 t.Errorf("client.EmptyCall() returned unexpected error: %v", err) 339 } 340 341 // On closing the first server, the first child balancer should enter 342 // IDLE. Since endpointsharding is configured not to auto-reconnect, it will 343 // remain IDLE and will not try to re-connect 344 backend.Stop() 345 testutils.AwaitState(ctx, t, cc, connectivity.Idle) 346 shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout) 347 defer shortCancel() 348 testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Idle) 349 350 // The balancer should try to re-connect and fail. 351 cc.Connect() 352 testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure) 353 }