google.golang.org/grpc@v1.74.2/balancer/endpointsharding/endpointsharding_test.go (about)

     1  /*
     2   *
     3   * Copyright 2024 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package endpointsharding_test
    20  
    21  import (
    22  	"context"
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"strings"
    27  	"testing"
    28  	"time"
    29  
    30  	"google.golang.org/grpc"
    31  	"google.golang.org/grpc/backoff"
    32  	"google.golang.org/grpc/balancer"
    33  	"google.golang.org/grpc/balancer/endpointsharding"
    34  	"google.golang.org/grpc/balancer/pickfirst/pickfirstleaf"
    35  	"google.golang.org/grpc/codes"
    36  	"google.golang.org/grpc/connectivity"
    37  	"google.golang.org/grpc/credentials/insecure"
    38  	"google.golang.org/grpc/grpclog"
    39  	"google.golang.org/grpc/internal"
    40  	"google.golang.org/grpc/internal/balancer/stub"
    41  	"google.golang.org/grpc/internal/grpctest"
    42  	"google.golang.org/grpc/internal/stubserver"
    43  	"google.golang.org/grpc/internal/testutils"
    44  	"google.golang.org/grpc/internal/testutils/roundrobin"
    45  	"google.golang.org/grpc/peer"
    46  	"google.golang.org/grpc/resolver"
    47  	"google.golang.org/grpc/resolver/manual"
    48  	"google.golang.org/grpc/serviceconfig"
    49  	"google.golang.org/grpc/status"
    50  
    51  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    52  	testpb "google.golang.org/grpc/interop/grpc_testing"
    53  )
    54  
    55  var (
    56  	defaultTestTimeout      = time.Second * 10
    57  	defaultTestShortTimeout = time.Millisecond * 10
    58  )
    59  
    60  type s struct {
    61  	grpctest.Tester
    62  }
    63  
    64  func Test(t *testing.T) {
    65  	grpctest.RunSubTests(t, s{})
    66  }
    67  
    68  var logger = grpclog.Component("endpoint-sharding-test")
    69  
    70  func init() {
    71  	balancer.Register(fakePetioleBuilder{})
    72  }
    73  
    74  const fakePetioleName = "fake_petiole"
    75  
    76  type fakePetioleBuilder struct{}
    77  
    78  func (fakePetioleBuilder) Name() string {
    79  	return fakePetioleName
    80  }
    81  
    82  func (fakePetioleBuilder) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer {
    83  	fp := &fakePetiole{
    84  		ClientConn: cc,
    85  		bOpts:      opts,
    86  	}
    87  	fp.Balancer = endpointsharding.NewBalancer(fp, opts, balancer.Get(pickfirstleaf.Name).Build, endpointsharding.Options{})
    88  	return fp
    89  }
    90  
    91  func (fakePetioleBuilder) ParseConfig(json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
    92  	return nil, nil
    93  }
    94  
    95  // fakePetiole is a load balancer that wraps the endpointShardingBalancer, and
    96  // forwards ClientConnUpdates with a child config of graceful switch that wraps
    97  // pick first. It also intercepts UpdateState to make sure it can access the
    98  // child state maintained by EndpointSharding.
    99  type fakePetiole struct {
   100  	balancer.Balancer
   101  	balancer.ClientConn
   102  	bOpts balancer.BuildOptions
   103  }
   104  
   105  func (fp *fakePetiole) UpdateClientConnState(state balancer.ClientConnState) error {
   106  	if el := state.ResolverState.Endpoints; len(el) != 2 {
   107  		return fmt.Errorf("UpdateClientConnState wants two endpoints, got: %v", el)
   108  	}
   109  
   110  	return fp.Balancer.UpdateClientConnState(state)
   111  }
   112  
   113  func (fp *fakePetiole) UpdateState(state balancer.State) {
   114  	childStates := endpointsharding.ChildStatesFromPicker(state.Picker)
   115  	// Both child states should be present in the child picker. States and
   116  	// picker change over the lifecycle of test, but there should always be two.
   117  	if len(childStates) != 2 {
   118  		logger.Fatal(fmt.Errorf("length of child states received: %v, want 2", len(childStates)))
   119  	}
   120  
   121  	fp.ClientConn.UpdateState(state)
   122  }
   123  
   124  // TestEndpointShardingBasic tests the basic functionality of the endpoint
   125  // sharding balancer. It specifies a petiole policy that is essentially a
   126  // wrapper around the endpoint sharder. Two backends are started, with each
   127  // backend's address specified in an endpoint. The petiole does not have a
   128  // special picker, so it should fallback to the default behavior, which is to
   129  // round_robin amongst the endpoint children that are in the aggregated state.
   130  // It also verifies the petiole has access to the raw child state in case it
   131  // wants to implement a custom picker. The test sends a resolver error to the
   132  // endpointsharding balancer and verifies an error picker from the children
   133  // is used while making an RPC.
   134  func (s) TestEndpointShardingBasic(t *testing.T) {
   135  	backend1 := stubserver.StartTestService(t, nil)
   136  	defer backend1.Stop()
   137  	backend2 := stubserver.StartTestService(t, nil)
   138  	defer backend2.Stop()
   139  
   140  	mr := manual.NewBuilderWithScheme("e2e-test")
   141  	defer mr.Close()
   142  
   143  	json := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, fakePetioleName)
   144  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(json)
   145  	mr.InitialState(resolver.State{
   146  		Endpoints: []resolver.Endpoint{
   147  			{Addresses: []resolver.Address{{Addr: backend1.Address}}},
   148  			{Addresses: []resolver.Address{{Addr: backend2.Address}}},
   149  		},
   150  		ServiceConfig: sc,
   151  	})
   152  
   153  	dOpts := []grpc.DialOption{
   154  		grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials()),
   155  		// Use a large backoff delay to avoid the error picker being updated
   156  		// too quickly.
   157  		grpc.WithConnectParams(grpc.ConnectParams{
   158  			Backoff: backoff.Config{
   159  				BaseDelay:  2 * defaultTestTimeout,
   160  				Multiplier: float64(0),
   161  				Jitter:     float64(0),
   162  				MaxDelay:   2 * defaultTestTimeout,
   163  			},
   164  		}),
   165  	}
   166  	cc, err := grpc.NewClient(mr.Scheme()+":///", dOpts...)
   167  	if err != nil {
   168  		t.Fatalf("Failed to create new client: %v", err)
   169  	}
   170  	defer cc.Close()
   171  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   172  	defer cancel()
   173  	client := testgrpc.NewTestServiceClient(cc)
   174  	// Assert a round robin distribution between the two spun up backends. This
   175  	// requires a poll and eventual consistency as both endpoint children do not
   176  	// start in state READY.
   177  	if err = roundrobin.CheckRoundRobinRPCs(ctx, client, []resolver.Address{{Addr: backend1.Address}, {Addr: backend2.Address}}); err != nil {
   178  		t.Fatalf("error in expected round robin: %v", err)
   179  	}
   180  
   181  	// Stopping both the backends should make the channel enter
   182  	// TransientFailure.
   183  	backend1.Stop()
   184  	backend2.Stop()
   185  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   186  
   187  	// When the resolver reports an error, the picker should get updated to
   188  	// return the resolver error.
   189  	mr.CC().ReportError(errors.New("test error"))
   190  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   191  	for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
   192  		_, err := client.EmptyCall(ctx, &testpb.Empty{})
   193  		if err == nil {
   194  			t.Fatalf("EmptyCall succeeded when expected to fail with %q", "test error")
   195  		}
   196  		if strings.Contains(err.Error(), "test error") {
   197  			break
   198  		}
   199  	}
   200  	if ctx.Err() != nil {
   201  		t.Fatalf("Context timed out waiting for picker with resolver error.")
   202  	}
   203  }
   204  
   205  // Tests that endpointsharding doesn't automatically re-connect IDLE children.
   206  // The test creates an endpoint with two servers and another with a single
   207  // server. The active service in endpoint 1 is closed to make the child
   208  // pickfirst enter IDLE state. The test verifies that the child pickfirst
   209  // doesn't connect to the second address in the endpoint.
   210  func (s) TestEndpointShardingReconnectDisabled(t *testing.T) {
   211  	backend1 := stubserver.StartTestService(t, nil)
   212  	defer backend1.Stop()
   213  	backend2 := stubserver.StartTestService(t, nil)
   214  	defer backend2.Stop()
   215  	backend3 := stubserver.StartTestService(t, nil)
   216  	defer backend3.Stop()
   217  
   218  	mr := manual.NewBuilderWithScheme("e2e-test")
   219  	defer mr.Close()
   220  
   221  	name := strings.ReplaceAll(strings.ToLower(t.Name()), "/", "")
   222  	bf := stub.BalancerFuncs{
   223  		Init: func(bd *stub.BalancerData) {
   224  			epOpts := endpointsharding.Options{DisableAutoReconnect: true}
   225  			bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build, epOpts)
   226  		},
   227  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   228  			return bd.ChildBalancer.UpdateClientConnState(ccs)
   229  		},
   230  		Close: func(bd *stub.BalancerData) {
   231  			bd.ChildBalancer.Close()
   232  		},
   233  	}
   234  	stub.Register(name, bf)
   235  
   236  	json := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, name)
   237  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(json)
   238  	mr.InitialState(resolver.State{
   239  		Endpoints: []resolver.Endpoint{
   240  			{Addresses: []resolver.Address{{Addr: backend1.Address}, {Addr: backend2.Address}}},
   241  			{Addresses: []resolver.Address{{Addr: backend3.Address}}},
   242  		},
   243  		ServiceConfig: sc,
   244  	})
   245  
   246  	cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials()))
   247  	if err != nil {
   248  		t.Fatalf("Failed to create new client: %v", err)
   249  	}
   250  	defer cc.Close()
   251  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   252  	defer cancel()
   253  	client := testgrpc.NewTestServiceClient(cc)
   254  	// Assert a round robin distribution between the two spun up backends. This
   255  	// requires a poll and eventual consistency as both endpoint children do not
   256  	// start in state READY.
   257  	if err = roundrobin.CheckRoundRobinRPCs(ctx, client, []resolver.Address{{Addr: backend1.Address}, {Addr: backend3.Address}}); err != nil {
   258  		t.Fatalf("error in expected round robin: %v", err)
   259  	}
   260  
   261  	// On closing the first server, the first child balancer should enter
   262  	// IDLE. Since endpointsharding is configured not to auto-reconnect, it will
   263  	// remain IDLE and will not try to connect to the second backend in the same
   264  	// endpoint.
   265  	backend1.Stop()
   266  	// CheckRoundRobinRPCs waits for all the backends to become reachable, we
   267  	// call it to ensure the picker no longer sends RPCs to closed backend.
   268  	if err = roundrobin.CheckRoundRobinRPCs(ctx, client, []resolver.Address{{Addr: backend3.Address}}); err != nil {
   269  		t.Fatalf("error in expected round robin: %v", err)
   270  	}
   271  
   272  	// Verify requests go only to backend3 for a short time.
   273  	shortCtx, cancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   274  	defer cancel()
   275  	for ; shortCtx.Err() == nil; <-time.After(time.Millisecond) {
   276  		var peer peer.Peer
   277  		if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil {
   278  			if status.Code(err) != codes.DeadlineExceeded {
   279  				t.Fatalf("EmptyCall() returned unexpected error %v", err)
   280  			}
   281  			break
   282  		}
   283  		if got, want := peer.Addr.String(), backend3.Address; got != want {
   284  			t.Fatalf("EmptyCall() went to unexpected backend: got %q, want %q", got, want)
   285  		}
   286  	}
   287  }
   288  
   289  // Tests that endpointsharding doesn't automatically re-connect IDLE children
   290  // until cc.Connect() is called. The test creates an endpoint with a single
   291  // address. The client is connected and the active server is closed to make the
   292  // child pickfirst enter IDLE state. The test verifies that the child pickfirst
   293  // doesn't re-connect automatically. The test calls cc.Connect() and verified
   294  // that the balancer connects causing the channel to enter TransientFailure.
   295  func (s) TestEndpointShardingExitIdle(t *testing.T) {
   296  	backend := stubserver.StartTestService(t, nil)
   297  	defer backend.Stop()
   298  
   299  	mr := manual.NewBuilderWithScheme("e2e-test")
   300  	defer mr.Close()
   301  
   302  	name := strings.ReplaceAll(strings.ToLower(t.Name()), "/", "")
   303  	bf := stub.BalancerFuncs{
   304  		Init: func(bd *stub.BalancerData) {
   305  			epOpts := endpointsharding.Options{DisableAutoReconnect: true}
   306  			bd.ChildBalancer = endpointsharding.NewBalancer(bd.ClientConn, bd.BuildOptions, balancer.Get(pickfirstleaf.Name).Build, epOpts)
   307  		},
   308  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   309  			return bd.ChildBalancer.UpdateClientConnState(ccs)
   310  		},
   311  		Close: func(bd *stub.BalancerData) {
   312  			bd.ChildBalancer.Close()
   313  		},
   314  		ExitIdle: func(bd *stub.BalancerData) {
   315  			bd.ChildBalancer.ExitIdle()
   316  		},
   317  	}
   318  	stub.Register(name, bf)
   319  
   320  	json := fmt.Sprintf(`{"loadBalancingConfig": [{"%s":{}}]}`, name)
   321  	sc := internal.ParseServiceConfig.(func(string) *serviceconfig.ParseResult)(json)
   322  	mr.InitialState(resolver.State{
   323  		Endpoints: []resolver.Endpoint{
   324  			{Addresses: []resolver.Address{{Addr: backend.Address}}},
   325  		},
   326  		ServiceConfig: sc,
   327  	})
   328  
   329  	cc, err := grpc.NewClient(mr.Scheme()+":///", grpc.WithResolvers(mr), grpc.WithTransportCredentials(insecure.NewCredentials()))
   330  	if err != nil {
   331  		t.Fatalf("Failed to create new client: %v", err)
   332  	}
   333  	defer cc.Close()
   334  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   335  	defer cancel()
   336  	client := testgrpc.NewTestServiceClient(cc)
   337  	if _, err := client.EmptyCall(ctx, &testpb.Empty{}); err != nil {
   338  		t.Errorf("client.EmptyCall() returned unexpected error: %v", err)
   339  	}
   340  
   341  	// On closing the first server, the first child balancer should enter
   342  	// IDLE. Since endpointsharding is configured not to auto-reconnect, it will
   343  	// remain IDLE and will not try to re-connect
   344  	backend.Stop()
   345  	testutils.AwaitState(ctx, t, cc, connectivity.Idle)
   346  	shortCtx, shortCancel := context.WithTimeout(ctx, defaultTestShortTimeout)
   347  	defer shortCancel()
   348  	testutils.AwaitNoStateChange(shortCtx, t, cc, connectivity.Idle)
   349  
   350  	// The balancer should try to re-connect and fail.
   351  	cc.Connect()
   352  	testutils.AwaitState(ctx, t, cc, connectivity.TransientFailure)
   353  }