google.golang.org/grpc@v1.62.1/internal/testutils/roundrobin/roundrobin.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  // Package roundrobin contains helper functions to check for roundrobin and
    20  // weighted-roundrobin load balancing of RPCs in tests.
    21  package roundrobin
    22  
    23  import (
    24  	"context"
    25  	"fmt"
    26  	"math"
    27  	"time"
    28  
    29  	"github.com/google/go-cmp/cmp"
    30  	"google.golang.org/grpc"
    31  	"google.golang.org/grpc/grpclog"
    32  	"google.golang.org/grpc/peer"
    33  	"google.golang.org/grpc/resolver"
    34  
    35  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    36  	testpb "google.golang.org/grpc/interop/grpc_testing"
    37  )
    38  
    39  var logger = grpclog.Component("testutils-roundrobin")
    40  
    41  // waitForTrafficToReachBackends repeatedly makes RPCs using the provided
    42  // TestServiceClient until RPCs reach all backends specified in addrs, or the
    43  // context expires, in which case a non-nil error is returned.
    44  func waitForTrafficToReachBackends(ctx context.Context, client testgrpc.TestServiceClient, addrs []resolver.Address) error {
    45  	// Make sure connections to all backends are up. We need to do this two
    46  	// times (to be sure that round_robin has kicked in) because the channel
    47  	// could have been configured with a different LB policy before the switch
    48  	// to round_robin. And the previous LB policy could be sharing backends with
    49  	// round_robin, and therefore in the first iteration of this loop, RPCs
    50  	// could land on backends owned by the previous LB policy.
    51  	for j := 0; j < 2; j++ {
    52  		for i := 0; i < len(addrs); i++ {
    53  			for {
    54  				time.Sleep(time.Millisecond)
    55  				if ctx.Err() != nil {
    56  					return fmt.Errorf("timeout waiting for connection to %q to be up", addrs[i].Addr)
    57  				}
    58  				var peer peer.Peer
    59  				if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil {
    60  					// Some tests remove backends and check if round robin is
    61  					// happening across the remaining backends. In such cases,
    62  					// RPCs can initially fail on the connection using the
    63  					// removed backend. Just keep retrying and eventually the
    64  					// connection using the removed backend will shutdown and
    65  					// will be removed.
    66  					continue
    67  				}
    68  				if peer.Addr.String() == addrs[i].Addr {
    69  					break
    70  				}
    71  			}
    72  		}
    73  	}
    74  	return nil
    75  }
    76  
    77  // CheckRoundRobinRPCs verifies that EmptyCall RPCs on the given ClientConn,
    78  // connected to a server exposing the test.grpc_testing.TestService, are
    79  // roundrobined across the given backend addresses.
    80  //
    81  // Returns a non-nil error if context deadline expires before RPCs start to get
    82  // roundrobined across the given backends.
    83  func CheckRoundRobinRPCs(ctx context.Context, client testgrpc.TestServiceClient, addrs []resolver.Address) error {
    84  	if err := waitForTrafficToReachBackends(ctx, client, addrs); err != nil {
    85  		return err
    86  	}
    87  
    88  	// At this point, RPCs are getting successfully executed at the backends
    89  	// that we care about. To support duplicate addresses (in addrs) and
    90  	// backends being removed from the list of addresses passed to the
    91  	// roundrobin LB, we do the following:
    92  	// 1. Determine the count of RPCs that we expect each of our backends to
    93  	//    receive per iteration.
    94  	// 2. Wait until the same pattern repeats a few times, or the context
    95  	//    deadline expires.
    96  	wantAddrCount := make(map[string]int)
    97  	for _, addr := range addrs {
    98  		wantAddrCount[addr.Addr]++
    99  	}
   100  	for ; ctx.Err() == nil; <-time.After(time.Millisecond) {
   101  		// Perform 3 more iterations.
   102  		var iterations [][]string
   103  		for i := 0; i < 3; i++ {
   104  			iteration := make([]string, len(addrs))
   105  			for c := 0; c < len(addrs); c++ {
   106  				var peer peer.Peer
   107  				if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil {
   108  					return fmt.Errorf("EmptyCall() = %v, want <nil>", err)
   109  				}
   110  				iteration[c] = peer.Addr.String()
   111  			}
   112  			iterations = append(iterations, iteration)
   113  		}
   114  		// Ensure the first iteration contains all addresses in addrs.
   115  		gotAddrCount := make(map[string]int)
   116  		for _, addr := range iterations[0] {
   117  			gotAddrCount[addr]++
   118  		}
   119  		if diff := cmp.Diff(gotAddrCount, wantAddrCount); diff != "" {
   120  			logger.Infof("non-roundrobin, got address count in one iteration: %v, want: %v, Diff: %s", gotAddrCount, wantAddrCount, diff)
   121  			continue
   122  		}
   123  		// Ensure all three iterations contain the same addresses.
   124  		if !cmp.Equal(iterations[0], iterations[1]) || !cmp.Equal(iterations[0], iterations[2]) {
   125  			logger.Infof("non-roundrobin, first iter: %v, second iter: %v, third iter: %v", iterations[0], iterations[1], iterations[2])
   126  			continue
   127  		}
   128  		return nil
   129  	}
   130  	return fmt.Errorf("timeout when waiting for roundrobin distribution of RPCs across addresses: %v", addrs)
   131  }
   132  
   133  // CheckWeightedRoundRobinRPCs verifies that EmptyCall RPCs on the given
   134  // ClientConn, connected to a server exposing the test.grpc_testing.TestService,
   135  // are weighted roundrobined (with randomness) across the given backend
   136  // addresses.
   137  //
   138  // Returns a non-nil error if context deadline expires before RPCs start to get
   139  // roundrobined across the given backends.
   140  func CheckWeightedRoundRobinRPCs(ctx context.Context, client testgrpc.TestServiceClient, addrs []resolver.Address) error {
   141  	if err := waitForTrafficToReachBackends(ctx, client, addrs); err != nil {
   142  		return err
   143  	}
   144  
   145  	// At this point, RPCs are getting successfully executed at the backends
   146  	// that we care about. To take the randomness of the WRR into account, we
   147  	// look for approximate distribution instead of exact.
   148  	wantAddrCount := make(map[string]int)
   149  	for _, addr := range addrs {
   150  		wantAddrCount[addr.Addr]++
   151  	}
   152  	wantRatio := make(map[string]float64)
   153  	for addr, count := range wantAddrCount {
   154  		wantRatio[addr] = float64(count) / float64(len(addrs))
   155  	}
   156  
   157  	// There is a small possibility that RPCs are reaching backends that we
   158  	// don't expect them to reach here. The can happen because:
   159  	// - at time T0, the list of backends [A, B, C, D].
   160  	// - at time T1, the test updates the list of backends to [A, B, C], and
   161  	//   immediately starts attempting to check the distribution of RPCs to the
   162  	//   new backends.
   163  	// - there is no way for the test to wait for a new picker to be pushed on
   164  	//   to the channel (which contains the updated list of backends) before
   165  	//   starting to attempt the RPC distribution checks.
   166  	// - This is usually a transitory state and will eventually fix itself when
   167  	//   the new picker is pushed on the channel, and RPCs will start getting
   168  	//   routed to only backends that we care about.
   169  	//
   170  	// We work around this situation by using two loops. The inner loop contains
   171  	// the meat of the calculations, and includes the logic which factors out
   172  	// the randomness in weighted roundrobin. If we ever see an RPCs getting
   173  	// routed to a backend that we dont expect it to get routed to, we break
   174  	// from the inner loop thereby resetting all state and start afresh.
   175  	for {
   176  		results := make(map[string]float64)
   177  		totalCount := float64(0)
   178  	InnerLoop:
   179  		for {
   180  			if ctx.Err() != nil {
   181  				return fmt.Errorf("timeout when waiting for roundrobin distribution of RPCs across addresses: %v", addrs)
   182  			}
   183  			for i := 0; i < len(addrs); i++ {
   184  				var peer peer.Peer
   185  				if _, err := client.EmptyCall(ctx, &testpb.Empty{}, grpc.Peer(&peer)); err != nil {
   186  					return fmt.Errorf("EmptyCall() = %v, want <nil>", err)
   187  				}
   188  				if addr := peer.Addr.String(); wantAddrCount[addr] == 0 {
   189  					break InnerLoop
   190  				}
   191  				results[peer.Addr.String()]++
   192  			}
   193  			totalCount += float64(len(addrs))
   194  
   195  			gotRatio := make(map[string]float64)
   196  			for addr, count := range results {
   197  				gotRatio[addr] = count / totalCount
   198  			}
   199  			if equalApproximate(gotRatio, wantRatio) {
   200  				return nil
   201  			}
   202  			logger.Infof("non-weighted-roundrobin, gotRatio: %v, wantRatio: %v", gotRatio, wantRatio)
   203  		}
   204  		<-time.After(time.Millisecond)
   205  	}
   206  }
   207  
   208  func equalApproximate(got, want map[string]float64) bool {
   209  	if len(got) != len(want) {
   210  		return false
   211  	}
   212  	opt := cmp.Comparer(func(x, y float64) bool {
   213  		delta := math.Abs(x - y)
   214  		mean := math.Abs(x+y) / 2.0
   215  		return delta/mean < 0.05
   216  	})
   217  	for addr := range want {
   218  		if !cmp.Equal(got[addr], want[addr], opt) {
   219  			return false
   220  		}
   221  	}
   222  	return true
   223  }