google.golang.org/grpc@v1.62.1/xds/internal/balancer/outlierdetection/balancer_test.go (about)

     1  /*
     2   *
     3   * Copyright 2022 gRPC authors.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *     http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   *
    17   */
    18  
    19  package outlierdetection
    20  
    21  import (
    22  	"context"
    23  	"encoding/json"
    24  	"errors"
    25  	"fmt"
    26  	"math"
    27  	"strings"
    28  	"sync"
    29  	"testing"
    30  	"time"
    31  
    32  	"github.com/google/go-cmp/cmp"
    33  	"github.com/google/go-cmp/cmp/cmpopts"
    34  	"google.golang.org/grpc/balancer"
    35  	"google.golang.org/grpc/connectivity"
    36  	"google.golang.org/grpc/internal/balancer/stub"
    37  	"google.golang.org/grpc/internal/channelz"
    38  	"google.golang.org/grpc/internal/grpcsync"
    39  	"google.golang.org/grpc/internal/grpctest"
    40  	iserviceconfig "google.golang.org/grpc/internal/serviceconfig"
    41  	"google.golang.org/grpc/internal/testutils"
    42  	"google.golang.org/grpc/resolver"
    43  	"google.golang.org/grpc/serviceconfig"
    44  	"google.golang.org/grpc/xds/internal/balancer/clusterimpl"
    45  )
    46  
    47  var (
    48  	defaultTestTimeout      = 5 * time.Second
    49  	defaultTestShortTimeout = 10 * time.Millisecond
    50  )
    51  
    52  type s struct {
    53  	grpctest.Tester
    54  }
    55  
    56  func Test(t *testing.T) {
    57  	grpctest.RunSubTests(t, s{})
    58  }
    59  
    60  // TestParseConfig verifies the ParseConfig() method in the Outlier Detection
    61  // Balancer.
    62  func (s) TestParseConfig(t *testing.T) {
    63  	const errParseConfigName = "errParseConfigBalancer"
    64  	stub.Register(errParseConfigName, stub.BalancerFuncs{
    65  		ParseConfig: func(json.RawMessage) (serviceconfig.LoadBalancingConfig, error) {
    66  			return nil, errors.New("some error")
    67  		},
    68  	})
    69  
    70  	parser := bb{}
    71  	const (
    72  		defaultInterval                       = iserviceconfig.Duration(10 * time.Second)
    73  		defaultBaseEjectionTime               = iserviceconfig.Duration(30 * time.Second)
    74  		defaultMaxEjectionTime                = iserviceconfig.Duration(300 * time.Second)
    75  		defaultMaxEjectionPercent             = 10
    76  		defaultSuccessRateStdevFactor         = 1900
    77  		defaultEnforcingSuccessRate           = 100
    78  		defaultSuccessRateMinimumHosts        = 5
    79  		defaultSuccessRateRequestVolume       = 100
    80  		defaultFailurePercentageThreshold     = 85
    81  		defaultEnforcingFailurePercentage     = 0
    82  		defaultFailurePercentageMinimumHosts  = 5
    83  		defaultFailurePercentageRequestVolume = 50
    84  	)
    85  	tests := []struct {
    86  		name    string
    87  		input   string
    88  		wantCfg serviceconfig.LoadBalancingConfig
    89  		wantErr string
    90  	}{
    91  		{
    92  			name: "no-fields-set-should-get-default",
    93  			input: `{
    94  				"childPolicy": [
    95  				{
    96  					"xds_cluster_impl_experimental": {
    97  						"cluster": "test_cluster"
    98  					}
    99  				}
   100  				]
   101  			}`,
   102  			wantCfg: &LBConfig{
   103  				Interval:           defaultInterval,
   104  				BaseEjectionTime:   defaultBaseEjectionTime,
   105  				MaxEjectionTime:    defaultMaxEjectionTime,
   106  				MaxEjectionPercent: defaultMaxEjectionPercent,
   107  				ChildPolicy: &iserviceconfig.BalancerConfig{
   108  					Name: "xds_cluster_impl_experimental",
   109  					Config: &clusterimpl.LBConfig{
   110  						Cluster: "test_cluster",
   111  					},
   112  				},
   113  			},
   114  		},
   115  
   116  		{
   117  			name: "some-top-level-fields-set",
   118  			input: `{
   119  				"interval": "15s",
   120  				"maxEjectionTime": "350s",
   121  				"childPolicy": [
   122  				{
   123  					"xds_cluster_impl_experimental": {
   124  						"cluster": "test_cluster"
   125  					}
   126  				}
   127  				]
   128  			}`,
   129  			// Should get set fields + defaults for unset fields.
   130  			wantCfg: &LBConfig{
   131  				Interval:           iserviceconfig.Duration(15 * time.Second),
   132  				BaseEjectionTime:   defaultBaseEjectionTime,
   133  				MaxEjectionTime:    iserviceconfig.Duration(350 * time.Second),
   134  				MaxEjectionPercent: defaultMaxEjectionPercent,
   135  				ChildPolicy: &iserviceconfig.BalancerConfig{
   136  					Name: "xds_cluster_impl_experimental",
   137  					Config: &clusterimpl.LBConfig{
   138  						Cluster: "test_cluster",
   139  					},
   140  				},
   141  			},
   142  		},
   143  		{
   144  			name: "success-rate-ejection-present-but-no-fields",
   145  			input: `{
   146  				"successRateEjection": {},
   147                  "childPolicy": [
   148  				{
   149  					"xds_cluster_impl_experimental": {
   150  						"cluster": "test_cluster"
   151  					}
   152  				}
   153  				]
   154  			}`,
   155  			// Should get defaults of success-rate-ejection struct.
   156  			wantCfg: &LBConfig{
   157  				Interval:           defaultInterval,
   158  				BaseEjectionTime:   defaultBaseEjectionTime,
   159  				MaxEjectionTime:    defaultMaxEjectionTime,
   160  				MaxEjectionPercent: defaultMaxEjectionPercent,
   161  				SuccessRateEjection: &SuccessRateEjection{
   162  					StdevFactor:           defaultSuccessRateStdevFactor,
   163  					EnforcementPercentage: defaultEnforcingSuccessRate,
   164  					MinimumHosts:          defaultSuccessRateMinimumHosts,
   165  					RequestVolume:         defaultSuccessRateRequestVolume,
   166  				},
   167  				ChildPolicy: &iserviceconfig.BalancerConfig{
   168  					Name: "xds_cluster_impl_experimental",
   169  					Config: &clusterimpl.LBConfig{
   170  						Cluster: "test_cluster",
   171  					},
   172  				},
   173  			},
   174  		},
   175  		{
   176  			name: "success-rate-ejection-present-partially-set",
   177  			input: `{
   178  				"successRateEjection": {
   179  					"stdevFactor": 1000,
   180  					"minimumHosts": 5
   181  				},
   182                  "childPolicy": [
   183  				{
   184  					"xds_cluster_impl_experimental": {
   185  						"cluster": "test_cluster"
   186  					}
   187  				}
   188  				]
   189  			}`,
   190  			// Should get set fields + defaults for others in success rate
   191  			// ejection layer.
   192  			wantCfg: &LBConfig{
   193  				Interval:           defaultInterval,
   194  				BaseEjectionTime:   defaultBaseEjectionTime,
   195  				MaxEjectionTime:    defaultMaxEjectionTime,
   196  				MaxEjectionPercent: defaultMaxEjectionPercent,
   197  				SuccessRateEjection: &SuccessRateEjection{
   198  					StdevFactor:           1000,
   199  					EnforcementPercentage: defaultEnforcingSuccessRate,
   200  					MinimumHosts:          5,
   201  					RequestVolume:         defaultSuccessRateRequestVolume,
   202  				},
   203  				ChildPolicy: &iserviceconfig.BalancerConfig{
   204  					Name: "xds_cluster_impl_experimental",
   205  					Config: &clusterimpl.LBConfig{
   206  						Cluster: "test_cluster",
   207  					},
   208  				},
   209  			},
   210  		},
   211  		{
   212  			name: "success-rate-ejection-present-fully-set",
   213  			input: `{
   214  				"successRateEjection": {
   215  					"stdevFactor": 1000,
   216  					"enforcementPercentage": 50,
   217  					"minimumHosts": 5,
   218  					"requestVolume": 50
   219  				},
   220                  "childPolicy": [
   221  				{
   222  					"xds_cluster_impl_experimental": {
   223  						"cluster": "test_cluster"
   224  					}
   225  				}
   226  				]
   227  			}`,
   228  			wantCfg: &LBConfig{
   229  				Interval:           defaultInterval,
   230  				BaseEjectionTime:   defaultBaseEjectionTime,
   231  				MaxEjectionTime:    defaultMaxEjectionTime,
   232  				MaxEjectionPercent: defaultMaxEjectionPercent,
   233  				SuccessRateEjection: &SuccessRateEjection{
   234  					StdevFactor:           1000,
   235  					EnforcementPercentage: 50,
   236  					MinimumHosts:          5,
   237  					RequestVolume:         50,
   238  				},
   239  				ChildPolicy: &iserviceconfig.BalancerConfig{
   240  					Name: "xds_cluster_impl_experimental",
   241  					Config: &clusterimpl.LBConfig{
   242  						Cluster: "test_cluster",
   243  					},
   244  				},
   245  			},
   246  		},
   247  		{
   248  			name: "failure-percentage-ejection-present-but-no-fields",
   249  			input: `{
   250  				"failurePercentageEjection": {},
   251                  "childPolicy": [
   252  				{
   253  					"xds_cluster_impl_experimental": {
   254  						"cluster": "test_cluster"
   255  					}
   256  				}
   257  				]
   258  			}`,
   259  			// Should get defaults of failure percentage ejection layer.
   260  			wantCfg: &LBConfig{
   261  				Interval:           defaultInterval,
   262  				BaseEjectionTime:   defaultBaseEjectionTime,
   263  				MaxEjectionTime:    defaultMaxEjectionTime,
   264  				MaxEjectionPercent: defaultMaxEjectionPercent,
   265  				FailurePercentageEjection: &FailurePercentageEjection{
   266  					Threshold:             defaultFailurePercentageThreshold,
   267  					EnforcementPercentage: defaultEnforcingFailurePercentage,
   268  					MinimumHosts:          defaultFailurePercentageMinimumHosts,
   269  					RequestVolume:         defaultFailurePercentageRequestVolume,
   270  				},
   271  				ChildPolicy: &iserviceconfig.BalancerConfig{
   272  					Name: "xds_cluster_impl_experimental",
   273  					Config: &clusterimpl.LBConfig{
   274  						Cluster: "test_cluster",
   275  					},
   276  				},
   277  			},
   278  		},
   279  		{
   280  			name: "failure-percentage-ejection-present-partially-set",
   281  			input: `{
   282  				"failurePercentageEjection": {
   283  					"threshold": 80,
   284  					"minimumHosts": 10
   285  				},
   286                  "childPolicy": [
   287  				{
   288  					"xds_cluster_impl_experimental": {
   289  						"cluster": "test_cluster"
   290  					}
   291  				}
   292  				]
   293  			}`,
   294  			// Should get set fields + defaults for others in success rate
   295  			// ejection layer.
   296  			wantCfg: &LBConfig{
   297  				Interval:           defaultInterval,
   298  				BaseEjectionTime:   defaultBaseEjectionTime,
   299  				MaxEjectionTime:    defaultMaxEjectionTime,
   300  				MaxEjectionPercent: defaultMaxEjectionPercent,
   301  				FailurePercentageEjection: &FailurePercentageEjection{
   302  					Threshold:             80,
   303  					EnforcementPercentage: defaultEnforcingFailurePercentage,
   304  					MinimumHosts:          10,
   305  					RequestVolume:         defaultFailurePercentageRequestVolume,
   306  				},
   307  				ChildPolicy: &iserviceconfig.BalancerConfig{
   308  					Name: "xds_cluster_impl_experimental",
   309  					Config: &clusterimpl.LBConfig{
   310  						Cluster: "test_cluster",
   311  					},
   312  				},
   313  			},
   314  		},
   315  		{
   316  			name: "failure-percentage-ejection-present-fully-set",
   317  			input: `{
   318  				"failurePercentageEjection": {
   319  					"threshold": 80,
   320  					"enforcementPercentage": 100,
   321  					"minimumHosts": 10,
   322  					"requestVolume": 40
   323                  },
   324                  "childPolicy": [
   325  				{
   326  					"xds_cluster_impl_experimental": {
   327  						"cluster": "test_cluster"
   328  					}
   329  				}
   330  				]
   331  			}`,
   332  			wantCfg: &LBConfig{
   333  				Interval:           defaultInterval,
   334  				BaseEjectionTime:   defaultBaseEjectionTime,
   335  				MaxEjectionTime:    defaultMaxEjectionTime,
   336  				MaxEjectionPercent: defaultMaxEjectionPercent,
   337  				FailurePercentageEjection: &FailurePercentageEjection{
   338  					Threshold:             80,
   339  					EnforcementPercentage: 100,
   340  					MinimumHosts:          10,
   341  					RequestVolume:         40,
   342  				},
   343  				ChildPolicy: &iserviceconfig.BalancerConfig{
   344  					Name: "xds_cluster_impl_experimental",
   345  					Config: &clusterimpl.LBConfig{
   346  						Cluster: "test_cluster",
   347  					},
   348  				},
   349  			},
   350  		},
   351  		{ // to make sure zero values aren't overwritten by defaults
   352  			name: "lb-config-every-field-set-zero-value",
   353  			input: `{
   354  				"interval": "0s",
   355  				"baseEjectionTime": "0s",
   356  				"maxEjectionTime": "0s",
   357  				"maxEjectionPercent": 0,
   358  				"successRateEjection": {
   359  					"stdevFactor": 0,
   360  					"enforcementPercentage": 0,
   361  					"minimumHosts": 0,
   362  					"requestVolume": 0
   363  				},
   364  				"failurePercentageEjection": {
   365  					"threshold": 0,
   366  					"enforcementPercentage": 0,
   367  					"minimumHosts": 0,
   368  					"requestVolume": 0
   369  				},
   370                  "childPolicy": [
   371  				{
   372  					"xds_cluster_impl_experimental": {
   373  						"cluster": "test_cluster"
   374  					}
   375  				}
   376  				]
   377  			}`,
   378  			wantCfg: &LBConfig{
   379  				SuccessRateEjection:       &SuccessRateEjection{},
   380  				FailurePercentageEjection: &FailurePercentageEjection{},
   381  				ChildPolicy: &iserviceconfig.BalancerConfig{
   382  					Name: "xds_cluster_impl_experimental",
   383  					Config: &clusterimpl.LBConfig{
   384  						Cluster: "test_cluster",
   385  					},
   386  				},
   387  			},
   388  		},
   389  		{
   390  			name: "lb-config-every-field-set",
   391  			input: `{
   392  				"interval": "10s",
   393  				"baseEjectionTime": "30s",
   394  				"maxEjectionTime": "300s",
   395  				"maxEjectionPercent": 10,
   396  				"successRateEjection": {
   397  					"stdevFactor": 1900,
   398  					"enforcementPercentage": 100,
   399  					"minimumHosts": 5,
   400  					"requestVolume": 100
   401  				},
   402  				"failurePercentageEjection": {
   403  					"threshold": 85,
   404  					"enforcementPercentage": 5,
   405  					"minimumHosts": 5,
   406  					"requestVolume": 50
   407  				},
   408                  "childPolicy": [
   409  				{
   410  					"xds_cluster_impl_experimental": {
   411  						"cluster": "test_cluster"
   412  					}
   413  				}
   414  				]
   415  			}`,
   416  			wantCfg: &LBConfig{
   417  				Interval:           iserviceconfig.Duration(10 * time.Second),
   418  				BaseEjectionTime:   iserviceconfig.Duration(30 * time.Second),
   419  				MaxEjectionTime:    iserviceconfig.Duration(300 * time.Second),
   420  				MaxEjectionPercent: 10,
   421  				SuccessRateEjection: &SuccessRateEjection{
   422  					StdevFactor:           1900,
   423  					EnforcementPercentage: 100,
   424  					MinimumHosts:          5,
   425  					RequestVolume:         100,
   426  				},
   427  				FailurePercentageEjection: &FailurePercentageEjection{
   428  					Threshold:             85,
   429  					EnforcementPercentage: 5,
   430  					MinimumHosts:          5,
   431  					RequestVolume:         50,
   432  				},
   433  				ChildPolicy: &iserviceconfig.BalancerConfig{
   434  					Name: "xds_cluster_impl_experimental",
   435  					Config: &clusterimpl.LBConfig{
   436  						Cluster: "test_cluster",
   437  					},
   438  				},
   439  			},
   440  		},
   441  		{
   442  			name:    "interval-is-negative",
   443  			input:   `{"interval": "-10s"}`,
   444  			wantErr: "OutlierDetectionLoadBalancingConfig.interval = -10s; must be >= 0",
   445  		},
   446  		{
   447  			name:    "base-ejection-time-is-negative",
   448  			input:   `{"baseEjectionTime": "-10s"}`,
   449  			wantErr: "OutlierDetectionLoadBalancingConfig.base_ejection_time = -10s; must be >= 0",
   450  		},
   451  		{
   452  			name:    "max-ejection-time-is-negative",
   453  			input:   `{"maxEjectionTime": "-10s"}`,
   454  			wantErr: "OutlierDetectionLoadBalancingConfig.max_ejection_time = -10s; must be >= 0",
   455  		},
   456  		{
   457  			name:    "max-ejection-percent-is-greater-than-100",
   458  			input:   `{"maxEjectionPercent": 150}`,
   459  			wantErr: "OutlierDetectionLoadBalancingConfig.max_ejection_percent = 150; must be <= 100",
   460  		},
   461  		{
   462  			name: "enforcement-percentage-success-rate-is-greater-than-100",
   463  			input: `{
   464  				"successRateEjection": {
   465  					"enforcementPercentage": 150
   466  				}
   467  			}`,
   468  			wantErr: "OutlierDetectionLoadBalancingConfig.SuccessRateEjection.enforcement_percentage = 150; must be <= 100",
   469  		},
   470  		{
   471  			name: "failure-percentage-threshold-is-greater-than-100",
   472  			input: `{
   473  				"failurePercentageEjection": {
   474  					"threshold": 150
   475  				}
   476  			}`,
   477  			wantErr: "OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.threshold = 150; must be <= 100",
   478  		},
   479  		{
   480  			name: "enforcement-percentage-failure-percentage-ejection-is-greater-than-100",
   481  			input: `{
   482  				"failurePercentageEjection": {
   483  					"enforcementPercentage": 150
   484  				}
   485  			}`,
   486  			wantErr: "OutlierDetectionLoadBalancingConfig.FailurePercentageEjection.enforcement_percentage = 150; must be <= 100",
   487  		},
   488  		{
   489  			name: "child-policy-present-but-parse-error",
   490  			input: `{
   491  				"childPolicy": [
   492  				{
   493  					"errParseConfigBalancer": {
   494  						"cluster": "test_cluster"
   495  					}
   496  				}
   497  			]
   498  			}`,
   499  			wantErr: "error parsing loadBalancingConfig for policy \"errParseConfigBalancer\"",
   500  		},
   501  		{
   502  			name: "no-supported-child-policy",
   503  			input: `{
   504  				"childPolicy": [
   505  				{
   506  					"doesNotExistBalancer": {
   507  						"cluster": "test_cluster"
   508  					}
   509  				}
   510  			]
   511  			}`,
   512  			wantErr: "invalid loadBalancingConfig: no supported policies found",
   513  		},
   514  	}
   515  	for _, test := range tests {
   516  		t.Run(test.name, func(t *testing.T) {
   517  			gotCfg, gotErr := parser.ParseConfig(json.RawMessage(test.input))
   518  			if gotErr != nil && !strings.Contains(gotErr.Error(), test.wantErr) {
   519  				t.Fatalf("ParseConfig(%v) = %v, wantErr %v", test.input, gotErr, test.wantErr)
   520  			}
   521  			if (gotErr != nil) != (test.wantErr != "") {
   522  				t.Fatalf("ParseConfig(%v) = %v, wantErr %v", test.input, gotErr, test.wantErr)
   523  			}
   524  			if test.wantErr != "" {
   525  				return
   526  			}
   527  			if diff := cmp.Diff(gotCfg, test.wantCfg); diff != "" {
   528  				t.Fatalf("parseConfig(%v) got unexpected output, diff (-got +want): %v", string(test.input), diff)
   529  			}
   530  		})
   531  	}
   532  }
   533  
   534  func (lbc *LBConfig) Equal(lbc2 *LBConfig) bool {
   535  	if !lbc.EqualIgnoringChildPolicy(lbc2) {
   536  		return false
   537  	}
   538  	return cmp.Equal(lbc.ChildPolicy, lbc2.ChildPolicy)
   539  }
   540  
   541  type subConnWithState struct {
   542  	sc    balancer.SubConn
   543  	state balancer.SubConnState
   544  }
   545  
   546  func setup(t *testing.T) (*outlierDetectionBalancer, *testutils.BalancerClientConn, func()) {
   547  	t.Helper()
   548  	builder := balancer.Get(Name)
   549  	if builder == nil {
   550  		t.Fatalf("balancer.Get(%q) returned nil", Name)
   551  	}
   552  	tcc := testutils.NewBalancerClientConn(t)
   553  	odB := builder.Build(tcc, balancer.BuildOptions{ChannelzParentID: channelz.NewIdentifierForTesting(channelz.RefChannel, time.Now().Unix(), nil)})
   554  	return odB.(*outlierDetectionBalancer), tcc, odB.Close
   555  }
   556  
   557  type emptyChildConfig struct {
   558  	serviceconfig.LoadBalancingConfig
   559  }
   560  
   561  // TestChildBasicOperations tests basic operations of the Outlier Detection
   562  // Balancer and it's interaction with it's child. The following scenarios are
   563  // tested, in a step by step fashion:
   564  // 1. The Outlier Detection Balancer receives it's first good configuration. The
   565  // balancer is expected to create a child and sent the child it's configuration.
   566  // 2. The Outlier Detection Balancer receives new configuration that specifies a
   567  // child's type, and the new type immediately reports READY inline. The first
   568  // child balancer should be closed and the second child balancer should receive
   569  // a config update.
   570  // 3. The Outlier Detection Balancer is closed. The second child balancer should
   571  // be closed.
   572  func (s) TestChildBasicOperations(t *testing.T) {
   573  	bc := emptyChildConfig{}
   574  
   575  	ccsCh := testutils.NewChannel()
   576  	closeCh := testutils.NewChannel()
   577  
   578  	stub.Register(t.Name()+"child1", stub.BalancerFuncs{
   579  		UpdateClientConnState: func(bd *stub.BalancerData, ccs balancer.ClientConnState) error {
   580  			ccsCh.Send(ccs.BalancerConfig)
   581  			return nil
   582  		},
   583  		Close: func(bd *stub.BalancerData) {
   584  			closeCh.Send(nil)
   585  		},
   586  	})
   587  
   588  	stub.Register(t.Name()+"child2", stub.BalancerFuncs{
   589  		UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error {
   590  			// UpdateState inline to READY to complete graceful switch process
   591  			// synchronously from any UpdateClientConnState call.
   592  			bd.ClientConn.UpdateState(balancer.State{
   593  				ConnectivityState: connectivity.Ready,
   594  				Picker:            &testutils.TestConstPicker{},
   595  			})
   596  			ccsCh.Send(nil)
   597  			return nil
   598  		},
   599  		Close: func(bd *stub.BalancerData) {
   600  			closeCh.Send(nil)
   601  		},
   602  	})
   603  
   604  	od, tcc, _ := setup(t)
   605  
   606  	// This first config update should cause a child to be built and forwarded
   607  	// it's first update.
   608  	od.UpdateClientConnState(balancer.ClientConnState{
   609  		BalancerConfig: &LBConfig{
   610  			ChildPolicy: &iserviceconfig.BalancerConfig{
   611  				Name:   t.Name() + "child1",
   612  				Config: bc,
   613  			},
   614  		},
   615  	})
   616  
   617  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   618  	defer cancel()
   619  	cr, err := ccsCh.Receive(ctx)
   620  	if err != nil {
   621  		t.Fatalf("timed out waiting for UpdateClientConnState on the first child balancer: %v", err)
   622  	}
   623  	if _, ok := cr.(emptyChildConfig); !ok {
   624  		t.Fatalf("Received child policy config of type %T, want %T", cr, emptyChildConfig{})
   625  	}
   626  
   627  	// This Update Client Conn State call should cause the first child balancer
   628  	// to close, and a new child to be created and also forwarded it's first
   629  	// config update.
   630  	od.UpdateClientConnState(balancer.ClientConnState{
   631  		BalancerConfig: &LBConfig{
   632  			Interval: math.MaxInt64,
   633  			ChildPolicy: &iserviceconfig.BalancerConfig{
   634  				Name:   t.Name() + "child2",
   635  				Config: emptyChildConfig{},
   636  			},
   637  		},
   638  	})
   639  
   640  	// Verify inline UpdateState() call from the new child eventually makes it's
   641  	// way to the Test Client Conn.
   642  	select {
   643  	case <-ctx.Done():
   644  		t.Fatalf("timeout while waiting for a UpdateState call on the ClientConn")
   645  	case state := <-tcc.NewStateCh:
   646  		if state != connectivity.Ready {
   647  			t.Fatalf("ClientConn received connectivity state %v, want %v", state, connectivity.Ready)
   648  		}
   649  	}
   650  
   651  	// Verify the first child balancer closed.
   652  	if _, err = closeCh.Receive(ctx); err != nil {
   653  		t.Fatalf("timed out waiting for the first child balancer to be closed: %v", err)
   654  	}
   655  	// Verify the second child balancer received it's first config update.
   656  	if _, err = ccsCh.Receive(ctx); err != nil {
   657  		t.Fatalf("timed out waiting for UpdateClientConnState on the second child balancer: %v", err)
   658  	}
   659  	// Closing the Outlier Detection Balancer should close the newly created
   660  	// child.
   661  	od.Close()
   662  	if _, err = closeCh.Receive(ctx); err != nil {
   663  		t.Fatalf("timed out waiting for the second child balancer to be closed: %v", err)
   664  	}
   665  }
   666  
   667  // TestUpdateAddresses tests the functionality of UpdateAddresses and any
   668  // changes in the addresses/plurality of those addresses for a SubConn. The
   669  // Balancer is set up with two upstreams, with one of the upstreams being
   670  // ejected. Initially, there is one SubConn for each address. The following
   671  // scenarios are tested, in a step by step fashion:
   672  // 1. The SubConn not currently ejected switches addresses to the address that
   673  // is ejected. This should cause the SubConn to get ejected.
   674  // 2. Update this same SubConn to multiple addresses. This should cause the
   675  // SubConn to get unejected, as it is no longer being tracked by Outlier
   676  // Detection at that point.
   677  // 3. Update this same SubConn to different addresses, still multiple. This
   678  // should be a noop, as the SubConn is still no longer being tracked by Outlier
   679  // Detection.
   680  // 4. Update this same SubConn to the a single address which is ejected. This
   681  // should cause the SubConn to be ejected.
   682  func (s) TestUpdateAddresses(t *testing.T) {
   683  	scsCh := testutils.NewChannel()
   684  	var scw1, scw2 balancer.SubConn
   685  	var err error
   686  	stub.Register(t.Name(), stub.BalancerFuncs{
   687  		UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error {
   688  			scw1, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{
   689  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw1, state: state}) },
   690  			})
   691  			if err != nil {
   692  				t.Errorf("error in od.NewSubConn call: %v", err)
   693  			}
   694  			scw2, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address2"}}, balancer.NewSubConnOptions{
   695  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw2, state: state}) },
   696  			})
   697  			if err != nil {
   698  				t.Errorf("error in od.NewSubConn call: %v", err)
   699  			}
   700  			bd.ClientConn.UpdateState(balancer.State{
   701  				ConnectivityState: connectivity.Ready,
   702  				Picker: &rrPicker{
   703  					scs: []balancer.SubConn{scw1, scw2},
   704  				},
   705  			})
   706  			return nil
   707  		},
   708  	})
   709  
   710  	od, tcc, cleanup := setup(t)
   711  	defer cleanup()
   712  
   713  	od.UpdateClientConnState(balancer.ClientConnState{
   714  		ResolverState: resolver.State{
   715  			Addresses: []resolver.Address{
   716  				{Addr: "address1"},
   717  				{Addr: "address2"},
   718  			},
   719  		},
   720  		BalancerConfig: &LBConfig{
   721  			Interval:           iserviceconfig.Duration(10 * time.Second),
   722  			BaseEjectionTime:   iserviceconfig.Duration(30 * time.Second),
   723  			MaxEjectionTime:    iserviceconfig.Duration(300 * time.Second),
   724  			MaxEjectionPercent: 10,
   725  			FailurePercentageEjection: &FailurePercentageEjection{
   726  				Threshold:             50,
   727  				EnforcementPercentage: 100,
   728  				MinimumHosts:          2,
   729  				RequestVolume:         3,
   730  			},
   731  			ChildPolicy: &iserviceconfig.BalancerConfig{
   732  				Name:   t.Name(),
   733  				Config: emptyChildConfig{},
   734  			},
   735  		},
   736  	})
   737  
   738  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   739  	defer cancel()
   740  
   741  	// Setup the system to where one address is ejected and one address
   742  	// isn't.
   743  	select {
   744  	case <-ctx.Done():
   745  		t.Fatal("timeout while waiting for a UpdateState call on the ClientConn")
   746  	case picker := <-tcc.NewPickerCh:
   747  		pi, err := picker.Pick(balancer.PickInfo{})
   748  		if err != nil {
   749  			t.Fatalf("picker.Pick failed with error: %v", err)
   750  		}
   751  		// Simulate 5 successful RPC calls on the first SubConn (the first call
   752  		// to picker.Pick).
   753  		for c := 0; c < 5; c++ {
   754  			pi.Done(balancer.DoneInfo{})
   755  		}
   756  		pi, err = picker.Pick(balancer.PickInfo{})
   757  		if err != nil {
   758  			t.Fatalf("picker.Pick failed with error: %v", err)
   759  		}
   760  		// Simulate 5 failed RPC calls on the second SubConn (the second call to
   761  		// picker.Pick). Thus, when the interval timer algorithm is run, the
   762  		// second SubConn's address should be ejected, which will allow us to
   763  		// further test UpdateAddresses() logic.
   764  		for c := 0; c < 5; c++ {
   765  			pi.Done(balancer.DoneInfo{Err: errors.New("some error")})
   766  		}
   767  		od.intervalTimerAlgorithm()
   768  		// verify StateListener() got called with TRANSIENT_FAILURE for child
   769  		// with address that was ejected.
   770  		gotSCWS, err := scsCh.Receive(ctx)
   771  		if err != nil {
   772  			t.Fatalf("Error waiting for Sub Conn update: %v", err)
   773  		}
   774  		if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
   775  			sc:    scw2,
   776  			state: balancer.SubConnState{ConnectivityState: connectivity.TransientFailure},
   777  		}); err != nil {
   778  			t.Fatalf("Error in Sub Conn update: %v", err)
   779  		}
   780  	}
   781  
   782  	// Update scw1 to another address that is currently ejected. This should
   783  	// cause scw1 to get ejected.
   784  	od.UpdateAddresses(scw1, []resolver.Address{{Addr: "address2"}})
   785  
   786  	// Verify that update addresses gets forwarded to ClientConn.
   787  	select {
   788  	case <-ctx.Done():
   789  		t.Fatal("timeout while waiting for a UpdateState call on the ClientConn")
   790  	case <-tcc.UpdateAddressesAddrsCh:
   791  	}
   792  	// Verify scw1 got ejected (StateListener called with TRANSIENT_FAILURE).
   793  	gotSCWS, err := scsCh.Receive(ctx)
   794  	if err != nil {
   795  		t.Fatalf("Error waiting for Sub Conn update: %v", err)
   796  	}
   797  	if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
   798  		sc:    scw1,
   799  		state: balancer.SubConnState{ConnectivityState: connectivity.TransientFailure},
   800  	}); err != nil {
   801  		t.Fatalf("Error in Sub Conn update: %v", err)
   802  	}
   803  
   804  	// Update scw1 to multiple addresses. This should cause scw1 to get
   805  	// unejected, as is it no longer being tracked for Outlier Detection.
   806  	od.UpdateAddresses(scw1, []resolver.Address{
   807  		{Addr: "address1"},
   808  		{Addr: "address2"},
   809  	})
   810  	// Verify scw1 got unejected (StateListener called with recent state).
   811  	gotSCWS, err = scsCh.Receive(ctx)
   812  	if err != nil {
   813  		t.Fatalf("Error waiting for Sub Conn update: %v", err)
   814  	}
   815  	if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
   816  		sc:    scw1,
   817  		state: balancer.SubConnState{ConnectivityState: connectivity.Idle},
   818  	}); err != nil {
   819  		t.Fatalf("Error in Sub Conn update: %v", err)
   820  	}
   821  
   822  	// Update scw1 to a different multiple addresses list. A change of addresses
   823  	// in which the plurality goes from multiple to multiple should be a no-op,
   824  	// as the address continues to be ignored by outlier detection.
   825  	od.UpdateAddresses(scw1, []resolver.Address{
   826  		{Addr: "address2"},
   827  		{Addr: "address3"},
   828  	})
   829  	// Verify no downstream effects.
   830  	sCtx, cancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   831  	defer cancel()
   832  	if _, err := scsCh.Receive(sCtx); err == nil {
   833  		t.Fatalf("no SubConn update should have been sent (no SubConn got ejected/unejected)")
   834  	}
   835  
   836  	// Update scw1 back to a single address, which is ejected. This should cause
   837  	// the SubConn to be re-ejected.
   838  	od.UpdateAddresses(scw1, []resolver.Address{{Addr: "address2"}})
   839  	// Verify scw1 got ejected (StateListener called with TRANSIENT FAILURE).
   840  	gotSCWS, err = scsCh.Receive(ctx)
   841  	if err != nil {
   842  		t.Fatalf("Error waiting for Sub Conn update: %v", err)
   843  	}
   844  	if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
   845  		sc:    scw1,
   846  		state: balancer.SubConnState{ConnectivityState: connectivity.TransientFailure},
   847  	}); err != nil {
   848  		t.Fatalf("Error in Sub Conn update: %v", err)
   849  	}
   850  }
   851  
   852  func scwsEqual(gotSCWS subConnWithState, wantSCWS subConnWithState) error {
   853  	if gotSCWS.sc != wantSCWS.sc || !cmp.Equal(gotSCWS.state, wantSCWS.state, cmp.AllowUnexported(subConnWrapper{}, addressInfo{}), cmpopts.IgnoreFields(subConnWrapper{}, "scUpdateCh")) {
   854  		return fmt.Errorf("received SubConnState: %+v, want %+v", gotSCWS, wantSCWS)
   855  	}
   856  	return nil
   857  }
   858  
   859  type rrPicker struct {
   860  	scs  []balancer.SubConn
   861  	next int
   862  }
   863  
   864  func (rrp *rrPicker) Pick(balancer.PickInfo) (balancer.PickResult, error) {
   865  	sc := rrp.scs[rrp.next]
   866  	rrp.next = (rrp.next + 1) % len(rrp.scs)
   867  	return balancer.PickResult{SubConn: sc}, nil
   868  }
   869  
   870  // TestDurationOfInterval tests the configured interval timer.
   871  // The following scenarios are tested:
   872  // 1. The Outlier Detection Balancer receives it's first config. The balancer
   873  // should configure the timer with whatever is directly specified on the config.
   874  // 2. The Outlier Detection Balancer receives a subsequent config. The balancer
   875  // should configure with whatever interval is configured minus the difference
   876  // between the current time and the previous start timestamp.
   877  // 3. The Outlier Detection Balancer receives a no-op configuration. The
   878  // balancer should not configure a timer at all.
   879  func (s) TestDurationOfInterval(t *testing.T) {
   880  	stub.Register(t.Name(), stub.BalancerFuncs{})
   881  
   882  	od, _, cleanup := setup(t)
   883  	defer func(af func(d time.Duration, f func()) *time.Timer) {
   884  		cleanup()
   885  		afterFunc = af
   886  	}(afterFunc)
   887  
   888  	durationChan := testutils.NewChannel()
   889  	afterFunc = func(dur time.Duration, _ func()) *time.Timer {
   890  		durationChan.Send(dur)
   891  		return time.NewTimer(math.MaxInt64)
   892  	}
   893  
   894  	od.UpdateClientConnState(balancer.ClientConnState{
   895  		BalancerConfig: &LBConfig{
   896  			Interval: iserviceconfig.Duration(8 * time.Second),
   897  			SuccessRateEjection: &SuccessRateEjection{
   898  				StdevFactor:           1900,
   899  				EnforcementPercentage: 100,
   900  				MinimumHosts:          5,
   901  				RequestVolume:         100,
   902  			},
   903  			ChildPolicy: &iserviceconfig.BalancerConfig{
   904  				Name:   t.Name(),
   905  				Config: emptyChildConfig{},
   906  			},
   907  		},
   908  	})
   909  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
   910  	defer cancel()
   911  	d, err := durationChan.Receive(ctx)
   912  	if err != nil {
   913  		t.Fatalf("Error receiving duration from afterFunc() call: %v", err)
   914  	}
   915  	dur := d.(time.Duration)
   916  	// The configured duration should be 8 seconds - what the balancer was
   917  	// configured with.
   918  	if dur != 8*time.Second {
   919  		t.Fatalf("configured duration should have been 8 seconds to start timer")
   920  	}
   921  
   922  	// Override time.Now to time.Now() + 5 seconds. This will represent 5
   923  	// seconds already passing for the next check in UpdateClientConnState.
   924  	defer func(n func() time.Time) {
   925  		now = n
   926  	}(now)
   927  	now = func() time.Time {
   928  		return time.Now().Add(time.Second * 5)
   929  	}
   930  
   931  	// UpdateClientConnState with an interval of 9 seconds. Due to 5 seconds
   932  	// already passing (from overridden time.Now function), this should start an
   933  	// interval timer of ~4 seconds.
   934  	od.UpdateClientConnState(balancer.ClientConnState{
   935  		BalancerConfig: &LBConfig{
   936  			Interval: iserviceconfig.Duration(9 * time.Second),
   937  			SuccessRateEjection: &SuccessRateEjection{
   938  				StdevFactor:           1900,
   939  				EnforcementPercentage: 100,
   940  				MinimumHosts:          5,
   941  				RequestVolume:         100,
   942  			},
   943  			ChildPolicy: &iserviceconfig.BalancerConfig{
   944  				Name:   t.Name(),
   945  				Config: emptyChildConfig{},
   946  			},
   947  		},
   948  	})
   949  
   950  	d, err = durationChan.Receive(ctx)
   951  	if err != nil {
   952  		t.Fatalf("Error receiving duration from afterFunc() call: %v", err)
   953  	}
   954  	dur = d.(time.Duration)
   955  	if dur.Seconds() < 3.5 || 4.5 < dur.Seconds() {
   956  		t.Fatalf("configured duration should have been around 4 seconds to start timer")
   957  	}
   958  
   959  	// UpdateClientConnState with a no-op config. This shouldn't configure the
   960  	// interval timer at all due to it being a no-op.
   961  	od.UpdateClientConnState(balancer.ClientConnState{
   962  		BalancerConfig: &LBConfig{
   963  			Interval: iserviceconfig.Duration(10 * time.Second),
   964  			ChildPolicy: &iserviceconfig.BalancerConfig{
   965  				Name:   t.Name(),
   966  				Config: emptyChildConfig{},
   967  			},
   968  		},
   969  	})
   970  
   971  	// No timer should have been started.
   972  	sCtx, cancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
   973  	defer cancel()
   974  	if _, err = durationChan.Receive(sCtx); err == nil {
   975  		t.Fatal("No timer should have started.")
   976  	}
   977  }
   978  
   979  // TestEjectUnejectSuccessRate tests the functionality of the interval timer
   980  // algorithm when configured with SuccessRateEjection. The Outlier Detection
   981  // Balancer will be set up with 3 SubConns, each with a different address.
   982  // It tests the following scenarios, in a step by step fashion:
   983  // 1. The three addresses each have 5 successes. The interval timer algorithm should
   984  // not eject any of the addresses.
   985  // 2. Two of the addresses have 5 successes, the third has five failures. The
   986  // interval timer algorithm should eject the third address with five failures.
   987  // 3. The interval timer algorithm is run at a later time past max ejection
   988  // time. The interval timer algorithm should uneject the third address.
   989  func (s) TestEjectUnejectSuccessRate(t *testing.T) {
   990  	scsCh := testutils.NewChannel()
   991  	var scw1, scw2, scw3 balancer.SubConn
   992  	var err error
   993  	stub.Register(t.Name(), stub.BalancerFuncs{
   994  		UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error {
   995  			scw1, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{
   996  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw1, state: state}) },
   997  			})
   998  			if err != nil {
   999  				t.Errorf("error in od.NewSubConn call: %v", err)
  1000  			}
  1001  			scw2, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address2"}}, balancer.NewSubConnOptions{
  1002  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw2, state: state}) },
  1003  			})
  1004  			if err != nil {
  1005  				t.Errorf("error in od.NewSubConn call: %v", err)
  1006  			}
  1007  			scw3, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address3"}}, balancer.NewSubConnOptions{
  1008  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw3, state: state}) },
  1009  			})
  1010  			if err != nil {
  1011  				t.Errorf("error in od.NewSubConn call: %v", err)
  1012  			}
  1013  			bd.ClientConn.UpdateState(balancer.State{
  1014  				ConnectivityState: connectivity.Ready,
  1015  				Picker: &rrPicker{
  1016  					scs: []balancer.SubConn{scw1, scw2, scw3},
  1017  				},
  1018  			})
  1019  			return nil
  1020  		},
  1021  	})
  1022  
  1023  	od, tcc, cleanup := setup(t)
  1024  	defer func() {
  1025  		cleanup()
  1026  	}()
  1027  
  1028  	od.UpdateClientConnState(balancer.ClientConnState{
  1029  		ResolverState: resolver.State{
  1030  			Addresses: []resolver.Address{
  1031  				{Addr: "address1"},
  1032  				{Addr: "address2"},
  1033  				{Addr: "address3"},
  1034  			},
  1035  		},
  1036  		BalancerConfig: &LBConfig{
  1037  			Interval:           math.MaxInt64, // so the interval will never run unless called manually in test.
  1038  			BaseEjectionTime:   iserviceconfig.Duration(30 * time.Second),
  1039  			MaxEjectionTime:    iserviceconfig.Duration(300 * time.Second),
  1040  			MaxEjectionPercent: 10,
  1041  			FailurePercentageEjection: &FailurePercentageEjection{
  1042  				Threshold:             50,
  1043  				EnforcementPercentage: 100,
  1044  				MinimumHosts:          3,
  1045  				RequestVolume:         3,
  1046  			},
  1047  			ChildPolicy: &iserviceconfig.BalancerConfig{
  1048  				Name:   t.Name(),
  1049  				Config: emptyChildConfig{},
  1050  			},
  1051  		},
  1052  	})
  1053  
  1054  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1055  	defer cancel()
  1056  
  1057  	select {
  1058  	case <-ctx.Done():
  1059  		t.Fatalf("timeout while waiting for a UpdateState call on the ClientConn")
  1060  	case picker := <-tcc.NewPickerCh:
  1061  		// Set each of the three upstream addresses to have five successes each.
  1062  		// This should cause none of the addresses to be ejected as none of them
  1063  		// are outliers according to the success rate algorithm.
  1064  		for i := 0; i < 3; i++ {
  1065  			pi, err := picker.Pick(balancer.PickInfo{})
  1066  			if err != nil {
  1067  				t.Fatalf("picker.Pick failed with error: %v", err)
  1068  			}
  1069  			for c := 0; c < 5; c++ {
  1070  				pi.Done(balancer.DoneInfo{})
  1071  			}
  1072  		}
  1073  
  1074  		od.intervalTimerAlgorithm()
  1075  
  1076  		// verify no StateListener() call on the child, as no addresses got
  1077  		// ejected (ejected address will cause an StateListener call).
  1078  		sCtx, cancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
  1079  		defer cancel()
  1080  		if _, err := scsCh.Receive(sCtx); err == nil {
  1081  			t.Fatalf("no SubConn update should have been sent (no SubConn got ejected)")
  1082  		}
  1083  
  1084  		// Since no addresses are ejected, a SubConn update should forward down
  1085  		// to the child.
  1086  		od.updateSubConnState(scw1.(*subConnWrapper).SubConn, balancer.SubConnState{
  1087  			ConnectivityState: connectivity.Connecting,
  1088  		})
  1089  
  1090  		gotSCWS, err := scsCh.Receive(ctx)
  1091  		if err != nil {
  1092  			t.Fatalf("Error waiting for Sub Conn update: %v", err)
  1093  		}
  1094  		if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
  1095  			sc:    scw1,
  1096  			state: balancer.SubConnState{ConnectivityState: connectivity.Connecting},
  1097  		}); err != nil {
  1098  			t.Fatalf("Error in Sub Conn update: %v", err)
  1099  		}
  1100  
  1101  		// Set two of the upstream addresses to have five successes each, and
  1102  		// one of the upstream addresses to have five failures. This should
  1103  		// cause the address which has five failures to be ejected according to
  1104  		// the SuccessRateAlgorithm.
  1105  		for i := 0; i < 2; i++ {
  1106  			pi, err := picker.Pick(balancer.PickInfo{})
  1107  			if err != nil {
  1108  				t.Fatalf("picker.Pick failed with error: %v", err)
  1109  			}
  1110  			for c := 0; c < 5; c++ {
  1111  				pi.Done(balancer.DoneInfo{})
  1112  			}
  1113  		}
  1114  		pi, err := picker.Pick(balancer.PickInfo{})
  1115  		if err != nil {
  1116  			t.Fatalf("picker.Pick failed with error: %v", err)
  1117  		}
  1118  		for c := 0; c < 5; c++ {
  1119  			pi.Done(balancer.DoneInfo{Err: errors.New("some error")})
  1120  		}
  1121  
  1122  		// should eject address that always errored.
  1123  		od.intervalTimerAlgorithm()
  1124  		// Due to the address being ejected, the SubConn with that address
  1125  		// should be ejected, meaning a TRANSIENT_FAILURE connectivity state
  1126  		// gets reported to the child.
  1127  		gotSCWS, err = scsCh.Receive(ctx)
  1128  		if err != nil {
  1129  			t.Fatalf("Error waiting for Sub Conn update: %v", err)
  1130  		}
  1131  		if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
  1132  			sc:    scw3,
  1133  			state: balancer.SubConnState{ConnectivityState: connectivity.TransientFailure},
  1134  		}); err != nil {
  1135  			t.Fatalf("Error in Sub Conn update: %v", err)
  1136  		}
  1137  		// Only one address should be ejected.
  1138  		sCtx, cancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
  1139  		defer cancel()
  1140  		if _, err := scsCh.Receive(sCtx); err == nil {
  1141  			t.Fatalf("Only one SubConn update should have been sent (only one SubConn got ejected)")
  1142  		}
  1143  
  1144  		// Now that an address is ejected, SubConn updates for SubConns using
  1145  		// that address should not be forwarded downward. These SubConn updates
  1146  		// will be cached to update the child sometime in the future when the
  1147  		// address gets unejected.
  1148  		od.updateSubConnState(pi.SubConn, balancer.SubConnState{
  1149  			ConnectivityState: connectivity.Connecting,
  1150  		})
  1151  		sCtx, cancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
  1152  		defer cancel()
  1153  		if _, err := scsCh.Receive(sCtx); err == nil {
  1154  			t.Fatalf("SubConn update should not have been forwarded (the SubConn is ejected)")
  1155  		}
  1156  
  1157  		// Override now to cause the interval timer algorithm to always uneject
  1158  		// the ejected address. This will always uneject the ejected address
  1159  		// because this time is set way past the max ejection time set in the
  1160  		// configuration, which will make the next interval timer algorithm run
  1161  		// uneject any ejected addresses.
  1162  		defer func(n func() time.Time) {
  1163  			now = n
  1164  		}(now)
  1165  		now = func() time.Time {
  1166  			return time.Now().Add(time.Second * 1000)
  1167  		}
  1168  		od.intervalTimerAlgorithm()
  1169  
  1170  		// unejected SubConn should report latest persisted state - which is
  1171  		// connecting from earlier.
  1172  		gotSCWS, err = scsCh.Receive(ctx)
  1173  		if err != nil {
  1174  			t.Fatalf("Error waiting for Sub Conn update: %v", err)
  1175  		}
  1176  		if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
  1177  			sc:    scw3,
  1178  			state: balancer.SubConnState{ConnectivityState: connectivity.Connecting},
  1179  		}); err != nil {
  1180  			t.Fatalf("Error in Sub Conn update: %v", err)
  1181  		}
  1182  	}
  1183  }
  1184  
  1185  // TestEjectFailureRate tests the functionality of the interval timer algorithm
  1186  // when configured with FailurePercentageEjection, and also the functionality of
  1187  // noop configuration. The Outlier Detection Balancer will be set up with 3
  1188  // SubConns, each with a different address. It tests the following scenarios, in
  1189  // a step by step fashion:
  1190  // 1. The three addresses each have 5 successes. The interval timer algorithm
  1191  // should not eject any of the addresses.
  1192  // 2. Two of the addresses have 5 successes, the third has five failures. The
  1193  // interval timer algorithm should eject the third address with five failures.
  1194  // 3. The Outlier Detection Balancer receives a subsequent noop config update.
  1195  // The balancer should uneject all ejected addresses.
  1196  func (s) TestEjectFailureRate(t *testing.T) {
  1197  	scsCh := testutils.NewChannel()
  1198  	var scw1, scw2, scw3 balancer.SubConn
  1199  	var err error
  1200  	stub.Register(t.Name(), stub.BalancerFuncs{
  1201  		UpdateClientConnState: func(bd *stub.BalancerData, _ balancer.ClientConnState) error {
  1202  			if scw1 != nil { // UpdateClientConnState was already called, no need to recreate SubConns.
  1203  				return nil
  1204  			}
  1205  			scw1, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{
  1206  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw1, state: state}) },
  1207  			})
  1208  			if err != nil {
  1209  				t.Errorf("error in od.NewSubConn call: %v", err)
  1210  			}
  1211  			scw2, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address2"}}, balancer.NewSubConnOptions{
  1212  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw2, state: state}) },
  1213  			})
  1214  			if err != nil {
  1215  				t.Errorf("error in od.NewSubConn call: %v", err)
  1216  			}
  1217  			scw3, err = bd.ClientConn.NewSubConn([]resolver.Address{{Addr: "address3"}}, balancer.NewSubConnOptions{
  1218  				StateListener: func(state balancer.SubConnState) { scsCh.Send(subConnWithState{sc: scw3, state: state}) },
  1219  			})
  1220  			if err != nil {
  1221  				t.Errorf("error in od.NewSubConn call: %v", err)
  1222  			}
  1223  			return nil
  1224  		},
  1225  	})
  1226  
  1227  	od, tcc, cleanup := setup(t)
  1228  	defer func() {
  1229  		cleanup()
  1230  	}()
  1231  
  1232  	od.UpdateClientConnState(balancer.ClientConnState{
  1233  		ResolverState: resolver.State{
  1234  			Addresses: []resolver.Address{
  1235  				{Addr: "address1"},
  1236  				{Addr: "address2"},
  1237  				{Addr: "address3"},
  1238  			},
  1239  		},
  1240  		BalancerConfig: &LBConfig{
  1241  			Interval:           math.MaxInt64, // so the interval will never run unless called manually in test.
  1242  			BaseEjectionTime:   iserviceconfig.Duration(30 * time.Second),
  1243  			MaxEjectionTime:    iserviceconfig.Duration(300 * time.Second),
  1244  			MaxEjectionPercent: 10,
  1245  			SuccessRateEjection: &SuccessRateEjection{
  1246  				StdevFactor:           500,
  1247  				EnforcementPercentage: 100,
  1248  				MinimumHosts:          3,
  1249  				RequestVolume:         3,
  1250  			},
  1251  			ChildPolicy: &iserviceconfig.BalancerConfig{
  1252  				Name:   t.Name(),
  1253  				Config: emptyChildConfig{},
  1254  			},
  1255  		},
  1256  	})
  1257  
  1258  	od.UpdateState(balancer.State{
  1259  		ConnectivityState: connectivity.Ready,
  1260  		Picker: &rrPicker{
  1261  			scs: []balancer.SubConn{scw1, scw2, scw3},
  1262  		},
  1263  	})
  1264  
  1265  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1266  	defer cancel()
  1267  
  1268  	select {
  1269  	case <-ctx.Done():
  1270  		t.Fatalf("timeout while waiting for a UpdateState call on the ClientConn")
  1271  	case picker := <-tcc.NewPickerCh:
  1272  		// Set each upstream address to have five successes each. This should
  1273  		// cause none of the addresses to be ejected as none of them are below
  1274  		// the failure percentage threshold.
  1275  		for i := 0; i < 3; i++ {
  1276  			pi, err := picker.Pick(balancer.PickInfo{})
  1277  			if err != nil {
  1278  				t.Fatalf("picker.Pick failed with error: %v", err)
  1279  			}
  1280  			for c := 0; c < 5; c++ {
  1281  				pi.Done(balancer.DoneInfo{})
  1282  			}
  1283  		}
  1284  
  1285  		od.intervalTimerAlgorithm()
  1286  		sCtx, cancel := context.WithTimeout(context.Background(), defaultTestShortTimeout)
  1287  		defer cancel()
  1288  		if _, err := scsCh.Receive(sCtx); err == nil {
  1289  			t.Fatalf("no SubConn update should have been sent (no SubConn got ejected)")
  1290  		}
  1291  
  1292  		// Set two upstream addresses to have five successes each, and one
  1293  		// upstream address to have five failures. This should cause the address
  1294  		// with five failures to be ejected according to the Failure Percentage
  1295  		// Algorithm.
  1296  		for i := 0; i < 2; i++ {
  1297  			pi, err := picker.Pick(balancer.PickInfo{})
  1298  			if err != nil {
  1299  				t.Fatalf("picker.Pick failed with error: %v", err)
  1300  			}
  1301  			for c := 0; c < 5; c++ {
  1302  				pi.Done(balancer.DoneInfo{})
  1303  			}
  1304  		}
  1305  		pi, err := picker.Pick(balancer.PickInfo{})
  1306  		if err != nil {
  1307  			t.Fatalf("picker.Pick failed with error: %v", err)
  1308  		}
  1309  		for c := 0; c < 5; c++ {
  1310  			pi.Done(balancer.DoneInfo{Err: errors.New("some error")})
  1311  		}
  1312  
  1313  		// should eject address that always errored.
  1314  		od.intervalTimerAlgorithm()
  1315  
  1316  		// verify StateListener() got called with TRANSIENT_FAILURE for child
  1317  		// in address that was ejected.
  1318  		gotSCWS, err := scsCh.Receive(ctx)
  1319  		if err != nil {
  1320  			t.Fatalf("Error waiting for Sub Conn update: %v", err)
  1321  		}
  1322  		if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
  1323  			sc:    scw3,
  1324  			state: balancer.SubConnState{ConnectivityState: connectivity.TransientFailure},
  1325  		}); err != nil {
  1326  			t.Fatalf("Error in Sub Conn update: %v", err)
  1327  		}
  1328  
  1329  		// verify only one address got ejected.
  1330  		sCtx, cancel = context.WithTimeout(context.Background(), defaultTestShortTimeout)
  1331  		defer cancel()
  1332  		if _, err := scsCh.Receive(sCtx); err == nil {
  1333  			t.Fatalf("Only one SubConn update should have been sent (only one SubConn got ejected)")
  1334  		}
  1335  
  1336  		// upon the Outlier Detection balancer being reconfigured with a noop
  1337  		// configuration, every ejected SubConn should be unejected.
  1338  		od.UpdateClientConnState(balancer.ClientConnState{
  1339  			ResolverState: resolver.State{
  1340  				Addresses: []resolver.Address{
  1341  					{Addr: "address1"},
  1342  					{Addr: "address2"},
  1343  					{Addr: "address3"},
  1344  				},
  1345  			},
  1346  			BalancerConfig: &LBConfig{
  1347  				Interval:           math.MaxInt64,
  1348  				BaseEjectionTime:   iserviceconfig.Duration(30 * time.Second),
  1349  				MaxEjectionTime:    iserviceconfig.Duration(300 * time.Second),
  1350  				MaxEjectionPercent: 10,
  1351  				ChildPolicy: &iserviceconfig.BalancerConfig{
  1352  					Name:   t.Name(),
  1353  					Config: emptyChildConfig{},
  1354  				},
  1355  			},
  1356  		})
  1357  		gotSCWS, err = scsCh.Receive(ctx)
  1358  		if err != nil {
  1359  			t.Fatalf("Error waiting for Sub Conn update: %v", err)
  1360  		}
  1361  		if err = scwsEqual(gotSCWS.(subConnWithState), subConnWithState{
  1362  			sc:    scw3,
  1363  			state: balancer.SubConnState{ConnectivityState: connectivity.Idle},
  1364  		}); err != nil {
  1365  			t.Fatalf("Error in Sub Conn update: %v", err)
  1366  		}
  1367  	}
  1368  }
  1369  
  1370  // TestConcurrentOperations calls different operations on the balancer in
  1371  // separate goroutines to test for any race conditions and deadlocks. It also
  1372  // uses a child balancer which verifies that no operations on the child get
  1373  // called after the child balancer is closed.
  1374  func (s) TestConcurrentOperations(t *testing.T) {
  1375  	closed := grpcsync.NewEvent()
  1376  	stub.Register(t.Name(), stub.BalancerFuncs{
  1377  		UpdateClientConnState: func(*stub.BalancerData, balancer.ClientConnState) error {
  1378  			if closed.HasFired() {
  1379  				t.Error("UpdateClientConnState was called after Close(), which breaks the balancer API")
  1380  			}
  1381  			return nil
  1382  		},
  1383  		ResolverError: func(*stub.BalancerData, error) {
  1384  			if closed.HasFired() {
  1385  				t.Error("ResolverError was called after Close(), which breaks the balancer API")
  1386  			}
  1387  		},
  1388  		Close: func(*stub.BalancerData) {
  1389  			closed.Fire()
  1390  		},
  1391  		ExitIdle: func(*stub.BalancerData) {
  1392  			if closed.HasFired() {
  1393  				t.Error("ExitIdle was called after Close(), which breaks the balancer API")
  1394  			}
  1395  		},
  1396  	})
  1397  
  1398  	od, tcc, cleanup := setup(t)
  1399  	defer func() {
  1400  		cleanup()
  1401  	}()
  1402  
  1403  	od.UpdateClientConnState(balancer.ClientConnState{
  1404  		ResolverState: resolver.State{
  1405  			Addresses: []resolver.Address{
  1406  				{Addr: "address1"},
  1407  				{Addr: "address2"},
  1408  				{Addr: "address3"},
  1409  			},
  1410  		},
  1411  		BalancerConfig: &LBConfig{
  1412  			Interval:           math.MaxInt64, // so the interval will never run unless called manually in test.
  1413  			BaseEjectionTime:   iserviceconfig.Duration(30 * time.Second),
  1414  			MaxEjectionTime:    iserviceconfig.Duration(300 * time.Second),
  1415  			MaxEjectionPercent: 10,
  1416  			SuccessRateEjection: &SuccessRateEjection{ // Have both Success Rate and Failure Percentage to step through all the interval timer code
  1417  				StdevFactor:           500,
  1418  				EnforcementPercentage: 100,
  1419  				MinimumHosts:          3,
  1420  				RequestVolume:         3,
  1421  			},
  1422  			FailurePercentageEjection: &FailurePercentageEjection{
  1423  				Threshold:             50,
  1424  				EnforcementPercentage: 100,
  1425  				MinimumHosts:          3,
  1426  				RequestVolume:         3,
  1427  			},
  1428  			ChildPolicy: &iserviceconfig.BalancerConfig{
  1429  				Name:   t.Name(),
  1430  				Config: emptyChildConfig{},
  1431  			},
  1432  		},
  1433  	})
  1434  	ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
  1435  	defer cancel()
  1436  
  1437  	scw1, err := od.NewSubConn([]resolver.Address{{Addr: "address1"}}, balancer.NewSubConnOptions{})
  1438  	if err != nil {
  1439  		t.Fatalf("error in od.NewSubConn call: %v", err)
  1440  	}
  1441  	if err != nil {
  1442  		t.Fatalf("error in od.NewSubConn call: %v", err)
  1443  	}
  1444  
  1445  	scw2, err := od.NewSubConn([]resolver.Address{{Addr: "address2"}}, balancer.NewSubConnOptions{})
  1446  	if err != nil {
  1447  		t.Fatalf("error in od.NewSubConn call: %v", err)
  1448  	}
  1449  
  1450  	scw3, err := od.NewSubConn([]resolver.Address{{Addr: "address3"}}, balancer.NewSubConnOptions{})
  1451  	if err != nil {
  1452  		t.Fatalf("error in od.NewSubConn call: %v", err)
  1453  	}
  1454  
  1455  	od.UpdateState(balancer.State{
  1456  		ConnectivityState: connectivity.Ready,
  1457  		Picker: &rrPicker{
  1458  			scs: []balancer.SubConn{scw2, scw3},
  1459  		},
  1460  	})
  1461  
  1462  	var picker balancer.Picker
  1463  	select {
  1464  	case <-ctx.Done():
  1465  		t.Fatalf("timeout while waiting for a UpdateState call on the ClientConn")
  1466  	case picker = <-tcc.NewPickerCh:
  1467  	}
  1468  
  1469  	finished := make(chan struct{})
  1470  	var wg sync.WaitGroup
  1471  	wg.Add(1)
  1472  	go func() {
  1473  		defer wg.Done()
  1474  		for {
  1475  			select {
  1476  			case <-finished:
  1477  				return
  1478  			default:
  1479  			}
  1480  			pi, err := picker.Pick(balancer.PickInfo{})
  1481  			if err != nil {
  1482  				continue
  1483  			}
  1484  			pi.Done(balancer.DoneInfo{})
  1485  			pi.Done(balancer.DoneInfo{Err: errors.New("some error")})
  1486  			time.Sleep(1 * time.Nanosecond)
  1487  		}
  1488  	}()
  1489  
  1490  	wg.Add(1)
  1491  	go func() {
  1492  		defer wg.Done()
  1493  		for {
  1494  			select {
  1495  			case <-finished:
  1496  				return
  1497  			default:
  1498  			}
  1499  			od.intervalTimerAlgorithm()
  1500  		}
  1501  	}()
  1502  
  1503  	// call Outlier Detection's balancer.ClientConn operations asynchronously.
  1504  	// balancer.ClientConn operations have no guarantee from the API to be
  1505  	// called synchronously.
  1506  	wg.Add(1)
  1507  	go func() {
  1508  		defer wg.Done()
  1509  		for {
  1510  			select {
  1511  			case <-finished:
  1512  				return
  1513  			default:
  1514  			}
  1515  			od.UpdateState(balancer.State{
  1516  				ConnectivityState: connectivity.Ready,
  1517  				Picker: &rrPicker{
  1518  					scs: []balancer.SubConn{scw2, scw3},
  1519  				},
  1520  			})
  1521  			time.Sleep(1 * time.Nanosecond)
  1522  		}
  1523  	}()
  1524  
  1525  	wg.Add(1)
  1526  	go func() {
  1527  		defer wg.Done()
  1528  		od.NewSubConn([]resolver.Address{{Addr: "address4"}}, balancer.NewSubConnOptions{})
  1529  	}()
  1530  
  1531  	wg.Add(1)
  1532  	go func() {
  1533  		defer wg.Done()
  1534  		scw1.Shutdown()
  1535  	}()
  1536  
  1537  	wg.Add(1)
  1538  	go func() {
  1539  		defer wg.Done()
  1540  		od.UpdateAddresses(scw2, []resolver.Address{{Addr: "address3"}})
  1541  	}()
  1542  
  1543  	// Call balancer.Balancers synchronously in this goroutine, upholding the
  1544  	// balancer.Balancer API guarantee of synchronous calls.
  1545  	od.UpdateClientConnState(balancer.ClientConnState{ // This will delete addresses and flip to no op
  1546  		ResolverState: resolver.State{
  1547  			Addresses: []resolver.Address{{Addr: "address1"}},
  1548  		},
  1549  		BalancerConfig: &LBConfig{
  1550  			Interval: math.MaxInt64,
  1551  			ChildPolicy: &iserviceconfig.BalancerConfig{
  1552  				Name:   t.Name(),
  1553  				Config: emptyChildConfig{},
  1554  			},
  1555  		},
  1556  	})
  1557  
  1558  	// Call balancer.Balancers synchronously in this goroutine, upholding the
  1559  	// balancer.Balancer API guarantee.
  1560  	od.updateSubConnState(scw1.(*subConnWrapper).SubConn, balancer.SubConnState{
  1561  		ConnectivityState: connectivity.Connecting,
  1562  	})
  1563  	od.ResolverError(errors.New("some error"))
  1564  	od.ExitIdle()
  1565  	od.Close()
  1566  	close(finished)
  1567  	wg.Wait()
  1568  }