github.com/m3db/m3@v1.5.0/src/dbnode/client/connection_pool_test.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package client
    22  
    23  import (
    24  	"fmt"
    25  	"sync"
    26  	"sync/atomic"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/dbnode/generated/thrift/rpc"
    31  	"github.com/m3db/m3/src/dbnode/topology"
    32  	xclock "github.com/m3db/m3/src/x/clock"
    33  
    34  	"github.com/golang/mock/gomock"
    35  	"github.com/stretchr/testify/require"
    36  	"github.com/uber/tchannel-go"
    37  )
    38  
    39  const (
    40  	testHostStr  = "testhost"
    41  	testHostAddr = testHostStr + ":9000"
    42  )
    43  
    44  var (
    45  	h = topology.NewHost(testHostStr, testHostAddr)
    46  )
    47  
    48  type noopPooledChannel struct {
    49  	address    string
    50  	closeCount int32
    51  }
    52  
    53  func asNoopPooledChannel(c Channel) *noopPooledChannel {
    54  	cc, ok := c.(*noopPooledChannel)
    55  	if !ok {
    56  		panic("not a noopPooledChannel")
    57  	}
    58  	return cc
    59  }
    60  
    61  func (c *noopPooledChannel) CloseCount() int {
    62  	return int(atomic.LoadInt32(&c.closeCount))
    63  }
    64  
    65  func (c *noopPooledChannel) Close() {
    66  	atomic.AddInt32(&c.closeCount, 1)
    67  }
    68  
    69  func (c *noopPooledChannel) GetSubChannel(
    70  	serviceName string,
    71  	opts ...tchannel.SubChannelOption,
    72  ) *tchannel.SubChannel {
    73  	return nil
    74  }
    75  
    76  func newConnectionPoolTestOptions() Options {
    77  	return newSessionTestOptions().
    78  		SetBackgroundConnectInterval(5 * time.Millisecond).
    79  		SetBackgroundConnectStutter(2 * time.Millisecond).
    80  		SetBackgroundHealthCheckInterval(5 * time.Millisecond).
    81  		SetBackgroundHealthCheckStutter(2 * time.Millisecond)
    82  }
    83  
    84  func TestConnectionPoolConnectsAndRetriesConnects(t *testing.T) {
    85  	// Scenario:
    86  	// 1. Try fill 4 connections
    87  	// > Fail 1 on connection step, have 3 connections
    88  	// 2. Try fill remaining connection
    89  	// > Fail 1 on health check, have 3 connections
    90  	// 3. Try fill remaining connection
    91  	// > Fulfill remaining connection, have 4 connections
    92  	// 4. Don't bother
    93  
    94  	var (
    95  		attempts        int32
    96  		sleeps          int32
    97  		rounds          int32
    98  		sleepWgs        [4]sync.WaitGroup
    99  		proceedSleepWgs [3]sync.WaitGroup
   100  		doneWg          sync.WaitGroup
   101  	)
   102  	for i := range sleepWgs {
   103  		sleepWgs[i].Add(1)
   104  	}
   105  	for i := range proceedSleepWgs {
   106  		proceedSleepWgs[i].Add(1)
   107  	}
   108  	doneWg.Add(1)
   109  
   110  	opts := newConnectionPoolTestOptions()
   111  	opts = opts.SetMaxConnectionCount(4)
   112  
   113  	fn := func(
   114  		ch string, addr string, opts Options,
   115  	) (Channel, rpc.TChanNode, error) {
   116  		attempt := int(atomic.AddInt32(&attempts, 1))
   117  		if attempt == 1 {
   118  			return nil, nil, fmt.Errorf("a connect error")
   119  		}
   120  		return &noopPooledChannel{}, nil, nil
   121  	}
   122  
   123  	opts = opts.SetNewConnectionFn(fn)
   124  	conns := newConnectionPool(h, opts).(*connPool)
   125  	conns.healthCheckNewConn = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error {
   126  		if atomic.LoadInt32(&rounds) == 1 {
   127  			// If second round then fail health check
   128  			return fmt.Errorf("a health check error")
   129  		}
   130  		return nil
   131  	}
   132  	conns.healthCheck = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error {
   133  		return nil
   134  	}
   135  	conns.sleepConnect = func(t time.Duration) {
   136  		sleep := int(atomic.AddInt32(&sleeps, 1))
   137  		if sleep <= 4 {
   138  			if sleep <= len(sleepWgs) {
   139  				sleepWgs[sleep-1].Done()
   140  			}
   141  			if sleep <= len(proceedSleepWgs) {
   142  				proceedSleepWgs[sleep-1].Wait()
   143  			}
   144  		}
   145  		if sleep == 4 {
   146  			doneWg.Wait()
   147  			return // All done
   148  		}
   149  		atomic.AddInt32(&rounds, 1)
   150  		time.Sleep(time.Millisecond)
   151  	}
   152  
   153  	require.Equal(t, 0, conns.ConnectionCount())
   154  
   155  	conns.Open()
   156  
   157  	// Wait for first round, should've created all conns except first
   158  	sleepWgs[0].Wait()
   159  	require.Equal(t, 3, conns.ConnectionCount())
   160  	proceedSleepWgs[0].Done()
   161  
   162  	// Wait for second round, all attempts should succeed but all fail health checks
   163  	sleepWgs[1].Wait()
   164  	require.Equal(t, 3, conns.ConnectionCount())
   165  	proceedSleepWgs[1].Done()
   166  
   167  	// Wait for third round, now should succeed and all connections accounted for
   168  	sleepWgs[2].Wait()
   169  	require.Equal(t, 4, conns.ConnectionCount())
   170  	doneAll := attempts
   171  	proceedSleepWgs[2].Done()
   172  
   173  	// Wait for fourth roundm, now should not involve attempting to spawn connections
   174  	sleepWgs[3].Wait()
   175  	// Ensure no more attempts done in fnal round
   176  	require.Equal(t, doneAll, attempts)
   177  
   178  	conns.Close()
   179  	doneWg.Done()
   180  
   181  	nextClient, _, err := conns.NextClient()
   182  	require.Nil(t, nextClient)
   183  	require.Equal(t, errConnectionPoolClosed, err)
   184  }
   185  
   186  func TestConnectionPoolHealthChecks(t *testing.T) {
   187  	ctrl := gomock.NewController(t)
   188  	defer ctrl.Finish()
   189  
   190  	// Scenario:
   191  	// 1. Fill 2 connections
   192  	// 2. Round 1, fail conn 0 health checks
   193  	// > Take connection out
   194  	// 3. Round 2, fail conn 1 health checks
   195  	// > Take connection out
   196  	opts := newConnectionPoolTestOptions()
   197  	opts = opts.SetMaxConnectionCount(2)
   198  	opts = opts.SetHostConnectTimeout(10 * time.Second)
   199  	healthCheckFailLimit := opts.BackgroundHealthCheckFailLimit()
   200  	healthCheckFailThrottleFactor := opts.BackgroundHealthCheckFailThrottleFactor()
   201  
   202  	var (
   203  		newConnAttempt int32
   204  		connectRounds  int32
   205  		healthRounds   int32
   206  		invokeFail     int32
   207  		client1        = rpc.TChanNode(rpc.NewMockTChanNode(ctrl))
   208  		client2        = rpc.TChanNode(rpc.NewMockTChanNode(ctrl))
   209  		overrides      = []healthCheckFn{}
   210  		overridesMut   sync.RWMutex
   211  		pushOverride   = func(fn healthCheckFn, count int) {
   212  			overridesMut.Lock()
   213  			defer overridesMut.Unlock()
   214  			for i := 0; i < count; i++ {
   215  				overrides = append(overrides, fn)
   216  			}
   217  		}
   218  		popOverride = func() healthCheckFn {
   219  			overridesMut.Lock()
   220  			defer overridesMut.Unlock()
   221  			if len(overrides) == 0 {
   222  				return nil
   223  			}
   224  			next := overrides[0]
   225  			overrides = overrides[1:]
   226  			return next
   227  		}
   228  		pushFailClientOverride = func(failTargetClient rpc.TChanNode) {
   229  			var failOverride healthCheckFn
   230  			failOverride = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error {
   231  				if client == failTargetClient {
   232  					atomic.AddInt32(&invokeFail, 1)
   233  					return fmt.Errorf("fail client")
   234  				}
   235  				// Not failing this client, re-enqueue
   236  				pushOverride(failOverride, 1)
   237  				return nil
   238  			}
   239  			pushOverride(failOverride, healthCheckFailLimit)
   240  		}
   241  		onNextSleepHealth     []func()
   242  		onNextSleepHealthMut  sync.RWMutex
   243  		pushOnNextSleepHealth = func(fn func()) {
   244  			onNextSleepHealthMut.Lock()
   245  			defer onNextSleepHealthMut.Unlock()
   246  			onNextSleepHealth = append(onNextSleepHealth, fn)
   247  		}
   248  		popOnNextSleepHealth = func() func() {
   249  			onNextSleepHealthMut.Lock()
   250  			defer onNextSleepHealthMut.Unlock()
   251  			if len(onNextSleepHealth) == 0 {
   252  				return nil
   253  			}
   254  			next := onNextSleepHealth[0]
   255  			onNextSleepHealth = onNextSleepHealth[1:]
   256  			return next
   257  		}
   258  		failsDoneWg [2]sync.WaitGroup
   259  		failsDone   [2]int32
   260  	)
   261  	for i := range failsDoneWg {
   262  		failsDoneWg[i].Add(1)
   263  	}
   264  
   265  	fn := func(
   266  		ch string, addr string, opts Options,
   267  	) (Channel, rpc.TChanNode, error) {
   268  		attempt := atomic.AddInt32(&newConnAttempt, 1)
   269  		if attempt == 1 {
   270  			return &noopPooledChannel{}, client1, nil
   271  		} else if attempt == 2 {
   272  			return &noopPooledChannel{}, client2, nil
   273  		}
   274  		return nil, nil, fmt.Errorf("spawning only 2 connections")
   275  	}
   276  	opts = opts.SetNewConnectionFn(fn)
   277  
   278  	conns := newConnectionPool(h, opts).(*connPool)
   279  	conns.healthCheckNewConn = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error {
   280  		return nil
   281  	}
   282  	conns.healthCheck = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error {
   283  		if fn := popOverride(); fn != nil {
   284  			return fn(client, opts, checkBootstrapped)
   285  		}
   286  		return nil
   287  	}
   288  	conns.sleepConnect = func(d time.Duration) {
   289  		atomic.AddInt32(&connectRounds, 1)
   290  		time.Sleep(time.Millisecond)
   291  	}
   292  	conns.sleepHealth = func(d time.Duration) {
   293  		atomic.AddInt32(&healthRounds, 1)
   294  		if int(atomic.LoadInt32(&invokeFail)) == 1*healthCheckFailLimit &&
   295  			atomic.CompareAndSwapInt32(&failsDone[0], 0, 1) {
   296  			failsDoneWg[0].Done()
   297  		} else if int(atomic.LoadInt32(&invokeFail)) == 2*healthCheckFailLimit &&
   298  			atomic.CompareAndSwapInt32(&failsDone[1], 0, 1) {
   299  			failsDoneWg[1].Done()
   300  		}
   301  		time.Sleep(time.Millisecond)
   302  		if fn := popOnNextSleepHealth(); fn != nil {
   303  			fn()
   304  		}
   305  	}
   306  	conns.sleepHealthRetry = func(d time.Duration) {
   307  		expected := healthCheckFailThrottleFactor * float64(opts.HostConnectTimeout())
   308  		require.Equal(t, time.Duration(expected), d)
   309  	}
   310  
   311  	require.Equal(t, 0, conns.ConnectionCount())
   312  
   313  	conns.Open()
   314  
   315  	// Wait for first round, should've created all conns except first
   316  	for atomic.LoadInt32(&connectRounds) < 1 {
   317  		time.Sleep(time.Millisecond)
   318  	}
   319  
   320  	require.Equal(t, 2, conns.ConnectionCount())
   321  
   322  	// Fail client1 health check
   323  	pushOnNextSleepHealth(func() {
   324  		pushFailClientOverride(client1)
   325  	})
   326  
   327  	// Wait for health check round to take action
   328  	failsDoneWg[0].Wait()
   329  
   330  	// Verify only 1 connection and its client2
   331  	xclock.WaitUntil(func() bool {
   332  		// Need WaitUntil() because there is a delay between the health check failing
   333  		// and the connection actually being removed.
   334  		return conns.ConnectionCount() == 1
   335  	}, 5*time.Second)
   336  	for i := 0; i < 2; i++ {
   337  		nextClient, _, err := conns.NextClient()
   338  		require.NoError(t, err)
   339  		require.Equal(t, client2, nextClient)
   340  	}
   341  
   342  	// Fail client2 health check
   343  	pushOnNextSleepHealth(func() {
   344  		pushFailClientOverride(client2)
   345  	})
   346  
   347  	// Wait for health check round to take action
   348  	failsDoneWg[1].Wait()
   349  	xclock.WaitUntil(func() bool {
   350  		// Need WaitUntil() because there is a delay between the health check failing
   351  		// and the connection actually being removed.
   352  		return conns.ConnectionCount() == 0
   353  	}, 5*time.Second)
   354  	nextClient, _, err := conns.NextClient()
   355  	require.Nil(t, nextClient)
   356  	require.Equal(t, errConnectionPoolHasNoConnections, err)
   357  
   358  	conns.Close()
   359  
   360  	nextClient, _, err = conns.NextClient()
   361  	require.Nil(t, nextClient)
   362  	require.Equal(t, errConnectionPoolClosed, err)
   363  }