github.com/m3db/m3@v1.5.0/src/dbnode/client/connection_pool_test.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package client 22 23 import ( 24 "fmt" 25 "sync" 26 "sync/atomic" 27 "testing" 28 "time" 29 30 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 31 "github.com/m3db/m3/src/dbnode/topology" 32 xclock "github.com/m3db/m3/src/x/clock" 33 34 "github.com/golang/mock/gomock" 35 "github.com/stretchr/testify/require" 36 "github.com/uber/tchannel-go" 37 ) 38 39 const ( 40 testHostStr = "testhost" 41 testHostAddr = testHostStr + ":9000" 42 ) 43 44 var ( 45 h = topology.NewHost(testHostStr, testHostAddr) 46 ) 47 48 type noopPooledChannel struct { 49 address string 50 closeCount int32 51 } 52 53 func asNoopPooledChannel(c Channel) *noopPooledChannel { 54 cc, ok := c.(*noopPooledChannel) 55 if !ok { 56 panic("not a noopPooledChannel") 57 } 58 return cc 59 } 60 61 func (c *noopPooledChannel) CloseCount() int { 62 return int(atomic.LoadInt32(&c.closeCount)) 63 } 64 65 func (c *noopPooledChannel) Close() { 66 atomic.AddInt32(&c.closeCount, 1) 67 } 68 69 func (c *noopPooledChannel) GetSubChannel( 70 serviceName string, 71 opts ...tchannel.SubChannelOption, 72 ) *tchannel.SubChannel { 73 return nil 74 } 75 76 func newConnectionPoolTestOptions() Options { 77 return newSessionTestOptions(). 78 SetBackgroundConnectInterval(5 * time.Millisecond). 79 SetBackgroundConnectStutter(2 * time.Millisecond). 80 SetBackgroundHealthCheckInterval(5 * time.Millisecond). 81 SetBackgroundHealthCheckStutter(2 * time.Millisecond) 82 } 83 84 func TestConnectionPoolConnectsAndRetriesConnects(t *testing.T) { 85 // Scenario: 86 // 1. Try fill 4 connections 87 // > Fail 1 on connection step, have 3 connections 88 // 2. Try fill remaining connection 89 // > Fail 1 on health check, have 3 connections 90 // 3. Try fill remaining connection 91 // > Fulfill remaining connection, have 4 connections 92 // 4. Don't bother 93 94 var ( 95 attempts int32 96 sleeps int32 97 rounds int32 98 sleepWgs [4]sync.WaitGroup 99 proceedSleepWgs [3]sync.WaitGroup 100 doneWg sync.WaitGroup 101 ) 102 for i := range sleepWgs { 103 sleepWgs[i].Add(1) 104 } 105 for i := range proceedSleepWgs { 106 proceedSleepWgs[i].Add(1) 107 } 108 doneWg.Add(1) 109 110 opts := newConnectionPoolTestOptions() 111 opts = opts.SetMaxConnectionCount(4) 112 113 fn := func( 114 ch string, addr string, opts Options, 115 ) (Channel, rpc.TChanNode, error) { 116 attempt := int(atomic.AddInt32(&attempts, 1)) 117 if attempt == 1 { 118 return nil, nil, fmt.Errorf("a connect error") 119 } 120 return &noopPooledChannel{}, nil, nil 121 } 122 123 opts = opts.SetNewConnectionFn(fn) 124 conns := newConnectionPool(h, opts).(*connPool) 125 conns.healthCheckNewConn = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error { 126 if atomic.LoadInt32(&rounds) == 1 { 127 // If second round then fail health check 128 return fmt.Errorf("a health check error") 129 } 130 return nil 131 } 132 conns.healthCheck = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error { 133 return nil 134 } 135 conns.sleepConnect = func(t time.Duration) { 136 sleep := int(atomic.AddInt32(&sleeps, 1)) 137 if sleep <= 4 { 138 if sleep <= len(sleepWgs) { 139 sleepWgs[sleep-1].Done() 140 } 141 if sleep <= len(proceedSleepWgs) { 142 proceedSleepWgs[sleep-1].Wait() 143 } 144 } 145 if sleep == 4 { 146 doneWg.Wait() 147 return // All done 148 } 149 atomic.AddInt32(&rounds, 1) 150 time.Sleep(time.Millisecond) 151 } 152 153 require.Equal(t, 0, conns.ConnectionCount()) 154 155 conns.Open() 156 157 // Wait for first round, should've created all conns except first 158 sleepWgs[0].Wait() 159 require.Equal(t, 3, conns.ConnectionCount()) 160 proceedSleepWgs[0].Done() 161 162 // Wait for second round, all attempts should succeed but all fail health checks 163 sleepWgs[1].Wait() 164 require.Equal(t, 3, conns.ConnectionCount()) 165 proceedSleepWgs[1].Done() 166 167 // Wait for third round, now should succeed and all connections accounted for 168 sleepWgs[2].Wait() 169 require.Equal(t, 4, conns.ConnectionCount()) 170 doneAll := attempts 171 proceedSleepWgs[2].Done() 172 173 // Wait for fourth roundm, now should not involve attempting to spawn connections 174 sleepWgs[3].Wait() 175 // Ensure no more attempts done in fnal round 176 require.Equal(t, doneAll, attempts) 177 178 conns.Close() 179 doneWg.Done() 180 181 nextClient, _, err := conns.NextClient() 182 require.Nil(t, nextClient) 183 require.Equal(t, errConnectionPoolClosed, err) 184 } 185 186 func TestConnectionPoolHealthChecks(t *testing.T) { 187 ctrl := gomock.NewController(t) 188 defer ctrl.Finish() 189 190 // Scenario: 191 // 1. Fill 2 connections 192 // 2. Round 1, fail conn 0 health checks 193 // > Take connection out 194 // 3. Round 2, fail conn 1 health checks 195 // > Take connection out 196 opts := newConnectionPoolTestOptions() 197 opts = opts.SetMaxConnectionCount(2) 198 opts = opts.SetHostConnectTimeout(10 * time.Second) 199 healthCheckFailLimit := opts.BackgroundHealthCheckFailLimit() 200 healthCheckFailThrottleFactor := opts.BackgroundHealthCheckFailThrottleFactor() 201 202 var ( 203 newConnAttempt int32 204 connectRounds int32 205 healthRounds int32 206 invokeFail int32 207 client1 = rpc.TChanNode(rpc.NewMockTChanNode(ctrl)) 208 client2 = rpc.TChanNode(rpc.NewMockTChanNode(ctrl)) 209 overrides = []healthCheckFn{} 210 overridesMut sync.RWMutex 211 pushOverride = func(fn healthCheckFn, count int) { 212 overridesMut.Lock() 213 defer overridesMut.Unlock() 214 for i := 0; i < count; i++ { 215 overrides = append(overrides, fn) 216 } 217 } 218 popOverride = func() healthCheckFn { 219 overridesMut.Lock() 220 defer overridesMut.Unlock() 221 if len(overrides) == 0 { 222 return nil 223 } 224 next := overrides[0] 225 overrides = overrides[1:] 226 return next 227 } 228 pushFailClientOverride = func(failTargetClient rpc.TChanNode) { 229 var failOverride healthCheckFn 230 failOverride = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error { 231 if client == failTargetClient { 232 atomic.AddInt32(&invokeFail, 1) 233 return fmt.Errorf("fail client") 234 } 235 // Not failing this client, re-enqueue 236 pushOverride(failOverride, 1) 237 return nil 238 } 239 pushOverride(failOverride, healthCheckFailLimit) 240 } 241 onNextSleepHealth []func() 242 onNextSleepHealthMut sync.RWMutex 243 pushOnNextSleepHealth = func(fn func()) { 244 onNextSleepHealthMut.Lock() 245 defer onNextSleepHealthMut.Unlock() 246 onNextSleepHealth = append(onNextSleepHealth, fn) 247 } 248 popOnNextSleepHealth = func() func() { 249 onNextSleepHealthMut.Lock() 250 defer onNextSleepHealthMut.Unlock() 251 if len(onNextSleepHealth) == 0 { 252 return nil 253 } 254 next := onNextSleepHealth[0] 255 onNextSleepHealth = onNextSleepHealth[1:] 256 return next 257 } 258 failsDoneWg [2]sync.WaitGroup 259 failsDone [2]int32 260 ) 261 for i := range failsDoneWg { 262 failsDoneWg[i].Add(1) 263 } 264 265 fn := func( 266 ch string, addr string, opts Options, 267 ) (Channel, rpc.TChanNode, error) { 268 attempt := atomic.AddInt32(&newConnAttempt, 1) 269 if attempt == 1 { 270 return &noopPooledChannel{}, client1, nil 271 } else if attempt == 2 { 272 return &noopPooledChannel{}, client2, nil 273 } 274 return nil, nil, fmt.Errorf("spawning only 2 connections") 275 } 276 opts = opts.SetNewConnectionFn(fn) 277 278 conns := newConnectionPool(h, opts).(*connPool) 279 conns.healthCheckNewConn = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error { 280 return nil 281 } 282 conns.healthCheck = func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error { 283 if fn := popOverride(); fn != nil { 284 return fn(client, opts, checkBootstrapped) 285 } 286 return nil 287 } 288 conns.sleepConnect = func(d time.Duration) { 289 atomic.AddInt32(&connectRounds, 1) 290 time.Sleep(time.Millisecond) 291 } 292 conns.sleepHealth = func(d time.Duration) { 293 atomic.AddInt32(&healthRounds, 1) 294 if int(atomic.LoadInt32(&invokeFail)) == 1*healthCheckFailLimit && 295 atomic.CompareAndSwapInt32(&failsDone[0], 0, 1) { 296 failsDoneWg[0].Done() 297 } else if int(atomic.LoadInt32(&invokeFail)) == 2*healthCheckFailLimit && 298 atomic.CompareAndSwapInt32(&failsDone[1], 0, 1) { 299 failsDoneWg[1].Done() 300 } 301 time.Sleep(time.Millisecond) 302 if fn := popOnNextSleepHealth(); fn != nil { 303 fn() 304 } 305 } 306 conns.sleepHealthRetry = func(d time.Duration) { 307 expected := healthCheckFailThrottleFactor * float64(opts.HostConnectTimeout()) 308 require.Equal(t, time.Duration(expected), d) 309 } 310 311 require.Equal(t, 0, conns.ConnectionCount()) 312 313 conns.Open() 314 315 // Wait for first round, should've created all conns except first 316 for atomic.LoadInt32(&connectRounds) < 1 { 317 time.Sleep(time.Millisecond) 318 } 319 320 require.Equal(t, 2, conns.ConnectionCount()) 321 322 // Fail client1 health check 323 pushOnNextSleepHealth(func() { 324 pushFailClientOverride(client1) 325 }) 326 327 // Wait for health check round to take action 328 failsDoneWg[0].Wait() 329 330 // Verify only 1 connection and its client2 331 xclock.WaitUntil(func() bool { 332 // Need WaitUntil() because there is a delay between the health check failing 333 // and the connection actually being removed. 334 return conns.ConnectionCount() == 1 335 }, 5*time.Second) 336 for i := 0; i < 2; i++ { 337 nextClient, _, err := conns.NextClient() 338 require.NoError(t, err) 339 require.Equal(t, client2, nextClient) 340 } 341 342 // Fail client2 health check 343 pushOnNextSleepHealth(func() { 344 pushFailClientOverride(client2) 345 }) 346 347 // Wait for health check round to take action 348 failsDoneWg[1].Wait() 349 xclock.WaitUntil(func() bool { 350 // Need WaitUntil() because there is a delay between the health check failing 351 // and the connection actually being removed. 352 return conns.ConnectionCount() == 0 353 }, 5*time.Second) 354 nextClient, _, err := conns.NextClient() 355 require.Nil(t, nextClient) 356 require.Equal(t, errConnectionPoolHasNoConnections, err) 357 358 conns.Close() 359 360 nextClient, _, err = conns.NextClient() 361 require.Nil(t, nextClient) 362 require.Equal(t, errConnectionPoolClosed, err) 363 }