github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/client/connection_pool.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package client 22 23 import ( 24 "errors" 25 "fmt" 26 "math" 27 "math/rand" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 murmur3 "github.com/m3db/stackmurmur3/v2" 33 "github.com/uber-go/tally" 34 "github.com/uber/tchannel-go/thrift" 35 "go.uber.org/zap" 36 37 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 38 "github.com/m3db/m3/src/dbnode/topology" 39 ) 40 41 const ( 42 channelName = "Client" 43 ) 44 45 var ( 46 errConnectionPoolClosed = errors.New("connection pool closed") 47 errConnectionPoolHasNoConnections = newHostNotAvailableError(errors.New("connection pool has no connections")) 48 errNodeNotBootstrapped = errors.New("node not bootstrapped") 49 ) 50 51 type connPool struct { 52 sync.RWMutex 53 54 opts Options 55 host topology.Host 56 pool []conn 57 poolLen int64 58 used int64 59 connectRand rand.Source 60 healthCheckRand rand.Source 61 healthCheckNewConn healthCheckFn 62 healthCheck healthCheckFn 63 sleepConnect sleepFn 64 sleepHealth sleepFn 65 sleepHealthRetry sleepFn 66 status status 67 healthStatus tally.Gauge 68 } 69 70 type conn struct { 71 channel Channel 72 client rpc.TChanNode 73 } 74 75 // NewConnectionFn is a function that creates a connection. 76 type NewConnectionFn func( 77 channelName string, addr string, opts Options, 78 ) (Channel, rpc.TChanNode, error) 79 80 type healthCheckFn func(client rpc.TChanNode, opts Options, checkBootstrapped bool) error 81 82 type sleepFn func(t time.Duration) 83 84 func newConnectionPool(host topology.Host, opts Options) connectionPool { 85 seed := int64(murmur3.StringSum32(host.Address())) 86 87 scope := opts.InstrumentOptions(). 88 MetricsScope(). 89 Tagged(map[string]string{ 90 "hostID": host.ID(), 91 }) 92 93 p := &connPool{ 94 opts: opts, 95 host: host, 96 pool: make([]conn, 0, opts.MaxConnectionCount()), 97 poolLen: 0, 98 connectRand: rand.NewSource(seed), 99 healthCheckRand: rand.NewSource(seed + 1), 100 healthCheckNewConn: healthCheck, 101 healthCheck: healthCheck, 102 sleepConnect: time.Sleep, 103 sleepHealth: time.Sleep, 104 sleepHealthRetry: time.Sleep, 105 healthStatus: scope.Gauge("health-status"), 106 } 107 108 return p 109 } 110 111 func (p *connPool) Open() { 112 p.Lock() 113 defer p.Unlock() 114 115 if p.status != statusNotOpen { 116 return 117 } 118 119 p.status = statusOpen 120 121 connectEvery := p.opts.BackgroundConnectInterval() 122 connectStutter := p.opts.BackgroundConnectStutter() 123 go p.connectEvery(connectEvery, connectStutter) 124 125 healthCheckEvery := p.opts.BackgroundHealthCheckInterval() 126 healthCheckStutter := p.opts.BackgroundHealthCheckStutter() 127 go p.healthCheckEvery(healthCheckEvery, healthCheckStutter) 128 } 129 130 func (p *connPool) ConnectionCount() int { 131 p.RLock() 132 poolLen := p.poolLen 133 p.RUnlock() 134 return int(poolLen) 135 } 136 137 func (p *connPool) NextClient() (rpc.TChanNode, Channel, error) { 138 p.RLock() 139 if p.status != statusOpen { 140 p.RUnlock() 141 return nil, nil, errConnectionPoolClosed 142 } 143 if p.poolLen < 1 { 144 p.RUnlock() 145 return nil, nil, errConnectionPoolHasNoConnections 146 } 147 n := atomic.AddInt64(&p.used, 1) 148 conn := p.pool[n%p.poolLen] 149 p.RUnlock() 150 return conn.client, conn.channel, nil 151 } 152 153 func (p *connPool) Close() { 154 p.Lock() 155 if p.status != statusOpen { 156 p.Unlock() 157 return 158 } 159 p.status = statusClosed 160 p.Unlock() 161 162 for i := range p.pool { 163 p.pool[i].channel.Close() 164 } 165 } 166 167 func (p *connPool) connectEvery(interval time.Duration, stutter time.Duration) { 168 log := p.opts.InstrumentOptions().Logger() 169 target := p.opts.MaxConnectionCount() 170 171 for { 172 p.RLock() 173 state := p.status 174 poolLen := int(p.poolLen) 175 p.RUnlock() 176 if state != statusOpen { 177 return 178 } 179 180 address := p.host.Address() 181 182 var wg sync.WaitGroup 183 for i := 0; i < target-poolLen; i++ { 184 wg.Add(1) 185 newConnFn := p.opts.NewConnectionFn() 186 go func() { 187 defer wg.Done() 188 189 // Create connection 190 channel, client, err := newConnFn(channelName, address, p.opts) 191 if err != nil { 192 log.Debug("could not connect", zap.String("host", address), zap.Error(err)) 193 return 194 } 195 196 // Health check the connection 197 if err := p.healthCheckNewConn(client, p.opts, false); err != nil { 198 p.maybeEmitHealthStatus(healthStatusCheckFailed) 199 log.Debug("could not connect, failed health check", zap.String("host", address), zap.Error(err)) 200 channel.Close() 201 return 202 } 203 204 p.maybeEmitHealthStatus(healthStatusOK) 205 p.Lock() 206 if p.status == statusOpen { 207 p.pool = append(p.pool, conn{channel, client}) 208 p.poolLen = int64(len(p.pool)) 209 } else { 210 // NB(antanas): just being defensive. 211 // It's likely a corner case and happens only during server shutdown. 212 channel.Close() 213 } 214 p.Unlock() 215 }() 216 } 217 218 wg.Wait() 219 220 p.sleepConnect(interval + randStutter(p.connectRand, stutter)) 221 } 222 } 223 224 func (p *connPool) maybeEmitHealthStatus(hs healthStatus) { 225 if p.opts.HostQueueEmitsHealthStatus() { 226 p.healthStatus.Update(float64(hs)) 227 } 228 } 229 230 func (p *connPool) healthCheckEvery(interval time.Duration, stutter time.Duration) { 231 log := p.opts.InstrumentOptions().Logger() 232 nowFn := p.opts.ClockOptions().NowFn() 233 234 for { 235 p.RLock() 236 state := p.status 237 p.RUnlock() 238 if state != statusOpen { 239 return 240 } 241 242 var ( 243 wg sync.WaitGroup 244 start = nowFn() 245 deadline = start.Add(interval + randStutter(p.healthCheckRand, stutter)) 246 ) 247 248 p.RLock() 249 for i := int64(0); i < p.poolLen; i++ { 250 wg.Add(1) 251 go func(client rpc.TChanNode) { 252 defer wg.Done() 253 254 var ( 255 attempts = p.opts.BackgroundHealthCheckFailLimit() 256 failed = 0 257 checkErr error 258 ) 259 for j := 0; j < attempts; j++ { 260 if err := p.healthCheck(client, p.opts, false); err != nil { 261 checkErr = err 262 failed++ 263 throttleDuration := time.Duration(math.Max( 264 float64(time.Second), 265 p.opts.BackgroundHealthCheckFailThrottleFactor()* 266 float64(p.opts.HostConnectTimeout()))) 267 p.sleepHealthRetry(throttleDuration) 268 continue 269 } 270 // Healthy 271 break 272 } 273 274 healthy := failed < attempts 275 if !healthy { 276 // Log health check error 277 log.Debug("health check failed", zap.String("host", p.host.Address()), zap.Error(checkErr)) 278 279 // Swap with tail and decrement pool size 280 p.Lock() 281 if p.status != statusOpen { 282 p.Unlock() 283 return 284 } 285 var c conn 286 for j := int64(0); j < p.poolLen; j++ { 287 if client == p.pool[j].client { 288 c = p.pool[j] 289 p.pool[j] = p.pool[p.poolLen-1] 290 p.pool = p.pool[:p.poolLen-1] 291 p.poolLen = int64(len(p.pool)) 292 break 293 } 294 } 295 p.Unlock() 296 297 // Close the client's channel 298 c.channel.Close() 299 } 300 }(p.pool[i].client) 301 } 302 p.RUnlock() 303 304 wg.Wait() 305 306 now := nowFn() 307 if !now.Before(deadline) { 308 // Exceeded deadline, start next health check loop 309 p.sleepHealth(0) // Call sleep 0 for tests to intercept this loop continuation 310 continue 311 } 312 313 p.sleepHealth(deadline.Sub(now)) 314 } 315 } 316 317 func healthCheck(client rpc.TChanNode, opts Options, checkBootstrapped bool) error { 318 tctx, _ := thrift.NewContext(opts.HostConnectTimeout()) 319 result, err := client.Health(tctx) 320 if err != nil { 321 return err 322 } 323 if !result.Ok { 324 return fmt.Errorf("status not ok: %s", result.Status) 325 } 326 if checkBootstrapped && !result.Bootstrapped { 327 return errNodeNotBootstrapped 328 } 329 return nil 330 } 331 332 func randStutter(source rand.Source, t time.Duration) time.Duration { 333 amount := float64(source.Int63()) / float64(math.MaxInt64) 334 return time.Duration(float64(t) * amount) 335 }