github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/aggregator/client/conn.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package client 22 23 import ( 24 "context" 25 "errors" 26 "math/rand" 27 "net" 28 "sync" 29 "time" 30 31 "github.com/m3db/m3/src/x/clock" 32 xio "github.com/m3db/m3/src/x/io" 33 xnet "github.com/m3db/m3/src/x/net" 34 "github.com/m3db/m3/src/x/retry" 35 36 "github.com/uber-go/tally" 37 ) 38 39 const ( 40 tcpProtocol = "tcp" 41 ) 42 43 var ( 44 errNoActiveConnection = errors.New("no active connection") 45 errInvalidConnection = errors.New("connection is invalid") 46 uninitWriter uninitializedWriter 47 ) 48 49 type ( 50 sleepFn func(time.Duration) 51 connectWithLockFn func() error 52 writeWithLockFn func([]byte) error 53 ) 54 55 // connection is a persistent connection that retries establishing 56 // connection with exponential backoff if the connection goes down. 57 type connection struct { 58 metrics connectionMetrics 59 writeRetryOpts retry.Options 60 writer xio.ResettableWriter 61 connectWithLockFn connectWithLockFn 62 sleepFn sleepFn 63 nowFn clock.NowFn 64 conn net.Conn 65 rngFn retry.RngFn 66 writeWithLockFn writeWithLockFn 67 addr string 68 maxDuration time.Duration 69 maxThreshold int 70 multiplier int 71 initThreshold int 72 threshold int 73 lastConnectAttemptNanos int64 74 writeTimeout time.Duration 75 connTimeout time.Duration 76 numFailures int 77 mtx sync.Mutex 78 keepAlive bool 79 dialer xnet.ContextDialerFn 80 } 81 82 // newConnection creates a new connection. 83 func newConnection(addr string, opts ConnectionOptions) *connection { 84 c := &connection{ 85 addr: addr, 86 connTimeout: opts.ConnectionTimeout(), 87 writeTimeout: opts.WriteTimeout(), 88 keepAlive: opts.ConnectionKeepAlive(), 89 initThreshold: opts.InitReconnectThreshold(), 90 multiplier: opts.ReconnectThresholdMultiplier(), 91 maxThreshold: opts.MaxReconnectThreshold(), 92 maxDuration: opts.MaxReconnectDuration(), 93 writeRetryOpts: opts.WriteRetryOptions(), 94 dialer: opts.ContextDialer(), 95 rngFn: rand.New(rand.NewSource(time.Now().UnixNano())).Int63n, 96 nowFn: opts.ClockOptions().NowFn(), 97 sleepFn: time.Sleep, 98 threshold: opts.InitReconnectThreshold(), 99 writer: opts.RWOptions().ResettableWriterFn()( 100 uninitWriter, 101 xio.ResettableWriterOptions{WriteBufferSize: 0}, 102 ), 103 metrics: newConnectionMetrics(opts.InstrumentOptions().MetricsScope()), 104 } 105 c.connectWithLockFn = c.connectWithLock 106 c.writeWithLockFn = c.writeWithLock 107 108 return c 109 } 110 111 // Write sends data onto the connection, and attempts to re-establish 112 // connection if the connection is down. 113 func (c *connection) Write(data []byte) error { 114 var err error 115 c.mtx.Lock() 116 if c.conn == nil { 117 if err = c.checkReconnectWithLock(); err != nil { 118 c.numFailures++ 119 c.mtx.Unlock() 120 return err 121 } 122 } 123 if err = c.writeAttemptWithLock(data); err == nil { 124 c.mtx.Unlock() 125 return nil 126 } 127 for i := 1; i <= c.writeRetryOpts.MaxRetries(); i++ { 128 if backoffDur := time.Duration(retry.BackoffNanos( 129 i, 130 c.writeRetryOpts.Jitter(), 131 c.writeRetryOpts.BackoffFactor(), 132 c.writeRetryOpts.InitialBackoff(), 133 c.writeRetryOpts.MaxBackoff(), 134 c.rngFn, 135 )); backoffDur > 0 { 136 c.sleepFn(backoffDur) 137 } 138 c.metrics.writeRetries.Inc(1) 139 if err = c.writeAttemptWithLock(data); err == nil { 140 c.mtx.Unlock() 141 return nil 142 } 143 } 144 c.numFailures++ 145 c.mtx.Unlock() 146 return err 147 } 148 149 func (c *connection) Close() { 150 c.mtx.Lock() 151 c.closeWithLock() 152 c.mtx.Unlock() 153 } 154 155 // writeAttemptWithLock attempts to establish a new connection and writes raw bytes 156 // to the connection while holding the write lock. 157 // If the write succeeds, c.conn is guaranteed to be a valid connection on return. 158 // If the write fails, c.conn is guaranteed to be nil on return. 159 func (c *connection) writeAttemptWithLock(data []byte) error { 160 if c.conn == nil { 161 if err := c.connectWithLockFn(); err != nil { 162 return err 163 } 164 } 165 if err := c.writeWithLockFn(data); err != nil { 166 c.closeWithLock() 167 return err 168 } 169 return nil 170 } 171 172 func (c *connection) connectWithLock() error { 173 // TODO: propagate this all the way up the callstack. 174 ctx := context.TODO() 175 176 c.lastConnectAttemptNanos = c.nowFn().UnixNano() 177 178 ctx, cancel := context.WithTimeout(ctx, c.connTimeout) 179 defer cancel() 180 181 conn, err := c.dialContext(ctx, c.addr) 182 if err != nil { 183 c.metrics.connectError.Inc(1) 184 return err 185 } 186 187 // N.B.: If using a custom dialer which doesn't return *net.TCPConn, users are responsible for TCP keep alive options 188 // themselves. 189 if tcpConn, ok := conn.(keepAlivable); ok { 190 if err := tcpConn.SetKeepAlive(c.keepAlive); err != nil { 191 c.metrics.setKeepAliveError.Inc(1) 192 } 193 } 194 195 if c.conn != nil { 196 c.conn.Close() // nolint: errcheck 197 } 198 199 c.conn = conn 200 c.writer.Reset(conn) 201 return nil 202 } 203 204 // Make sure net.TCPConn implements this; otherwise bad things will happen. 205 var _ keepAlivable = (*net.TCPConn)(nil) 206 207 type keepAlivable interface { 208 SetKeepAlive(shouldKeepAlive bool) error 209 } 210 211 func (c *connection) dialContext(ctx context.Context, addr string) (net.Conn, error) { 212 if dialer := c.dialer; dialer != nil { 213 return dialer(ctx, tcpProtocol, addr) 214 } 215 var dialer net.Dialer 216 return dialer.DialContext(ctx, tcpProtocol, addr) 217 } 218 219 func (c *connection) checkReconnectWithLock() error { 220 // If we haven't accumulated enough failures to warrant another reconnect 221 // and we haven't past the maximum duration since the last time we attempted 222 // to connect then we simply return false without reconnecting. 223 // If we exhausted maximum allowed failures then we will retry only based on 224 // maximum duration since the last attempt. 225 enoughFailuresToRetry := c.numFailures >= c.threshold 226 exhaustedMaxFailures := c.numFailures >= c.maxThreshold 227 sufficientTimePassed := c.nowFn().UnixNano()-c.lastConnectAttemptNanos >= c.maxDuration.Nanoseconds() 228 if !sufficientTimePassed && (exhaustedMaxFailures || !enoughFailuresToRetry) { 229 return errNoActiveConnection 230 } 231 err := c.connectWithLockFn() 232 if err == nil { 233 c.resetWithLock() 234 return nil 235 } 236 237 // Only raise the threshold when it is crossed, not when the max duration is reached. 238 if enoughFailuresToRetry && c.threshold < c.maxThreshold { 239 newThreshold := c.threshold * c.multiplier 240 if newThreshold > c.maxThreshold { 241 newThreshold = c.maxThreshold 242 } 243 c.threshold = newThreshold 244 } 245 return err 246 } 247 248 func (c *connection) writeWithLock(data []byte) error { 249 if err := c.conn.SetWriteDeadline(c.nowFn().Add(c.writeTimeout)); err != nil { 250 c.metrics.setWriteDeadlineError.Inc(1) 251 } 252 if _, err := c.writer.Write(data); err != nil { 253 c.metrics.writeError.Inc(1) 254 return err 255 } 256 if err := c.writer.Flush(); err != nil { 257 c.metrics.writeError.Inc(1) 258 return err 259 } 260 return nil 261 } 262 263 func (c *connection) resetWithLock() { 264 c.numFailures = 0 265 c.threshold = c.initThreshold 266 } 267 268 func (c *connection) closeWithLock() { 269 if c.conn != nil { 270 c.conn.Close() // nolint: errcheck 271 } 272 c.conn = nil 273 } 274 275 const ( 276 errorMetric = "errors" 277 errorMetricType = "error-type" 278 ) 279 280 type connectionMetrics struct { 281 connectError tally.Counter 282 writeError tally.Counter 283 writeRetries tally.Counter 284 setKeepAliveError tally.Counter 285 setWriteDeadlineError tally.Counter 286 } 287 288 func newConnectionMetrics(scope tally.Scope) connectionMetrics { 289 return connectionMetrics{ 290 connectError: scope.Tagged(map[string]string{errorMetricType: "connect"}). 291 Counter(errorMetric), 292 writeError: scope.Tagged(map[string]string{errorMetricType: "write"}). 293 Counter(errorMetric), 294 writeRetries: scope.Tagged(map[string]string{"action": "write"}).Counter("retries"), 295 setKeepAliveError: scope.Tagged(map[string]string{errorMetricType: "tcp-keep-alive"}). 296 Counter(errorMetric), 297 setWriteDeadlineError: scope.Tagged(map[string]string{errorMetricType: "set-write-deadline"}). 298 Counter(errorMetric), 299 } 300 } 301 302 type uninitializedWriter struct{} 303 304 func (u uninitializedWriter) Write(p []byte) (int, error) { return 0, errInvalidConnection } 305 func (u uninitializedWriter) Close() error { return nil }