github.com/m3db/m3@v1.5.0/src/aggregator/client/conn.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package client 22 23 import ( 24 "errors" 25 "math/rand" 26 "net" 27 "sync" 28 "time" 29 30 "github.com/m3db/m3/src/x/clock" 31 xio "github.com/m3db/m3/src/x/io" 32 "github.com/m3db/m3/src/x/retry" 33 34 "github.com/uber-go/tally" 35 ) 36 37 const ( 38 tcpProtocol = "tcp" 39 ) 40 41 var ( 42 errNoActiveConnection = errors.New("no active connection") 43 errInvalidConnection = errors.New("connection is invalid") 44 uninitWriter uninitializedWriter 45 ) 46 47 type ( 48 sleepFn func(time.Duration) 49 connectWithLockFn func() error 50 writeWithLockFn func([]byte) error 51 ) 52 53 // connection is a persistent connection that retries establishing 54 // connection with exponential backoff if the connection goes down. 55 type connection struct { 56 metrics connectionMetrics 57 writeRetryOpts retry.Options 58 writer xio.ResettableWriter 59 connectWithLockFn connectWithLockFn 60 sleepFn sleepFn 61 nowFn clock.NowFn 62 conn *net.TCPConn 63 rngFn retry.RngFn 64 writeWithLockFn writeWithLockFn 65 addr string 66 maxDuration time.Duration 67 maxThreshold int 68 multiplier int 69 initThreshold int 70 threshold int 71 lastConnectAttemptNanos int64 72 writeTimeout time.Duration 73 connTimeout time.Duration 74 numFailures int 75 mtx sync.Mutex 76 keepAlive bool 77 } 78 79 // newConnection creates a new connection. 80 func newConnection(addr string, opts ConnectionOptions) *connection { 81 c := &connection{ 82 addr: addr, 83 connTimeout: opts.ConnectionTimeout(), 84 writeTimeout: opts.WriteTimeout(), 85 keepAlive: opts.ConnectionKeepAlive(), 86 initThreshold: opts.InitReconnectThreshold(), 87 multiplier: opts.ReconnectThresholdMultiplier(), 88 maxThreshold: opts.MaxReconnectThreshold(), 89 maxDuration: opts.MaxReconnectDuration(), 90 writeRetryOpts: opts.WriteRetryOptions(), 91 rngFn: rand.New(rand.NewSource(time.Now().UnixNano())).Int63n, 92 nowFn: opts.ClockOptions().NowFn(), 93 sleepFn: time.Sleep, 94 threshold: opts.InitReconnectThreshold(), 95 writer: opts.RWOptions().ResettableWriterFn()( 96 uninitWriter, 97 xio.ResettableWriterOptions{WriteBufferSize: 0}, 98 ), 99 metrics: newConnectionMetrics(opts.InstrumentOptions().MetricsScope()), 100 } 101 c.connectWithLockFn = c.connectWithLock 102 c.writeWithLockFn = c.writeWithLock 103 104 return c 105 } 106 107 // Write sends data onto the connection, and attempts to re-establish 108 // connection if the connection is down. 109 func (c *connection) Write(data []byte) error { 110 var err error 111 c.mtx.Lock() 112 if c.conn == nil { 113 if err = c.checkReconnectWithLock(); err != nil { 114 c.numFailures++ 115 c.mtx.Unlock() 116 return err 117 } 118 } 119 if err = c.writeAttemptWithLock(data); err == nil { 120 c.mtx.Unlock() 121 return nil 122 } 123 for i := 1; i <= c.writeRetryOpts.MaxRetries(); i++ { 124 if backoffDur := time.Duration(retry.BackoffNanos( 125 i, 126 c.writeRetryOpts.Jitter(), 127 c.writeRetryOpts.BackoffFactor(), 128 c.writeRetryOpts.InitialBackoff(), 129 c.writeRetryOpts.MaxBackoff(), 130 c.rngFn, 131 )); backoffDur > 0 { 132 c.sleepFn(backoffDur) 133 } 134 c.metrics.writeRetries.Inc(1) 135 if err = c.writeAttemptWithLock(data); err == nil { 136 c.mtx.Unlock() 137 return nil 138 } 139 } 140 c.numFailures++ 141 c.mtx.Unlock() 142 return err 143 } 144 145 func (c *connection) Close() { 146 c.mtx.Lock() 147 c.closeWithLock() 148 c.mtx.Unlock() 149 } 150 151 // writeAttemptWithLock attempts to establish a new connection and writes raw bytes 152 // to the connection while holding the write lock. 153 // If the write succeeds, c.conn is guaranteed to be a valid connection on return. 154 // If the write fails, c.conn is guaranteed to be nil on return. 155 func (c *connection) writeAttemptWithLock(data []byte) error { 156 if c.conn == nil { 157 if err := c.connectWithLockFn(); err != nil { 158 return err 159 } 160 } 161 if err := c.writeWithLockFn(data); err != nil { 162 c.closeWithLock() 163 return err 164 } 165 return nil 166 } 167 168 func (c *connection) connectWithLock() error { 169 c.lastConnectAttemptNanos = c.nowFn().UnixNano() 170 conn, err := net.DialTimeout(tcpProtocol, c.addr, c.connTimeout) 171 if err != nil { 172 c.metrics.connectError.Inc(1) 173 return err 174 } 175 176 tcpConn := conn.(*net.TCPConn) 177 if err := tcpConn.SetKeepAlive(c.keepAlive); err != nil { 178 c.metrics.setKeepAliveError.Inc(1) 179 } 180 181 if c.conn != nil { 182 c.conn.Close() // nolint: errcheck 183 } 184 185 c.conn = tcpConn 186 c.writer.Reset(tcpConn) 187 return nil 188 } 189 190 func (c *connection) checkReconnectWithLock() error { 191 // If we haven't accumulated enough failures to warrant another reconnect 192 // and we haven't past the maximum duration since the last time we attempted 193 // to connect then we simply return false without reconnecting. 194 // If we exhausted maximum allowed failures then we will retry only based on 195 // maximum duration since the last attempt. 196 enoughFailuresToRetry := c.numFailures >= c.threshold 197 exhaustedMaxFailures := c.numFailures >= c.maxThreshold 198 sufficientTimePassed := c.nowFn().UnixNano()-c.lastConnectAttemptNanos >= c.maxDuration.Nanoseconds() 199 if !sufficientTimePassed && (exhaustedMaxFailures || !enoughFailuresToRetry) { 200 return errNoActiveConnection 201 } 202 err := c.connectWithLockFn() 203 if err == nil { 204 c.resetWithLock() 205 return nil 206 } 207 208 // Only raise the threshold when it is crossed, not when the max duration is reached. 209 if enoughFailuresToRetry && c.threshold < c.maxThreshold { 210 newThreshold := c.threshold * c.multiplier 211 if newThreshold > c.maxThreshold { 212 newThreshold = c.maxThreshold 213 } 214 c.threshold = newThreshold 215 } 216 return err 217 } 218 219 func (c *connection) writeWithLock(data []byte) error { 220 if err := c.conn.SetWriteDeadline(c.nowFn().Add(c.writeTimeout)); err != nil { 221 c.metrics.setWriteDeadlineError.Inc(1) 222 } 223 if _, err := c.writer.Write(data); err != nil { 224 c.metrics.writeError.Inc(1) 225 return err 226 } 227 if err := c.writer.Flush(); err != nil { 228 c.metrics.writeError.Inc(1) 229 return err 230 } 231 return nil 232 } 233 234 func (c *connection) resetWithLock() { 235 c.numFailures = 0 236 c.threshold = c.initThreshold 237 } 238 239 func (c *connection) closeWithLock() { 240 if c.conn != nil { 241 c.conn.Close() // nolint: errcheck 242 } 243 c.conn = nil 244 } 245 246 const ( 247 errorMetric = "errors" 248 errorMetricType = "error-type" 249 ) 250 251 type connectionMetrics struct { 252 connectError tally.Counter 253 writeError tally.Counter 254 writeRetries tally.Counter 255 setKeepAliveError tally.Counter 256 setWriteDeadlineError tally.Counter 257 } 258 259 func newConnectionMetrics(scope tally.Scope) connectionMetrics { 260 return connectionMetrics{ 261 connectError: scope.Tagged(map[string]string{errorMetricType: "connect"}). 262 Counter(errorMetric), 263 writeError: scope.Tagged(map[string]string{errorMetricType: "write"}). 264 Counter(errorMetric), 265 writeRetries: scope.Tagged(map[string]string{"action": "write"}).Counter("retries"), 266 setKeepAliveError: scope.Tagged(map[string]string{errorMetricType: "tcp-keep-alive"}). 267 Counter(errorMetric), 268 setWriteDeadlineError: scope.Tagged(map[string]string{errorMetricType: "set-write-deadline"}). 269 Counter(errorMetric), 270 } 271 } 272 273 type uninitializedWriter struct{} 274 275 func (u uninitializedWriter) Write(p []byte) (int, error) { return 0, errInvalidConnection } 276 func (u uninitializedWriter) Close() error { return nil }