github.com/m3db/m3@v1.5.0/src/aggregator/client/conn.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package client
    22  
    23  import (
    24  	"errors"
    25  	"math/rand"
    26  	"net"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/x/clock"
    31  	xio "github.com/m3db/m3/src/x/io"
    32  	"github.com/m3db/m3/src/x/retry"
    33  
    34  	"github.com/uber-go/tally"
    35  )
    36  
    37  const (
    38  	tcpProtocol = "tcp"
    39  )
    40  
    41  var (
    42  	errNoActiveConnection = errors.New("no active connection")
    43  	errInvalidConnection  = errors.New("connection is invalid")
    44  	uninitWriter          uninitializedWriter
    45  )
    46  
    47  type (
    48  	sleepFn           func(time.Duration)
    49  	connectWithLockFn func() error
    50  	writeWithLockFn   func([]byte) error
    51  )
    52  
    53  // connection is a persistent connection that retries establishing
    54  // connection with exponential backoff if the connection goes down.
    55  type connection struct {
    56  	metrics                 connectionMetrics
    57  	writeRetryOpts          retry.Options
    58  	writer                  xio.ResettableWriter
    59  	connectWithLockFn       connectWithLockFn
    60  	sleepFn                 sleepFn
    61  	nowFn                   clock.NowFn
    62  	conn                    *net.TCPConn
    63  	rngFn                   retry.RngFn
    64  	writeWithLockFn         writeWithLockFn
    65  	addr                    string
    66  	maxDuration             time.Duration
    67  	maxThreshold            int
    68  	multiplier              int
    69  	initThreshold           int
    70  	threshold               int
    71  	lastConnectAttemptNanos int64
    72  	writeTimeout            time.Duration
    73  	connTimeout             time.Duration
    74  	numFailures             int
    75  	mtx                     sync.Mutex
    76  	keepAlive               bool
    77  }
    78  
    79  // newConnection creates a new connection.
    80  func newConnection(addr string, opts ConnectionOptions) *connection {
    81  	c := &connection{
    82  		addr:           addr,
    83  		connTimeout:    opts.ConnectionTimeout(),
    84  		writeTimeout:   opts.WriteTimeout(),
    85  		keepAlive:      opts.ConnectionKeepAlive(),
    86  		initThreshold:  opts.InitReconnectThreshold(),
    87  		multiplier:     opts.ReconnectThresholdMultiplier(),
    88  		maxThreshold:   opts.MaxReconnectThreshold(),
    89  		maxDuration:    opts.MaxReconnectDuration(),
    90  		writeRetryOpts: opts.WriteRetryOptions(),
    91  		rngFn:          rand.New(rand.NewSource(time.Now().UnixNano())).Int63n,
    92  		nowFn:          opts.ClockOptions().NowFn(),
    93  		sleepFn:        time.Sleep,
    94  		threshold:      opts.InitReconnectThreshold(),
    95  		writer: opts.RWOptions().ResettableWriterFn()(
    96  			uninitWriter,
    97  			xio.ResettableWriterOptions{WriteBufferSize: 0},
    98  		),
    99  		metrics: newConnectionMetrics(opts.InstrumentOptions().MetricsScope()),
   100  	}
   101  	c.connectWithLockFn = c.connectWithLock
   102  	c.writeWithLockFn = c.writeWithLock
   103  
   104  	return c
   105  }
   106  
   107  // Write sends data onto the connection, and attempts to re-establish
   108  // connection if the connection is down.
   109  func (c *connection) Write(data []byte) error {
   110  	var err error
   111  	c.mtx.Lock()
   112  	if c.conn == nil {
   113  		if err = c.checkReconnectWithLock(); err != nil {
   114  			c.numFailures++
   115  			c.mtx.Unlock()
   116  			return err
   117  		}
   118  	}
   119  	if err = c.writeAttemptWithLock(data); err == nil {
   120  		c.mtx.Unlock()
   121  		return nil
   122  	}
   123  	for i := 1; i <= c.writeRetryOpts.MaxRetries(); i++ {
   124  		if backoffDur := time.Duration(retry.BackoffNanos(
   125  			i,
   126  			c.writeRetryOpts.Jitter(),
   127  			c.writeRetryOpts.BackoffFactor(),
   128  			c.writeRetryOpts.InitialBackoff(),
   129  			c.writeRetryOpts.MaxBackoff(),
   130  			c.rngFn,
   131  		)); backoffDur > 0 {
   132  			c.sleepFn(backoffDur)
   133  		}
   134  		c.metrics.writeRetries.Inc(1)
   135  		if err = c.writeAttemptWithLock(data); err == nil {
   136  			c.mtx.Unlock()
   137  			return nil
   138  		}
   139  	}
   140  	c.numFailures++
   141  	c.mtx.Unlock()
   142  	return err
   143  }
   144  
   145  func (c *connection) Close() {
   146  	c.mtx.Lock()
   147  	c.closeWithLock()
   148  	c.mtx.Unlock()
   149  }
   150  
   151  // writeAttemptWithLock attempts to establish a new connection and writes raw bytes
   152  // to the connection while holding the write lock.
   153  // If the write succeeds, c.conn is guaranteed to be a valid connection on return.
   154  // If the write fails, c.conn is guaranteed to be nil on return.
   155  func (c *connection) writeAttemptWithLock(data []byte) error {
   156  	if c.conn == nil {
   157  		if err := c.connectWithLockFn(); err != nil {
   158  			return err
   159  		}
   160  	}
   161  	if err := c.writeWithLockFn(data); err != nil {
   162  		c.closeWithLock()
   163  		return err
   164  	}
   165  	return nil
   166  }
   167  
   168  func (c *connection) connectWithLock() error {
   169  	c.lastConnectAttemptNanos = c.nowFn().UnixNano()
   170  	conn, err := net.DialTimeout(tcpProtocol, c.addr, c.connTimeout)
   171  	if err != nil {
   172  		c.metrics.connectError.Inc(1)
   173  		return err
   174  	}
   175  
   176  	tcpConn := conn.(*net.TCPConn)
   177  	if err := tcpConn.SetKeepAlive(c.keepAlive); err != nil {
   178  		c.metrics.setKeepAliveError.Inc(1)
   179  	}
   180  
   181  	if c.conn != nil {
   182  		c.conn.Close() // nolint: errcheck
   183  	}
   184  
   185  	c.conn = tcpConn
   186  	c.writer.Reset(tcpConn)
   187  	return nil
   188  }
   189  
   190  func (c *connection) checkReconnectWithLock() error {
   191  	// If we haven't accumulated enough failures to warrant another reconnect
   192  	// and we haven't past the maximum duration since the last time we attempted
   193  	// to connect then we simply return false without reconnecting.
   194  	// If we exhausted maximum allowed failures then we will retry only based on
   195  	// maximum duration since the last attempt.
   196  	enoughFailuresToRetry := c.numFailures >= c.threshold
   197  	exhaustedMaxFailures := c.numFailures >= c.maxThreshold
   198  	sufficientTimePassed := c.nowFn().UnixNano()-c.lastConnectAttemptNanos >= c.maxDuration.Nanoseconds()
   199  	if !sufficientTimePassed && (exhaustedMaxFailures || !enoughFailuresToRetry) {
   200  		return errNoActiveConnection
   201  	}
   202  	err := c.connectWithLockFn()
   203  	if err == nil {
   204  		c.resetWithLock()
   205  		return nil
   206  	}
   207  
   208  	// Only raise the threshold when it is crossed, not when the max duration is reached.
   209  	if enoughFailuresToRetry && c.threshold < c.maxThreshold {
   210  		newThreshold := c.threshold * c.multiplier
   211  		if newThreshold > c.maxThreshold {
   212  			newThreshold = c.maxThreshold
   213  		}
   214  		c.threshold = newThreshold
   215  	}
   216  	return err
   217  }
   218  
   219  func (c *connection) writeWithLock(data []byte) error {
   220  	if err := c.conn.SetWriteDeadline(c.nowFn().Add(c.writeTimeout)); err != nil {
   221  		c.metrics.setWriteDeadlineError.Inc(1)
   222  	}
   223  	if _, err := c.writer.Write(data); err != nil {
   224  		c.metrics.writeError.Inc(1)
   225  		return err
   226  	}
   227  	if err := c.writer.Flush(); err != nil {
   228  		c.metrics.writeError.Inc(1)
   229  		return err
   230  	}
   231  	return nil
   232  }
   233  
   234  func (c *connection) resetWithLock() {
   235  	c.numFailures = 0
   236  	c.threshold = c.initThreshold
   237  }
   238  
   239  func (c *connection) closeWithLock() {
   240  	if c.conn != nil {
   241  		c.conn.Close() // nolint: errcheck
   242  	}
   243  	c.conn = nil
   244  }
   245  
   246  const (
   247  	errorMetric     = "errors"
   248  	errorMetricType = "error-type"
   249  )
   250  
   251  type connectionMetrics struct {
   252  	connectError          tally.Counter
   253  	writeError            tally.Counter
   254  	writeRetries          tally.Counter
   255  	setKeepAliveError     tally.Counter
   256  	setWriteDeadlineError tally.Counter
   257  }
   258  
   259  func newConnectionMetrics(scope tally.Scope) connectionMetrics {
   260  	return connectionMetrics{
   261  		connectError: scope.Tagged(map[string]string{errorMetricType: "connect"}).
   262  			Counter(errorMetric),
   263  		writeError: scope.Tagged(map[string]string{errorMetricType: "write"}).
   264  			Counter(errorMetric),
   265  		writeRetries: scope.Tagged(map[string]string{"action": "write"}).Counter("retries"),
   266  		setKeepAliveError: scope.Tagged(map[string]string{errorMetricType: "tcp-keep-alive"}).
   267  			Counter(errorMetric),
   268  		setWriteDeadlineError: scope.Tagged(map[string]string{errorMetricType: "set-write-deadline"}).
   269  			Counter(errorMetric),
   270  	}
   271  }
   272  
   273  type uninitializedWriter struct{}
   274  
   275  func (u uninitializedWriter) Write(p []byte) (int, error) { return 0, errInvalidConnection }
   276  func (u uninitializedWriter) Close() error                { return nil }