go.uber.org/yarpc@v1.72.1/transport/http/peer.go (about)

     1  // Copyright (c) 2022 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package http
    22  
    23  import (
    24  	"net"
    25  	"time"
    26  
    27  	"go.uber.org/atomic"
    28  	"go.uber.org/yarpc/api/peer"
    29  	"go.uber.org/yarpc/peer/abstractpeer"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  type httpPeer struct {
    34  	*abstractpeer.Peer
    35  
    36  	transport             *Transport
    37  	addr                  string
    38  	changed               chan struct{}
    39  	released              chan struct{}
    40  	timer                 *time.Timer
    41  	innocentUntilUnixNano *atomic.Int64
    42  }
    43  
    44  func newPeer(addr string, t *Transport) *httpPeer {
    45  	// Create a defused timer for later use.
    46  	timer := time.NewTimer(0)
    47  	if !timer.Stop() {
    48  		// not reachable, but if the timer wins the race, it would mean
    49  		// deadlock later, so best to conditionally drain the channel just in
    50  		// that case.
    51  		<-timer.C
    52  	}
    53  
    54  	return &httpPeer{
    55  		Peer:                  abstractpeer.NewPeer(abstractpeer.PeerIdentifier(addr), t),
    56  		transport:             t,
    57  		addr:                  addr,
    58  		changed:               make(chan struct{}, 1),
    59  		released:              make(chan struct{}),
    60  		timer:                 timer,
    61  		innocentUntilUnixNano: atomic.NewInt64(0),
    62  	}
    63  }
    64  
    65  // The HTTP transport polls for whether a peer is available by attempting to
    66  // connect. The transport does not preserve the connection because HTTP servers
    67  // may behave oddly if they don't receive a request immediately.
    68  // Instead, we treat the peer as available until proven otherwise with a fresh
    69  // connection attempt.
    70  func (p *httpPeer) isAvailable() bool {
    71  	// If there's no open connection, we probe by connecting.
    72  	dialer := &net.Dialer{Timeout: p.transport.connTimeout}
    73  	conn, err := dialer.Dial("tcp", p.addr)
    74  	if conn != nil {
    75  		conn.Close()
    76  	}
    77  	if conn != nil && err == nil {
    78  		return true
    79  	}
    80  
    81  	p.transport.logger.Debug(
    82  		"unable to connect to peer, marking as unavailable",
    83  		zap.String("peer", p.addr),
    84  		zap.String("transport", "http"),
    85  	)
    86  
    87  	return false
    88  }
    89  
    90  // StartRequest and EndRequest are no-ops now.
    91  // They previously aggregated pending request count from all subscibed peer
    92  // lists and distributed change notifications.
    93  // This was fraught with concurrency hazards so we moved pending request count
    94  // tracking into the lists themselves.
    95  
    96  func (p *httpPeer) StartRequest() {}
    97  
    98  func (p *httpPeer) EndRequest() {}
    99  
   100  func (p *httpPeer) notifyStatusChanged() {
   101  	// Kick the state change channel (if it hasn't been kicked already).
   102  	// The peer connection management loop broadcasts status changes, to avoid
   103  	// deadlock on the stack.
   104  	select {
   105  	case p.changed <- struct{}{}:
   106  	default:
   107  	}
   108  }
   109  
   110  func (p *httpPeer) onSuspect() {
   111  	now := time.Now().UnixNano()
   112  	innocentUntil := p.innocentUntilUnixNano.Load()
   113  
   114  	// Do not check for connectivity after every request timeout.
   115  	// Spread them out so they only occur once in every innocence window.
   116  	if now < innocentUntil {
   117  		return
   118  	}
   119  
   120  	// Extend the window of innocence from the current time.
   121  	// Use Store instead of CAS since races at worst extend the innocence
   122  	// window to relatively similar distant times.
   123  	innocentDurationUnixNano := p.transport.jitter(p.transport.innocenceWindow.Nanoseconds())
   124  	p.innocentUntilUnixNano.Store(now + innocentDurationUnixNano)
   125  
   126  	p.transport.logger.Debug(
   127  		"peer marked suspicious due to timeout",
   128  		zap.String("peer", p.addr),
   129  		zap.Duration("duration", time.Duration(innocentDurationUnixNano)),
   130  		zap.Time("until", time.Unix(0, innocentDurationUnixNano)),
   131  		zap.String("transport", "http"),
   132  	)
   133  
   134  	p.notifyStatusChanged()
   135  }
   136  
   137  func (p *httpPeer) onDisconnected() {
   138  	p.Peer.SetStatus(peer.Connecting)
   139  	p.notifyStatusChanged()
   140  }
   141  
   142  func (p *httpPeer) Release() {
   143  	close(p.released)
   144  }
   145  
   146  func (p *httpPeer) MaintainConn() {
   147  	var attempts uint
   148  
   149  	backoff := p.transport.connBackoffStrategy.Backoff()
   150  
   151  	// Wait for start (so we can be certain that we have a channel).
   152  	<-p.transport.once.Started()
   153  
   154  	// Attempt to retain an open connection to each peer so long as it is
   155  	// retained.
   156  	p.setStatus(peer.Connecting)
   157  	for {
   158  		// Invariant: Status is Connecting initially, or after exponential
   159  		// back-off, or after onDisconnected, but still Available after
   160  		// onSuspect.
   161  		if p.isAvailable() {
   162  			p.setStatus(peer.Available)
   163  			// Reset on success
   164  			attempts = 0
   165  			if !p.waitForChange() {
   166  				break
   167  			}
   168  			// Invariant: the status is Connecting if change is triggered by
   169  			// onDisconnected, but remains Available if triggered by onSuspect.
   170  		} else {
   171  			p.setStatus(peer.Unavailable)
   172  			// Back-off on fail
   173  			dur := backoff.Duration(attempts)
   174  			p.transport.logger.Debug(
   175  				"peer connect retry back-off",
   176  				zap.String("peer", p.addr),
   177  				zap.Duration("sleep", dur),
   178  				zap.Time("until", time.Now().Add(dur)),
   179  				zap.Int("attempt", int(attempts)),
   180  				zap.String("transport", "http"),
   181  			)
   182  			if !p.sleep(dur) {
   183  				break
   184  			}
   185  			attempts++
   186  			p.setStatus(peer.Connecting)
   187  		}
   188  	}
   189  	p.setStatus(peer.Unavailable)
   190  
   191  	p.transport.connectorsGroup.Done()
   192  }
   193  
   194  func (p *httpPeer) setStatus(status peer.ConnectionStatus) {
   195  	p.transport.logger.Debug(
   196  		"peer status change",
   197  		zap.String("status", status.String()),
   198  		zap.String("peer", p.Peer.Identifier()),
   199  		zap.String("transport", "http"),
   200  	)
   201  	p.Peer.SetStatus(status)
   202  	p.Peer.NotifyStatusChanged()
   203  }
   204  
   205  // waitForChange waits for the transport to send a peer connection status
   206  // change notification, but exits early if the transport releases the peer or
   207  // stops.  waitForChange returns whether it is resuming due to a connection
   208  // status change event.
   209  func (p *httpPeer) waitForChange() (changed bool) {
   210  	for {
   211  		select {
   212  		case <-p.changed:
   213  			return true
   214  		case <-p.released:
   215  			return false
   216  		}
   217  	}
   218  }
   219  
   220  // sleep waits for a duration, but exits early if the transport releases the
   221  // peer or stops.  sleep returns whether it successfully waited the entire
   222  // duration.
   223  func (p *httpPeer) sleep(delay time.Duration) (completed bool) {
   224  	p.timer.Reset(delay)
   225  
   226  	select {
   227  	case <-p.timer.C:
   228  		return true
   229  	case <-p.released:
   230  	case <-p.transport.once.Stopping():
   231  	}
   232  
   233  	if !p.timer.Stop() {
   234  		// This branch is very difficult to reach, as stopping a timer almost
   235  		// always succeeds.
   236  		<-p.timer.C
   237  	}
   238  	return false
   239  }