go.uber.org/yarpc@v1.72.1/transport/http/peer.go (about) 1 // Copyright (c) 2022 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package http 22 23 import ( 24 "net" 25 "time" 26 27 "go.uber.org/atomic" 28 "go.uber.org/yarpc/api/peer" 29 "go.uber.org/yarpc/peer/abstractpeer" 30 "go.uber.org/zap" 31 ) 32 33 type httpPeer struct { 34 *abstractpeer.Peer 35 36 transport *Transport 37 addr string 38 changed chan struct{} 39 released chan struct{} 40 timer *time.Timer 41 innocentUntilUnixNano *atomic.Int64 42 } 43 44 func newPeer(addr string, t *Transport) *httpPeer { 45 // Create a defused timer for later use. 46 timer := time.NewTimer(0) 47 if !timer.Stop() { 48 // not reachable, but if the timer wins the race, it would mean 49 // deadlock later, so best to conditionally drain the channel just in 50 // that case. 51 <-timer.C 52 } 53 54 return &httpPeer{ 55 Peer: abstractpeer.NewPeer(abstractpeer.PeerIdentifier(addr), t), 56 transport: t, 57 addr: addr, 58 changed: make(chan struct{}, 1), 59 released: make(chan struct{}), 60 timer: timer, 61 innocentUntilUnixNano: atomic.NewInt64(0), 62 } 63 } 64 65 // The HTTP transport polls for whether a peer is available by attempting to 66 // connect. The transport does not preserve the connection because HTTP servers 67 // may behave oddly if they don't receive a request immediately. 68 // Instead, we treat the peer as available until proven otherwise with a fresh 69 // connection attempt. 70 func (p *httpPeer) isAvailable() bool { 71 // If there's no open connection, we probe by connecting. 72 dialer := &net.Dialer{Timeout: p.transport.connTimeout} 73 conn, err := dialer.Dial("tcp", p.addr) 74 if conn != nil { 75 conn.Close() 76 } 77 if conn != nil && err == nil { 78 return true 79 } 80 81 p.transport.logger.Debug( 82 "unable to connect to peer, marking as unavailable", 83 zap.String("peer", p.addr), 84 zap.String("transport", "http"), 85 ) 86 87 return false 88 } 89 90 // StartRequest and EndRequest are no-ops now. 91 // They previously aggregated pending request count from all subscibed peer 92 // lists and distributed change notifications. 93 // This was fraught with concurrency hazards so we moved pending request count 94 // tracking into the lists themselves. 95 96 func (p *httpPeer) StartRequest() {} 97 98 func (p *httpPeer) EndRequest() {} 99 100 func (p *httpPeer) notifyStatusChanged() { 101 // Kick the state change channel (if it hasn't been kicked already). 102 // The peer connection management loop broadcasts status changes, to avoid 103 // deadlock on the stack. 104 select { 105 case p.changed <- struct{}{}: 106 default: 107 } 108 } 109 110 func (p *httpPeer) onSuspect() { 111 now := time.Now().UnixNano() 112 innocentUntil := p.innocentUntilUnixNano.Load() 113 114 // Do not check for connectivity after every request timeout. 115 // Spread them out so they only occur once in every innocence window. 116 if now < innocentUntil { 117 return 118 } 119 120 // Extend the window of innocence from the current time. 121 // Use Store instead of CAS since races at worst extend the innocence 122 // window to relatively similar distant times. 123 innocentDurationUnixNano := p.transport.jitter(p.transport.innocenceWindow.Nanoseconds()) 124 p.innocentUntilUnixNano.Store(now + innocentDurationUnixNano) 125 126 p.transport.logger.Debug( 127 "peer marked suspicious due to timeout", 128 zap.String("peer", p.addr), 129 zap.Duration("duration", time.Duration(innocentDurationUnixNano)), 130 zap.Time("until", time.Unix(0, innocentDurationUnixNano)), 131 zap.String("transport", "http"), 132 ) 133 134 p.notifyStatusChanged() 135 } 136 137 func (p *httpPeer) onDisconnected() { 138 p.Peer.SetStatus(peer.Connecting) 139 p.notifyStatusChanged() 140 } 141 142 func (p *httpPeer) Release() { 143 close(p.released) 144 } 145 146 func (p *httpPeer) MaintainConn() { 147 var attempts uint 148 149 backoff := p.transport.connBackoffStrategy.Backoff() 150 151 // Wait for start (so we can be certain that we have a channel). 152 <-p.transport.once.Started() 153 154 // Attempt to retain an open connection to each peer so long as it is 155 // retained. 156 p.setStatus(peer.Connecting) 157 for { 158 // Invariant: Status is Connecting initially, or after exponential 159 // back-off, or after onDisconnected, but still Available after 160 // onSuspect. 161 if p.isAvailable() { 162 p.setStatus(peer.Available) 163 // Reset on success 164 attempts = 0 165 if !p.waitForChange() { 166 break 167 } 168 // Invariant: the status is Connecting if change is triggered by 169 // onDisconnected, but remains Available if triggered by onSuspect. 170 } else { 171 p.setStatus(peer.Unavailable) 172 // Back-off on fail 173 dur := backoff.Duration(attempts) 174 p.transport.logger.Debug( 175 "peer connect retry back-off", 176 zap.String("peer", p.addr), 177 zap.Duration("sleep", dur), 178 zap.Time("until", time.Now().Add(dur)), 179 zap.Int("attempt", int(attempts)), 180 zap.String("transport", "http"), 181 ) 182 if !p.sleep(dur) { 183 break 184 } 185 attempts++ 186 p.setStatus(peer.Connecting) 187 } 188 } 189 p.setStatus(peer.Unavailable) 190 191 p.transport.connectorsGroup.Done() 192 } 193 194 func (p *httpPeer) setStatus(status peer.ConnectionStatus) { 195 p.transport.logger.Debug( 196 "peer status change", 197 zap.String("status", status.String()), 198 zap.String("peer", p.Peer.Identifier()), 199 zap.String("transport", "http"), 200 ) 201 p.Peer.SetStatus(status) 202 p.Peer.NotifyStatusChanged() 203 } 204 205 // waitForChange waits for the transport to send a peer connection status 206 // change notification, but exits early if the transport releases the peer or 207 // stops. waitForChange returns whether it is resuming due to a connection 208 // status change event. 209 func (p *httpPeer) waitForChange() (changed bool) { 210 for { 211 select { 212 case <-p.changed: 213 return true 214 case <-p.released: 215 return false 216 } 217 } 218 } 219 220 // sleep waits for a duration, but exits early if the transport releases the 221 // peer or stops. sleep returns whether it successfully waited the entire 222 // duration. 223 func (p *httpPeer) sleep(delay time.Duration) (completed bool) { 224 p.timer.Reset(delay) 225 226 select { 227 case <-p.timer.C: 228 return true 229 case <-p.released: 230 case <-p.transport.once.Stopping(): 231 } 232 233 if !p.timer.Stop() { 234 // This branch is very difficult to reach, as stopping a timer almost 235 // always succeeds. 236 <-p.timer.C 237 } 238 return false 239 }