github.com/rigado/snapd@v2.42.5-go-mod+incompatible/httputil/retry.go (about) 1 // -*- Mode: Go; indent-tabs-mode: t -*- 2 3 /* 4 * Copyright (C) 2014-2016 Canonical Ltd 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 3 as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 */ 19 20 package httputil 21 22 import ( 23 "fmt" 24 "io" 25 "net" 26 "net/http" 27 "net/url" 28 "os" 29 "strings" 30 "syscall" 31 "time" 32 33 "gopkg.in/retry.v1" 34 35 "github.com/snapcore/snapd/logger" 36 "github.com/snapcore/snapd/osutil" 37 ) 38 39 type PerstistentNetworkError struct { 40 Err error 41 } 42 43 func (e *PerstistentNetworkError) Error() string { 44 return fmt.Sprintf("persistent network error: %v", e.Err) 45 } 46 47 func MaybeLogRetryAttempt(url string, attempt *retry.Attempt, startTime time.Time) { 48 if osutil.GetenvBool("SNAPD_DEBUG") || attempt.Count() > 1 { 49 logger.Debugf("Retrying %s, attempt %d, elapsed time=%v", url, attempt.Count(), time.Since(startTime)) 50 } 51 } 52 53 func maybeLogRetrySummary(startTime time.Time, url string, attempt *retry.Attempt, resp *http.Response, err error) { 54 if osutil.GetenvBool("SNAPD_DEBUG") || attempt.Count() > 1 { 55 var status string 56 if err != nil { 57 status = err.Error() 58 } else if resp != nil { 59 status = fmt.Sprintf("%d", resp.StatusCode) 60 } 61 logger.Debugf("The retry loop for %s finished after %d retries, elapsed time=%v, status: %s", url, attempt.Count(), time.Since(startTime), status) 62 } 63 } 64 65 func ShouldRetryHttpResponse(attempt *retry.Attempt, resp *http.Response) bool { 66 if !attempt.More() { 67 return false 68 } 69 return resp.StatusCode >= 500 70 } 71 72 // isHttp2ProtocolError returns true if the given error is a http2 73 // stream error with code 0x1 (PROTOCOL_ERROR). 74 // 75 // Unfortunately it seems this is not easy to detect. In e3be142 this 76 // code tried to be smart and detect this via http2.StreamError but it 77 // seems like with the h2_bundle.go in the go distro this does not 78 // work, i.e. in https://travis-ci.org/snapcore/snapd/jobs/575471665 79 // we still got protocol errors even with this detection code. 80 // 81 // So this code falls back to simple and naive detection. 82 func isHttp2ProtocolError(err error) bool { 83 if strings.Contains(err.Error(), "PROTOCOL_ERROR") { 84 return true 85 } 86 // here is what a protocol error may look like: 87 // "DEBUG: Not retrying: http.http2StreamError{StreamID:0x1, Code:0x1, Cause:error(nil)}" 88 if strings.Contains(err.Error(), "http2StreamError") && strings.Contains(err.Error(), "Code:0x1,") { 89 return true 90 } 91 return false 92 } 93 94 func ShouldRetryError(attempt *retry.Attempt, err error) bool { 95 if !attempt.More() { 96 return false 97 } 98 if urlErr, ok := err.(*url.Error); ok { 99 err = urlErr.Err 100 } 101 if netErr, ok := err.(net.Error); ok { 102 if netErr.Timeout() { 103 logger.Debugf("Retrying because of: %s", netErr) 104 return true 105 } 106 } 107 // The CDN sometimes resets the connection (LP:#1617765), also 108 // retry in this case 109 if opErr, ok := err.(*net.OpError); ok { 110 // "no such host" is a permanent error and should not be retried. 111 if opErr.Op == "dial" && strings.Contains(opErr.Error(), "no such host") { 112 return false 113 } 114 // peeling the onion 115 if syscallErr, ok := opErr.Err.(*os.SyscallError); ok { 116 if syscallErr.Err == syscall.ECONNRESET { 117 logger.Debugf("Retrying because of: %s", opErr) 118 return true 119 } 120 // FIXME: code below is not (unit) tested and 121 // it is unclear if we need it with the new 122 // opErr.Temporary() "if" below 123 if opErr.Op == "dial" { 124 logger.Debugf("Retrying because of: %#v (syscall error: %#v)", opErr, syscallErr.Err) 125 return true 126 } 127 logger.Debugf("Encountered syscall error: %#v", syscallErr) 128 } 129 130 // If we are unable to talk to a DNS go1.9+ will set 131 // opErr.IsTemporary - we also support go1.6 so we need to 132 // add a workaround here. This block can go away once we 133 // use go1.9+ only. 134 if dnsErr, ok := opErr.Err.(*net.DNSError); ok { 135 // The horror, the horror 136 // TODO: stop Arch to use the cgo resolver 137 // which requires the right side of the OR 138 if strings.Contains(dnsErr.Err, "connection refused") || strings.Contains(dnsErr.Err, "Temporary failure in name resolution") { 139 logger.Debugf("Retrying because of temporary net error (DNS): %#v", dnsErr) 140 return true 141 } 142 } 143 144 // Retry for temporary network errors (like dns errors in 1.9+) 145 if opErr.Temporary() { 146 logger.Debugf("Retrying because of temporary net error: %#v", opErr) 147 return true 148 } 149 logger.Debugf("Encountered non temporary net.OpError: %#v", opErr) 150 } 151 152 // we see this from http2 downloads sometimes - it is unclear what 153 // is causing it but https://github.com/golang/go/issues/29125 154 // indicates a retry might be enough. Note that we get the 155 // PROTOCOL_ERROR *from* the remote side (fastly it seems) 156 if isHttp2ProtocolError(err) { 157 logger.Debugf("Retrying because of: %s", err) 158 return true 159 } 160 161 if err == io.ErrUnexpectedEOF || err == io.EOF { 162 logger.Debugf("Retrying because of: %s (%s)", err, err) 163 return true 164 } 165 166 if osutil.GetenvBool("SNAPD_DEBUG") { 167 logger.Debugf("Not retrying: %#v", err) 168 } 169 170 return false 171 } 172 173 func isNetworkDown(err error) bool { 174 urlErr, ok := err.(*url.Error) 175 if !ok { 176 return false 177 } 178 opErr, ok := urlErr.Err.(*net.OpError) 179 if !ok { 180 return false 181 } 182 183 switch lowerErr := opErr.Err.(type) { 184 case *net.DNSError: 185 // on 16.04 we will not have SyscallError here, but DNSError, with 186 // no further details other than error message 187 return strings.Contains(lowerErr.Err, "connect: network is unreachable") 188 case *os.SyscallError: 189 if errnoErr, ok := lowerErr.Err.(syscall.Errno); ok { 190 // the errno codes from kernel/libc when the network is down 191 return errnoErr == syscall.ENETUNREACH || errnoErr == syscall.ENETDOWN 192 } 193 } 194 return false 195 } 196 197 func isDnsUnavailable(err error) bool { 198 urlErr, ok := err.(*url.Error) 199 if !ok { 200 return false 201 } 202 opErr, ok := urlErr.Err.(*net.OpError) 203 if !ok { 204 return false 205 } 206 207 dnsErr, ok := opErr.Err.(*net.DNSError) 208 if !ok { 209 return false 210 } 211 212 // We really want to check for EAI_AGAIN error here - but this is 213 // not exposed in net.DNSError and in go-1.10 it is not even 214 // a temporary error so there is no way to distiguish it other 215 // than a fugly string compare on a (potentially) localized string 216 return strings.Contains(dnsErr.Err, "Temporary failure in name resolution") 217 } 218 219 // RetryRequest calls doRequest and read the response body in a retry loop using the given retryStrategy. 220 func RetryRequest(endpoint string, doRequest func() (*http.Response, error), readResponseBody func(resp *http.Response) error, retryStrategy retry.Strategy) (resp *http.Response, err error) { 221 var attempt *retry.Attempt 222 startTime := time.Now() 223 for attempt = retry.Start(retryStrategy, nil); attempt.Next(); { 224 MaybeLogRetryAttempt(endpoint, attempt, startTime) 225 226 resp, err = doRequest() 227 if err != nil { 228 if ShouldRetryError(attempt, err) { 229 continue 230 } 231 232 if isNetworkDown(err) || isDnsUnavailable(err) { 233 err = &PerstistentNetworkError{Err: err} 234 } 235 break 236 } 237 238 if ShouldRetryHttpResponse(attempt, resp) { 239 resp.Body.Close() 240 continue 241 } else { 242 err := readResponseBody(resp) 243 resp.Body.Close() 244 if err != nil { 245 if ShouldRetryError(attempt, err) { 246 continue 247 } else { 248 maybeLogRetrySummary(startTime, endpoint, attempt, resp, err) 249 return nil, err 250 } 251 } 252 } 253 // break out from retry loop 254 break 255 } 256 maybeLogRetrySummary(startTime, endpoint, attempt, resp, err) 257 258 return resp, err 259 }