github.com/hugh712/snapd@v0.0.0-20200910133618-1a99902bd583/httputil/retry.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2014-2016 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package httputil
    21  
    22  import (
    23  	"fmt"
    24  	"io"
    25  	"net"
    26  	"net/http"
    27  	"net/url"
    28  	"os"
    29  	"strings"
    30  	"syscall"
    31  	"time"
    32  
    33  	"gopkg.in/retry.v1"
    34  
    35  	"github.com/snapcore/snapd/logger"
    36  	"github.com/snapcore/snapd/osutil"
    37  )
    38  
    39  type PerstistentNetworkError struct {
    40  	Err error
    41  }
    42  
    43  func (e *PerstistentNetworkError) Error() string {
    44  	return fmt.Sprintf("persistent network error: %v", e.Err)
    45  }
    46  
    47  func MaybeLogRetryAttempt(url string, attempt *retry.Attempt, startTime time.Time) {
    48  	if osutil.GetenvBool("SNAPD_DEBUG") || attempt.Count() > 1 {
    49  		logger.Debugf("Retrying %s, attempt %d, elapsed time=%v", url, attempt.Count(), time.Since(startTime))
    50  	}
    51  }
    52  
    53  func maybeLogRetrySummary(startTime time.Time, url string, attempt *retry.Attempt, resp *http.Response, err error) {
    54  	if osutil.GetenvBool("SNAPD_DEBUG") || attempt.Count() > 1 {
    55  		var status string
    56  		if err != nil {
    57  			status = err.Error()
    58  		} else if resp != nil {
    59  			status = fmt.Sprintf("%d", resp.StatusCode)
    60  		}
    61  		logger.Debugf("The retry loop for %s finished after %d retries, elapsed time=%v, status: %s", url, attempt.Count(), time.Since(startTime), status)
    62  	}
    63  }
    64  
    65  func ShouldRetryHttpResponse(attempt *retry.Attempt, resp *http.Response) bool {
    66  	if !attempt.More() {
    67  		return false
    68  	}
    69  	return resp.StatusCode >= 500
    70  }
    71  
    72  // isHttp2ProtocolError returns true if the given error is a http2
    73  // stream error with code 0x1 (PROTOCOL_ERROR).
    74  //
    75  // Unfortunately it seems this is not easy to detect. In e3be142 this
    76  // code tried to be smart and detect this via http2.StreamError but it
    77  // seems like with the h2_bundle.go in the go distro this does not
    78  // work, i.e. in https://travis-ci.org/snapcore/snapd/jobs/575471665
    79  // we still got protocol errors even with this detection code.
    80  //
    81  // So this code falls back to simple and naive detection.
    82  func isHttp2ProtocolError(err error) bool {
    83  	if strings.Contains(err.Error(), "PROTOCOL_ERROR") {
    84  		return true
    85  	}
    86  	// here is what a protocol error may look like:
    87  	// "DEBUG: Not retrying: http.http2StreamError{StreamID:0x1, Code:0x1, Cause:error(nil)}"
    88  	if strings.Contains(err.Error(), "http2StreamError") && strings.Contains(err.Error(), "Code:0x1,") {
    89  		return true
    90  	}
    91  	return false
    92  }
    93  
    94  func ShouldRetryAttempt(attempt *retry.Attempt, err error) bool {
    95  	if !attempt.More() {
    96  		return false
    97  	}
    98  	return ShouldRetryError(err)
    99  }
   100  
   101  // ShouldRetryError returns true for transient network errors like when
   102  // the remote side returns a connection reset and it's sensible to retry
   103  // after a short time.
   104  //
   105  // XXX: Note that currently also NoNetwork(err) errors are reported
   106  // with true here.
   107  func ShouldRetryError(err error) (b bool) {
   108  	if err == nil {
   109  		return false
   110  	}
   111  	defer func() {
   112  		logger.Debugf("ShouldRetryError: %v %T -> %v", err, err, b)
   113  	}()
   114  
   115  	if urlErr, ok := err.(*url.Error); ok {
   116  		err = urlErr.Err
   117  	}
   118  	if netErr, ok := err.(net.Error); ok {
   119  		if netErr.Timeout() {
   120  			logger.Debugf("Retrying because of: %s", netErr)
   121  			return true
   122  		}
   123  	}
   124  	// The CDN sometimes resets the connection (LP:#1617765), also
   125  	// retry in this case
   126  	if opErr, ok := err.(*net.OpError); ok {
   127  		// "no such host" is a permanent error and should not be retried.
   128  		if opErr.Op == "dial" && strings.Contains(opErr.Error(), "no such host") {
   129  			return false
   130  		}
   131  		// peeling the onion
   132  		if syscallErr, ok := opErr.Err.(*os.SyscallError); ok {
   133  			if syscallErr.Err == syscall.ECONNRESET {
   134  				logger.Debugf("Retrying because of: %s", opErr)
   135  				return true
   136  			}
   137  			// FIXME: code below is not (unit) tested and
   138  			// it is unclear if we need it with the new
   139  			// opErr.Temporary() "if" below
   140  			if opErr.Op == "dial" {
   141  				logger.Debugf("Retrying because of: %#v (syscall error: %#v)", opErr, syscallErr.Err)
   142  				return true
   143  			}
   144  			logger.Debugf("Encountered syscall error: %#v", syscallErr)
   145  		}
   146  
   147  		// If we are unable to talk to a DNS go1.9+ will set
   148  		// opErr.IsTemporary - we also support go1.6 so we need to
   149  		// add a workaround here. This block can go away once we
   150  		// use go1.9+ only.
   151  		if dnsErr, ok := opErr.Err.(*net.DNSError); ok {
   152  			// The horror, the horror
   153  			// TODO: stop Arch to use the cgo resolver
   154  			// which requires the right side of the OR
   155  			if strings.Contains(dnsErr.Err, "connection refused") || strings.Contains(dnsErr.Err, "Temporary failure in name resolution") {
   156  				logger.Debugf("Retrying because of temporary net error (DNS): %#v", dnsErr)
   157  				return true
   158  			}
   159  		}
   160  
   161  		// Retry for temporary network errors (like dns errors in 1.9+)
   162  		if opErr.Temporary() {
   163  			logger.Debugf("Retrying because of temporary net error: %#v", opErr)
   164  			return true
   165  		}
   166  		logger.Debugf("Encountered non temporary net.OpError: %#v", opErr)
   167  	}
   168  
   169  	// we see this from http2 downloads sometimes - it is unclear what
   170  	// is causing it but https://github.com/golang/go/issues/29125
   171  	// indicates a retry might be enough. Note that we get the
   172  	// PROTOCOL_ERROR *from* the remote side (fastly it seems)
   173  	if isHttp2ProtocolError(err) {
   174  		logger.Debugf("Retrying because of: %s", err)
   175  		return true
   176  	}
   177  
   178  	if err == io.ErrUnexpectedEOF || err == io.EOF {
   179  		logger.Debugf("Retrying because of: %s (%s)", err, err)
   180  		return true
   181  	}
   182  
   183  	if osutil.GetenvBool("SNAPD_DEBUG") {
   184  		logger.Debugf("Not retrying: %#v", err)
   185  	}
   186  
   187  	return false
   188  }
   189  
   190  // NoNetwork returns true if the error indicates that there is no network
   191  // connection available, i.e. network unreachable or down or DNS unavailable.
   192  func NoNetwork(err error) (b bool) {
   193  	defer func() {
   194  		logger.Debugf("NoNetwork: %v %T -> %v", err, err, b)
   195  	}()
   196  
   197  	return isNetworkDown(err) || isDnsUnavailable(err)
   198  }
   199  
   200  func isNetworkDown(err error) bool {
   201  	if err == nil {
   202  		return false
   203  	}
   204  	urlErr, ok := err.(*url.Error)
   205  	if !ok {
   206  		return false
   207  	}
   208  	opErr, ok := urlErr.Err.(*net.OpError)
   209  	if !ok {
   210  		return false
   211  	}
   212  
   213  	switch lowerErr := opErr.Err.(type) {
   214  	case *net.DNSError:
   215  		// on 16.04 we will not have SyscallError here, but DNSError, with
   216  		// no further details other than error message
   217  		return strings.Contains(lowerErr.Err, "connect: network is unreachable")
   218  	case *os.SyscallError:
   219  		if errnoErr, ok := lowerErr.Err.(syscall.Errno); ok {
   220  			// the errno codes from kernel/libc when the network is down
   221  			return errnoErr == syscall.ENETUNREACH || errnoErr == syscall.ENETDOWN
   222  		}
   223  	}
   224  	return false
   225  }
   226  
   227  func isDnsUnavailable(err error) bool {
   228  	if err == nil {
   229  		return false
   230  	}
   231  
   232  	urlErr, ok := err.(*url.Error)
   233  	if !ok {
   234  		return false
   235  	}
   236  	opErr, ok := urlErr.Err.(*net.OpError)
   237  	if !ok {
   238  		return false
   239  	}
   240  
   241  	dnsErr, ok := opErr.Err.(*net.DNSError)
   242  	if !ok {
   243  		return false
   244  	}
   245  
   246  	// We really want to check for EAI_AGAIN error here - but this is
   247  	// not exposed in net.DNSError and in go-1.10 it is not even
   248  	// a temporary error so there is no way to distiguish it other
   249  	// than a fugly string compare on a (potentially) localized string
   250  	return strings.Contains(dnsErr.Err, "Temporary failure in name resolution")
   251  }
   252  
   253  // RetryRequest calls doRequest and read the response body in a retry loop using the given retryStrategy.
   254  func RetryRequest(endpoint string, doRequest func() (*http.Response, error), readResponseBody func(resp *http.Response) error, retryStrategy retry.Strategy) (resp *http.Response, err error) {
   255  	var attempt *retry.Attempt
   256  	startTime := time.Now()
   257  	for attempt = retry.Start(retryStrategy, nil); attempt.Next(); {
   258  		MaybeLogRetryAttempt(endpoint, attempt, startTime)
   259  
   260  		resp, err = doRequest()
   261  		if err != nil {
   262  			if ShouldRetryAttempt(attempt, err) {
   263  				continue
   264  			}
   265  
   266  			if isNetworkDown(err) || isDnsUnavailable(err) {
   267  				err = &PerstistentNetworkError{Err: err}
   268  			}
   269  			break
   270  		}
   271  
   272  		if ShouldRetryHttpResponse(attempt, resp) {
   273  			resp.Body.Close()
   274  			continue
   275  		} else {
   276  			err := readResponseBody(resp)
   277  			resp.Body.Close()
   278  			if err != nil {
   279  				if ShouldRetryAttempt(attempt, err) {
   280  					continue
   281  				} else {
   282  					maybeLogRetrySummary(startTime, endpoint, attempt, resp, err)
   283  					return nil, err
   284  				}
   285  			}
   286  		}
   287  		// break out from retry loop
   288  		break
   289  	}
   290  	maybeLogRetrySummary(startTime, endpoint, attempt, resp, err)
   291  
   292  	return resp, err
   293  }