github.com/rigado/snapd@v2.42.5-go-mod+incompatible/httputil/retry.go (about)

     1  // -*- Mode: Go; indent-tabs-mode: t -*-
     2  
     3  /*
     4   * Copyright (C) 2014-2016 Canonical Ltd
     5   *
     6   * This program is free software: you can redistribute it and/or modify
     7   * it under the terms of the GNU General Public License version 3 as
     8   * published by the Free Software Foundation.
     9   *
    10   * This program is distributed in the hope that it will be useful,
    11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13   * GNU General Public License for more details.
    14   *
    15   * You should have received a copy of the GNU General Public License
    16   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17   *
    18   */
    19  
    20  package httputil
    21  
    22  import (
    23  	"fmt"
    24  	"io"
    25  	"net"
    26  	"net/http"
    27  	"net/url"
    28  	"os"
    29  	"strings"
    30  	"syscall"
    31  	"time"
    32  
    33  	"gopkg.in/retry.v1"
    34  
    35  	"github.com/snapcore/snapd/logger"
    36  	"github.com/snapcore/snapd/osutil"
    37  )
    38  
    39  type PerstistentNetworkError struct {
    40  	Err error
    41  }
    42  
    43  func (e *PerstistentNetworkError) Error() string {
    44  	return fmt.Sprintf("persistent network error: %v", e.Err)
    45  }
    46  
    47  func MaybeLogRetryAttempt(url string, attempt *retry.Attempt, startTime time.Time) {
    48  	if osutil.GetenvBool("SNAPD_DEBUG") || attempt.Count() > 1 {
    49  		logger.Debugf("Retrying %s, attempt %d, elapsed time=%v", url, attempt.Count(), time.Since(startTime))
    50  	}
    51  }
    52  
    53  func maybeLogRetrySummary(startTime time.Time, url string, attempt *retry.Attempt, resp *http.Response, err error) {
    54  	if osutil.GetenvBool("SNAPD_DEBUG") || attempt.Count() > 1 {
    55  		var status string
    56  		if err != nil {
    57  			status = err.Error()
    58  		} else if resp != nil {
    59  			status = fmt.Sprintf("%d", resp.StatusCode)
    60  		}
    61  		logger.Debugf("The retry loop for %s finished after %d retries, elapsed time=%v, status: %s", url, attempt.Count(), time.Since(startTime), status)
    62  	}
    63  }
    64  
    65  func ShouldRetryHttpResponse(attempt *retry.Attempt, resp *http.Response) bool {
    66  	if !attempt.More() {
    67  		return false
    68  	}
    69  	return resp.StatusCode >= 500
    70  }
    71  
    72  // isHttp2ProtocolError returns true if the given error is a http2
    73  // stream error with code 0x1 (PROTOCOL_ERROR).
    74  //
    75  // Unfortunately it seems this is not easy to detect. In e3be142 this
    76  // code tried to be smart and detect this via http2.StreamError but it
    77  // seems like with the h2_bundle.go in the go distro this does not
    78  // work, i.e. in https://travis-ci.org/snapcore/snapd/jobs/575471665
    79  // we still got protocol errors even with this detection code.
    80  //
    81  // So this code falls back to simple and naive detection.
    82  func isHttp2ProtocolError(err error) bool {
    83  	if strings.Contains(err.Error(), "PROTOCOL_ERROR") {
    84  		return true
    85  	}
    86  	// here is what a protocol error may look like:
    87  	// "DEBUG: Not retrying: http.http2StreamError{StreamID:0x1, Code:0x1, Cause:error(nil)}"
    88  	if strings.Contains(err.Error(), "http2StreamError") && strings.Contains(err.Error(), "Code:0x1,") {
    89  		return true
    90  	}
    91  	return false
    92  }
    93  
    94  func ShouldRetryError(attempt *retry.Attempt, err error) bool {
    95  	if !attempt.More() {
    96  		return false
    97  	}
    98  	if urlErr, ok := err.(*url.Error); ok {
    99  		err = urlErr.Err
   100  	}
   101  	if netErr, ok := err.(net.Error); ok {
   102  		if netErr.Timeout() {
   103  			logger.Debugf("Retrying because of: %s", netErr)
   104  			return true
   105  		}
   106  	}
   107  	// The CDN sometimes resets the connection (LP:#1617765), also
   108  	// retry in this case
   109  	if opErr, ok := err.(*net.OpError); ok {
   110  		// "no such host" is a permanent error and should not be retried.
   111  		if opErr.Op == "dial" && strings.Contains(opErr.Error(), "no such host") {
   112  			return false
   113  		}
   114  		// peeling the onion
   115  		if syscallErr, ok := opErr.Err.(*os.SyscallError); ok {
   116  			if syscallErr.Err == syscall.ECONNRESET {
   117  				logger.Debugf("Retrying because of: %s", opErr)
   118  				return true
   119  			}
   120  			// FIXME: code below is not (unit) tested and
   121  			// it is unclear if we need it with the new
   122  			// opErr.Temporary() "if" below
   123  			if opErr.Op == "dial" {
   124  				logger.Debugf("Retrying because of: %#v (syscall error: %#v)", opErr, syscallErr.Err)
   125  				return true
   126  			}
   127  			logger.Debugf("Encountered syscall error: %#v", syscallErr)
   128  		}
   129  
   130  		// If we are unable to talk to a DNS go1.9+ will set
   131  		// opErr.IsTemporary - we also support go1.6 so we need to
   132  		// add a workaround here. This block can go away once we
   133  		// use go1.9+ only.
   134  		if dnsErr, ok := opErr.Err.(*net.DNSError); ok {
   135  			// The horror, the horror
   136  			// TODO: stop Arch to use the cgo resolver
   137  			// which requires the right side of the OR
   138  			if strings.Contains(dnsErr.Err, "connection refused") || strings.Contains(dnsErr.Err, "Temporary failure in name resolution") {
   139  				logger.Debugf("Retrying because of temporary net error (DNS): %#v", dnsErr)
   140  				return true
   141  			}
   142  		}
   143  
   144  		// Retry for temporary network errors (like dns errors in 1.9+)
   145  		if opErr.Temporary() {
   146  			logger.Debugf("Retrying because of temporary net error: %#v", opErr)
   147  			return true
   148  		}
   149  		logger.Debugf("Encountered non temporary net.OpError: %#v", opErr)
   150  	}
   151  
   152  	// we see this from http2 downloads sometimes - it is unclear what
   153  	// is causing it but https://github.com/golang/go/issues/29125
   154  	// indicates a retry might be enough. Note that we get the
   155  	// PROTOCOL_ERROR *from* the remote side (fastly it seems)
   156  	if isHttp2ProtocolError(err) {
   157  		logger.Debugf("Retrying because of: %s", err)
   158  		return true
   159  	}
   160  
   161  	if err == io.ErrUnexpectedEOF || err == io.EOF {
   162  		logger.Debugf("Retrying because of: %s (%s)", err, err)
   163  		return true
   164  	}
   165  
   166  	if osutil.GetenvBool("SNAPD_DEBUG") {
   167  		logger.Debugf("Not retrying: %#v", err)
   168  	}
   169  
   170  	return false
   171  }
   172  
   173  func isNetworkDown(err error) bool {
   174  	urlErr, ok := err.(*url.Error)
   175  	if !ok {
   176  		return false
   177  	}
   178  	opErr, ok := urlErr.Err.(*net.OpError)
   179  	if !ok {
   180  		return false
   181  	}
   182  
   183  	switch lowerErr := opErr.Err.(type) {
   184  	case *net.DNSError:
   185  		// on 16.04 we will not have SyscallError here, but DNSError, with
   186  		// no further details other than error message
   187  		return strings.Contains(lowerErr.Err, "connect: network is unreachable")
   188  	case *os.SyscallError:
   189  		if errnoErr, ok := lowerErr.Err.(syscall.Errno); ok {
   190  			// the errno codes from kernel/libc when the network is down
   191  			return errnoErr == syscall.ENETUNREACH || errnoErr == syscall.ENETDOWN
   192  		}
   193  	}
   194  	return false
   195  }
   196  
   197  func isDnsUnavailable(err error) bool {
   198  	urlErr, ok := err.(*url.Error)
   199  	if !ok {
   200  		return false
   201  	}
   202  	opErr, ok := urlErr.Err.(*net.OpError)
   203  	if !ok {
   204  		return false
   205  	}
   206  
   207  	dnsErr, ok := opErr.Err.(*net.DNSError)
   208  	if !ok {
   209  		return false
   210  	}
   211  
   212  	// We really want to check for EAI_AGAIN error here - but this is
   213  	// not exposed in net.DNSError and in go-1.10 it is not even
   214  	// a temporary error so there is no way to distiguish it other
   215  	// than a fugly string compare on a (potentially) localized string
   216  	return strings.Contains(dnsErr.Err, "Temporary failure in name resolution")
   217  }
   218  
   219  // RetryRequest calls doRequest and read the response body in a retry loop using the given retryStrategy.
   220  func RetryRequest(endpoint string, doRequest func() (*http.Response, error), readResponseBody func(resp *http.Response) error, retryStrategy retry.Strategy) (resp *http.Response, err error) {
   221  	var attempt *retry.Attempt
   222  	startTime := time.Now()
   223  	for attempt = retry.Start(retryStrategy, nil); attempt.Next(); {
   224  		MaybeLogRetryAttempt(endpoint, attempt, startTime)
   225  
   226  		resp, err = doRequest()
   227  		if err != nil {
   228  			if ShouldRetryError(attempt, err) {
   229  				continue
   230  			}
   231  
   232  			if isNetworkDown(err) || isDnsUnavailable(err) {
   233  				err = &PerstistentNetworkError{Err: err}
   234  			}
   235  			break
   236  		}
   237  
   238  		if ShouldRetryHttpResponse(attempt, resp) {
   239  			resp.Body.Close()
   240  			continue
   241  		} else {
   242  			err := readResponseBody(resp)
   243  			resp.Body.Close()
   244  			if err != nil {
   245  				if ShouldRetryError(attempt, err) {
   246  					continue
   247  				} else {
   248  					maybeLogRetrySummary(startTime, endpoint, attempt, resp, err)
   249  					return nil, err
   250  				}
   251  			}
   252  		}
   253  		// break out from retry loop
   254  		break
   255  	}
   256  	maybeLogRetrySummary(startTime, endpoint, attempt, resp, err)
   257  
   258  	return resp, err
   259  }