go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cipd/appengine/impl/gs/retry.go (about)

     1  // Copyright 2017 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gs
    16  
    17  import (
    18  	"context"
    19  	"net/http"
    20  	"time"
    21  
    22  	"google.golang.org/api/googleapi"
    23  
    24  	"go.chromium.org/luci/common/errors"
    25  	"go.chromium.org/luci/common/logging"
    26  	"go.chromium.org/luci/common/retry"
    27  	"go.chromium.org/luci/common/retry/transient"
    28  )
    29  
    30  var statusCodeTagKey = errors.NewTagKey("Google Storage API Status Code")
    31  
    32  // StatusCode returns HTTP status code embedded inside the annotated error.
    33  //
    34  // Returns http.StatusOK if err is nil and 0 if the error doesn't have a status
    35  // code.
    36  func StatusCode(err error) int {
    37  	if err == nil {
    38  		return http.StatusOK
    39  	}
    40  	if val, ok := errors.TagValueIn(statusCodeTagKey, err); ok {
    41  		return val.(int)
    42  	}
    43  	return 0
    44  }
    45  
    46  // StatusCodeTag can be used to attach HTTP status code to the error.
    47  //
    48  // This code will be available via StatusCode(err) function.
    49  func StatusCodeTag(code int) errors.TagValue {
    50  	return errors.TagValue{Key: statusCodeTagKey, Value: code}
    51  }
    52  
    53  // withRetry executes a Google Storage API call, retrying on transient errors.
    54  //
    55  // If request reached GS, but the service replied with an error, the
    56  // corresponding HTTP status code can be extracted from the error via
    57  // StatusCode(err). The error is also tagged as transient based on the code:
    58  // response with HTTP statuses >=500 and 429 are considered transient errors.
    59  //
    60  // If the request never reached GS, StatusCode(err) would return 0 and the error
    61  // will be tagged as transient.
    62  func withRetry(ctx context.Context, call func() error) error {
    63  	return retry.Retry(ctx, transient.Only(retry.Default), func() error {
    64  		err := call()
    65  		if err == nil {
    66  			return nil
    67  		}
    68  		apiErr, _ := err.(*googleapi.Error)
    69  		if apiErr == nil {
    70  			// RestartUploadError errors are fatal and should be passed unannotated.
    71  			if _, ok := err.(*RestartUploadError); ok {
    72  				return err
    73  			}
    74  			return errors.Annotate(err, "failed to call GS").Tag(transient.Tag).Err()
    75  		}
    76  		logging.Infof(ctx, "GS replied with HTTP code %d", apiErr.Code)
    77  		logging.Debugf(ctx, "full response body:\n%s", apiErr.Body)
    78  		ann := errors.Annotate(err, "GS replied with HTTP code %d", apiErr.Code).
    79  			Tag(StatusCodeTag(apiErr.Code))
    80  		// Retry only on 429 and 5xx responses, according to
    81  		// https://cloud.google.com/storage/docs/exponential-backoff.
    82  		if apiErr.Code == 429 || apiErr.Code >= 500 {
    83  			ann.Tag(transient.Tag)
    84  		}
    85  		return ann.Err()
    86  	}, func(err error, d time.Duration) {
    87  		logging.WithError(err).Errorf(ctx, "Transient error when accessing GS. Retrying in %s...", d)
    88  	})
    89  }