sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/resultstore/writer/writer.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package writer
    18  
    19  import (
    20  	"context"
    21  	"encoding/base64"
    22  	"fmt"
    23  	"time"
    24  
    25  	"github.com/google/uuid"
    26  	"github.com/sirupsen/logrus"
    27  	"google.golang.org/genproto/googleapis/devtools/resultstore/v2"
    28  	"google.golang.org/grpc"
    29  	"google.golang.org/grpc/codes"
    30  	"google.golang.org/grpc/status"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  )
    33  
    34  const (
    35  	// Number of UploadRequest messages per batch recommended by the
    36  	// ResultStore maintainers. This is likely not a factor unless
    37  	// this implementation is changed to upload individual tests.
    38  	batchSize = 100
    39  )
    40  
    41  var (
    42  	// rpcRetryBackoff returns the Backoff for retrying CreateInvocation
    43  	// and UploadBatch requests to ResultStore.
    44  	rpcRetryBackoff = wait.Backoff{
    45  		Duration: 100 * time.Millisecond,
    46  		Factor:   2,
    47  		Cap:      30 * time.Second,
    48  		Steps:    8,
    49  		Jitter:   0.2,
    50  	}
    51  	// rpcRetryDuration returns the time allowed for all retries of a
    52  	// single CreateInvocation or UploadBatch request to ResultStore.
    53  	rpcRetryDuration = 5 * time.Minute
    54  )
    55  
    56  func resumeToken() string {
    57  	// ResultStore resume tokens must be unique and be "web safe
    58  	// Base64 encoded bytes."
    59  	return base64.StdEncoding.EncodeToString([]byte(uuid.New().String()))
    60  }
    61  
    62  type ResultStoreBatchClient interface {
    63  	CreateInvocation(context.Context, *resultstore.CreateInvocationRequest, ...grpc.CallOption) (*resultstore.Invocation, error)
    64  	GetInvocationUploadMetadata(context.Context, *resultstore.GetInvocationUploadMetadataRequest, ...grpc.CallOption) (*resultstore.UploadMetadata, error)
    65  	TouchInvocation(context.Context, *resultstore.TouchInvocationRequest, ...grpc.CallOption) (*resultstore.TouchInvocationResponse, error)
    66  	UploadBatch(ctx context.Context, in *resultstore.UploadBatchRequest, opts ...grpc.CallOption) (*resultstore.UploadBatchResponse, error)
    67  }
    68  
    69  // writer writes results to resultstore using the UpdateBatch API.
    70  type writer struct {
    71  	log         *logrus.Entry
    72  	client      ResultStoreBatchClient
    73  	invID       string
    74  	authToken   string
    75  	resumeToken string
    76  	updates     []*resultstore.UploadRequest
    77  	finalized   bool
    78  }
    79  
    80  // IsPermanentError returns whether the error status code is permanent based on
    81  // the ResultStore implementation, according to the ResultStore maintainers.
    82  // (No external documentation is known.) Permanent errors will never succeed
    83  // and should not be retried. Transient errors should be retried with
    84  // exponential backoff.
    85  func IsPermanentError(err error) bool {
    86  	status, _ := status.FromError(err)
    87  	switch status.Code() {
    88  	case codes.AlreadyExists:
    89  		return true
    90  	case codes.NotFound:
    91  		return true
    92  	case codes.InvalidArgument:
    93  		return true
    94  	case codes.FailedPrecondition:
    95  		return true
    96  	case codes.Unimplemented:
    97  		return true
    98  	case codes.PermissionDenied:
    99  		return true
   100  	}
   101  	return false
   102  }
   103  
   104  // IsAlreadyExistsErr returns whether the error status code is AlreadyExists.
   105  func IsAlreadyExistsErr(err error) bool {
   106  	status, _ := status.FromError(err)
   107  	return status.Code() == codes.AlreadyExists
   108  }
   109  
   110  // New creates Invocation inv in ResultStore and returns a writer to add
   111  // resource protos and finalize the Invocation. If the Invocation already
   112  // exists and is finalized, a permanent error is returned. Otherwise, the
   113  // writer syncs with ResultStore to resume writing. RPCs are retried with
   114  // exponential backoff unless there is a permanent error, which is returned
   115  // immediately. The caller should check whether a returned error is permanent
   116  // using IsPermanentError() and only retry transient errors. The authToken is
   117  // a UUID and must be identical across all calls for the same Invocation.
   118  func New(ctx context.Context, log *logrus.Entry, client ResultStoreBatchClient, inv *resultstore.Invocation, invID, authToken string) (*writer, error) {
   119  	w := &writer{
   120  		log:         log,
   121  		client:      client,
   122  		invID:       invID,
   123  		authToken:   authToken,
   124  		resumeToken: resumeToken(),
   125  		updates:     []*resultstore.UploadRequest{},
   126  	}
   127  	ctx, cancel := context.WithTimeout(ctx, rpcRetryDuration)
   128  	defer cancel()
   129  
   130  	err := w.createInvocation(ctx, inv)
   131  	if err == nil {
   132  		return w, nil
   133  	}
   134  	if !IsAlreadyExistsErr(err) {
   135  		return nil, err
   136  	}
   137  
   138  	if touchErr := w.touchInvocation(ctx); IsPermanentError(touchErr) {
   139  		// Since it was confirmed above that the Invocation exists, a
   140  		// permanent error here indicates the Invocation is finalized.
   141  		return nil, err
   142  	}
   143  
   144  	if err = w.retrieveResumeToken(ctx); err != nil {
   145  		return nil, err
   146  	}
   147  
   148  	log.Info("Resuming upload for unfinalized invocation")
   149  	return w, nil
   150  }
   151  
   152  // onlyPermanentError returns err only if it is permanent. Used to prevent
   153  // retries for RPC errors which will never succeed.
   154  func onlyPermanentError(err error) error {
   155  	if IsPermanentError(err) {
   156  		return err
   157  	}
   158  	return nil
   159  }
   160  
   161  func (w *writer) createInvocation(ctx context.Context, inv *resultstore.Invocation) error {
   162  	return wait.ExponentialBackoffWithContext(ctx, rpcRetryBackoff, func() (bool, error) {
   163  		_, err := w.client.CreateInvocation(ctx, w.createInvocationRequest(inv))
   164  		if err != nil {
   165  			w.log.Errorf("resultstore.CreateInvocation: %v", err)
   166  			return false, onlyPermanentError(err)
   167  		}
   168  		return true, nil
   169  	})
   170  }
   171  
   172  func (w *writer) touchInvocation(ctx context.Context) error {
   173  	return wait.ExponentialBackoffWithContext(ctx, rpcRetryBackoff, func() (bool, error) {
   174  		_, err := w.client.TouchInvocation(ctx, w.touchInvocationRequest())
   175  		if err != nil {
   176  			w.log.Errorf("resultstore.TouchInvocation: %v", err)
   177  			return false, onlyPermanentError(err)
   178  		}
   179  		return true, nil
   180  	})
   181  }
   182  
   183  func (w *writer) retrieveResumeToken(ctx context.Context) error {
   184  	return wait.ExponentialBackoffWithContext(ctx, rpcRetryBackoff, func() (bool, error) {
   185  		meta, err := w.client.GetInvocationUploadMetadata(ctx, w.getInvocationUploadMetadataRequest())
   186  		if err != nil {
   187  			w.log.Errorf("resultstore.GetInvocationUploadMetadata: %v", err)
   188  			return false, onlyPermanentError(err)
   189  		}
   190  		w.resumeToken = meta.ResumeToken
   191  		return true, nil
   192  	})
   193  }
   194  
   195  func (w *writer) WriteConfiguration(ctx context.Context, c *resultstore.Configuration) error {
   196  	return w.addUploadRequest(ctx, createConfigurationUploadRequest(c))
   197  }
   198  
   199  func (w *writer) WriteTarget(ctx context.Context, t *resultstore.Target) error {
   200  	return w.addUploadRequest(ctx, createTargetUploadRequest(t))
   201  }
   202  
   203  func (w *writer) WriteConfiguredTarget(ctx context.Context, ct *resultstore.ConfiguredTarget) error {
   204  	return w.addUploadRequest(ctx, createConfiguredTargetUploadRequest(ct))
   205  }
   206  
   207  func (w *writer) WriteAction(ctx context.Context, a *resultstore.Action) error {
   208  	return w.addUploadRequest(ctx, createActionUploadRequest(a))
   209  }
   210  
   211  func (w *writer) Finalize(ctx context.Context) error {
   212  	return w.addUploadRequest(ctx, w.finalizeRequest())
   213  }
   214  
   215  func (w *writer) createInvocationRequest(inv *resultstore.Invocation) *resultstore.CreateInvocationRequest {
   216  	return &resultstore.CreateInvocationRequest{
   217  		InvocationId:       w.invID,
   218  		Invocation:         inv,
   219  		AuthorizationToken: w.authToken,
   220  		InitialResumeToken: w.resumeToken,
   221  	}
   222  }
   223  
   224  func (w *writer) invocationName() string {
   225  	return fmt.Sprintf("invocations/%s", w.invID)
   226  }
   227  
   228  func (w *writer) touchInvocationRequest() *resultstore.TouchInvocationRequest {
   229  	return &resultstore.TouchInvocationRequest{
   230  		Name:               w.invocationName(),
   231  		AuthorizationToken: w.authToken,
   232  	}
   233  }
   234  
   235  func (w *writer) uploadMetadataName() string {
   236  	return fmt.Sprintf("invocations/%s/uploadMetadata", w.invID)
   237  }
   238  
   239  func (w *writer) getInvocationUploadMetadataRequest() *resultstore.GetInvocationUploadMetadataRequest {
   240  	return &resultstore.GetInvocationUploadMetadataRequest{
   241  		Name:               w.uploadMetadataName(),
   242  		AuthorizationToken: w.authToken,
   243  	}
   244  }
   245  
   246  func (w *writer) addUploadRequest(ctx context.Context, r *resultstore.UploadRequest) error {
   247  	if w.finalized {
   248  		return fmt.Errorf("addUploadRequest after finalized for %v", r)
   249  	}
   250  	if r.UploadOperation == resultstore.UploadRequest_FINALIZE {
   251  		w.finalized = true
   252  	}
   253  	w.updates = append(w.updates, r)
   254  	if !w.finalized && len(w.updates) < batchSize {
   255  		return nil
   256  	}
   257  	return w.flushUpdates(ctx)
   258  }
   259  
   260  func (w *writer) flushUpdates(ctx context.Context) error {
   261  	b := w.uploadBatchRequest(w.updates)
   262  	ctx, cancel := context.WithTimeout(ctx, rpcRetryDuration)
   263  	defer cancel()
   264  	return wait.ExponentialBackoffWithContext(ctx, rpcRetryBackoff, func() (bool, error) {
   265  		if _, err := w.client.UploadBatch(ctx, b); err != nil {
   266  			w.log.Errorf("resultstore.UploadBatch: %v", err)
   267  			if IsPermanentError(err) {
   268  				// End retries by returning error.
   269  				return false, err
   270  			}
   271  			return false, nil
   272  		}
   273  		w.updates = []*resultstore.UploadRequest{}
   274  		return true, nil
   275  	})
   276  }
   277  
   278  func (w *writer) uploadBatchRequest(reqs []*resultstore.UploadRequest) *resultstore.UploadBatchRequest {
   279  	nextToken := resumeToken()
   280  	req := &resultstore.UploadBatchRequest{
   281  		Parent:             w.invocationName(),
   282  		ResumeToken:        w.resumeToken,
   283  		NextResumeToken:    nextToken,
   284  		AuthorizationToken: w.authToken,
   285  		UploadRequests:     reqs,
   286  	}
   287  	w.resumeToken = nextToken
   288  	return req
   289  }
   290  
   291  func (w *writer) finalizeRequest() *resultstore.UploadRequest {
   292  	return &resultstore.UploadRequest{
   293  		UploadOperation: resultstore.UploadRequest_FINALIZE,
   294  		Resource:        &resultstore.UploadRequest_Invocation{},
   295  	}
   296  }
   297  
   298  func createConfigurationUploadRequest(c *resultstore.Configuration) *resultstore.UploadRequest {
   299  	id := &resultstore.UploadRequest_Id{
   300  		ConfigurationId: c.Id.ConfigurationId,
   301  	}
   302  	c.Id = nil
   303  	return &resultstore.UploadRequest{
   304  		Id:              id,
   305  		UploadOperation: resultstore.UploadRequest_CREATE,
   306  		Resource: &resultstore.UploadRequest_Configuration{
   307  			Configuration: c,
   308  		},
   309  	}
   310  }
   311  
   312  func createTargetUploadRequest(t *resultstore.Target) *resultstore.UploadRequest {
   313  	id := &resultstore.UploadRequest_Id{
   314  		TargetId: t.Id.GetTargetId(),
   315  	}
   316  	t.Id = nil
   317  	return &resultstore.UploadRequest{
   318  		Id:              id,
   319  		UploadOperation: resultstore.UploadRequest_CREATE,
   320  		Resource: &resultstore.UploadRequest_Target{
   321  			Target: t,
   322  		},
   323  	}
   324  }
   325  
   326  func createConfiguredTargetUploadRequest(ct *resultstore.ConfiguredTarget) *resultstore.UploadRequest {
   327  	id := &resultstore.UploadRequest_Id{
   328  		TargetId:        ct.Id.GetTargetId(),
   329  		ConfigurationId: ct.Id.GetConfigurationId(),
   330  	}
   331  	ct.Id = nil
   332  	return &resultstore.UploadRequest{
   333  		Id:              id,
   334  		UploadOperation: resultstore.UploadRequest_CREATE,
   335  		Resource: &resultstore.UploadRequest_ConfiguredTarget{
   336  			ConfiguredTarget: ct,
   337  		},
   338  	}
   339  }
   340  
   341  func createActionUploadRequest(a *resultstore.Action) *resultstore.UploadRequest {
   342  	id := &resultstore.UploadRequest_Id{
   343  		TargetId:        a.Id.GetTargetId(),
   344  		ConfigurationId: a.Id.GetConfigurationId(),
   345  		ActionId:        a.Id.GetActionId(),
   346  	}
   347  	a.Id = nil
   348  	return &resultstore.UploadRequest{
   349  		Id:              id,
   350  		UploadOperation: resultstore.UploadRequest_CREATE,
   351  		Resource: &resultstore.UploadRequest_Action{
   352  			Action: a,
   353  		},
   354  	}
   355  }