go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/sink/artifact_uploader.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sink
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"crypto/sha256"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"io"
    24  	"net/http"
    25  	"os"
    26  
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/retry"
    29  	"go.chromium.org/luci/common/retry/transient"
    30  	"go.chromium.org/luci/common/sync/dispatcher/buffer"
    31  
    32  	pb "go.chromium.org/luci/resultdb/proto/v1"
    33  )
    34  
    35  // artifactUploader provides functions for uploading artifacts to ResultDB.
    36  type artifactUploader struct {
    37  	// Recorder is a gRPC client used to upload artifacts in batches.
    38  	Recorder pb.RecorderClient
    39  
    40  	// StreamClient is an HTTP client used to upload artifacts that are too large
    41  	// to be batched.
    42  	StreamClient *http.Client
    43  	// StreamHost is the host of a ResultDB service instance, to which artifacts are streamed.
    44  	StreamHost string
    45  
    46  	// MaxBatchable is the maximum size of an artifact that can be batched.
    47  	MaxBatchable int64
    48  }
    49  
    50  // StreamUpload uploads the artifact in a streaming manner via HTTP..
    51  func (u *artifactUploader) StreamUpload(ctx context.Context, t *uploadTask, updateToken string) error {
    52  	var body io.ReadSeeker
    53  	var err error
    54  
    55  	if t.art.GetGcsUri() != "" {
    56  		return errors.Reason("StreamUpload does not support gcsUri upload").Err()
    57  	}
    58  
    59  	if fp := t.art.GetFilePath(); fp == "" {
    60  		body = bytes.NewReader(t.art.GetContents())
    61  	} else {
    62  		fh, err := os.Open(t.art.GetFilePath())
    63  		if err != nil {
    64  			return err
    65  		}
    66  		defer fh.Close()
    67  		body = fh
    68  	}
    69  
    70  	req, err := http.NewRequestWithContext(
    71  		ctx, "PUT", fmt.Sprintf("https://%s/%s", u.StreamHost, t.artName), body)
    72  	if err != nil {
    73  		return errors.Annotate(err, "newHTTPRequest").Err()
    74  	}
    75  
    76  	// Client.Do always closes the Body on exit, whether there was an error or not.
    77  	// It also closes the Body on 3xx responses, which requires re-sending the request.
    78  	// If req.GetBody != nil, it calls GetBody to get a new copy of the body, and resend
    79  	// the request on 3xx responses.
    80  	//
    81  	// http.NewRequestWithContext() returns an HTTP request with
    82  	// - custom GetBody and ContentLength set, if the given body is of type buffer.Reader,
    83  	// - nil GetBody and ContentLength unset, if the given body is os.File.
    84  	//
    85  	// Therefore, if the body is os.File, the caller is responsible for setting GetBody
    86  	// and ContentLength, as necessary.
    87  	if fh, ok := body.(*os.File); ok {
    88  		// Prevent the file handler from being closed by Client.Do. The file handler, body,
    89  		// will be closed by the defer function above. With NopCloser(), GetBody() can
    90  		// simply reset the cursor and return the handler w/o reopening the file.
    91  		req.Body = io.NopCloser(body)
    92  		req.GetBody = func() (io.ReadCloser, error) {
    93  			if _, err := body.Seek(0, io.SeekStart); err != nil {
    94  				return nil, err
    95  			}
    96  			return io.NopCloser(body), nil
    97  		}
    98  
    99  		st, err := fh.Stat()
   100  		if err != nil {
   101  			return err
   102  		}
   103  		req.ContentLength = st.Size()
   104  	}
   105  
   106  	// calculates the hash and rewind the position back to the beginning so that
   107  	// the request body can be re-read by HTTPClient.Do.
   108  	hash, err := calculateHash(body)
   109  	if err != nil {
   110  		return errors.Annotate(err, "artifact-hash").Err()
   111  	}
   112  	if _, err := body.Seek(0, io.SeekStart); err != nil {
   113  		return err
   114  	}
   115  	req.Header.Add("Content-Hash", hash)
   116  	if t.art.ContentType != "" {
   117  		req.Header.Add("Content-Type", t.art.ContentType)
   118  	}
   119  	req.Header.Add("Update-Token", updateToken)
   120  	return u.sendHTTP(req)
   121  }
   122  
   123  func (u *artifactUploader) sendHTTP(req *http.Request) error {
   124  	return retry.Retry(req.Context(), transient.Only(retry.Default), func() error {
   125  		resp, err := u.StreamClient.Do(req)
   126  		if err != nil {
   127  			return errors.Annotate(err, "failed to send HTTP request").Err()
   128  		}
   129  
   130  		code := resp.StatusCode
   131  		// ResultDB returns StatusNoContent on success.
   132  		if code == http.StatusNoContent {
   133  			return nil
   134  		}
   135  
   136  		// Tag the error as an transient error, if retriable.
   137  		hErr := errors.Reason("http request failed(%d): %s", resp.StatusCode, resp.Status)
   138  		if code == http.StatusRequestTimeout || code == http.StatusTooManyRequests || code >= 500 {
   139  			hErr = hErr.Tag(transient.Tag)
   140  		}
   141  		return hErr.Err()
   142  	}, nil)
   143  }
   144  
   145  func calculateHash(input io.Reader) (string, error) {
   146  	hash := sha256.New()
   147  	if _, err := io.Copy(hash, input); err != nil {
   148  		return "", err
   149  	}
   150  	return "sha256:" + hex.EncodeToString(hash.Sum(nil)), nil
   151  }
   152  
   153  // newBatchCreateArtifactsRequest returns a BatchCreateArtifactsRequest with
   154  // at most 500 items with capping the sum of the artifact sizes by maxSum.
   155  //
   156  // Panics if tasks an item with an artifact larger than maxSum.
   157  func newBatchCreateArtifactsRequest(maxSum int64, tasks []buffer.BatchItem) (*pb.BatchCreateArtifactsRequest, error) {
   158  	l := len(tasks)
   159  	if l > 500 {
   160  		l = 500
   161  	}
   162  
   163  	var sum int64
   164  	reqs := make([]*pb.CreateArtifactRequest, 0, l)
   165  	for i := 0; i < l; i++ {
   166  		ut := tasks[i].Item.(*uploadTask)
   167  
   168  		// artifactChannel.schedule() should have sent it to streamChannel.
   169  		if ut.size > maxSum {
   170  			return nil, errors.Reason("an artifact is greater than %d", maxSum).Err()
   171  		}
   172  		// if the sum is going to be too big, stop the iteration.
   173  		if sum+ut.size > maxSum {
   174  			break
   175  		}
   176  
   177  		r, err := ut.CreateRequest()
   178  		if err != nil {
   179  			return nil, errors.Annotate(err, "CreateRequest").Err()
   180  		}
   181  		reqs = append(reqs, r)
   182  		sum += ut.size
   183  	}
   184  	return &pb.BatchCreateArtifactsRequest{Requests: reqs}, nil
   185  }
   186  
   187  func (u *artifactUploader) BatchUpload(ctx context.Context, b *buffer.Batch) error {
   188  	var req *pb.BatchCreateArtifactsRequest
   189  	var err error
   190  	if b.Meta != nil {
   191  		req = b.Meta.(*pb.BatchCreateArtifactsRequest)
   192  		if _, err = u.Recorder.BatchCreateArtifacts(ctx, req); err != nil {
   193  			return err
   194  		}
   195  	}
   196  
   197  	// There are the following conditions this loop handles.
   198  	//
   199  	// 1) pb.BatchCreateArtifactsRequest can contain at most 500 artifacts.
   200  	// 2) The sum of the artifact content sizes must be <= u.MaxBatchable.
   201  	// 3) The size of input artifacts varies.
   202  	//
   203  	// It's possible that a buffer.Batch contains 500 of large artifact files, like 1MiB.
   204  	// To avoid loading the contents of all the artifacts unnecessarily, this loop slices
   205  	// b.Data by 500 or u.MaxBatchable, and creates a batch request only for the tasks,
   206  	// handled in the current iteration.
   207  	for len(b.Data) > 0 {
   208  		if req, err = newBatchCreateArtifactsRequest(u.MaxBatchable, b.Data); err != nil {
   209  			return errors.Annotate(err, "newBatchCreateArtifactRequest").Err()
   210  		}
   211  
   212  		b.Meta = req
   213  		if _, err := u.Recorder.BatchCreateArtifacts(ctx, req); err != nil {
   214  			return err
   215  		}
   216  		b.Data = b.Data[len(req.Requests):]
   217  	}
   218  	return nil
   219  }