go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/recorder/create_artifact.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package recorder
    16  
    17  import (
    18  	"context"
    19  	"crypto/sha256"
    20  	"encoding/hex"
    21  	"fmt"
    22  	"hash"
    23  	"io"
    24  	"net/http"
    25  	"strconv"
    26  	"strings"
    27  
    28  	"cloud.google.com/go/spanner"
    29  	"github.com/google/uuid"
    30  	"go.opentelemetry.io/otel/attribute"
    31  	"google.golang.org/genproto/googleapis/bytestream"
    32  	"google.golang.org/grpc/codes"
    33  	"google.golang.org/grpc/status"
    34  
    35  	"go.chromium.org/luci/common/data/rand/mathrand"
    36  	"go.chromium.org/luci/common/errors"
    37  	"go.chromium.org/luci/common/logging"
    38  	"go.chromium.org/luci/common/sync/parallel"
    39  	"go.chromium.org/luci/grpc/appstatus"
    40  	"go.chromium.org/luci/grpc/grpcutil"
    41  	"go.chromium.org/luci/server/router"
    42  	"go.chromium.org/luci/server/span"
    43  
    44  	"go.chromium.org/luci/resultdb/internal/artifactcontent"
    45  	"go.chromium.org/luci/resultdb/internal/artifacts"
    46  	"go.chromium.org/luci/resultdb/internal/invocations"
    47  	"go.chromium.org/luci/resultdb/internal/spanutil"
    48  	"go.chromium.org/luci/resultdb/internal/tracing"
    49  	"go.chromium.org/luci/resultdb/pbutil"
    50  	pb "go.chromium.org/luci/resultdb/proto/v1"
    51  )
    52  
    53  const (
    54  	artifactContentHashHeaderKey = "Content-Hash"
    55  	artifactContentSizeHeaderKey = "Content-Length"
    56  	artifactContentTypeHeaderKey = "Content-Type"
    57  	updateTokenHeaderKey         = "Update-Token"
    58  )
    59  
    60  // artifactCreationHandler can handle artifact creation requests.
    61  //
    62  // Request:
    63  //   - Router parameter "artifact" MUST be a valid artifact name.
    64  //   - The request body MUST be the artifact contents.
    65  //   - The request MUST include an Update-Token header with the value of
    66  //     invocation's update token.
    67  //   - The request MUST include a Content-Length header. It must be <= MaxArtifactContentSize.
    68  //   - The request MUST include a Content-Hash header with value "sha256:{hash}"
    69  //     where {hash} is a lower-case hex-encoded SHA256 hash of the artifact
    70  //     contents.
    71  //   - The request SHOULD have a Content-Type header.
    72  type artifactCreationHandler struct {
    73  	// RBEInstance is the full name of the RBE instance used for artifact storage.
    74  	// Format: projects/{project}/instances/{instance}.
    75  	RBEInstance                    string
    76  	NewCASWriter                   func(context.Context) (bytestream.ByteStream_WriteClient, error)
    77  	MaxArtifactContentStreamLength int64
    78  	bufSize                        int
    79  }
    80  
    81  // Handle implements router.Handler.
    82  func (h *artifactCreationHandler) Handle(c *router.Context) {
    83  	ac := &artifactCreator{artifactCreationHandler: h}
    84  	mw := artifactcontent.NewMetricsWriter(c)
    85  	defer func() {
    86  		mw.Upload(c.Request.Context(), ac.size)
    87  	}()
    88  
    89  	err := ac.handle(c)
    90  	st, ok := appstatus.Get(err)
    91  	switch {
    92  	case ok:
    93  		logging.Warningf(c.Request.Context(), "Responding with %s: %s", st.Code(), err)
    94  		http.Error(c.Writer, st.Message(), grpcutil.CodeStatus(st.Code()))
    95  	case err != nil:
    96  		logging.Errorf(c.Request.Context(), "Internal server error: %s", err)
    97  		http.Error(c.Writer, "Internal server error", http.StatusInternalServerError)
    98  	default:
    99  		c.Writer.WriteHeader(http.StatusNoContent)
   100  	}
   101  }
   102  
   103  // artifactCreator handles one artifact creation request.
   104  type artifactCreator struct {
   105  	*artifactCreationHandler
   106  
   107  	artifactName  string
   108  	invID         invocations.ID
   109  	testID        string
   110  	resultID      string
   111  	artifactID    string
   112  	localParentID string
   113  	contentType   string
   114  
   115  	hash string
   116  	size int64
   117  }
   118  
   119  func (ac *artifactCreator) handle(c *router.Context) error {
   120  	ctx := c.Request.Context()
   121  
   122  	// Parse and validate the request.
   123  	if err := ac.parseRequest(c); err != nil {
   124  		return err
   125  	}
   126  
   127  	// Read and verify the current state.
   128  	switch sameExists, err := ac.verifyStateBeforeWriting(ctx); {
   129  	case err != nil:
   130  		return err
   131  	case sameExists:
   132  		return nil
   133  	}
   134  
   135  	// Read the request body through a digest verifying proxy.
   136  	// This is mandatory because RBE-CAS does not guarantee digest verification in
   137  	// all cases.
   138  	ver := &digestVerifier{
   139  		r:            c.Request.Body,
   140  		expectedHash: artifacts.TrimHashPrefix(ac.hash),
   141  		expectedSize: ac.size,
   142  		actualHash:   sha256.New(),
   143  	}
   144  
   145  	// Forward the request body to RBE-CAS.
   146  	if err := ac.writeToCAS(ctx, ver); err != nil {
   147  		return errors.Annotate(err, "failed to write to CAS").Err()
   148  	}
   149  
   150  	if err := ver.ReadVerify(ctx); err != nil {
   151  		return err
   152  	}
   153  
   154  	// Record the artifact in Spanner.
   155  	var realm string
   156  	_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) (err error) {
   157  		// Verify the state again.
   158  		var sameExists bool
   159  		realm, sameExists, err = ac.verifyState(ctx)
   160  		switch {
   161  		case err != nil:
   162  			return err
   163  		case sameExists:
   164  			return nil
   165  		}
   166  
   167  		span.BufferWrite(ctx, spanutil.InsertMap("Artifacts", map[string]any{
   168  			"InvocationId": ac.invID,
   169  			"ParentId":     ac.localParentID,
   170  			"ArtifactId":   ac.artifactID,
   171  			"ContentType":  ac.contentType,
   172  			"Size":         ac.size,
   173  			"RBECASHash":   ac.hash,
   174  		}))
   175  		return nil
   176  	})
   177  	if err != nil {
   178  		return err
   179  	}
   180  	spanutil.IncRowCount(ctx, 1, spanutil.Artifacts, spanutil.Inserted, realm)
   181  	return nil
   182  }
   183  
   184  // writeToCAS writes contents in r to RBE-CAS.
   185  // ac.hash and ac.size must match the contents.
   186  func (ac *artifactCreator) writeToCAS(ctx context.Context, r io.Reader) (err error) {
   187  	ctx, overallSpan := tracing.Start(ctx, "resultdb.writeToCAS")
   188  	defer func() { tracing.End(overallSpan, err) }()
   189  
   190  	// Protocol:
   191  	// https://github.com/bazelbuild/remote-apis/blob/7802003e00901b4e740fe0ebec1243c221e02ae2/build/bazel/remote/execution/v2/remote_execution.proto#L193-L205
   192  	// https://github.com/googleapis/googleapis/blob/c8e291e6a4d60771219205b653715d5aeec3e96b/google/bytestream/bytestream.proto#L55
   193  
   194  	w, err := ac.NewCASWriter(ctx)
   195  	if err != nil {
   196  		return errors.Annotate(err, "failed to create a CAS writer").Err()
   197  	}
   198  	defer w.CloseSend()
   199  
   200  	bufSize := ac.bufSize
   201  	if bufSize == 0 {
   202  		bufSize = 1024 * 1024
   203  		if bufSize > int(ac.size) {
   204  			bufSize = int(ac.size)
   205  		}
   206  	}
   207  	buf := make([]byte, bufSize)
   208  
   209  	// Copy data from r to w using buffer buf.
   210  	// Include the resource name only in the first request.
   211  	first := true
   212  	bytesSent := 0
   213  	for {
   214  		_, readSpan := tracing.Start(ctx, "resultdb.readChunk")
   215  		n, err := r.Read(buf)
   216  		if err != nil && err != io.EOF {
   217  			tracing.End(readSpan, err)
   218  			if err != io.ErrUnexpectedEOF {
   219  				return errors.Annotate(err, "failed to read artifact contents").Err()
   220  			}
   221  			return appstatus.BadRequest(errors.Annotate(err, "failed to read artifact contents").Err())
   222  		}
   223  		tracing.End(readSpan, nil, attribute.Int("size", n))
   224  		last := err == io.EOF
   225  
   226  		// Prepare the request.
   227  		// WriteRequest message: https://github.com/googleapis/googleapis/blob/c8e291e6a4d60771219205b653715d5aeec3e96b/google/bytestream/bytestream.proto#L128
   228  		req := &bytestream.WriteRequest{
   229  			Data:        buf[:n],
   230  			FinishWrite: last,
   231  			WriteOffset: int64(bytesSent),
   232  		}
   233  
   234  		// Include the resource name only in the first request.
   235  		if first {
   236  			first = false
   237  			req.ResourceName = ac.genWriteResourceName(ctx)
   238  		}
   239  
   240  		// Send the request.
   241  		_, writeSpan := tracing.Start(ctx, "resultdb.writeChunk",
   242  			attribute.Int("size", n),
   243  		)
   244  		// Do not shadow err! It is checked below again.
   245  		if err = w.Send(req); err != nil && err != io.EOF {
   246  			tracing.End(writeSpan, err)
   247  			return errors.Annotate(err, "failed to write data to RBE-CAS").Err()
   248  		}
   249  		tracing.End(writeSpan, nil)
   250  		bytesSent += n
   251  		if last || err == io.EOF {
   252  			// Either this was the last chunk, or server closed the stream.
   253  			break
   254  		}
   255  	}
   256  
   257  	// Read and interpret the response.
   258  	switch res, err := w.CloseAndRecv(); {
   259  	case status.Code(err) == codes.InvalidArgument:
   260  		logging.Warningf(ctx, "RBE-CAS responded with %s", err)
   261  		return appstatus.Errorf(codes.InvalidArgument, "Content-Hash and/or Content-Length do not match the request body")
   262  	case err != nil:
   263  		return errors.Annotate(err, "failed to read RBE-CAS write response").Err()
   264  	case res.CommittedSize == ac.size:
   265  		return nil
   266  	default:
   267  		return errors.Reason("unexpected blob commit size %d, expected %d", res.CommittedSize, ac.size).Err()
   268  	}
   269  }
   270  
   271  // genWriteResourceName generates a random resource name that can be used
   272  // to write the blob to RBE-CAS.
   273  func (ac *artifactCreator) genWriteResourceName(ctx context.Context) string {
   274  	uuidBytes := make([]byte, 16)
   275  	if _, err := mathrand.Read(ctx, uuidBytes); err != nil {
   276  		panic(err)
   277  	}
   278  	return fmt.Sprintf(
   279  		"%s/uploads/%s/blobs/%s/%d",
   280  		ac.RBEInstance,
   281  		uuid.Must(uuid.FromBytes(uuidBytes)),
   282  		artifacts.TrimHashPrefix(ac.hash),
   283  		ac.size)
   284  }
   285  
   286  // parseRequest populates ac fields based on the HTTP request.
   287  func (ac *artifactCreator) parseRequest(c *router.Context) error {
   288  	// Read the artifact name.
   289  	// We must use EscapedPath(), not Path, to preserve test ID's own encoding.
   290  	ac.artifactName = strings.TrimPrefix(c.Request.URL.EscapedPath(), "/")
   291  
   292  	// Parse and validate the artifact name.
   293  	var invIDString string
   294  	var err error
   295  	invIDString, ac.testID, ac.resultID, ac.artifactID, err = pbutil.ParseArtifactName(ac.artifactName)
   296  	if err != nil {
   297  		return appstatus.Errorf(codes.InvalidArgument, "bad artifact name: %s", err)
   298  	}
   299  	ac.invID = invocations.ID(invIDString)
   300  	ac.localParentID = artifacts.ParentID(ac.testID, ac.resultID)
   301  
   302  	// Parse and validate the hash.
   303  	switch ac.hash = c.Request.Header.Get(artifactContentHashHeaderKey); {
   304  	case ac.hash == "":
   305  		return appstatus.Errorf(codes.InvalidArgument, "%s header is missing", artifactContentHashHeaderKey)
   306  	case !artifacts.ContentHashRe.MatchString(ac.hash):
   307  		return appstatus.Errorf(codes.InvalidArgument, "%s header value does not match %s", artifactContentHashHeaderKey, artifacts.ContentHashRe)
   308  	}
   309  
   310  	// Parse and validate the size.
   311  	sizeHeader := c.Request.Header.Get(artifactContentSizeHeaderKey)
   312  	if sizeHeader == "" {
   313  		return appstatus.Errorf(codes.InvalidArgument, "%s header is missing", artifactContentSizeHeaderKey)
   314  	}
   315  	switch ac.size, err = strconv.ParseInt(sizeHeader, 10, 64); {
   316  	case err != nil:
   317  		return appstatus.Errorf(codes.InvalidArgument, "%s header is malformed: %s", artifactContentSizeHeaderKey, err)
   318  	case ac.size < 0 || ac.size > ac.MaxArtifactContentStreamLength:
   319  		return appstatus.Errorf(codes.InvalidArgument, "%s header must be a value between 0 and %d", artifactContentSizeHeaderKey, ac.MaxArtifactContentStreamLength)
   320  	}
   321  
   322  	// Parse and validate the update token.
   323  	updateToken := c.Request.Header.Get(updateTokenHeaderKey)
   324  	if updateToken == "" {
   325  		return appstatus.Errorf(codes.Unauthenticated, "%s header is missing", updateTokenHeaderKey)
   326  	}
   327  	if err := validateInvocationToken(c.Request.Context(), updateToken, ac.invID); err != nil {
   328  		return appstatus.Errorf(codes.PermissionDenied, "invalid %s header value", updateTokenHeaderKey)
   329  	}
   330  
   331  	ac.contentType = c.Request.Header.Get(artifactContentTypeHeaderKey)
   332  
   333  	return nil
   334  }
   335  
   336  // verifyStateBeforeWriting checks Spanner state in a read-only transaction,
   337  // see verifyState comment.
   338  func (ac *artifactCreator) verifyStateBeforeWriting(ctx context.Context) (sameAlreadyExists bool, err error) {
   339  	ctx, cancel := span.ReadOnlyTransaction(ctx)
   340  	defer cancel()
   341  	_, sameAlreadyExists, err = ac.verifyState(ctx)
   342  	return
   343  }
   344  
   345  // verifyState checks if the Spanner state is compatible with creation of the
   346  // artifact. If an identical artifact already exists, sameAlreadyExists is true.
   347  func (ac *artifactCreator) verifyState(ctx context.Context) (realm string, sameAlreadyExists bool, err error) {
   348  	var (
   349  		invState       pb.Invocation_State
   350  		hash           spanner.NullString
   351  		size           spanner.NullInt64
   352  		artifactExists bool
   353  	)
   354  
   355  	// Read the state concurrently.
   356  	err = parallel.FanOutIn(func(work chan<- func() error) {
   357  		work <- func() (err error) {
   358  			return invocations.ReadColumns(ctx, ac.invID, map[string]any{
   359  				"State": &invState, "Realm": &realm,
   360  			})
   361  		}
   362  
   363  		work <- func() error {
   364  			key := ac.invID.Key(ac.localParentID, ac.artifactID)
   365  			err := spanutil.ReadRow(ctx, "Artifacts", key, map[string]any{
   366  				"RBECASHash": &hash,
   367  				"Size":       &size,
   368  			})
   369  			artifactExists = err == nil
   370  			if spanner.ErrCode(err) == codes.NotFound {
   371  				// This is expected.
   372  				return nil
   373  			}
   374  			return err
   375  		}
   376  	})
   377  
   378  	// Interpret the state.
   379  	switch {
   380  	case err != nil:
   381  		return
   382  
   383  	case invState != pb.Invocation_ACTIVE:
   384  		err = appstatus.Errorf(codes.FailedPrecondition, "%s is not active", ac.invID.Name())
   385  		return
   386  
   387  	case hash.Valid && hash.StringVal == ac.hash && size.Valid && size.Int64 == ac.size:
   388  		// The same artifact already exists.
   389  		sameAlreadyExists = true
   390  		return
   391  
   392  	case artifactExists:
   393  		// A different artifact already exists.
   394  		err = appstatus.Errorf(codes.AlreadyExists, "artifact %q already exists", ac.artifactName)
   395  		return
   396  	}
   397  	return
   398  }
   399  
   400  // digestVerifier is an io.Reader that also verifies the digest.
   401  type digestVerifier struct {
   402  	r            io.Reader
   403  	expectedSize int64
   404  	expectedHash string
   405  
   406  	actualSize int64
   407  	actualHash hash.Hash
   408  }
   409  
   410  func (v *digestVerifier) Read(p []byte) (n int, err error) {
   411  	n, err = v.r.Read(p)
   412  	v.actualSize += int64(n)
   413  	v.actualHash.Write(p[:n])
   414  	return n, err
   415  }
   416  
   417  // ReadVerify reads through the rest of the v.r
   418  // and returns a non-nil error if the content have unexpected hash or size.
   419  // The error may be annotated with appstatus.
   420  func (v *digestVerifier) ReadVerify(ctx context.Context) (err error) {
   421  	_, ts := tracing.Start(ctx, "resultdb.digestVerifier.ReadVerify")
   422  	defer func() { tracing.End(ts, err) }()
   423  
   424  	// Read until the end.
   425  	if _, err := io.Copy(io.Discard, v); err != nil {
   426  		return err
   427  	}
   428  
   429  	// Verify size.
   430  	if v.actualSize != v.expectedSize {
   431  		return appstatus.Errorf(
   432  			codes.InvalidArgument,
   433  			"Content-Length header value %d does not match the length of the request body, %d",
   434  			v.expectedSize,
   435  			v.actualSize,
   436  		)
   437  	}
   438  
   439  	// Verify hash.
   440  	hashFromBody := hex.EncodeToString(v.actualHash.Sum(nil))
   441  	hashFromHeader := v.expectedHash
   442  	if hashFromBody != hashFromHeader {
   443  		return appstatus.Errorf(
   444  			codes.InvalidArgument,
   445  			`Content-Hash header value "%s" does not match the hash of the request body, "%s"`,
   446  			artifacts.AddHashPrefix(hashFromHeader),
   447  			artifacts.AddHashPrefix(hashFromBody),
   448  		)
   449  	}
   450  
   451  	return nil
   452  }