go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/chunkstore/client.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package chunkstore
    16  
    17  import (
    18  	"context"
    19  	"crypto/rand"
    20  	"encoding/hex"
    21  	"fmt"
    22  	"io"
    23  	"regexp"
    24  
    25  	"cloud.google.com/go/storage"
    26  	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
    27  	"google.golang.org/api/option"
    28  	"google.golang.org/grpc"
    29  	"google.golang.org/protobuf/proto"
    30  
    31  	"go.chromium.org/luci/common/errors"
    32  	"go.chromium.org/luci/grpc/grpcmon"
    33  	"go.chromium.org/luci/server/auth"
    34  
    35  	cpb "go.chromium.org/luci/analysis/internal/clustering/proto"
    36  	"go.chromium.org/luci/analysis/pbutil"
    37  )
    38  
    39  // objectRe matches validly formed object IDs.
    40  var objectRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
    41  
    42  // Client provides methods to store and retrieve chunks of test failures.
    43  type Client struct {
    44  	// client is the GCS client used to access chunks.
    45  	client *storage.Client
    46  	// bucket is the GCS bucket in which chunks are stored.
    47  	bucket string
    48  }
    49  
    50  // NewClient initialises a new chunk storage client, that uses the specified
    51  // GCS bucket as the backing store.
    52  func NewClient(ctx context.Context, bucket string) (*Client, error) {
    53  	// Credentials with Cloud scope.
    54  	creds, err := auth.GetPerRPCCredentials(ctx, auth.AsSelf, auth.WithScopes(auth.CloudOAuthScopes...))
    55  	if err != nil {
    56  		return nil, errors.Annotate(err, "failed to get PerRPCCredentials").Err()
    57  	}
    58  
    59  	// Initialize the client.
    60  	options := []option.ClientOption{
    61  		option.WithGRPCDialOption(grpc.WithPerRPCCredentials(creds)),
    62  		option.WithGRPCDialOption(grpc.WithStatsHandler(&grpcmon.ClientRPCStatsMonitor{})),
    63  		option.WithGRPCDialOption(grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor())),
    64  		option.WithGRPCDialOption(grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor())),
    65  		option.WithScopes(storage.ScopeReadWrite),
    66  	}
    67  	cl, err := storage.NewClient(ctx, options...)
    68  
    69  	if err != nil {
    70  		return nil, errors.Annotate(err, "failed to instantiate Cloud Storage client").Err()
    71  	}
    72  	return &Client{
    73  		client: cl,
    74  		bucket: bucket,
    75  	}, nil
    76  }
    77  
    78  // Close releases resources associated with the client.
    79  func (c *Client) Close() {
    80  	c.client.Close()
    81  }
    82  
    83  // Put saves the given chunk to storage. If successful, it returns
    84  // the randomly-assigned ID of the created object.
    85  func (c *Client) Put(ctx context.Context, project string, content *cpb.Chunk) (objectID string, retErr error) {
    86  	if err := pbutil.ValidateProject(project); err != nil {
    87  		return "", err
    88  	}
    89  	b, err := proto.Marshal(content)
    90  	if err != nil {
    91  		return "", errors.Annotate(err, "marhsalling chunk").Err()
    92  	}
    93  	objID, err := generateObjectID()
    94  	if err != nil {
    95  		return "", err
    96  	}
    97  
    98  	name := FileName(project, objID)
    99  	doesNotExist := storage.Conditions{
   100  		DoesNotExist: true,
   101  	}
   102  	// Only create the file if it does not exist. The risk of collision if
   103  	// ID generation is working correctly is extremely remote so this mostly
   104  	// defensive coding and a failsafe against bad randomness in ID generation.
   105  	obj := c.client.Bucket(c.bucket).Object(name).If(doesNotExist)
   106  	w := obj.NewWriter(ctx)
   107  	defer func() {
   108  		if err := w.Close(); err != nil && retErr == nil {
   109  			retErr = errors.Annotate(err, "closing object writer").Err()
   110  		}
   111  	}()
   112  
   113  	// As the file is small (<8MB), set ChunkSize to object size to avoid
   114  	// excessive memory usage, as per the documentation. Otherwise use
   115  	// the default ChunkSize.
   116  	if len(b) < 8*1024*1024 {
   117  		w.ChunkSize = len(b)
   118  	}
   119  	w.ContentType = "application/x-protobuf"
   120  	_, err = w.Write(b)
   121  	if err != nil {
   122  		return "", errors.Annotate(err, "writing object %q", name).Err()
   123  	}
   124  	return objID, nil
   125  }
   126  
   127  // Get retrieves the chunk with the specified object ID and returns it.
   128  func (c *Client) Get(ctx context.Context, project, objectID string) (chunk *cpb.Chunk, retErr error) {
   129  	if err := pbutil.ValidateProject(project); err != nil {
   130  		return nil, err
   131  	}
   132  	if err := validateObjectID(objectID); err != nil {
   133  		return nil, err
   134  	}
   135  	name := FileName(project, objectID)
   136  	obj := c.client.Bucket(c.bucket).Object(name)
   137  	r, err := obj.NewReader(ctx)
   138  	if err != nil {
   139  		return nil, errors.Annotate(err, "creating reader %q", name).Err()
   140  	}
   141  	defer func() {
   142  		if err := r.Close(); err != nil && retErr == nil {
   143  			retErr = errors.Annotate(err, "closing object reader").Err()
   144  		}
   145  	}()
   146  
   147  	// Allocate a buffer of the correct size and use io.ReadFull instead of
   148  	// io.ReadAll to avoid needlessly reallocating slices.
   149  	b := make([]byte, r.Attrs.Size)
   150  	if _, err := io.ReadFull(r, b); err != nil {
   151  		return nil, errors.Annotate(err, "read object %q", name).Err()
   152  	}
   153  	content := &cpb.Chunk{}
   154  	if err := proto.Unmarshal(b, content); err != nil {
   155  		return nil, errors.Annotate(err, "unmarshal chunk").Err()
   156  	}
   157  	return content, nil
   158  }
   159  
   160  func validateObjectID(id string) error {
   161  	if !objectRe.MatchString(id) {
   162  		return fmt.Errorf("object ID %q is not a valid", id)
   163  	}
   164  	return nil
   165  }
   166  
   167  // generateObjectID returns a random 128-bit object ID, encoded as
   168  // 32 lowercase hexadecimal characters.
   169  func generateObjectID() (string, error) {
   170  	randomBytes := make([]byte, 16)
   171  	_, err := rand.Read(randomBytes)
   172  	if err != nil {
   173  		return "", err
   174  	}
   175  	return hex.EncodeToString(randomBytes), nil
   176  }
   177  
   178  // FileName returns the file path in GCS for the object with the
   179  // given project and objectID. Exposed for testing only.
   180  func FileName(project, objectID string) string {
   181  	return fmt.Sprintf("/projects/%s/chunks/%s.binarypb", project, objectID)
   182  }