go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/chunkstore/client.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package chunkstore 16 17 import ( 18 "context" 19 "crypto/rand" 20 "encoding/hex" 21 "fmt" 22 "io" 23 "regexp" 24 25 "cloud.google.com/go/storage" 26 "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" 27 "google.golang.org/api/option" 28 "google.golang.org/grpc" 29 "google.golang.org/protobuf/proto" 30 31 "go.chromium.org/luci/common/errors" 32 "go.chromium.org/luci/grpc/grpcmon" 33 "go.chromium.org/luci/server/auth" 34 35 cpb "go.chromium.org/luci/analysis/internal/clustering/proto" 36 "go.chromium.org/luci/analysis/pbutil" 37 ) 38 39 // objectRe matches validly formed object IDs. 40 var objectRe = regexp.MustCompile(`^[0-9a-f]{32}$`) 41 42 // Client provides methods to store and retrieve chunks of test failures. 43 type Client struct { 44 // client is the GCS client used to access chunks. 45 client *storage.Client 46 // bucket is the GCS bucket in which chunks are stored. 47 bucket string 48 } 49 50 // NewClient initialises a new chunk storage client, that uses the specified 51 // GCS bucket as the backing store. 52 func NewClient(ctx context.Context, bucket string) (*Client, error) { 53 // Credentials with Cloud scope. 54 creds, err := auth.GetPerRPCCredentials(ctx, auth.AsSelf, auth.WithScopes(auth.CloudOAuthScopes...)) 55 if err != nil { 56 return nil, errors.Annotate(err, "failed to get PerRPCCredentials").Err() 57 } 58 59 // Initialize the client. 60 options := []option.ClientOption{ 61 option.WithGRPCDialOption(grpc.WithPerRPCCredentials(creds)), 62 option.WithGRPCDialOption(grpc.WithStatsHandler(&grpcmon.ClientRPCStatsMonitor{})), 63 option.WithGRPCDialOption(grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor())), 64 option.WithGRPCDialOption(grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor())), 65 option.WithScopes(storage.ScopeReadWrite), 66 } 67 cl, err := storage.NewClient(ctx, options...) 68 69 if err != nil { 70 return nil, errors.Annotate(err, "failed to instantiate Cloud Storage client").Err() 71 } 72 return &Client{ 73 client: cl, 74 bucket: bucket, 75 }, nil 76 } 77 78 // Close releases resources associated with the client. 79 func (c *Client) Close() { 80 c.client.Close() 81 } 82 83 // Put saves the given chunk to storage. If successful, it returns 84 // the randomly-assigned ID of the created object. 85 func (c *Client) Put(ctx context.Context, project string, content *cpb.Chunk) (objectID string, retErr error) { 86 if err := pbutil.ValidateProject(project); err != nil { 87 return "", err 88 } 89 b, err := proto.Marshal(content) 90 if err != nil { 91 return "", errors.Annotate(err, "marhsalling chunk").Err() 92 } 93 objID, err := generateObjectID() 94 if err != nil { 95 return "", err 96 } 97 98 name := FileName(project, objID) 99 doesNotExist := storage.Conditions{ 100 DoesNotExist: true, 101 } 102 // Only create the file if it does not exist. The risk of collision if 103 // ID generation is working correctly is extremely remote so this mostly 104 // defensive coding and a failsafe against bad randomness in ID generation. 105 obj := c.client.Bucket(c.bucket).Object(name).If(doesNotExist) 106 w := obj.NewWriter(ctx) 107 defer func() { 108 if err := w.Close(); err != nil && retErr == nil { 109 retErr = errors.Annotate(err, "closing object writer").Err() 110 } 111 }() 112 113 // As the file is small (<8MB), set ChunkSize to object size to avoid 114 // excessive memory usage, as per the documentation. Otherwise use 115 // the default ChunkSize. 116 if len(b) < 8*1024*1024 { 117 w.ChunkSize = len(b) 118 } 119 w.ContentType = "application/x-protobuf" 120 _, err = w.Write(b) 121 if err != nil { 122 return "", errors.Annotate(err, "writing object %q", name).Err() 123 } 124 return objID, nil 125 } 126 127 // Get retrieves the chunk with the specified object ID and returns it. 128 func (c *Client) Get(ctx context.Context, project, objectID string) (chunk *cpb.Chunk, retErr error) { 129 if err := pbutil.ValidateProject(project); err != nil { 130 return nil, err 131 } 132 if err := validateObjectID(objectID); err != nil { 133 return nil, err 134 } 135 name := FileName(project, objectID) 136 obj := c.client.Bucket(c.bucket).Object(name) 137 r, err := obj.NewReader(ctx) 138 if err != nil { 139 return nil, errors.Annotate(err, "creating reader %q", name).Err() 140 } 141 defer func() { 142 if err := r.Close(); err != nil && retErr == nil { 143 retErr = errors.Annotate(err, "closing object reader").Err() 144 } 145 }() 146 147 // Allocate a buffer of the correct size and use io.ReadFull instead of 148 // io.ReadAll to avoid needlessly reallocating slices. 149 b := make([]byte, r.Attrs.Size) 150 if _, err := io.ReadFull(r, b); err != nil { 151 return nil, errors.Annotate(err, "read object %q", name).Err() 152 } 153 content := &cpb.Chunk{} 154 if err := proto.Unmarshal(b, content); err != nil { 155 return nil, errors.Annotate(err, "unmarshal chunk").Err() 156 } 157 return content, nil 158 } 159 160 func validateObjectID(id string) error { 161 if !objectRe.MatchString(id) { 162 return fmt.Errorf("object ID %q is not a valid", id) 163 } 164 return nil 165 } 166 167 // generateObjectID returns a random 128-bit object ID, encoded as 168 // 32 lowercase hexadecimal characters. 169 func generateObjectID() (string, error) { 170 randomBytes := make([]byte, 16) 171 _, err := rand.Read(randomBytes) 172 if err != nil { 173 return "", err 174 } 175 return hex.EncodeToString(randomBytes), nil 176 } 177 178 // FileName returns the file path in GCS for the object with the 179 // given project and objectID. Exposed for testing only. 180 func FileName(project, objectID string) string { 181 return fmt.Sprintf("/projects/%s/chunks/%s.binarypb", project, objectID) 182 }