github.com/weaviate/weaviate@v1.24.6/modules/backup-gcs/client.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package modstggcs 13 14 import ( 15 "context" 16 "fmt" 17 "io" 18 "os" 19 "path" 20 "strings" 21 "time" 22 23 "cloud.google.com/go/storage" 24 "github.com/googleapis/gax-go/v2" 25 "github.com/pkg/errors" 26 "github.com/weaviate/weaviate/entities/backup" 27 "github.com/weaviate/weaviate/usecases/monitoring" 28 "golang.org/x/oauth2/google" 29 "google.golang.org/api/option" 30 ) 31 32 type gcsClient struct { 33 client *storage.Client 34 config clientConfig 35 projectID string 36 dataPath string 37 } 38 39 func newClient(ctx context.Context, config *clientConfig, dataPath string) (*gcsClient, error) { 40 options := []option.ClientOption{} 41 useAuth := strings.ToLower(os.Getenv("BACKUP_GCS_USE_AUTH")) != "false" 42 if useAuth { 43 scopes := []string{ 44 "https://www.googleapis.com/auth/devstorage.read_write", 45 } 46 creds, err := google.FindDefaultCredentials(ctx, scopes...) 47 if err != nil { 48 return nil, errors.Wrap(err, "find default credentials") 49 } 50 options = append(options, option.WithCredentials(creds)) 51 } else { 52 options = append(options, option.WithoutAuthentication()) 53 } 54 projectID := os.Getenv("GOOGLE_CLOUD_PROJECT") 55 if len(projectID) == 0 { 56 projectID = os.Getenv("GCLOUD_PROJECT") 57 if len(projectID) == 0 { 58 projectID = os.Getenv("GCP_PROJECT") 59 } 60 } 61 client, err := storage.NewClient(ctx, options...) 62 if err != nil { 63 return nil, errors.Wrap(err, "create client") 64 } 65 66 client.SetRetry(storage.WithBackoff(gax.Backoff{ 67 Initial: 2 * time.Second, // Note: the client uses a jitter internally 68 Max: 60 * time.Second, 69 Multiplier: 3, 70 }), 71 storage.WithPolicy(storage.RetryAlways), 72 ) 73 return &gcsClient{client, *config, projectID, dataPath}, nil 74 } 75 76 func (g *gcsClient) getObject(ctx context.Context, bucket *storage.BucketHandle, 77 backupID, objectName string, 78 ) ([]byte, error) { 79 // Create bucket reader 80 obj := bucket.Object(objectName) 81 reader, err := obj.NewReader(ctx) 82 if err != nil { 83 if errors.Is(err, storage.ErrObjectNotExist) { 84 return nil, err 85 } 86 return nil, errors.Wrapf(err, "new reader: %v", objectName) 87 } 88 // Read file contents 89 content, err := io.ReadAll(reader) 90 if err != nil { 91 return nil, errors.Wrapf(err, "read object: %v", objectName) 92 } 93 94 metric, err := monitoring.GetMetrics().BackupRestoreDataTransferred.GetMetricWithLabelValues(Name, "class") 95 if err == nil { 96 metric.Add(float64(len(content))) 97 } 98 return content, nil 99 } 100 101 func (g *gcsClient) HomeDir(backupID string) string { 102 return "gs://" + path.Join(g.config.Bucket, 103 g.makeObjectName(backupID)) 104 } 105 106 func (g *gcsClient) findBucket(ctx context.Context) (*storage.BucketHandle, error) { 107 bucket := g.client.Bucket(g.config.Bucket) 108 109 if _, err := bucket.Attrs(ctx); err != nil { 110 return nil, err 111 } 112 113 return bucket, nil 114 } 115 116 func (g *gcsClient) makeObjectName(parts ...string) string { 117 base := path.Join(parts...) 118 return path.Join(g.config.BackupPath, base) 119 } 120 121 func (g *gcsClient) GetObject(ctx context.Context, backupID, key string) ([]byte, error) { 122 objectName := g.makeObjectName(backupID, key) 123 124 if err := ctx.Err(); err != nil { 125 return nil, backup.NewErrContextExpired(errors.Wrapf(err, "get object '%s'", objectName)) 126 } 127 128 bucket, err := g.findBucket(ctx) 129 if err != nil { 130 if errors.Is(err, storage.ErrBucketNotExist) { 131 return nil, backup.NewErrNotFound(errors.Wrapf(err, "get object '%s'", objectName)) 132 } 133 return nil, backup.NewErrInternal(errors.Wrapf(err, "get object '%s'", objectName)) 134 } 135 136 contents, err := g.getObject(ctx, bucket, backupID, objectName) 137 if err != nil { 138 if errors.Is(err, storage.ErrObjectNotExist) { 139 return nil, backup.NewErrNotFound(errors.Wrapf(err, "get object '%s'", objectName)) 140 } 141 return nil, backup.NewErrInternal(errors.Wrapf(err, "get object '%s'", objectName)) 142 } 143 144 return contents, nil 145 } 146 147 // PutFile creates an object with contents from file at filePath. 148 func (g *gcsClient) PutFile(ctx context.Context, backupID, key, srcPath string) error { 149 bucket, err := g.findBucket(ctx) 150 if err != nil { 151 return fmt.Errorf("find bucket: %w", err) 152 } 153 154 // open source file 155 filePath := path.Join(g.dataPath, srcPath) 156 file, err := os.Open(filePath) 157 if err != nil { 158 return fmt.Errorf("os.open %q: %w", filePath, err) 159 } 160 defer file.Close() 161 162 // create a new writer 163 object := g.makeObjectName(backupID, key) 164 writer := bucket.Object(object).NewWriter(ctx) 165 writer.ContentType = "application/octet-stream" 166 writer.Metadata = map[string]string{"backup-id": backupID} 167 168 // if we return early make sure writer is closed 169 closeWriter := true 170 defer func() { 171 if closeWriter { 172 writer.Close() 173 } 174 }() 175 176 nBytes, err := io.Copy(writer, file) 177 if err != nil { 178 return fmt.Errorf("io.copy %q %q: %w", object, filePath, err) 179 } 180 closeWriter = false 181 if err := writer.Close(); err != nil { 182 return fmt.Errorf("writer.close %q: %w", filePath, err) 183 } 184 metric, err := monitoring.GetMetrics().BackupStoreDataTransferred.GetMetricWithLabelValues("backup-gcs", "class") 185 if err == nil { 186 metric.Add(float64(nBytes)) 187 } 188 return nil 189 } 190 191 func (g *gcsClient) PutObject(ctx context.Context, backupID, key string, byes []byte) error { 192 bucket, err := g.findBucket(ctx) 193 if err != nil { 194 return errors.Wrap(err, "find bucket") 195 } 196 197 objectName := g.makeObjectName(backupID, key) 198 obj := bucket.Object(objectName) 199 writer := obj.NewWriter(ctx) 200 writer.ContentType = "application/octet-stream" 201 writer.Metadata = map[string]string{ 202 "backup-id": backupID, 203 } 204 if _, err := writer.Write(byes); err != nil { 205 return errors.Wrapf(err, "write file: %v", objectName) 206 } 207 if err := writer.Close(); err != nil { 208 return errors.Wrapf(err, "close writer for file: %v", objectName) 209 } 210 211 metric, err := monitoring.GetMetrics().BackupStoreDataTransferred.GetMetricWithLabelValues("backup-gcs", "class") 212 if err == nil { 213 metric.Add(float64(len(byes))) 214 } 215 216 return nil 217 } 218 219 func (g *gcsClient) Initialize(ctx context.Context, backupID string) error { 220 key := "access-check" 221 222 if err := g.PutObject(ctx, backupID, key, []byte("")); err != nil { 223 return errors.Wrap(err, "failed to access-check gcs backup module") 224 } 225 226 bucket, err := g.findBucket(ctx) 227 if err != nil { 228 return errors.Wrap(err, "find bucket") 229 } 230 231 objectName := g.makeObjectName(backupID, key) 232 if err := bucket.Object(objectName).Delete(ctx); err != nil { 233 return errors.Wrap(err, "failed to remove access-check gcs backup module") 234 } 235 236 return nil 237 } 238 239 // WriteToFile downloads an object and store its content in destPath 240 // The file destPath will be created if it doesn't exit 241 func (g *gcsClient) WriteToFile(ctx context.Context, backupID, key, destPath string) (err error) { 242 bucket, err := g.findBucket(ctx) 243 if err != nil { 244 return fmt.Errorf("find bucket: %w", err) 245 } 246 247 // validate destination path 248 if st, err := os.Stat(destPath); err == nil { 249 if st.IsDir() { 250 return fmt.Errorf("file is a directory") 251 } 252 } else if !os.IsNotExist(err) { 253 return err 254 } 255 256 // create empty file 257 dir := path.Dir(destPath) 258 if err := os.MkdirAll(dir, os.ModePerm); err != nil { 259 return fmt.Errorf("os.mkdir %q: %w", dir, err) 260 } 261 file, err := os.Create(destPath) 262 if err != nil { 263 return fmt.Errorf("os.create %q: %w", destPath, err) 264 } 265 266 // make sure to close and delete in case we return early 267 closeAndRemove := true 268 defer func() { 269 if closeAndRemove { 270 file.Close() 271 os.Remove(destPath) 272 } 273 }() 274 275 // create reader 276 object := g.makeObjectName(backupID, key) 277 rc, err := bucket.Object(object).NewReader(ctx) 278 if err != nil { 279 return fmt.Errorf("find object %q: %w", object, err) 280 } 281 defer rc.Close() 282 283 // transfer content to the file 284 if _, err := io.Copy(file, rc); err != nil { 285 return fmt.Errorf("io.Copy:%q %q: %w", destPath, object, err) 286 } 287 closeAndRemove = false 288 if err = file.Close(); err != nil { 289 return fmt.Errorf("f.Close %q: %w", destPath, err) 290 } 291 292 return nil 293 } 294 295 func (g *gcsClient) Write(ctx context.Context, backupID, key string, r io.ReadCloser) (int64, error) { 296 defer r.Close() 297 298 bucket, err := g.findBucket(ctx) 299 if err != nil { 300 return 0, fmt.Errorf("find bucket: %w", err) 301 } 302 303 // create a new writer 304 path := g.makeObjectName(backupID, key) 305 writer := bucket.Object(path).NewWriter(ctx) 306 writer.ContentType = "application/octet-stream" 307 writer.Metadata = map[string]string{"backup-id": backupID} 308 309 // if we return early make sure writer is closed 310 closeWriter := true 311 defer func() { 312 if closeWriter { 313 writer.Close() 314 } 315 }() 316 317 // copy 318 written, err := io.Copy(writer, r) 319 if err != nil { 320 return 0, fmt.Errorf("io.copy %q: %w", path, err) 321 } 322 closeWriter = false 323 if err := writer.Close(); err != nil { 324 return 0, fmt.Errorf("writer.close %q: %w", path, err) 325 } 326 if metric, err := monitoring.GetMetrics().BackupStoreDataTransferred. 327 GetMetricWithLabelValues(Name, "class"); err == nil { 328 metric.Add(float64(written)) 329 } 330 return written, nil 331 } 332 333 func (g *gcsClient) Read(ctx context.Context, backupID, key string, w io.WriteCloser) (int64, error) { 334 defer w.Close() 335 336 bucket, err := g.findBucket(ctx) 337 if err != nil { 338 err = fmt.Errorf("find bucket: %w", err) 339 if errors.Is(err, storage.ErrObjectNotExist) { 340 err = backup.NewErrNotFound(err) 341 } 342 return 0, err 343 } 344 345 // create reader 346 path := g.makeObjectName(backupID, key) 347 rc, err := bucket.Object(path).NewReader(ctx) 348 if err != nil { 349 err = fmt.Errorf("find object %s: %v", path, err) 350 if errors.Is(err, storage.ErrObjectNotExist) { 351 err = backup.NewErrNotFound(err) 352 } 353 return 0, err 354 } 355 defer rc.Close() 356 357 // copy 358 read, err := io.Copy(w, rc) 359 if err != nil { 360 return read, fmt.Errorf("io.copy %q: %w", path, err) 361 } 362 363 if metric, err := monitoring.GetMetrics().BackupRestoreDataTransferred. 364 GetMetricWithLabelValues(Name, "class"); err == nil { 365 metric.Add(float64(float64(read))) 366 } 367 368 return read, nil 369 } 370 371 func (g *gcsClient) SourceDataPath() string { 372 return g.dataPath 373 }