github.com/weaviate/weaviate@v1.24.6/modules/backup-gcs/client.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package modstggcs
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"path"
    20  	"strings"
    21  	"time"
    22  
    23  	"cloud.google.com/go/storage"
    24  	"github.com/googleapis/gax-go/v2"
    25  	"github.com/pkg/errors"
    26  	"github.com/weaviate/weaviate/entities/backup"
    27  	"github.com/weaviate/weaviate/usecases/monitoring"
    28  	"golang.org/x/oauth2/google"
    29  	"google.golang.org/api/option"
    30  )
    31  
    32  type gcsClient struct {
    33  	client    *storage.Client
    34  	config    clientConfig
    35  	projectID string
    36  	dataPath  string
    37  }
    38  
    39  func newClient(ctx context.Context, config *clientConfig, dataPath string) (*gcsClient, error) {
    40  	options := []option.ClientOption{}
    41  	useAuth := strings.ToLower(os.Getenv("BACKUP_GCS_USE_AUTH")) != "false"
    42  	if useAuth {
    43  		scopes := []string{
    44  			"https://www.googleapis.com/auth/devstorage.read_write",
    45  		}
    46  		creds, err := google.FindDefaultCredentials(ctx, scopes...)
    47  		if err != nil {
    48  			return nil, errors.Wrap(err, "find default credentials")
    49  		}
    50  		options = append(options, option.WithCredentials(creds))
    51  	} else {
    52  		options = append(options, option.WithoutAuthentication())
    53  	}
    54  	projectID := os.Getenv("GOOGLE_CLOUD_PROJECT")
    55  	if len(projectID) == 0 {
    56  		projectID = os.Getenv("GCLOUD_PROJECT")
    57  		if len(projectID) == 0 {
    58  			projectID = os.Getenv("GCP_PROJECT")
    59  		}
    60  	}
    61  	client, err := storage.NewClient(ctx, options...)
    62  	if err != nil {
    63  		return nil, errors.Wrap(err, "create client")
    64  	}
    65  
    66  	client.SetRetry(storage.WithBackoff(gax.Backoff{
    67  		Initial:    2 * time.Second, // Note: the client uses a jitter internally
    68  		Max:        60 * time.Second,
    69  		Multiplier: 3,
    70  	}),
    71  		storage.WithPolicy(storage.RetryAlways),
    72  	)
    73  	return &gcsClient{client, *config, projectID, dataPath}, nil
    74  }
    75  
    76  func (g *gcsClient) getObject(ctx context.Context, bucket *storage.BucketHandle,
    77  	backupID, objectName string,
    78  ) ([]byte, error) {
    79  	// Create bucket reader
    80  	obj := bucket.Object(objectName)
    81  	reader, err := obj.NewReader(ctx)
    82  	if err != nil {
    83  		if errors.Is(err, storage.ErrObjectNotExist) {
    84  			return nil, err
    85  		}
    86  		return nil, errors.Wrapf(err, "new reader: %v", objectName)
    87  	}
    88  	// Read file contents
    89  	content, err := io.ReadAll(reader)
    90  	if err != nil {
    91  		return nil, errors.Wrapf(err, "read object: %v", objectName)
    92  	}
    93  
    94  	metric, err := monitoring.GetMetrics().BackupRestoreDataTransferred.GetMetricWithLabelValues(Name, "class")
    95  	if err == nil {
    96  		metric.Add(float64(len(content)))
    97  	}
    98  	return content, nil
    99  }
   100  
   101  func (g *gcsClient) HomeDir(backupID string) string {
   102  	return "gs://" + path.Join(g.config.Bucket,
   103  		g.makeObjectName(backupID))
   104  }
   105  
   106  func (g *gcsClient) findBucket(ctx context.Context) (*storage.BucketHandle, error) {
   107  	bucket := g.client.Bucket(g.config.Bucket)
   108  
   109  	if _, err := bucket.Attrs(ctx); err != nil {
   110  		return nil, err
   111  	}
   112  
   113  	return bucket, nil
   114  }
   115  
   116  func (g *gcsClient) makeObjectName(parts ...string) string {
   117  	base := path.Join(parts...)
   118  	return path.Join(g.config.BackupPath, base)
   119  }
   120  
   121  func (g *gcsClient) GetObject(ctx context.Context, backupID, key string) ([]byte, error) {
   122  	objectName := g.makeObjectName(backupID, key)
   123  
   124  	if err := ctx.Err(); err != nil {
   125  		return nil, backup.NewErrContextExpired(errors.Wrapf(err, "get object '%s'", objectName))
   126  	}
   127  
   128  	bucket, err := g.findBucket(ctx)
   129  	if err != nil {
   130  		if errors.Is(err, storage.ErrBucketNotExist) {
   131  			return nil, backup.NewErrNotFound(errors.Wrapf(err, "get object '%s'", objectName))
   132  		}
   133  		return nil, backup.NewErrInternal(errors.Wrapf(err, "get object '%s'", objectName))
   134  	}
   135  
   136  	contents, err := g.getObject(ctx, bucket, backupID, objectName)
   137  	if err != nil {
   138  		if errors.Is(err, storage.ErrObjectNotExist) {
   139  			return nil, backup.NewErrNotFound(errors.Wrapf(err, "get object '%s'", objectName))
   140  		}
   141  		return nil, backup.NewErrInternal(errors.Wrapf(err, "get object '%s'", objectName))
   142  	}
   143  
   144  	return contents, nil
   145  }
   146  
   147  // PutFile creates an object with contents from file at filePath.
   148  func (g *gcsClient) PutFile(ctx context.Context, backupID, key, srcPath string) error {
   149  	bucket, err := g.findBucket(ctx)
   150  	if err != nil {
   151  		return fmt.Errorf("find bucket: %w", err)
   152  	}
   153  
   154  	// open source file
   155  	filePath := path.Join(g.dataPath, srcPath)
   156  	file, err := os.Open(filePath)
   157  	if err != nil {
   158  		return fmt.Errorf("os.open %q: %w", filePath, err)
   159  	}
   160  	defer file.Close()
   161  
   162  	// create a new writer
   163  	object := g.makeObjectName(backupID, key)
   164  	writer := bucket.Object(object).NewWriter(ctx)
   165  	writer.ContentType = "application/octet-stream"
   166  	writer.Metadata = map[string]string{"backup-id": backupID}
   167  
   168  	// if we return early make sure writer is closed
   169  	closeWriter := true
   170  	defer func() {
   171  		if closeWriter {
   172  			writer.Close()
   173  		}
   174  	}()
   175  
   176  	nBytes, err := io.Copy(writer, file)
   177  	if err != nil {
   178  		return fmt.Errorf("io.copy %q %q: %w", object, filePath, err)
   179  	}
   180  	closeWriter = false
   181  	if err := writer.Close(); err != nil {
   182  		return fmt.Errorf("writer.close %q: %w", filePath, err)
   183  	}
   184  	metric, err := monitoring.GetMetrics().BackupStoreDataTransferred.GetMetricWithLabelValues("backup-gcs", "class")
   185  	if err == nil {
   186  		metric.Add(float64(nBytes))
   187  	}
   188  	return nil
   189  }
   190  
   191  func (g *gcsClient) PutObject(ctx context.Context, backupID, key string, byes []byte) error {
   192  	bucket, err := g.findBucket(ctx)
   193  	if err != nil {
   194  		return errors.Wrap(err, "find bucket")
   195  	}
   196  
   197  	objectName := g.makeObjectName(backupID, key)
   198  	obj := bucket.Object(objectName)
   199  	writer := obj.NewWriter(ctx)
   200  	writer.ContentType = "application/octet-stream"
   201  	writer.Metadata = map[string]string{
   202  		"backup-id": backupID,
   203  	}
   204  	if _, err := writer.Write(byes); err != nil {
   205  		return errors.Wrapf(err, "write file: %v", objectName)
   206  	}
   207  	if err := writer.Close(); err != nil {
   208  		return errors.Wrapf(err, "close writer for file: %v", objectName)
   209  	}
   210  
   211  	metric, err := monitoring.GetMetrics().BackupStoreDataTransferred.GetMetricWithLabelValues("backup-gcs", "class")
   212  	if err == nil {
   213  		metric.Add(float64(len(byes)))
   214  	}
   215  
   216  	return nil
   217  }
   218  
   219  func (g *gcsClient) Initialize(ctx context.Context, backupID string) error {
   220  	key := "access-check"
   221  
   222  	if err := g.PutObject(ctx, backupID, key, []byte("")); err != nil {
   223  		return errors.Wrap(err, "failed to access-check gcs backup module")
   224  	}
   225  
   226  	bucket, err := g.findBucket(ctx)
   227  	if err != nil {
   228  		return errors.Wrap(err, "find bucket")
   229  	}
   230  
   231  	objectName := g.makeObjectName(backupID, key)
   232  	if err := bucket.Object(objectName).Delete(ctx); err != nil {
   233  		return errors.Wrap(err, "failed to remove access-check gcs backup module")
   234  	}
   235  
   236  	return nil
   237  }
   238  
   239  // WriteToFile downloads an object and store its content in destPath
   240  // The file destPath will be created if it doesn't exit
   241  func (g *gcsClient) WriteToFile(ctx context.Context, backupID, key, destPath string) (err error) {
   242  	bucket, err := g.findBucket(ctx)
   243  	if err != nil {
   244  		return fmt.Errorf("find bucket: %w", err)
   245  	}
   246  
   247  	// validate destination path
   248  	if st, err := os.Stat(destPath); err == nil {
   249  		if st.IsDir() {
   250  			return fmt.Errorf("file is a directory")
   251  		}
   252  	} else if !os.IsNotExist(err) {
   253  		return err
   254  	}
   255  
   256  	// create empty file
   257  	dir := path.Dir(destPath)
   258  	if err := os.MkdirAll(dir, os.ModePerm); err != nil {
   259  		return fmt.Errorf("os.mkdir %q: %w", dir, err)
   260  	}
   261  	file, err := os.Create(destPath)
   262  	if err != nil {
   263  		return fmt.Errorf("os.create %q: %w", destPath, err)
   264  	}
   265  
   266  	// make sure to close and delete in case we return early
   267  	closeAndRemove := true
   268  	defer func() {
   269  		if closeAndRemove {
   270  			file.Close()
   271  			os.Remove(destPath)
   272  		}
   273  	}()
   274  
   275  	// create reader
   276  	object := g.makeObjectName(backupID, key)
   277  	rc, err := bucket.Object(object).NewReader(ctx)
   278  	if err != nil {
   279  		return fmt.Errorf("find object %q: %w", object, err)
   280  	}
   281  	defer rc.Close()
   282  
   283  	// transfer content to the file
   284  	if _, err := io.Copy(file, rc); err != nil {
   285  		return fmt.Errorf("io.Copy:%q %q: %w", destPath, object, err)
   286  	}
   287  	closeAndRemove = false
   288  	if err = file.Close(); err != nil {
   289  		return fmt.Errorf("f.Close %q: %w", destPath, err)
   290  	}
   291  
   292  	return nil
   293  }
   294  
   295  func (g *gcsClient) Write(ctx context.Context, backupID, key string, r io.ReadCloser) (int64, error) {
   296  	defer r.Close()
   297  
   298  	bucket, err := g.findBucket(ctx)
   299  	if err != nil {
   300  		return 0, fmt.Errorf("find bucket: %w", err)
   301  	}
   302  
   303  	// create a new writer
   304  	path := g.makeObjectName(backupID, key)
   305  	writer := bucket.Object(path).NewWriter(ctx)
   306  	writer.ContentType = "application/octet-stream"
   307  	writer.Metadata = map[string]string{"backup-id": backupID}
   308  
   309  	// if we return early make sure writer is closed
   310  	closeWriter := true
   311  	defer func() {
   312  		if closeWriter {
   313  			writer.Close()
   314  		}
   315  	}()
   316  
   317  	// copy
   318  	written, err := io.Copy(writer, r)
   319  	if err != nil {
   320  		return 0, fmt.Errorf("io.copy %q: %w", path, err)
   321  	}
   322  	closeWriter = false
   323  	if err := writer.Close(); err != nil {
   324  		return 0, fmt.Errorf("writer.close %q: %w", path, err)
   325  	}
   326  	if metric, err := monitoring.GetMetrics().BackupStoreDataTransferred.
   327  		GetMetricWithLabelValues(Name, "class"); err == nil {
   328  		metric.Add(float64(written))
   329  	}
   330  	return written, nil
   331  }
   332  
   333  func (g *gcsClient) Read(ctx context.Context, backupID, key string, w io.WriteCloser) (int64, error) {
   334  	defer w.Close()
   335  
   336  	bucket, err := g.findBucket(ctx)
   337  	if err != nil {
   338  		err = fmt.Errorf("find bucket: %w", err)
   339  		if errors.Is(err, storage.ErrObjectNotExist) {
   340  			err = backup.NewErrNotFound(err)
   341  		}
   342  		return 0, err
   343  	}
   344  
   345  	// create reader
   346  	path := g.makeObjectName(backupID, key)
   347  	rc, err := bucket.Object(path).NewReader(ctx)
   348  	if err != nil {
   349  		err = fmt.Errorf("find object %s: %v", path, err)
   350  		if errors.Is(err, storage.ErrObjectNotExist) {
   351  			err = backup.NewErrNotFound(err)
   352  		}
   353  		return 0, err
   354  	}
   355  	defer rc.Close()
   356  
   357  	// copy
   358  	read, err := io.Copy(w, rc)
   359  	if err != nil {
   360  		return read, fmt.Errorf("io.copy %q: %w", path, err)
   361  	}
   362  
   363  	if metric, err := monitoring.GetMetrics().BackupRestoreDataTransferred.
   364  		GetMetricWithLabelValues(Name, "class"); err == nil {
   365  		metric.Add(float64(float64(read)))
   366  	}
   367  
   368  	return read, nil
   369  }
   370  
   371  func (g *gcsClient) SourceDataPath() string {
   372  	return g.dataPath
   373  }