github.com/rudderlabs/rudder-go-kit@v0.30.0/filemanager/gcsmanager.go (about)

     1  package filemanager
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"google.golang.org/api/iterator"
    15  
    16  	"github.com/rudderlabs/rudder-go-kit/googleutil"
    17  	"github.com/rudderlabs/rudder-go-kit/logger"
    18  
    19  	"cloud.google.com/go/storage"
    20  	"google.golang.org/api/option"
    21  )
    22  
    23  type GCSConfig struct {
    24  	Bucket         string
    25  	Prefix         string
    26  	Credentials    string
    27  	EndPoint       *string
    28  	ForcePathStyle *bool
    29  	DisableSSL     *bool
    30  	JSONReads      bool
    31  }
    32  
    33  // NewGCSManager creates a new file manager for Google Cloud Storage
    34  func NewGCSManager(
    35  	config map[string]interface{}, log logger.Logger, defaultTimeout func() time.Duration,
    36  ) (*gcsManager, error) {
    37  	return &gcsManager{
    38  		baseManager: &baseManager{
    39  			logger:         log,
    40  			defaultTimeout: defaultTimeout,
    41  		},
    42  		config: gcsConfig(config),
    43  	}, nil
    44  }
    45  
    46  func (m *gcsManager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) ListSession {
    47  	return &gcsListSession{
    48  		baseListSession: &baseListSession{
    49  			ctx:        ctx,
    50  			startAfter: startAfter,
    51  			prefix:     prefix,
    52  			maxItems:   maxItems,
    53  		},
    54  		manager: m,
    55  	}
    56  }
    57  
    58  func (m *gcsManager) Download(ctx context.Context, output *os.File, key string) error {
    59  	client, err := m.getClient(ctx)
    60  	if err != nil {
    61  		return err
    62  	}
    63  
    64  	ctx, cancel := context.WithTimeout(ctx, m.getTimeout())
    65  	defer cancel()
    66  
    67  	rc, err := client.Bucket(m.config.Bucket).Object(key).NewReader(ctx)
    68  	if err != nil {
    69  		return err
    70  	}
    71  	defer func() { _ = rc.Close() }()
    72  
    73  	_, err = io.Copy(output, rc)
    74  	return err
    75  }
    76  
    77  func (m *gcsManager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadedFile, error) {
    78  	fileName := path.Join(m.config.Prefix, path.Join(prefixes...), path.Base(file.Name()))
    79  
    80  	client, err := m.getClient(ctx)
    81  	if err != nil {
    82  		return UploadedFile{}, err
    83  	}
    84  
    85  	ctx, cancel := context.WithTimeout(ctx, m.getTimeout())
    86  	defer cancel()
    87  
    88  	obj := client.Bucket(m.config.Bucket).Object(fileName)
    89  	w := obj.NewWriter(ctx)
    90  	if _, err := io.Copy(w, file); err != nil {
    91  		err = fmt.Errorf("copying file to GCS: %v", err)
    92  		if closeErr := w.Close(); closeErr != nil {
    93  			return UploadedFile{}, fmt.Errorf("closing writer: %q, while: %w", closeErr, err)
    94  		}
    95  
    96  		return UploadedFile{}, err
    97  	}
    98  
    99  	if err := w.Close(); err != nil {
   100  		return UploadedFile{}, fmt.Errorf("closing writer: %w", err)
   101  	}
   102  	attrs := w.Attrs()
   103  
   104  	return UploadedFile{Location: m.objectURL(attrs), ObjectName: fileName}, err
   105  }
   106  
   107  func (m *gcsManager) Delete(ctx context.Context, keys []string) (err error) {
   108  	client, err := m.getClient(ctx)
   109  	if err != nil {
   110  		return err
   111  	}
   112  
   113  	ctx, cancel := context.WithTimeout(ctx, m.getTimeout())
   114  	defer cancel()
   115  
   116  	for _, key := range keys {
   117  		if err := client.Bucket(m.config.Bucket).Object(key).Delete(ctx); err != nil && !errors.Is(err, storage.ErrObjectNotExist) {
   118  			return err
   119  		}
   120  	}
   121  	return
   122  }
   123  
   124  func (m *gcsManager) Prefix() string {
   125  	return m.config.Prefix
   126  }
   127  
   128  func (m *gcsManager) GetObjectNameFromLocation(location string) (string, error) {
   129  	splitStr := strings.Split(location, m.config.Bucket)
   130  	object := strings.TrimLeft(splitStr[len(splitStr)-1], "/")
   131  	return object, nil
   132  }
   133  
   134  func (m *gcsManager) GetDownloadKeyFromFileLocation(location string) string {
   135  	splitStr := strings.Split(location, m.config.Bucket)
   136  	key := strings.TrimLeft(splitStr[len(splitStr)-1], "/")
   137  	return key
   138  }
   139  
   140  func (m *gcsManager) objectURL(objAttrs *storage.ObjectAttrs) string {
   141  	if m.config.EndPoint != nil && *m.config.EndPoint != "" {
   142  		endpoint := strings.TrimSuffix(*m.config.EndPoint, "/")
   143  		return fmt.Sprintf("%s/%s/%s", endpoint, objAttrs.Bucket, objAttrs.Name)
   144  	}
   145  	return fmt.Sprintf("https://storage.googleapis.com/%s/%s", objAttrs.Bucket, objAttrs.Name)
   146  }
   147  
   148  func (m *gcsManager) getClient(ctx context.Context) (*storage.Client, error) {
   149  	m.clientMu.Lock()
   150  	defer m.clientMu.Unlock()
   151  
   152  	if m.client != nil {
   153  		return m.client, nil
   154  	}
   155  
   156  	var options []option.ClientOption
   157  	if m.config.EndPoint != nil && *m.config.EndPoint != "" {
   158  		options = append(options, option.WithEndpoint(*m.config.EndPoint))
   159  	}
   160  	if !googleutil.ShouldSkipCredentialsInit(m.config.Credentials) {
   161  		if err := googleutil.CompatibleGoogleCredentialsJSON([]byte(m.config.Credentials)); err != nil {
   162  			return m.client, err
   163  		}
   164  		options = append(options, option.WithCredentialsJSON([]byte(m.config.Credentials)))
   165  	}
   166  	if m.config.JSONReads {
   167  		options = append(options, storage.WithJSONReads())
   168  	}
   169  
   170  	ctx, cancel := context.WithTimeout(ctx, m.getTimeout())
   171  	defer cancel()
   172  
   173  	var err error
   174  	m.client, err = storage.NewClient(ctx, options...)
   175  	return m.client, err
   176  }
   177  
   178  type gcsManager struct {
   179  	*baseManager
   180  	config *GCSConfig
   181  
   182  	client   *storage.Client
   183  	clientMu sync.Mutex
   184  }
   185  
   186  func gcsConfig(config map[string]interface{}) *GCSConfig {
   187  	var bucketName, prefix, credentials string
   188  	var endPoint *string
   189  	var forcePathStyle, disableSSL *bool
   190  	var jsonReads bool
   191  
   192  	if config["bucketName"] != nil {
   193  		tmp, ok := config["bucketName"].(string)
   194  		if ok {
   195  			bucketName = tmp
   196  		}
   197  	}
   198  	if config["prefix"] != nil {
   199  		tmp, ok := config["prefix"].(string)
   200  		if ok {
   201  			prefix = tmp
   202  		}
   203  	}
   204  	if config["credentials"] != nil {
   205  		tmp, ok := config["credentials"].(string)
   206  		if ok {
   207  			credentials = tmp
   208  		}
   209  	}
   210  	if config["endPoint"] != nil {
   211  		tmp, ok := config["endPoint"].(string)
   212  		if ok {
   213  			endPoint = &tmp
   214  		}
   215  	}
   216  	if config["forcePathStyle"] != nil {
   217  		tmp, ok := config["forcePathStyle"].(bool)
   218  		if ok {
   219  			forcePathStyle = &tmp
   220  		}
   221  	}
   222  	if config["disableSSL"] != nil {
   223  		tmp, ok := config["disableSSL"].(bool)
   224  		if ok {
   225  			disableSSL = &tmp
   226  		}
   227  	}
   228  	if config["jsonReads"] != nil {
   229  		tmp, ok := config["jsonReads"].(bool)
   230  		if ok {
   231  			jsonReads = tmp
   232  		}
   233  	}
   234  	return &GCSConfig{
   235  		Bucket:         bucketName,
   236  		Prefix:         prefix,
   237  		Credentials:    credentials,
   238  		EndPoint:       endPoint,
   239  		ForcePathStyle: forcePathStyle,
   240  		DisableSSL:     disableSSL,
   241  		JSONReads:      jsonReads,
   242  	}
   243  }
   244  
   245  type gcsListSession struct {
   246  	*baseListSession
   247  	manager *gcsManager
   248  
   249  	Iterator *storage.ObjectIterator
   250  }
   251  
   252  func (l *gcsListSession) Next() (fileObjects []*FileInfo, err error) {
   253  	manager := l.manager
   254  	maxItems := l.maxItems
   255  	fileObjects = make([]*FileInfo, 0)
   256  
   257  	// Create GCS storage client
   258  	client, err := manager.getClient(l.ctx)
   259  	if err != nil {
   260  		return
   261  	}
   262  
   263  	// Create GCS Bucket handle
   264  	if l.Iterator == nil {
   265  		l.Iterator = client.Bucket(manager.config.Bucket).Objects(l.ctx, &storage.Query{
   266  			Prefix:      l.prefix,
   267  			Delimiter:   "",
   268  			StartOffset: l.startAfter,
   269  		})
   270  	}
   271  	var attrs *storage.ObjectAttrs
   272  	for {
   273  		if maxItems <= 0 {
   274  			break
   275  		}
   276  		attrs, err = l.Iterator.Next()
   277  		if err == iterator.Done || err != nil {
   278  			if err == iterator.Done {
   279  				err = nil
   280  			}
   281  			break
   282  		}
   283  		fileObjects = append(fileObjects, &FileInfo{attrs.Name, attrs.Updated})
   284  		maxItems--
   285  	}
   286  	return
   287  }