github.com/rudderlabs/rudder-go-kit@v0.30.0/filemanager/digitaloceanmanager.go (about)

     1  package filemanager
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net/url"
     8  	"os"
     9  	"path"
    10  	"regexp"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/aws/aws-sdk-go/aws"
    15  	"github.com/aws/aws-sdk-go/aws/awserr"
    16  	"github.com/aws/aws-sdk-go/aws/credentials"
    17  	"github.com/aws/aws-sdk-go/aws/session"
    18  	"github.com/aws/aws-sdk-go/service/s3"
    19  	"github.com/samber/lo"
    20  
    21  	"github.com/rudderlabs/rudder-go-kit/logger"
    22  
    23  	SpacesManager "github.com/aws/aws-sdk-go/service/s3/s3manager"
    24  )
    25  
    26  type DigitalOceanConfig struct {
    27  	Bucket         string
    28  	Prefix         string
    29  	EndPoint       string
    30  	AccessKeyID    string
    31  	AccessKey      string
    32  	Region         *string
    33  	ForcePathStyle *bool
    34  	DisableSSL     *bool
    35  }
    36  
    37  // NewDigitalOceanManager creates a new file manager for digital ocean spaces
    38  func NewDigitalOceanManager(config map[string]interface{}, log logger.Logger, defaultTimeout func() time.Duration) (*digitalOceanManager, error) {
    39  	return &digitalOceanManager{
    40  		baseManager: &baseManager{
    41  			logger:         log,
    42  			defaultTimeout: defaultTimeout,
    43  		},
    44  		Config: digitalOceanConfig(config),
    45  	}, nil
    46  }
    47  
    48  func (manager *digitalOceanManager) ListFilesWithPrefix(ctx context.Context, startAfter, prefix string, maxItems int64) ListSession {
    49  	return &digitalOceanListSession{
    50  		baseListSession: &baseListSession{
    51  			ctx:        ctx,
    52  			startAfter: startAfter,
    53  			prefix:     prefix,
    54  			maxItems:   maxItems,
    55  		},
    56  		manager:     manager,
    57  		isTruncated: true,
    58  	}
    59  }
    60  
    61  func (manager *digitalOceanManager) Download(ctx context.Context, output *os.File, key string) error {
    62  	downloadSession, err := manager.getSession()
    63  	if err != nil {
    64  		return fmt.Errorf("error starting Digital Ocean Spaces session: %w", err)
    65  	}
    66  
    67  	ctx, cancel := context.WithTimeout(ctx, manager.getTimeout())
    68  	defer cancel()
    69  
    70  	downloader := SpacesManager.NewDownloader(downloadSession)
    71  	_, err = downloader.DownloadWithContext(ctx, output,
    72  		&s3.GetObjectInput{
    73  			Bucket: aws.String(manager.Config.Bucket),
    74  			Key:    aws.String(key),
    75  		})
    76  
    77  	return err
    78  }
    79  
    80  func (manager *digitalOceanManager) Upload(ctx context.Context, file *os.File, prefixes ...string) (UploadedFile, error) {
    81  	if manager.Config.Bucket == "" {
    82  		return UploadedFile{}, errors.New("no storage bucket configured to uploader")
    83  	}
    84  
    85  	fileName := path.Join(manager.Config.Prefix, path.Join(prefixes...), path.Base(file.Name()))
    86  
    87  	uploadInput := &SpacesManager.UploadInput{
    88  		ACL:    aws.String("bucket-owner-full-control"),
    89  		Bucket: aws.String(manager.Config.Bucket),
    90  		Key:    aws.String(fileName),
    91  		Body:   file,
    92  	}
    93  	uploadSession, err := manager.getSession()
    94  	if err != nil {
    95  		return UploadedFile{}, fmt.Errorf("error starting Digital Ocean Spaces session: %w", err)
    96  	}
    97  	DOmanager := SpacesManager.NewUploader(uploadSession)
    98  
    99  	ctx, cancel := context.WithTimeout(ctx, manager.getTimeout())
   100  	defer cancel()
   101  
   102  	output, err := DOmanager.UploadWithContext(ctx, uploadInput)
   103  	if err != nil {
   104  		if awsError, ok := err.(awserr.Error); ok && awsError.Code() == "MissingRegion" {
   105  			err = fmt.Errorf(fmt.Sprintf(`Bucket '%s' not found.`, manager.Config.Bucket))
   106  		}
   107  		return UploadedFile{}, err
   108  	}
   109  
   110  	return UploadedFile{Location: output.Location, ObjectName: fileName}, err
   111  }
   112  
   113  func (manager *digitalOceanManager) Delete(ctx context.Context, keys []string) error {
   114  	sess, err := manager.getSession()
   115  	if err != nil {
   116  		return fmt.Errorf("error starting Digital Ocean Spaces session: %w", err)
   117  	}
   118  
   119  	objects := make([]*s3.ObjectIdentifier, len(keys))
   120  	for i, key := range keys {
   121  		objects[i] = &s3.ObjectIdentifier{Key: aws.String(key)}
   122  	}
   123  
   124  	svc := s3.New(sess)
   125  
   126  	batchSize := 1000 // max accepted by DeleteObjects API
   127  	chunks := lo.Chunk(objects, batchSize)
   128  	for _, chunk := range chunks {
   129  		input := &s3.DeleteObjectsInput{
   130  			Bucket: aws.String(manager.Config.Bucket),
   131  			Delete: &s3.Delete{
   132  				Objects: chunk,
   133  			},
   134  		}
   135  
   136  		_ctx, cancel := context.WithTimeout(ctx, manager.getTimeout())
   137  		_, err := svc.DeleteObjectsWithContext(_ctx, input)
   138  		if err != nil {
   139  			if aerr, ok := err.(awserr.Error); ok {
   140  				manager.logger.Errorf(`Error while deleting digital ocean spaces objects: %v, error code: %v`, aerr.Error(), aerr.Code())
   141  			} else {
   142  				// Print the error, cast err to awserr.Error to get the Code and
   143  				// Message from an error.
   144  				manager.logger.Errorf(`Error while deleting digital ocean spaces objects: %v`, aerr.Error())
   145  			}
   146  			cancel()
   147  			return err
   148  		}
   149  		cancel()
   150  	}
   151  	return nil
   152  }
   153  
   154  func (manager *digitalOceanManager) Prefix() string {
   155  	return manager.Config.Prefix
   156  }
   157  
   158  func (manager *digitalOceanManager) GetDownloadKeyFromFileLocation(location string) string {
   159  	parsedUrl, err := url.Parse(location)
   160  	if err != nil {
   161  		fmt.Println("error while parsing location url: ", err)
   162  	}
   163  	trimedUrl := strings.TrimLeft(parsedUrl.Path, "/")
   164  	if (manager.Config.ForcePathStyle != nil && *manager.Config.ForcePathStyle) || (!strings.Contains(parsedUrl.Host, manager.Config.Bucket)) {
   165  		return strings.TrimPrefix(trimedUrl, fmt.Sprintf(`%s/`, manager.Config.Bucket))
   166  	}
   167  	return trimedUrl
   168  }
   169  
   170  /*
   171  GetObjectNameFromLocation gets the object name/key name from the object location url
   172  
   173  	https://rudder.sgp1.digitaloceanspaces.com/key - >> key
   174  */
   175  func (manager *digitalOceanManager) GetObjectNameFromLocation(location string) (string, error) {
   176  	parsedURL, err := url.Parse(location)
   177  	if err != nil {
   178  		return "", err
   179  	}
   180  	trimedUrl := strings.TrimLeft(parsedURL.Path, "/")
   181  	if (manager.Config.ForcePathStyle != nil && *manager.Config.ForcePathStyle) || (!strings.Contains(parsedURL.Host, manager.Config.Bucket)) {
   182  		return strings.TrimPrefix(trimedUrl, fmt.Sprintf(`%s/`, manager.Config.Bucket)), nil
   183  	}
   184  	return trimedUrl, nil
   185  }
   186  
   187  func (manager *digitalOceanManager) getSession() (*session.Session, error) {
   188  	var region string
   189  	if manager.Config.Region != nil {
   190  		region = *manager.Config.Region
   191  	} else {
   192  		region = getSpacesLocation(manager.Config.EndPoint)
   193  	}
   194  	return session.NewSession(&aws.Config{
   195  		Region:           aws.String(region),
   196  		Credentials:      credentials.NewStaticCredentials(manager.Config.AccessKeyID, manager.Config.AccessKey, ""),
   197  		Endpoint:         aws.String(manager.Config.EndPoint),
   198  		DisableSSL:       manager.Config.DisableSSL,
   199  		S3ForcePathStyle: manager.Config.ForcePathStyle,
   200  	})
   201  }
   202  
   203  func getSpacesLocation(location string) (region string) {
   204  	r, _ := regexp.Compile(`\.*.*\.digitaloceanspaces\.com`) // skipcq: GO-S1009
   205  	subLocation := r.FindString(location)
   206  	regionTokens := strings.Split(subLocation, ".")
   207  	if len(regionTokens) == 3 {
   208  		region = regionTokens[0]
   209  	}
   210  	return region
   211  }
   212  
   213  type digitalOceanManager struct {
   214  	*baseManager
   215  	Config *DigitalOceanConfig
   216  }
   217  
   218  func digitalOceanConfig(config map[string]interface{}) *DigitalOceanConfig {
   219  	var bucketName, prefix, endPoint, accessKeyID, accessKey string
   220  	var region *string
   221  	var forcePathStyle, disableSSL *bool
   222  	if config["bucketName"] != nil {
   223  		tmp, ok := config["bucketName"].(string)
   224  		if ok {
   225  			bucketName = tmp
   226  		}
   227  	}
   228  	if config["prefix"] != nil {
   229  		tmp, ok := config["prefix"].(string)
   230  		if ok {
   231  			prefix = tmp
   232  		}
   233  	}
   234  	if config["endPoint"] != nil {
   235  		tmp, ok := config["endPoint"].(string)
   236  		if ok {
   237  			endPoint = tmp
   238  		}
   239  	}
   240  	if config["accessKeyID"] != nil {
   241  		tmp, ok := config["accessKeyID"].(string)
   242  		if ok {
   243  			accessKeyID = tmp
   244  		}
   245  	}
   246  	if config["accessKey"] != nil {
   247  		tmp, ok := config["accessKey"].(string)
   248  		if ok {
   249  			accessKey = tmp
   250  		}
   251  	}
   252  	if config["region"] != nil {
   253  		tmp, ok := config["region"].(string)
   254  		if ok {
   255  			region = &tmp
   256  		}
   257  	}
   258  	if config["forcePathStyle"] != nil {
   259  		tmp, ok := config["forcePathStyle"].(bool)
   260  		if ok {
   261  			forcePathStyle = &tmp
   262  		}
   263  	}
   264  	if config["disableSSL"] != nil {
   265  		tmp, ok := config["disableSSL"].(bool)
   266  		if ok {
   267  			disableSSL = &tmp
   268  		}
   269  	}
   270  	return &DigitalOceanConfig{
   271  		Bucket:         bucketName,
   272  		EndPoint:       endPoint,
   273  		Prefix:         prefix,
   274  		AccessKeyID:    accessKeyID,
   275  		AccessKey:      accessKey,
   276  		Region:         region,
   277  		ForcePathStyle: forcePathStyle,
   278  		DisableSSL:     disableSSL,
   279  	}
   280  }
   281  
   282  type digitalOceanListSession struct {
   283  	*baseListSession
   284  	manager *digitalOceanManager
   285  
   286  	continuationToken *string
   287  	isTruncated       bool
   288  }
   289  
   290  func (l *digitalOceanListSession) Next() (fileObjects []*FileInfo, err error) {
   291  	manager := l.manager
   292  	if !l.isTruncated {
   293  		manager.logger.Infof("Manager is truncated: %v so returning here", l.isTruncated)
   294  		return
   295  	}
   296  	fileObjects = make([]*FileInfo, 0)
   297  
   298  	sess, err := manager.getSession()
   299  	if err != nil {
   300  		return []*FileInfo{}, fmt.Errorf("error starting Digital Ocean Spaces session: %w", err)
   301  	}
   302  
   303  	// Create S3 service client
   304  	svc := s3.New(sess)
   305  
   306  	ctx, cancel := context.WithTimeout(l.ctx, manager.getTimeout())
   307  	defer cancel()
   308  
   309  	listObjectsV2Input := s3.ListObjectsV2Input{
   310  		Bucket:  aws.String(manager.Config.Bucket),
   311  		Prefix:  aws.String(l.prefix),
   312  		MaxKeys: &l.maxItems,
   313  	}
   314  	// startAfter is to resume a paused task.
   315  	if l.startAfter != "" {
   316  		listObjectsV2Input.StartAfter = aws.String(l.startAfter)
   317  	}
   318  	if l.continuationToken != nil {
   319  		listObjectsV2Input.ContinuationToken = l.continuationToken
   320  	}
   321  
   322  	// Get the list of items
   323  	resp, err := svc.ListObjectsV2WithContext(ctx, &listObjectsV2Input)
   324  	if err != nil {
   325  		manager.logger.Errorf("Error while listing Digital Ocean Spaces objects: %v", err)
   326  		return
   327  	}
   328  	if resp.IsTruncated != nil {
   329  		l.isTruncated = *resp.IsTruncated
   330  	}
   331  	l.continuationToken = resp.NextContinuationToken
   332  	for _, item := range resp.Contents {
   333  		fileObjects = append(fileObjects, &FileInfo{*item.Key, *item.LastModified})
   334  	}
   335  	return
   336  }