go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/projects/blogctl/pkg/aws/s3/manager.go (about)

     1  /*
     2  
     3  Copyright (c) 2023 - Present. Will Charczuk. All rights reserved.
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository.
     5  
     6  */
     7  
     8  package s3
     9  
    10  import (
    11  	"bytes"
    12  	"context"
    13  	"crypto/md5"
    14  	"encoding/hex"
    15  	"fmt"
    16  	"io"
    17  	"log"
    18  	"os"
    19  	"path/filepath"
    20  	"runtime"
    21  	"strings"
    22  	"sync"
    23  
    24  	"github.com/aws/aws-sdk-go/aws/session"
    25  	"github.com/aws/aws-sdk-go/service/s3"
    26  
    27  	"go.charczuk.com/sdk/async"
    28  	"go.charczuk.com/sdk/logutil"
    29  
    30  	"go.charczuk.com/projects/blogctl/pkg/aws"
    31  )
    32  
    33  // New returns a new manager.
    34  func New(cfg aws.Config) *Manager {
    35  	return &Manager{
    36  		Config: cfg,
    37  		Ignores: []string{
    38  			".DS_Store",
    39  			".git",
    40  		},
    41  		Session:     aws.NewSession(cfg),
    42  		Parallelism: runtime.NumCPU(),
    43  	}
    44  }
    45  
    46  // Manager is a helper for uploading files to s3.
    47  type Manager struct {
    48  	Log               *log.Logger
    49  	Ignores           []string
    50  	Config            aws.Config
    51  	Session           *session.Session
    52  	PutObjectDefaults File
    53  	DryRun            bool
    54  	Parallelism       int
    55  }
    56  
    57  // ParallelismOrDefault returns the parallelism or a default.
    58  func (m Manager) ParallelismOrDefault() int {
    59  	if m.Parallelism > 0 {
    60  		return m.Parallelism
    61  	}
    62  	return runtime.NumCPU()
    63  }
    64  
    65  // GetKey returns the relative path for a given file.
    66  func (m Manager) GetKey(rootPath, workingPath string) string {
    67  	if !strings.HasPrefix(workingPath, "./") {
    68  		workingPath = "./" + workingPath
    69  	}
    70  	if !strings.HasPrefix(rootPath, "./") {
    71  		rootPath = "./" + rootPath
    72  	}
    73  	return strings.TrimPrefix(workingPath, rootPath)
    74  }
    75  
    76  // SyncDirectory sync's a directory.
    77  // It returns a list of invalidated keys (i.e. keys to update or remove), and an error.
    78  func (m Manager) SyncDirectory(ctx context.Context, directoryPath, bucket string) (invalidations []string, err error) {
    79  	if m.DryRun {
    80  		logutil.Debug(m.Log, "sync directory (dry run): not realizing changes")
    81  	}
    82  	localFiles, err := m.DiscoverFiles(ctx, directoryPath)
    83  	if err != nil {
    84  		return nil, err
    85  	}
    86  	invalidations, err = m.ProcessFiles(ctx, localFiles, directoryPath, bucket)
    87  	return
    88  }
    89  
    90  // DiscoverFiles discovers local files.
    91  func (m Manager) DiscoverFiles(ctx context.Context, directoryPath string) (localFiles []string, err error) {
    92  	err = filepath.Walk(directoryPath, func(currentPath string, fileInfo os.FileInfo, err error) error {
    93  		if err != nil {
    94  			return err
    95  		}
    96  		if currentPath == directoryPath {
    97  			return nil
    98  		}
    99  		for _, ignore := range m.Ignores {
   100  			if strings.HasSuffix(currentPath, ignore) {
   101  				return nil
   102  			}
   103  		}
   104  		if fileInfo.IsDir() {
   105  			return nil
   106  		}
   107  		localFiles = append(localFiles, currentPath)
   108  		return nil
   109  	})
   110  	return
   111  }
   112  
   113  // ProcessFiles processes the files list.
   114  func (m Manager) ProcessFiles(ctx context.Context, localFiles []string, directoryPath, bucket string) (invalidated []string, err error) {
   115  	remoteETags := make(map[string]string)
   116  	localKeys := new(Set)
   117  
   118  	remoteFiles, err := m.List(ctx, bucket)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  
   123  	var remoteFileBatch = make([]File, 0, len(remoteFiles))
   124  	for _, remoteFile := range remoteFiles {
   125  		key := remoteFile.Key
   126  		if !strings.HasPrefix(key, "/") {
   127  			key = "/" + key
   128  		}
   129  		logutil.Debugf(m.Log, "%s: remote file has etag %s", key, remoteFile.ETag)
   130  		remoteETags[key] = aws.StripQuotes(remoteFile.ETag)
   131  		remoteFileBatch = append(remoteFileBatch, remoteFile)
   132  	}
   133  
   134  	b := new(async.Batch)
   135  	b.SetLimit(m.ParallelismOrDefault())
   136  
   137  	var processLocalFile = func(file string) func() error {
   138  		return func() error {
   139  			key := m.GetKey(directoryPath, file)
   140  			localKeys.Set(key)
   141  
   142  			var localETag string
   143  			remoteETag, hasRemoteFile := remoteETags[key]
   144  			if hasRemoteFile { // if we need to compare against a remote etag
   145  				logutil.Debugf(m.Log, "%s: generating local file etag for key", key)
   146  				localETag, err = m.GenerateETag(file)
   147  				if err != nil {
   148  					return err
   149  				}
   150  			} else {
   151  				logutil.Debugf(m.Log, "%s: missing remote file etag for key", key)
   152  			}
   153  
   154  			if !hasRemoteFile || remoteETag != localETag {
   155  				if !hasRemoteFile {
   156  					logutil.Debugf(m.Log, "%s: local file is not present on remote", key)
   157  				} else if hasRemoteFile && remoteETag != localETag {
   158  					logutil.Debugf(m.Log, "%s: local file has different etag than remote; %s vs. %v", key, localETag, remoteETag)
   159  				}
   160  
   161  				contentType, err := DetectContentType(file)
   162  				if err != nil {
   163  					return err
   164  				}
   165  
   166  				if !m.DryRun {
   167  					if err := m.Put(ctx, File{
   168  						FilePath:    file,
   169  						Key:         key,
   170  						Bucket:      bucket,
   171  						ContentType: contentType,
   172  					}); err != nil {
   173  						return err
   174  					}
   175  					logutil.Infof(m.Log, "%s: put file to remote", key)
   176  				} else {
   177  					logutil.Infof(m.Log, "%s: (dry run) put file to remote", key)
   178  				}
   179  				if hasRemoteFile {
   180  					logutil.Infof(m.Log, "%s: marking to be invalidated", key)
   181  					invalidated = append(invalidated, key)
   182  				}
   183  			} else {
   184  				logutil.Debugf(m.Log, "%s: skipping (unchanged)", key)
   185  			}
   186  			return nil
   187  		}
   188  	}
   189  	for _, file := range localFiles {
   190  		b.Go(processLocalFile(file))
   191  	}
   192  	if err := b.Wait(); err != nil {
   193  		return nil, err
   194  	}
   195  
   196  	rb := new(async.Batch)
   197  	rb.SetLimit(m.ParallelismOrDefault())
   198  
   199  	var invalidatedSync sync.Mutex
   200  	var invalidateRemoteFile = func(remoteFile File) func() error {
   201  		return func() error {
   202  			key := remoteFile.Key
   203  			if !strings.HasPrefix(key, "/") {
   204  				key = "/" + key
   205  			}
   206  			if !localKeys.Has(key) {
   207  				if !m.DryRun {
   208  					logutil.Infof(m.Log, "%s: removing remote file", remoteFile.Key)
   209  					if err := m.Delete(ctx, bucket, remoteFile.Key); err != nil {
   210  						return err
   211  					}
   212  				} else {
   213  					logutil.Infof(m.Log, "%s: (dry run) removing remote file", remoteFile.Key)
   214  				}
   215  
   216  				invalidatedSync.Lock()
   217  				invalidated = append(invalidated, key)
   218  				invalidatedSync.Unlock()
   219  			} else {
   220  				logutil.Debugf(m.Log, "%s: keeping remote file", remoteFile.Key)
   221  			}
   222  			return nil
   223  		}
   224  	}
   225  	for _, remoteFile := range remoteFileBatch {
   226  		rb.Go(invalidateRemoteFile(remoteFile))
   227  	}
   228  	if err := rb.Wait(); err != nil {
   229  		return nil, err
   230  	}
   231  	return invalidated, nil
   232  }
   233  
   234  // List lists all files in a bucket.
   235  func (m Manager) List(ctx context.Context, bucket string) ([]File, error) {
   236  	remoteFiles, err := s3.New(m.Session).ListObjectsWithContext(ctx, &s3.ListObjectsInput{
   237  		Bucket: &bucket,
   238  	})
   239  	if IsNotFound(err) {
   240  		return nil, nil
   241  	}
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	var files []File
   247  	for _, file := range remoteFiles.Contents {
   248  		files = append(files, File{
   249  			Bucket: bucket,
   250  			Key:    aws.DerefStr(file.Key),
   251  			ETag:   aws.DerefStr(file.ETag),
   252  		})
   253  	}
   254  	return files, nil
   255  }
   256  
   257  // Get fetches a file at a given key
   258  func (m Manager) Get(ctx context.Context, bucket, key string) (file File, contents io.ReadCloser, err error) {
   259  	remoteFile, getErr := s3.New(m.Session).GetObjectWithContext(ctx, &s3.GetObjectInput{
   260  		Bucket: &bucket,
   261  		Key:    &key,
   262  	})
   263  	if IsNotFound(getErr) {
   264  		return
   265  	}
   266  	if getErr != nil {
   267  		err = getErr
   268  		return
   269  	}
   270  
   271  	file = File{
   272  		Bucket:               bucket,
   273  		Key:                  key,
   274  		ContentType:          aws.DerefStr(remoteFile.ContentType),
   275  		ContentDisposition:   aws.DerefStr(remoteFile.ContentDisposition),
   276  		ServerSideEncryption: aws.DerefStr(remoteFile.ServerSideEncryption),
   277  		ETag:                 aws.DerefStr(remoteFile.ETag),
   278  	}
   279  	contents = remoteFile.Body
   280  	return
   281  }
   282  
   283  // GetMeta fetches file metadata at a given key
   284  func (m Manager) GetMeta(ctx context.Context, bucket, key string) (meta File, err error) {
   285  	var contents io.ReadCloser
   286  	meta, contents, err = m.Get(ctx, bucket, key)
   287  	if err != nil {
   288  		return
   289  	}
   290  	if contents != nil {
   291  		defer contents.Close()
   292  	}
   293  	return
   294  }
   295  
   296  // Put uploads a file to s3.
   297  func (m Manager) Put(ctx context.Context, fileInfo File) error {
   298  	var size int64
   299  	var contentType, contentDisposition, acl, serverSideEncryption string
   300  	var contents io.ReadSeeker
   301  
   302  	if fileInfo.FilePath != "" {
   303  		file, err := os.Open(fileInfo.FilePath)
   304  		if err != nil {
   305  			return err
   306  		}
   307  		defer file.Close()
   308  		stats, err := file.Stat()
   309  		if err != nil {
   310  			return err
   311  		}
   312  		contents = file
   313  		size = int64(stats.Size())
   314  	} else if len(fileInfo.Contents) > 0 {
   315  		size = int64(len(fileInfo.Contents))
   316  		contents = bytes.NewReader(fileInfo.Contents)
   317  	} else {
   318  		return fmt.Errorf("invalid put object; must set either the path or the contents")
   319  	}
   320  
   321  	if fileInfo.ContentType != "" {
   322  		contentType = fileInfo.ContentType
   323  	} else if m.PutObjectDefaults.ContentDisposition != "" {
   324  		contentType = m.PutObjectDefaults.ContentType
   325  	}
   326  
   327  	if fileInfo.ContentDisposition != "" {
   328  		contentDisposition = fileInfo.ContentDisposition
   329  	} else if m.PutObjectDefaults.ContentDisposition != "" {
   330  		contentDisposition = m.PutObjectDefaults.ContentDisposition
   331  	}
   332  
   333  	if fileInfo.ACL != "" {
   334  		acl = fileInfo.ACL
   335  	} else if m.PutObjectDefaults.ACL != "" {
   336  		acl = m.PutObjectDefaults.ACL
   337  	}
   338  
   339  	if fileInfo.ServerSideEncryption != "" {
   340  		serverSideEncryption = fileInfo.ServerSideEncryption
   341  	} else if m.PutObjectDefaults.ServerSideEncryption != "" {
   342  		serverSideEncryption = m.PutObjectDefaults.ServerSideEncryption
   343  	}
   344  
   345  	_, err := s3.New(m.Session).PutObject(&s3.PutObjectInput{
   346  		Bucket:               aws.RefStr(fileInfo.Bucket),
   347  		Key:                  aws.RefStr(fileInfo.Key),
   348  		Body:                 contents,
   349  		ContentLength:        &size,
   350  		ContentType:          aws.RefStr(contentType),
   351  		ContentDisposition:   aws.RefStr(contentDisposition),
   352  		ACL:                  aws.RefStr(acl),
   353  		ServerSideEncryption: aws.RefStr(serverSideEncryption),
   354  	})
   355  	return err
   356  }
   357  
   358  // Delete removes an object with a given key.
   359  func (m Manager) Delete(ctx context.Context, bucket, key string) error {
   360  	_, err := s3.New(m.Session).DeleteObject(&s3.DeleteObjectInput{
   361  		Bucket: aws.RefStr(bucket),
   362  		Key:    aws.RefStr(key),
   363  	})
   364  	return err
   365  }
   366  
   367  // GenerateETag generate an etag for a give file by path.
   368  func (m Manager) GenerateETag(path string) (string, error) {
   369  	f, err := os.Open(path)
   370  	if err != nil {
   371  		return "", err
   372  	}
   373  
   374  	hash := md5.New()
   375  	_, err = io.Copy(hash, f)
   376  	if err != nil {
   377  		return "", err
   378  	}
   379  
   380  	return hex.EncodeToString(hash.Sum(nil)), nil
   381  }