go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/projects/blogctl/pkg/aws/s3/manager.go (about) 1 /* 2 3 Copyright (c) 2023 - Present. Will Charczuk. All rights reserved. 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository. 5 6 */ 7 8 package s3 9 10 import ( 11 "bytes" 12 "context" 13 "crypto/md5" 14 "encoding/hex" 15 "fmt" 16 "io" 17 "log" 18 "os" 19 "path/filepath" 20 "runtime" 21 "strings" 22 "sync" 23 24 "github.com/aws/aws-sdk-go/aws/session" 25 "github.com/aws/aws-sdk-go/service/s3" 26 27 "go.charczuk.com/sdk/async" 28 "go.charczuk.com/sdk/logutil" 29 30 "go.charczuk.com/projects/blogctl/pkg/aws" 31 ) 32 33 // New returns a new manager. 34 func New(cfg aws.Config) *Manager { 35 return &Manager{ 36 Config: cfg, 37 Ignores: []string{ 38 ".DS_Store", 39 ".git", 40 }, 41 Session: aws.NewSession(cfg), 42 Parallelism: runtime.NumCPU(), 43 } 44 } 45 46 // Manager is a helper for uploading files to s3. 47 type Manager struct { 48 Log *log.Logger 49 Ignores []string 50 Config aws.Config 51 Session *session.Session 52 PutObjectDefaults File 53 DryRun bool 54 Parallelism int 55 } 56 57 // ParallelismOrDefault returns the parallelism or a default. 58 func (m Manager) ParallelismOrDefault() int { 59 if m.Parallelism > 0 { 60 return m.Parallelism 61 } 62 return runtime.NumCPU() 63 } 64 65 // GetKey returns the relative path for a given file. 66 func (m Manager) GetKey(rootPath, workingPath string) string { 67 if !strings.HasPrefix(workingPath, "./") { 68 workingPath = "./" + workingPath 69 } 70 if !strings.HasPrefix(rootPath, "./") { 71 rootPath = "./" + rootPath 72 } 73 return strings.TrimPrefix(workingPath, rootPath) 74 } 75 76 // SyncDirectory sync's a directory. 77 // It returns a list of invalidated keys (i.e. keys to update or remove), and an error. 78 func (m Manager) SyncDirectory(ctx context.Context, directoryPath, bucket string) (invalidations []string, err error) { 79 if m.DryRun { 80 logutil.Debug(m.Log, "sync directory (dry run): not realizing changes") 81 } 82 localFiles, err := m.DiscoverFiles(ctx, directoryPath) 83 if err != nil { 84 return nil, err 85 } 86 invalidations, err = m.ProcessFiles(ctx, localFiles, directoryPath, bucket) 87 return 88 } 89 90 // DiscoverFiles discovers local files. 91 func (m Manager) DiscoverFiles(ctx context.Context, directoryPath string) (localFiles []string, err error) { 92 err = filepath.Walk(directoryPath, func(currentPath string, fileInfo os.FileInfo, err error) error { 93 if err != nil { 94 return err 95 } 96 if currentPath == directoryPath { 97 return nil 98 } 99 for _, ignore := range m.Ignores { 100 if strings.HasSuffix(currentPath, ignore) { 101 return nil 102 } 103 } 104 if fileInfo.IsDir() { 105 return nil 106 } 107 localFiles = append(localFiles, currentPath) 108 return nil 109 }) 110 return 111 } 112 113 // ProcessFiles processes the files list. 114 func (m Manager) ProcessFiles(ctx context.Context, localFiles []string, directoryPath, bucket string) (invalidated []string, err error) { 115 remoteETags := make(map[string]string) 116 localKeys := new(Set) 117 118 remoteFiles, err := m.List(ctx, bucket) 119 if err != nil { 120 return nil, err 121 } 122 123 var remoteFileBatch = make([]File, 0, len(remoteFiles)) 124 for _, remoteFile := range remoteFiles { 125 key := remoteFile.Key 126 if !strings.HasPrefix(key, "/") { 127 key = "/" + key 128 } 129 logutil.Debugf(m.Log, "%s: remote file has etag %s", key, remoteFile.ETag) 130 remoteETags[key] = aws.StripQuotes(remoteFile.ETag) 131 remoteFileBatch = append(remoteFileBatch, remoteFile) 132 } 133 134 b := new(async.Batch) 135 b.SetLimit(m.ParallelismOrDefault()) 136 137 var processLocalFile = func(file string) func() error { 138 return func() error { 139 key := m.GetKey(directoryPath, file) 140 localKeys.Set(key) 141 142 var localETag string 143 remoteETag, hasRemoteFile := remoteETags[key] 144 if hasRemoteFile { // if we need to compare against a remote etag 145 logutil.Debugf(m.Log, "%s: generating local file etag for key", key) 146 localETag, err = m.GenerateETag(file) 147 if err != nil { 148 return err 149 } 150 } else { 151 logutil.Debugf(m.Log, "%s: missing remote file etag for key", key) 152 } 153 154 if !hasRemoteFile || remoteETag != localETag { 155 if !hasRemoteFile { 156 logutil.Debugf(m.Log, "%s: local file is not present on remote", key) 157 } else if hasRemoteFile && remoteETag != localETag { 158 logutil.Debugf(m.Log, "%s: local file has different etag than remote; %s vs. %v", key, localETag, remoteETag) 159 } 160 161 contentType, err := DetectContentType(file) 162 if err != nil { 163 return err 164 } 165 166 if !m.DryRun { 167 if err := m.Put(ctx, File{ 168 FilePath: file, 169 Key: key, 170 Bucket: bucket, 171 ContentType: contentType, 172 }); err != nil { 173 return err 174 } 175 logutil.Infof(m.Log, "%s: put file to remote", key) 176 } else { 177 logutil.Infof(m.Log, "%s: (dry run) put file to remote", key) 178 } 179 if hasRemoteFile { 180 logutil.Infof(m.Log, "%s: marking to be invalidated", key) 181 invalidated = append(invalidated, key) 182 } 183 } else { 184 logutil.Debugf(m.Log, "%s: skipping (unchanged)", key) 185 } 186 return nil 187 } 188 } 189 for _, file := range localFiles { 190 b.Go(processLocalFile(file)) 191 } 192 if err := b.Wait(); err != nil { 193 return nil, err 194 } 195 196 rb := new(async.Batch) 197 rb.SetLimit(m.ParallelismOrDefault()) 198 199 var invalidatedSync sync.Mutex 200 var invalidateRemoteFile = func(remoteFile File) func() error { 201 return func() error { 202 key := remoteFile.Key 203 if !strings.HasPrefix(key, "/") { 204 key = "/" + key 205 } 206 if !localKeys.Has(key) { 207 if !m.DryRun { 208 logutil.Infof(m.Log, "%s: removing remote file", remoteFile.Key) 209 if err := m.Delete(ctx, bucket, remoteFile.Key); err != nil { 210 return err 211 } 212 } else { 213 logutil.Infof(m.Log, "%s: (dry run) removing remote file", remoteFile.Key) 214 } 215 216 invalidatedSync.Lock() 217 invalidated = append(invalidated, key) 218 invalidatedSync.Unlock() 219 } else { 220 logutil.Debugf(m.Log, "%s: keeping remote file", remoteFile.Key) 221 } 222 return nil 223 } 224 } 225 for _, remoteFile := range remoteFileBatch { 226 rb.Go(invalidateRemoteFile(remoteFile)) 227 } 228 if err := rb.Wait(); err != nil { 229 return nil, err 230 } 231 return invalidated, nil 232 } 233 234 // List lists all files in a bucket. 235 func (m Manager) List(ctx context.Context, bucket string) ([]File, error) { 236 remoteFiles, err := s3.New(m.Session).ListObjectsWithContext(ctx, &s3.ListObjectsInput{ 237 Bucket: &bucket, 238 }) 239 if IsNotFound(err) { 240 return nil, nil 241 } 242 if err != nil { 243 return nil, err 244 } 245 246 var files []File 247 for _, file := range remoteFiles.Contents { 248 files = append(files, File{ 249 Bucket: bucket, 250 Key: aws.DerefStr(file.Key), 251 ETag: aws.DerefStr(file.ETag), 252 }) 253 } 254 return files, nil 255 } 256 257 // Get fetches a file at a given key 258 func (m Manager) Get(ctx context.Context, bucket, key string) (file File, contents io.ReadCloser, err error) { 259 remoteFile, getErr := s3.New(m.Session).GetObjectWithContext(ctx, &s3.GetObjectInput{ 260 Bucket: &bucket, 261 Key: &key, 262 }) 263 if IsNotFound(getErr) { 264 return 265 } 266 if getErr != nil { 267 err = getErr 268 return 269 } 270 271 file = File{ 272 Bucket: bucket, 273 Key: key, 274 ContentType: aws.DerefStr(remoteFile.ContentType), 275 ContentDisposition: aws.DerefStr(remoteFile.ContentDisposition), 276 ServerSideEncryption: aws.DerefStr(remoteFile.ServerSideEncryption), 277 ETag: aws.DerefStr(remoteFile.ETag), 278 } 279 contents = remoteFile.Body 280 return 281 } 282 283 // GetMeta fetches file metadata at a given key 284 func (m Manager) GetMeta(ctx context.Context, bucket, key string) (meta File, err error) { 285 var contents io.ReadCloser 286 meta, contents, err = m.Get(ctx, bucket, key) 287 if err != nil { 288 return 289 } 290 if contents != nil { 291 defer contents.Close() 292 } 293 return 294 } 295 296 // Put uploads a file to s3. 297 func (m Manager) Put(ctx context.Context, fileInfo File) error { 298 var size int64 299 var contentType, contentDisposition, acl, serverSideEncryption string 300 var contents io.ReadSeeker 301 302 if fileInfo.FilePath != "" { 303 file, err := os.Open(fileInfo.FilePath) 304 if err != nil { 305 return err 306 } 307 defer file.Close() 308 stats, err := file.Stat() 309 if err != nil { 310 return err 311 } 312 contents = file 313 size = int64(stats.Size()) 314 } else if len(fileInfo.Contents) > 0 { 315 size = int64(len(fileInfo.Contents)) 316 contents = bytes.NewReader(fileInfo.Contents) 317 } else { 318 return fmt.Errorf("invalid put object; must set either the path or the contents") 319 } 320 321 if fileInfo.ContentType != "" { 322 contentType = fileInfo.ContentType 323 } else if m.PutObjectDefaults.ContentDisposition != "" { 324 contentType = m.PutObjectDefaults.ContentType 325 } 326 327 if fileInfo.ContentDisposition != "" { 328 contentDisposition = fileInfo.ContentDisposition 329 } else if m.PutObjectDefaults.ContentDisposition != "" { 330 contentDisposition = m.PutObjectDefaults.ContentDisposition 331 } 332 333 if fileInfo.ACL != "" { 334 acl = fileInfo.ACL 335 } else if m.PutObjectDefaults.ACL != "" { 336 acl = m.PutObjectDefaults.ACL 337 } 338 339 if fileInfo.ServerSideEncryption != "" { 340 serverSideEncryption = fileInfo.ServerSideEncryption 341 } else if m.PutObjectDefaults.ServerSideEncryption != "" { 342 serverSideEncryption = m.PutObjectDefaults.ServerSideEncryption 343 } 344 345 _, err := s3.New(m.Session).PutObject(&s3.PutObjectInput{ 346 Bucket: aws.RefStr(fileInfo.Bucket), 347 Key: aws.RefStr(fileInfo.Key), 348 Body: contents, 349 ContentLength: &size, 350 ContentType: aws.RefStr(contentType), 351 ContentDisposition: aws.RefStr(contentDisposition), 352 ACL: aws.RefStr(acl), 353 ServerSideEncryption: aws.RefStr(serverSideEncryption), 354 }) 355 return err 356 } 357 358 // Delete removes an object with a given key. 359 func (m Manager) Delete(ctx context.Context, bucket, key string) error { 360 _, err := s3.New(m.Session).DeleteObject(&s3.DeleteObjectInput{ 361 Bucket: aws.RefStr(bucket), 362 Key: aws.RefStr(key), 363 }) 364 return err 365 } 366 367 // GenerateETag generate an etag for a give file by path. 368 func (m Manager) GenerateETag(path string) (string, error) { 369 f, err := os.Open(path) 370 if err != nil { 371 return "", err 372 } 373 374 hash := md5.New() 375 _, err = io.Copy(hash, f) 376 if err != nil { 377 return "", err 378 } 379 380 return hex.EncodeToString(hash.Sum(nil)), nil 381 }