github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/storage.go (about) 1 // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. 2 3 package runner 4 5 // This file contains the implementation for the storage sub system that will 6 // be used by the runner to retrieve storage from cloud providers or localized storage 7 8 import ( 9 "context" 10 "fmt" 11 "io" 12 "net/url" 13 "path/filepath" 14 "strings" 15 16 "github.com/go-stack/stack" 17 "github.com/jjeffery/kv" // MIT License 18 ) 19 20 // Storage defines an interface for implementations of a studioml artifact store 21 // 22 type Storage interface { 23 // Fetch will retrieve contents of the named storage object using a prefix treating any items retrieved as individual files 24 // 25 Gather(ctx context.Context, keyPrefix string, outputDir string, tap io.Writer) (warnings []kv.Error, err kv.Error) 26 27 // Fetch will retrieve contents of the named storage object and optionally unpack it into the 28 // user specified output directory 29 // 30 Fetch(ctx context.Context, name string, unpack bool, output string, tap io.Writer) (warnings []kv.Error, err kv.Error) 31 32 // Hoard will take a number of files for upload, deduplication is implemented outside of this interface 33 // 34 Hoard(ctx context.Context, srcDir string, keyPrefix string) (warnings []kv.Error, err kv.Error) 35 36 // Deposit is a directory archive and upload, deduplication is implemented outside of this interface 37 // 38 Deposit(ctx context.Context, src string, dest string) (warnings []kv.Error, err kv.Error) 39 40 // Hash can be used to retrieve the hash of the contents of the file. The hash is 41 // retrieved not computed and so is a lightweight operation common to both S3 and Google Storage. 42 // The hash on some storage platforms is not a plain MD5 but uses multiple hashes from file 43 // segments to increase the speed of hashing and also to reflect the multipart download 44 // processing that was used for the file, for a full explanation please see 45 // https://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb 46 // 47 Hash(ctx context.Context, name string) (hash string, err kv.Error) 48 49 Close() 50 } 51 52 // StoreOpts is used to encapsulate a storage implementation with the runner and studioml data needed 53 // 54 type StoreOpts struct { 55 Art *Artifact 56 ProjectID string 57 Group string 58 Creds string // The credentials file name 59 Env map[string]string 60 Validate bool 61 } 62 63 // NewStorage is used to create a receiver for a storage implementation 64 // 65 func NewStorage(ctx context.Context, spec *StoreOpts) (stor Storage, err kv.Error) { 66 67 if spec == nil { 68 return nil, kv.Wrap(err, "empty specification supplied").With("stack", stack.Trace().TrimRuntime()) 69 } 70 71 uri, errGo := url.ParseRequestURI(spec.Art.Qualified) 72 if errGo != nil { 73 return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 74 } 75 76 switch uri.Scheme { 77 case "gs": 78 return NewGSstorage(ctx, spec.ProjectID, spec.Creds, spec.Env, spec.Art.Bucket, spec.Validate) 79 case "s3": 80 uriPath := strings.Split(uri.EscapedPath(), "/") 81 if len(spec.Art.Key) == 0 { 82 spec.Art.Key = strings.Join(uriPath[2:], "/") 83 } 84 if len(spec.Art.Bucket) == 0 { 85 spec.Art.Bucket = uriPath[1] 86 } 87 88 if len(uri.Host) == 0 { 89 return nil, kv.NewError("S3/minio endpoint lacks a scheme, or the host name was not specified").With("stack", stack.Trace().TrimRuntime()) 90 } 91 92 useSSL := uri.Scheme == "https" 93 94 return NewS3storage(ctx, spec.ProjectID, spec.Creds, spec.Env, uri.Host, 95 spec.Art.Bucket, spec.Art.Key, spec.Validate, useSSL) 96 97 case "file": 98 return NewLocalStorage() 99 default: 100 return nil, kv.NewError(fmt.Sprintf("unknown, or unsupported URI scheme %s, s3 or gs expected", uri.Scheme)).With("stack", stack.Trace().TrimRuntime()) 101 } 102 } 103 104 // IsTar is used to test the extension to see if the presence of tar can be found 105 // 106 func IsTar(name string) bool { 107 switch { 108 case strings.Contains(name, ".tar."): 109 return true 110 case strings.HasSuffix(name, ".tgz"): 111 return true 112 case strings.HasSuffix(name, ".tar"): 113 return true 114 case strings.HasSuffix(name, ".tar.bzip2"): 115 return true 116 case strings.HasSuffix(name, ".tar.bz2"): 117 return true 118 case strings.HasSuffix(name, ".tbz2"): 119 return true 120 case strings.HasSuffix(name, ".tbz"): 121 return true 122 } 123 return false 124 } 125 126 // MimeFromExt is used to characterize a mime type from a files extension 127 // 128 func MimeFromExt(name string) (fileType string, err kv.Error) { 129 switch filepath.Ext(name) { 130 case ".gzip", ".gz": 131 return "application/x-gzip", nil 132 case ".zip": 133 return "application/zip", nil 134 case ".tgz": // Non standard extension as a result of studioml python code 135 return "application/bzip2", nil 136 case ".tb2", ".tbz", ".tbz2", ".bzip2", ".bz2": // Standard bzip2 extensions 137 return "application/bzip2", nil 138 case ".tar": 139 return "application/tar", nil 140 default: 141 fileType, errGo := DetectFileType(name) 142 if errGo != nil { 143 // Fill in a default value even if there is an error 144 return "application/octet-stream", errGo 145 } 146 return fileType, nil 147 } 148 }