github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/artifacts.go (about) 1 // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. 2 3 package runner 4 5 // This file contains the implementation of artifacts that exist as a directory containing 6 // files on a file system or archives on a cloud storage style platform. 7 // 8 // artifacts can be watched for changes and transfers between a file system and 9 // storage platforms based upon their contents changing etc 10 // 11 import ( 12 "context" 13 "fmt" 14 "io/ioutil" 15 "os" 16 "path/filepath" 17 "strings" 18 "sync" 19 20 hasher "github.com/karlmutch/hashstructure" 21 22 "github.com/go-stack/stack" 23 "github.com/jjeffery/kv" // MIT License 24 ) 25 26 // ArtifactCache is used to encapsulate and store hashes, typically file hashes, and 27 // prevent duplicated uploads from occurring needlessly 28 // 29 type ArtifactCache struct { 30 upHashes map[string]uint64 31 sync.Mutex 32 33 // This can be used by the application layer to receive diagnostic and other information 34 // about kv.occurring inside the caching tracker etc and surface these kv.etc to 35 // the logging system 36 ErrorC chan kv.Error 37 } 38 39 // NewArtifactCache initializes an hash tracker for artifact related files and 40 // passes it back to the caller. The tracking structure can be used to track 41 // files that already been downloaded / uploaded and also includes a channel 42 // that can be used to receive error notifications 43 // 44 func NewArtifactCache() (cache *ArtifactCache) { 45 return &ArtifactCache{ 46 upHashes: map[string]uint64{}, 47 ErrorC: make(chan kv.Error), 48 } 49 } 50 51 // Close will clean up the cache of hashes and close the error reporting channel 52 // associated with the cache tracker 53 // 54 func (cache *ArtifactCache) Close() { 55 56 if cache.ErrorC != nil { 57 defer func() { 58 // Closing a close channel could cause a panic which is 59 // acceptable while tearing down the cache 60 recover() 61 }() 62 63 close(cache.ErrorC) 64 } 65 } 66 67 func readAllHash(dir string) (hash uint64, err kv.Error) { 68 files := []os.FileInfo{} 69 dirs := []string{dir} 70 for { 71 newDirs := []string{} 72 for _, aDir := range dirs { 73 items, errGo := ioutil.ReadDir(aDir) 74 if errGo != nil { 75 return 0, kv.Wrap(errGo).With("hashDir", aDir, "stack", stack.Trace().TrimRuntime()) 76 } 77 for _, info := range items { 78 if info.IsDir() { 79 newDirs = append(newDirs, filepath.Join(aDir, info.Name())) 80 } 81 files = append(files, info) 82 } 83 } 84 dirs = newDirs 85 if len(dirs) == 0 { 86 break 87 } 88 } 89 90 hash, errGo := hasher.Hash(files, nil) 91 if errGo != nil { 92 return 0, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 93 } 94 return hash, nil 95 } 96 97 // Hash is used to obtain the hash of an artifact from the backing store implementation 98 // being used by the storage implementation 99 // 100 func (cache *ArtifactCache) Hash(ctx context.Context, art *Artifact, projectId string, group string, cred string, env map[string]string, dir string) (hash string, err kv.Error) { 101 102 kv := kv.With("artifact", fmt.Sprintf("%#v", *art)).With("project", projectId).With("group", group) 103 104 storage, err := NewObjStore( 105 ctx, 106 &StoreOpts{ 107 Art: art, 108 ProjectID: projectId, 109 Group: group, 110 Creds: cred, 111 Env: env, 112 Validate: true, 113 }, 114 cache.ErrorC) 115 116 if err != nil { 117 return "", kv.Wrap(err).With("stack", stack.Trace().TrimRuntime()) 118 } 119 120 defer storage.Close() 121 return storage.Hash(ctx, art.Key) 122 } 123 124 // Fetch can be used to retrieve an artifact from a storage layer implementation, while 125 // passing through the lens of a caching filter that prevents unneeded downloads. 126 // 127 func (cache *ArtifactCache) Fetch(ctx context.Context, art *Artifact, projectId string, group string, cred string, env map[string]string, dir string) (warns []kv.Error, err kv.Error) { 128 129 kv := kv.With("artifact", fmt.Sprintf("%#v", *art)).With("project", projectId).With("group", group) 130 131 // Process the qualified URI and use just the path for now 132 dest := filepath.Join(dir, group) 133 if errGo := os.MkdirAll(dest, 0700); errGo != nil { 134 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()).With("dest", dest) 135 } 136 137 storage, err := NewObjStore( 138 ctx, 139 &StoreOpts{ 140 Art: art, 141 ProjectID: projectId, 142 Group: group, 143 Creds: cred, 144 Env: env, 145 Validate: true, 146 }, 147 cache.ErrorC) 148 149 if err != nil { 150 return warns, kv.Wrap(err).With("stack", stack.Trace().TrimRuntime()) 151 } 152 153 if art.Unpack && !IsTar(art.Key) { 154 return warns, kv.NewError("the unpack flag was set for an unsupported file format (tar gzip/bzip2 only supported)").With("stack", stack.Trace().TrimRuntime()) 155 } 156 157 switch group { 158 case "_metadata": 159 //The following is disabled until we look into how to efficiently do downloads of 160 // experiment related retries rather than downloading an entire hosts worth of activity 161 // warns, err = storage.Gather(ctx, "metadata/", dest) 162 default: 163 warns, err = storage.Fetch(ctx, art.Key, art.Unpack, dest) 164 } 165 storage.Close() 166 167 if err != nil { 168 return warns, kv.Wrap(err) 169 } 170 171 // Immutable artifacts need just to be downloaded and nothing else 172 if !art.Mutable && !strings.HasPrefix(art.Qualified, "file://") { 173 return warns, nil 174 } 175 176 if cache == nil { 177 return warns, nil 178 } 179 180 if err = cache.updateHash(dest); err != nil { 181 return warns, kv.Wrap(err) 182 } 183 184 return warns, nil 185 } 186 187 func (cache *ArtifactCache) updateHash(dir string) (err kv.Error) { 188 hash, err := readAllHash(dir) 189 if err != nil { 190 return err 191 } 192 193 // Having obtained the artifact if it is mutable then we add a set of upload area hashes for all files and directories the artifact included 194 cache.Lock() 195 cache.upHashes[dir] = hash 196 cache.Unlock() 197 198 return nil 199 } 200 201 func (cache *ArtifactCache) checkHash(dir string) (isValid bool, err kv.Error) { 202 203 cache.Lock() 204 defer cache.Unlock() 205 206 oldHash, isPresent := cache.upHashes[dir] 207 208 if !isPresent { 209 return false, nil 210 } 211 212 hash, err := readAllHash(dir) 213 if err != nil { 214 return false, err 215 } 216 return oldHash == hash, nil 217 } 218 219 // Local returns the local disk based file name for the artifacts expanded archive files 220 // 221 func (cache *ArtifactCache) Local(group string, dir string, file string) (fn string, err kv.Error) { 222 fn = filepath.Join(dir, group, file) 223 if _, errOs := os.Stat(fn); errOs != nil { 224 return "", kv.Wrap(errOs).With("stack", stack.Trace().TrimRuntime()) 225 } 226 return fn, nil 227 } 228 229 // Restore the artifacts that have been marked mutable and that have changed 230 // 231 func (cache *ArtifactCache) Restore(ctx context.Context, art *Artifact, projectId string, group string, cred string, env map[string]string, dir string) (uploaded bool, warns []kv.Error, err kv.Error) { 232 233 // Immutable artifacts need just to be downloaded and nothing else 234 if !art.Mutable { 235 return false, warns, nil 236 } 237 238 kvDetails := []interface{}{"artifact", fmt.Sprintf("%#v", *art), "project", projectId, "group", group, "dir", dir} 239 240 source := filepath.Join(dir, group) 241 isValid, err := cache.checkHash(source) 242 if err != nil { 243 kvDetails = append(kvDetails, "group", group, "stack", stack.Trace().TrimRuntime()) 244 return false, warns, kv.Wrap(err).With(kvDetails...) 245 } 246 if isValid { 247 return false, warns, nil 248 } 249 250 storage, err := NewObjStore( 251 ctx, 252 &StoreOpts{ 253 Art: art, 254 ProjectID: projectId, 255 Creds: cred, 256 Env: env, 257 Validate: true, 258 }, 259 cache.ErrorC) 260 if err != nil { 261 return false, warns, err 262 } 263 defer storage.Close() 264 265 // Check to see if the cache has a hash for the directory that has changed and 266 // needs uploading 267 // 268 269 hash, errHash := readAllHash(dir) 270 271 switch group { 272 case "_metadata": 273 // If no metadata exists, which could be legitimate, dont try and save it 274 // otherwise things will go wrong when walking the directories 275 if _, errGo := os.Stat(source); !os.IsNotExist(errGo) { 276 if warns, err = storage.Hoard(ctx, source, "metadata"); err != nil { 277 return false, warns, err.With("group", group) 278 } 279 } 280 default: 281 if warns, err = storage.Deposit(ctx, source, art.Key); err != nil { 282 return false, warns, err.With("group", group) 283 } 284 } 285 286 if errHash == nil { 287 // Having obtained the artifact if it is mutable then we add a set of upload area hashes for all files and directories the artifact included 288 cache.Lock() 289 cache.upHashes[dir] = hash 290 cache.Unlock() 291 } 292 293 return true, warns, nil 294 }