github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/objectstore.go (about) 1 // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. 2 3 package runner 4 5 // This file contains the implementation of storage that can use an internal cache along with the MD5 6 // hash of the files contents to avoid downloads that are not needed. 7 8 import ( 9 "bufio" 10 "context" 11 "fmt" 12 "io/ioutil" 13 "os" 14 "path/filepath" 15 "sync" 16 "time" 17 18 "github.com/go-stack/stack" 19 "github.com/jjeffery/kv" // MIT License 20 21 "github.com/lthibault/jitterbug" 22 23 "github.com/karlmutch/ccache" 24 25 "github.com/karlmutch/go-shortid" 26 27 "github.com/prometheus/client_golang/prometheus" 28 ) 29 30 var ( 31 cacheHits = prometheus.NewCounterVec( 32 prometheus.CounterOpts{ 33 Name: "runner_cache_hits", 34 Help: "Number of artifact cache hits.", 35 }, 36 []string{"host", "hash"}, 37 ) 38 cacheMisses = prometheus.NewCounterVec( 39 prometheus.CounterOpts{ 40 Name: "runner_cache_misses", 41 Help: "Number of artifact cache misses.", 42 }, 43 []string{"host", "hash"}, 44 ) 45 46 host = "" 47 ) 48 49 func init() { 50 host, _ = os.Hostname() 51 } 52 53 type objStore struct { 54 store Storage 55 ErrorC chan kv.Error 56 } 57 58 // NewObjStore is used to instantiate an object store for the running that includes a cache 59 // 60 func NewObjStore(ctx context.Context, spec *StoreOpts, errorC chan kv.Error) (oStore *objStore, err kv.Error) { 61 store, err := NewStorage(ctx, spec) 62 if err != nil { 63 return nil, err 64 } 65 66 return &objStore{ 67 store: store, 68 ErrorC: errorC, 69 }, nil 70 } 71 72 var ( 73 backingDir = "" 74 75 cacheMax int64 76 cacheInit sync.Once 77 cacheInitSync sync.Mutex 78 cache *ccache.Cache 79 ) 80 81 func groom(backingDir string, removedC chan os.FileInfo, errorC chan kv.Error) { 82 if cache == nil { 83 return 84 } 85 cachedFiles, err := ioutil.ReadDir(backingDir) 86 if err != nil { 87 88 go func() { 89 defer func() { 90 recover() 91 }() 92 select { 93 case errorC <- kv.Wrap(err, fmt.Sprintf("cache dir %s refresh failure", backingDir)).With("stack", stack.Trace().TrimRuntime()): 94 case <-time.After(time.Second): 95 fmt.Printf("%s\n", kv.Wrap(err, fmt.Sprintf("cache dir %s refresh failed", backingDir)).With("stack", stack.Trace().TrimRuntime())) 96 } 97 }() 98 return 99 } 100 101 for _, file := range cachedFiles { 102 // Is an expired or missing file in cache data structure, if it is not a directory delete it 103 item := cache.Sample(file.Name()) 104 if item == nil || item.Expired() { 105 info, err := os.Stat(filepath.Join(backingDir, file.Name())) 106 if err == nil { 107 if info.IsDir() { 108 continue 109 } 110 select { 111 case removedC <- info: 112 case <-time.After(time.Second): 113 } 114 if err = os.Remove(filepath.Join(backingDir, file.Name())); err != nil { 115 select { 116 case errorC <- kv.Wrap(err, fmt.Sprintf("cache dir %s remove failed", backingDir)).With("stack", stack.Trace().TrimRuntime()): 117 case <-time.After(time.Second): 118 fmt.Printf("%s\n", kv.Wrap(err, fmt.Sprintf("cache dir %s remove failed", backingDir)).With("stack", stack.Trace().TrimRuntime())) 119 } 120 } 121 } 122 } 123 } 124 } 125 126 // groomDir will scan the in memory cache and if there are files that are on disk 127 // but not in the cache they will be reaped 128 // 129 func groomDir(ctx context.Context, backingDir string, removedC chan os.FileInfo, errorC chan kv.Error) (triggerC chan struct{}) { 130 triggerC = make(chan struct{}) 131 132 go func() { 133 check := NewTrigger(triggerC, time.Second*30, &jitterbug.Norm{Stdev: time.Second * 3}) 134 defer check.Stop() 135 136 for { 137 select { 138 case <-check.C: 139 groom(backingDir, removedC, errorC) 140 141 case <-ctx.Done(): 142 return 143 } 144 } 145 }() 146 147 return triggerC 148 } 149 150 // ClearObjStore can be used by clients to erase the contents of the object store cache 151 // 152 func ClearObjStore() (err kv.Error) { 153 // The ccache works by having the in memory tracking cache as the record to truth. if we 154 // delete the files on disk then when they are fetched they will be invalidated. If they expire 155 // then nothing will be done by the groomer 156 // 157 cachedFiles, errGo := ioutil.ReadDir(backingDir) 158 if errGo != nil { 159 return kv.Wrap(errGo).With("backingDir", backingDir).With("stack", stack.Trace().TrimRuntime()) 160 } 161 for _, file := range cachedFiles { 162 if file.Name()[0] == '.' { 163 continue 164 } 165 info, err := os.Stat(filepath.Join(backingDir, file.Name())) 166 if err == nil { 167 if info.IsDir() { 168 continue 169 } 170 if err = os.Remove(filepath.Join(backingDir, file.Name())); err != nil { 171 return kv.Wrap(err, fmt.Sprintf("cache dir %s remove failed", backingDir)).With("stack", stack.Trace().TrimRuntime()) 172 } 173 } 174 } 175 return nil 176 } 177 178 // ObjStoreFootPrint can be used to determine what the cxurrent footprint of the 179 // artifact cache is 180 // 181 func ObjStoreFootPrint() (max int64) { 182 return cacheMax 183 } 184 185 // InitObjStore sets up the backing store for our object store cache. The size specified 186 // can be any byte amount. 187 // 188 // The triggerC channel is functional when the err value is nil, this channel can be used to manually 189 // trigger the disk caching sub system 190 // 191 func InitObjStore(ctx context.Context, backing string, size int64, removedC chan os.FileInfo, errorC chan kv.Error) (triggerC chan<- struct{}, err kv.Error) { 192 if len(backing) == 0 { 193 // If we dont have a backing store dont start the cache 194 return nil, kv.NewError("empty cache directory name").With("stack", stack.Trace().TrimRuntime()) 195 } 196 197 // Also make sure that the specified directory actually exists 198 if stat, errGo := os.Stat(backing); errGo != nil || !stat.IsDir() { 199 if errGo != nil { 200 return nil, kv.Wrap(errGo, "cache directory does not exist").With("backing", backing).With("stack", stack.Trace().TrimRuntime()) 201 } 202 return nil, kv.NewError("cache name specified is not a directory").With("backing", backing).With("stack", stack.Trace().TrimRuntime()) 203 } 204 205 // Now load a list of the files in the cache directory which further checks 206 // our ability to use the storage 207 // 208 cachedFiles, errGo := ioutil.ReadDir(backing) 209 if errGo != nil { 210 return nil, kv.Wrap(errGo, "cache directory not readable").With("backing", backing).With("stack", stack.Trace().TrimRuntime()) 211 } 212 213 // Finally try to create and delete a working file 214 id, errGo := shortid.Generate() 215 if errGo != nil { 216 return nil, kv.Wrap(errGo, "cache directory not writable").With("backing", backing).With("stack", stack.Trace().TrimRuntime()) 217 } 218 tmpFile := filepath.Join(backing, id) 219 220 errGo = ioutil.WriteFile(tmpFile, []byte{0}, 0600) 221 if errGo != nil { 222 return nil, kv.Wrap(errGo, "cache directory not writable").With("backing", backing).With("stack", stack.Trace().TrimRuntime()) 223 } 224 os.Remove(tmpFile) 225 226 // When the cache init is called we only want one caller at a time through and they 227 // should only call the initializer function once, successfully, retries are permitted. 228 // 229 cacheInitSync.Lock() 230 defer cacheInitSync.Unlock() 231 232 if cache != nil { 233 return nil, kv.Wrap(err, "cache is already initialized").With("stack", stack.Trace().TrimRuntime()) 234 } 235 236 // Registry the monitoring items for measurement purposes by external parties, 237 // these are only activated if the caching is being used 238 if errGo = prometheus.Register(cacheHits); errGo != nil { 239 select { 240 case errorC <- kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()): 241 default: 242 } 243 } 244 if errGo = prometheus.Register(cacheMisses); errGo != nil { 245 select { 246 case errorC <- kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()): 247 default: 248 } 249 } 250 251 select { 252 case errorC <- kv.NewError("cache enabled").With("stack", stack.Trace().TrimRuntime()): 253 default: 254 } 255 256 // Store the backing store directory for the cache 257 backingDir = backing 258 cacheMax = size 259 260 // The backing store might have partial downloads inside it. We should clear those, ignoring kv. 261 // and then re-create the partial download directory 262 partialDir := filepath.Join(backingDir, ".partial") 263 os.RemoveAll(partialDir) 264 265 if errGo = os.MkdirAll(partialDir, 0700); err != nil { 266 return nil, kv.Wrap(errGo, "unable to create the partial downloads dir ", partialDir).With("stack", stack.Trace().TrimRuntime()) 267 } 268 269 // Size the cache appropriately, and track items that are in use through to their being released, 270 // which prevents items being read from being groomed and then new copies of the same 271 // data appearing 272 cache = ccache.New(ccache.Configure().MaxSize(size).GetsPerPromote(1).ItemsToPrune(1)) 273 274 // Now populate the lookaside cache with the files found in the cache directory and their sizes 275 for i, file := range cachedFiles { 276 if file.IsDir() { 277 continue 278 } 279 if file.Name()[0] != '.' { 280 cache.Fetch(file.Name(), time.Hour*48, 281 func() (interface{}, error) { 282 return cachedFiles[i], nil 283 }) 284 } 285 } 286 287 // Now start the directory groomer 288 cacheInit.Do(func() { 289 triggerC = groomDir(ctx, backingDir, removedC, errorC) 290 }) 291 292 return triggerC, nil 293 } 294 295 // CacheProbe can be used to test the validity of the cache for a previously cached item. 296 // 297 func CacheProbe(key string) bool { 298 return cache.Get(key) != nil && !cache.Get(key).Expired() 299 } 300 301 // Hash will return the hash of a stored file or other blob. This method can be used 302 // by a caching layer or by a client to obtain the unique content based identity of the 303 // resource being stored. 304 // 305 func (s *objStore) Hash(ctx context.Context, name string) (hash string, err kv.Error) { 306 return s.store.Hash(ctx, name) 307 } 308 309 // Gather is used to retrieve files prefixed with a specific key. It is used to retrieve the individual files 310 // associated with a previous Hoard operation 311 // 312 func (s *objStore) Gather(ctx context.Context, keyPrefix string, outputDir string) (warnings []kv.Error, err kv.Error) { 313 // Retrieve individual files, without using the cache, tap is set to nil 314 return s.store.Gather(ctx, keyPrefix, outputDir, nil) 315 } 316 317 // Fetch is used by client to retrieve resources from a concrete storage system. This function will 318 // invoke storage system logic that may retrieve resources from a cache. 319 // 320 func (s *objStore) Fetch(ctx context.Context, name string, unpack bool, output string) (warns []kv.Error, err kv.Error) { 321 // Check for meta data, MD5, from the upstream and then examine our cache for a match 322 hash, err := s.store.Hash(ctx, name) 323 if err != nil { 324 return warns, err 325 } 326 327 // If there is no cache simply download the file, and so we supply a nil for the tap 328 // for our tap 329 if len(backingDir) == 0 { 330 cacheMisses.With(prometheus.Labels{"host": host, "hash": hash}).Inc() 331 return s.store.Fetch(ctx, name, unpack, output, nil) 332 } 333 334 // triggers LRU to elevate the item being retrieved 335 if len(hash) != 0 { 336 if item := cache.Get(hash); item != nil { 337 if !item.Expired() { 338 item.Extend(48 * time.Hour) 339 } 340 } 341 } 342 343 startTime := time.Now() 344 345 // Define a time period on which we repeat checking for the presence of a partial 346 // download that is for the artifact we are waiting for and before we recheck for 347 // the continued presence of the artifact 348 waitOnPartial := time.Duration(33 * time.Second) 349 350 // If there is caching we should loop until we have a good file in the cache, and 351 // if appropriate based on the contents of the partial download directory be doing 352 // or waiting for the download to happen, respecting the notion that only one of 353 // the waiters should be downloading actively 354 // 355 downloader := false 356 357 // Loop termination conditions include a timeout and successful completion 358 // of the download 359 for { 360 // Examine the local file cache and use the file from there if present 361 localName := filepath.Join(backingDir, hash) 362 if _, errGo := os.Stat(localName); errGo == nil { 363 spec := StoreOpts{ 364 Art: &Artifact{ 365 Qualified: fmt.Sprintf("file:///%s", localName), 366 }, 367 Validate: true, 368 } 369 localFS, err := NewStorage(ctx, &spec) 370 if err != nil { 371 return warns, err 372 } 373 // Because the file is already in the cache we dont supply a tap here 374 w, err := localFS.Fetch(ctx, localName, unpack, output, nil) 375 if err == nil { 376 cacheHits.With(prometheus.Labels{"host": host, "hash": hash}).Inc() 377 return warns, nil 378 } 379 380 // Drops through to allow for a fresh download, after saving the errors 381 // as warnings for the caller so that caching failures can be observed 382 // and diagnosed 383 for _, warn := range w { 384 warns = append(warns, warn) 385 } 386 warns = append(warns, err) 387 } 388 cacheMisses.With(prometheus.Labels{"host": host, "hash": hash}).Inc() 389 390 if ctx.Err() != nil { 391 if downloader { 392 return warns, kv.NewError("downloading artifact terminated").With("stack", stack.Trace().TrimRuntime()).With("file", name) 393 } 394 return warns, kv.NewError("waiting for artifact terminated").With("stack", stack.Trace().TrimRuntime()).With("file", name) 395 } 396 downloader = false 397 398 // Look for partial downloads, if a downloader is found then wait for the file to appear 399 // inside the main directory 400 // 401 partial := filepath.Join(backingDir, ".partial", hash) 402 if _, errGo := os.Stat(partial); errGo == nil { 403 select { 404 case <-ctx.Done(): 405 return warns, err 406 case <-time.After(waitOnPartial): 407 warn := kv.NewError("pending").With("since", time.Now().Sub(startTime).String(), "partial", partial, "file", name, "stack", stack.Trace().TrimRuntime()) 408 warns = append(warns, warn) 409 } 410 continue 411 } 412 413 // If there is no partial file yet try to create a partial file with 414 // the exclusive and create flags set which avoids two threads 415 // creating the file on top of each other 416 // 417 file, errGo := os.OpenFile(partial, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0600) 418 if errGo != nil { 419 select { 420 case s.ErrorC <- kv.Wrap(errGo, "file open failure").With("stack", stack.Trace().TrimRuntime()).With("file", partial): 421 case <-ctx.Done(): 422 return warns, err 423 default: 424 } 425 select { 426 case <-ctx.Done(): 427 return warns, err 428 case <-time.After(waitOnPartial): 429 warn := kv.Wrap(errGo).With("since", time.Now().Sub(startTime).String(), "partial", partial, "file", name, "stack", stack.Trace().TrimRuntime()) 430 warns = append(warns, warn) 431 } 432 continue 433 } 434 downloader = true 435 436 tapWriter := bufio.NewWriter(file) 437 438 // Having gained the file to download into call the fetch method and supply the io.WriteClose 439 // to the concrete downloader 440 // 441 w, err := s.store.Fetch(ctx, name, unpack, output, tapWriter) 442 443 tapWriter.Flush() 444 file.Close() 445 446 // Save warnings from intermediate components, even if there are no 447 // unrecoverable errors 448 for _, warn := range w { 449 warns = append(warns, warn) 450 } 451 452 if err == nil { 453 info, errGo := os.Stat(partial) 454 if errGo == nil { 455 cache.Fetch(info.Name(), time.Hour*48, 456 func() (interface{}, error) { 457 return info, nil 458 }) 459 } else { 460 select { 461 case <-ctx.Done(): 462 return warns, err 463 case s.ErrorC <- kv.Wrap(errGo, "file cache failure").With("stack", stack.Trace().TrimRuntime()).With("file", partial).With("file", localName): 464 default: 465 } 466 } 467 // Move the downloaded file from .partial into our base cache directory, 468 // and need to handle the file from the applications perspective is done 469 // by the Fetch, if the rename files there is nothing we can do about it 470 // so simply continue as the application will have the data anyway 471 if errGo = os.Rename(partial, localName); errGo != nil { 472 select { 473 case s.ErrorC <- kv.Wrap(errGo, "file rename failure").With("stack", stack.Trace().TrimRuntime()).With("file", partial).With("file", localName): 474 default: 475 } 476 } 477 478 return warns, nil 479 } 480 select { 481 case s.ErrorC <- err: 482 default: 483 } 484 // If we had a working file get rid of it, this is because leaving it in place will 485 // block further download attempts 486 if errGo = os.Remove(partial); errGo != nil { 487 warn := kv.Wrap(errGo).With("since", time.Now().Sub(startTime).String(), "partial", partial, "file", name, "stack", stack.Trace().TrimRuntime()) 488 warns = append(warns, warn) 489 } 490 491 select { 492 case <-ctx.Done(): 493 return warns, err 494 case <-time.After(waitOnPartial): 495 warn := kv.NewError("reattempting").With("since", time.Now().Sub(startTime).String(), "partial", partial, "file", name, "stack", stack.Trace().TrimRuntime()) 496 warns = append(warns, warn) 497 } 498 } // End of for {} 499 // unreachable 500 } 501 502 // Hoard is used to place a directory with individual files into the storage resource within the storage implemented 503 // by a specific implementation. 504 // 505 func (s *objStore) Hoard(ctx context.Context, srcDir string, destPrefix string) (warns []kv.Error, err kv.Error) { 506 // Place an item into the cache 507 return s.store.Hoard(ctx, srcDir, destPrefix) 508 } 509 510 // Deposit is used to place a file or other storage resource within the storage implemented 511 // by a specific implementation. 512 // 513 func (s *objStore) Deposit(ctx context.Context, src string, dest string) (warns []kv.Error, err kv.Error) { 514 // Place an item into the cache 515 return s.store.Deposit(ctx, src, dest) 516 } 517 518 // Close is used to clean up any resources allocated to the storage by calling the implementation Close 519 // method. 520 // 521 func (s *objStore) Close() { 522 s.store.Close() 523 }