github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/artifacts.go (about)

     1  // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
     2  
     3  package runner
     4  
     5  // This file contains the implementation of artifacts that exist as a directory containing
     6  // files on a file system or archives on a cloud storage style platform.
     7  //
     8  // artifacts can be watched for changes and transfers between a file system and
     9  // storage platforms based upon their contents changing etc
    10  //
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"io/ioutil"
    15  	"os"
    16  	"path/filepath"
    17  	"strings"
    18  	"sync"
    19  
    20  	hasher "github.com/karlmutch/hashstructure"
    21  
    22  	"github.com/go-stack/stack"
    23  	"github.com/jjeffery/kv" // MIT License
    24  )
    25  
    26  // ArtifactCache is used to encapsulate and store hashes, typically file hashes, and
    27  // prevent duplicated uploads from occurring needlessly
    28  //
    29  type ArtifactCache struct {
    30  	upHashes map[string]uint64
    31  	sync.Mutex
    32  
    33  	// This can be used by the application layer to receive diagnostic and other information
    34  	// about kv.occurring inside the caching tracker etc and surface these kv.etc to
    35  	// the logging system
    36  	ErrorC chan kv.Error
    37  }
    38  
    39  // NewArtifactCache initializes an hash tracker for artifact related files and
    40  // passes it back to the caller.  The tracking structure can be used to track
    41  // files that already been downloaded / uploaded and also includes a channel
    42  // that can be used to receive error notifications
    43  //
    44  func NewArtifactCache() (cache *ArtifactCache) {
    45  	return &ArtifactCache{
    46  		upHashes: map[string]uint64{},
    47  		ErrorC:   make(chan kv.Error),
    48  	}
    49  }
    50  
    51  // Close will clean up the cache of hashes and close the error reporting channel
    52  // associated with the cache tracker
    53  //
    54  func (cache *ArtifactCache) Close() {
    55  
    56  	if cache.ErrorC != nil {
    57  		defer func() {
    58  			// Closing a close channel could cause a panic which is
    59  			// acceptable while tearing down the cache
    60  			recover()
    61  		}()
    62  
    63  		close(cache.ErrorC)
    64  	}
    65  }
    66  
    67  func readAllHash(dir string) (hash uint64, err kv.Error) {
    68  	files := []os.FileInfo{}
    69  	dirs := []string{dir}
    70  	for {
    71  		newDirs := []string{}
    72  		for _, aDir := range dirs {
    73  			items, errGo := ioutil.ReadDir(aDir)
    74  			if errGo != nil {
    75  				return 0, kv.Wrap(errGo).With("hashDir", aDir, "stack", stack.Trace().TrimRuntime())
    76  			}
    77  			for _, info := range items {
    78  				if info.IsDir() {
    79  					newDirs = append(newDirs, filepath.Join(aDir, info.Name()))
    80  				}
    81  				files = append(files, info)
    82  			}
    83  		}
    84  		dirs = newDirs
    85  		if len(dirs) == 0 {
    86  			break
    87  		}
    88  	}
    89  
    90  	hash, errGo := hasher.Hash(files, nil)
    91  	if errGo != nil {
    92  		return 0, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    93  	}
    94  	return hash, nil
    95  }
    96  
    97  // Hash is used to obtain the hash of an artifact from the backing store implementation
    98  // being used by the storage implementation
    99  //
   100  func (cache *ArtifactCache) Hash(ctx context.Context, art *Artifact, projectId string, group string, cred string, env map[string]string, dir string) (hash string, err kv.Error) {
   101  
   102  	kv := kv.With("artifact", fmt.Sprintf("%#v", *art)).With("project", projectId).With("group", group)
   103  
   104  	storage, err := NewObjStore(
   105  		ctx,
   106  		&StoreOpts{
   107  			Art:       art,
   108  			ProjectID: projectId,
   109  			Group:     group,
   110  			Creds:     cred,
   111  			Env:       env,
   112  			Validate:  true,
   113  		},
   114  		cache.ErrorC)
   115  
   116  	if err != nil {
   117  		return "", kv.Wrap(err).With("stack", stack.Trace().TrimRuntime())
   118  	}
   119  
   120  	defer storage.Close()
   121  	return storage.Hash(ctx, art.Key)
   122  }
   123  
   124  // Fetch can be used to retrieve an artifact from a storage layer implementation, while
   125  // passing through the lens of a caching filter that prevents unneeded downloads.
   126  //
   127  func (cache *ArtifactCache) Fetch(ctx context.Context, art *Artifact, projectId string, group string, cred string, env map[string]string, dir string) (warns []kv.Error, err kv.Error) {
   128  
   129  	kv := kv.With("artifact", fmt.Sprintf("%#v", *art)).With("project", projectId).With("group", group)
   130  
   131  	// Process the qualified URI and use just the path for now
   132  	dest := filepath.Join(dir, group)
   133  	if errGo := os.MkdirAll(dest, 0700); errGo != nil {
   134  		return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()).With("dest", dest)
   135  	}
   136  
   137  	storage, err := NewObjStore(
   138  		ctx,
   139  		&StoreOpts{
   140  			Art:       art,
   141  			ProjectID: projectId,
   142  			Group:     group,
   143  			Creds:     cred,
   144  			Env:       env,
   145  			Validate:  true,
   146  		},
   147  		cache.ErrorC)
   148  
   149  	if err != nil {
   150  		return warns, kv.Wrap(err).With("stack", stack.Trace().TrimRuntime())
   151  	}
   152  
   153  	if art.Unpack && !IsTar(art.Key) {
   154  		return warns, kv.NewError("the unpack flag was set for an unsupported file format (tar gzip/bzip2 only supported)").With("stack", stack.Trace().TrimRuntime())
   155  	}
   156  
   157  	switch group {
   158  	case "_metadata":
   159  		//The following is disabled until we look into how to efficiently do downloads of
   160  		// experiment related retries rather than downloading an entire hosts worth of activity
   161  		// warns, err = storage.Gather(ctx, "metadata/", dest)
   162  	default:
   163  		warns, err = storage.Fetch(ctx, art.Key, art.Unpack, dest)
   164  	}
   165  	storage.Close()
   166  
   167  	if err != nil {
   168  		return warns, kv.Wrap(err)
   169  	}
   170  
   171  	// Immutable artifacts need just to be downloaded and nothing else
   172  	if !art.Mutable && !strings.HasPrefix(art.Qualified, "file://") {
   173  		return warns, nil
   174  	}
   175  
   176  	if cache == nil {
   177  		return warns, nil
   178  	}
   179  
   180  	if err = cache.updateHash(dest); err != nil {
   181  		return warns, kv.Wrap(err)
   182  	}
   183  
   184  	return warns, nil
   185  }
   186  
   187  func (cache *ArtifactCache) updateHash(dir string) (err kv.Error) {
   188  	hash, err := readAllHash(dir)
   189  	if err != nil {
   190  		return err
   191  	}
   192  
   193  	// Having obtained the artifact if it is mutable then we add a set of upload area hashes for all files and directories the artifact included
   194  	cache.Lock()
   195  	cache.upHashes[dir] = hash
   196  	cache.Unlock()
   197  
   198  	return nil
   199  }
   200  
   201  func (cache *ArtifactCache) checkHash(dir string) (isValid bool, err kv.Error) {
   202  
   203  	cache.Lock()
   204  	defer cache.Unlock()
   205  
   206  	oldHash, isPresent := cache.upHashes[dir]
   207  
   208  	if !isPresent {
   209  		return false, nil
   210  	}
   211  
   212  	hash, err := readAllHash(dir)
   213  	if err != nil {
   214  		return false, err
   215  	}
   216  	return oldHash == hash, nil
   217  }
   218  
   219  // Local returns the local disk based file name for the artifacts expanded archive files
   220  //
   221  func (cache *ArtifactCache) Local(group string, dir string, file string) (fn string, err kv.Error) {
   222  	fn = filepath.Join(dir, group, file)
   223  	if _, errOs := os.Stat(fn); errOs != nil {
   224  		return "", kv.Wrap(errOs).With("stack", stack.Trace().TrimRuntime())
   225  	}
   226  	return fn, nil
   227  }
   228  
   229  // Restore the artifacts that have been marked mutable and that have changed
   230  //
   231  func (cache *ArtifactCache) Restore(ctx context.Context, art *Artifact, projectId string, group string, cred string, env map[string]string, dir string) (uploaded bool, warns []kv.Error, err kv.Error) {
   232  
   233  	// Immutable artifacts need just to be downloaded and nothing else
   234  	if !art.Mutable {
   235  		return false, warns, nil
   236  	}
   237  
   238  	kvDetails := []interface{}{"artifact", fmt.Sprintf("%#v", *art), "project", projectId, "group", group, "dir", dir}
   239  
   240  	source := filepath.Join(dir, group)
   241  	isValid, err := cache.checkHash(source)
   242  	if err != nil {
   243  		kvDetails = append(kvDetails, "group", group, "stack", stack.Trace().TrimRuntime())
   244  		return false, warns, kv.Wrap(err).With(kvDetails...)
   245  	}
   246  	if isValid {
   247  		return false, warns, nil
   248  	}
   249  
   250  	storage, err := NewObjStore(
   251  		ctx,
   252  		&StoreOpts{
   253  			Art:       art,
   254  			ProjectID: projectId,
   255  			Creds:     cred,
   256  			Env:       env,
   257  			Validate:  true,
   258  		},
   259  		cache.ErrorC)
   260  	if err != nil {
   261  		return false, warns, err
   262  	}
   263  	defer storage.Close()
   264  
   265  	// Check to see if the cache has a hash for the directory that has changed and
   266  	// needs uploading
   267  	//
   268  
   269  	hash, errHash := readAllHash(dir)
   270  
   271  	switch group {
   272  	case "_metadata":
   273  		// If no metadata exists, which could be legitimate, dont try and save it
   274  		// otherwise things will go wrong when walking the directories
   275  		if _, errGo := os.Stat(source); !os.IsNotExist(errGo) {
   276  			if warns, err = storage.Hoard(ctx, source, "metadata"); err != nil {
   277  				return false, warns, err.With("group", group)
   278  			}
   279  		}
   280  	default:
   281  		if warns, err = storage.Deposit(ctx, source, art.Key); err != nil {
   282  			return false, warns, err.With("group", group)
   283  		}
   284  	}
   285  
   286  	if errHash == nil {
   287  		// Having obtained the artifact if it is mutable then we add a set of upload area hashes for all files and directories the artifact included
   288  		cache.Lock()
   289  		cache.upHashes[dir] = hash
   290  		cache.Unlock()
   291  	}
   292  
   293  	return true, warns, nil
   294  }