github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/storage.go (about)

     1  // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
     2  
     3  package runner
     4  
     5  // This file contains the implementation for the storage sub system that will
     6  // be used by the runner to retrieve storage from cloud providers or localized storage
     7  
     8  import (
     9  	"context"
    10  	"fmt"
    11  	"io"
    12  	"net/url"
    13  	"path/filepath"
    14  	"strings"
    15  
    16  	"github.com/go-stack/stack"
    17  	"github.com/jjeffery/kv" // MIT License
    18  )
    19  
    20  // Storage defines an interface for implementations of a studioml artifact store
    21  //
    22  type Storage interface {
    23  	// Fetch will retrieve contents of the named storage object using a prefix treating any items retrieved as individual files
    24  	//
    25  	Gather(ctx context.Context, keyPrefix string, outputDir string, tap io.Writer) (warnings []kv.Error, err kv.Error)
    26  
    27  	// Fetch will retrieve contents of the named storage object and optionally unpack it into the
    28  	// user specified output directory
    29  	//
    30  	Fetch(ctx context.Context, name string, unpack bool, output string, tap io.Writer) (warnings []kv.Error, err kv.Error)
    31  
    32  	// Hoard will take a number of files for upload, deduplication is implemented outside of this interface
    33  	//
    34  	Hoard(ctx context.Context, srcDir string, keyPrefix string) (warnings []kv.Error, err kv.Error)
    35  
    36  	// Deposit is a directory archive and upload, deduplication is implemented outside of this interface
    37  	//
    38  	Deposit(ctx context.Context, src string, dest string) (warnings []kv.Error, err kv.Error)
    39  
    40  	// Hash can be used to retrieve the hash of the contents of the file.  The hash is
    41  	// retrieved not computed and so is a lightweight operation common to both S3 and Google Storage.
    42  	// The hash on some storage platforms is not a plain MD5 but uses multiple hashes from file
    43  	// segments to increase the speed of hashing and also to reflect the multipart download
    44  	// processing that was used for the file, for a full explanation please see
    45  	// https://stackoverflow.com/questions/12186993/what-is-the-algorithm-to-compute-the-amazon-s3-etag-for-a-file-larger-than-5gb
    46  	//
    47  	Hash(ctx context.Context, name string) (hash string, err kv.Error)
    48  
    49  	Close()
    50  }
    51  
    52  // StoreOpts is used to encapsulate a storage implementation with the runner and studioml data needed
    53  //
    54  type StoreOpts struct {
    55  	Art       *Artifact
    56  	ProjectID string
    57  	Group     string
    58  	Creds     string // The credentials file name
    59  	Env       map[string]string
    60  	Validate  bool
    61  }
    62  
    63  // NewStorage is used to create a receiver for a storage implementation
    64  //
    65  func NewStorage(ctx context.Context, spec *StoreOpts) (stor Storage, err kv.Error) {
    66  
    67  	if spec == nil {
    68  		return nil, kv.Wrap(err, "empty specification supplied").With("stack", stack.Trace().TrimRuntime())
    69  	}
    70  
    71  	uri, errGo := url.ParseRequestURI(spec.Art.Qualified)
    72  	if errGo != nil {
    73  		return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    74  	}
    75  
    76  	switch uri.Scheme {
    77  	case "gs":
    78  		return NewGSstorage(ctx, spec.ProjectID, spec.Creds, spec.Env, spec.Art.Bucket, spec.Validate)
    79  	case "s3":
    80  		uriPath := strings.Split(uri.EscapedPath(), "/")
    81  		if len(spec.Art.Key) == 0 {
    82  			spec.Art.Key = strings.Join(uriPath[2:], "/")
    83  		}
    84  		if len(spec.Art.Bucket) == 0 {
    85  			spec.Art.Bucket = uriPath[1]
    86  		}
    87  
    88  		if len(uri.Host) == 0 {
    89  			return nil, kv.NewError("S3/minio endpoint lacks a scheme, or the host name was not specified").With("stack", stack.Trace().TrimRuntime())
    90  		}
    91  
    92  		useSSL := uri.Scheme == "https"
    93  
    94  		return NewS3storage(ctx, spec.ProjectID, spec.Creds, spec.Env, uri.Host,
    95  			spec.Art.Bucket, spec.Art.Key, spec.Validate, useSSL)
    96  
    97  	case "file":
    98  		return NewLocalStorage()
    99  	default:
   100  		return nil, kv.NewError(fmt.Sprintf("unknown, or unsupported URI scheme %s, s3 or gs expected", uri.Scheme)).With("stack", stack.Trace().TrimRuntime())
   101  	}
   102  }
   103  
   104  // IsTar is used to test the extension to see if the presence of tar can be found
   105  //
   106  func IsTar(name string) bool {
   107  	switch {
   108  	case strings.Contains(name, ".tar."):
   109  		return true
   110  	case strings.HasSuffix(name, ".tgz"):
   111  		return true
   112  	case strings.HasSuffix(name, ".tar"):
   113  		return true
   114  	case strings.HasSuffix(name, ".tar.bzip2"):
   115  		return true
   116  	case strings.HasSuffix(name, ".tar.bz2"):
   117  		return true
   118  	case strings.HasSuffix(name, ".tbz2"):
   119  		return true
   120  	case strings.HasSuffix(name, ".tbz"):
   121  		return true
   122  	}
   123  	return false
   124  }
   125  
   126  // MimeFromExt is used to characterize a mime type from a files extension
   127  //
   128  func MimeFromExt(name string) (fileType string, err kv.Error) {
   129  	switch filepath.Ext(name) {
   130  	case ".gzip", ".gz":
   131  		return "application/x-gzip", nil
   132  	case ".zip":
   133  		return "application/zip", nil
   134  	case ".tgz": // Non standard extension as a result of studioml python code
   135  		return "application/bzip2", nil
   136  	case ".tb2", ".tbz", ".tbz2", ".bzip2", ".bz2": // Standard bzip2 extensions
   137  		return "application/bzip2", nil
   138  	case ".tar":
   139  		return "application/tar", nil
   140  	default:
   141  		fileType, errGo := DetectFileType(name)
   142  		if errGo != nil {
   143  			// Fill in a default value even if there is an error
   144  			return "application/octet-stream", errGo
   145  		}
   146  		return fileType, nil
   147  	}
   148  }