github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/gs.go (about)

     1  // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
     2  
     3  package runner
     4  
     5  // This file contains the implementation for the storage sub system that will
     6  // be used by the runner to retrieve storage from cloud providers or localized storage
     7  import (
     8  	"archive/tar"
     9  	"bufio"
    10  	"compress/bzip2"
    11  	"compress/gzip"
    12  	"context"
    13  	"encoding/hex"
    14  	"fmt"
    15  	"io"
    16  	"io/ioutil"
    17  	"os"
    18  	"path/filepath"
    19  
    20  	"cloud.google.com/go/storage"
    21  	"google.golang.org/api/iterator"
    22  	"google.golang.org/api/option"
    23  
    24  	bzip2w "github.com/dsnet/compress/bzip2"
    25  
    26  	"github.com/go-stack/stack"
    27  
    28  	"github.com/jjeffery/kv" // MIT License
    29  )
    30  
    31  type gsStorage struct {
    32  	project string
    33  	bucket  string
    34  	client  *storage.Client
    35  }
    36  
    37  // NewGSstorage will initialize a receiver that operates with the google cloud storage platform
    38  //
    39  func NewGSstorage(ctx context.Context, projectID string, creds string, env map[string]string, bucket string, validate bool) (s *gsStorage, err kv.Error) {
    40  
    41  	s = &gsStorage{
    42  		project: projectID,
    43  		bucket:  bucket,
    44  	}
    45  
    46  	client, errGo := storage.NewClient(ctx, option.WithCredentialsFile(creds))
    47  	if errGo != nil {
    48  		return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    49  	}
    50  	s.client = client
    51  
    52  	if validate {
    53  		// Validate the bucket during the NewBucket to give an early warning of issues
    54  		buckets := s.client.Buckets(ctx, projectID)
    55  		for {
    56  			attrs, errGo := buckets.Next()
    57  			if errGo == iterator.Done {
    58  				return nil, kv.NewError("bucket not found").With("stack", stack.Trace().TrimRuntime()).With("project", projectID).With("bucket", bucket)
    59  			}
    60  			if errGo != nil {
    61  				return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    62  			}
    63  			if attrs.Name == bucket {
    64  				break
    65  			}
    66  		}
    67  	}
    68  
    69  	return s, nil
    70  }
    71  
    72  // Close in the context of the google cloud storage implementation terminate the
    73  // client connect to the google server
    74  //
    75  func (s *gsStorage) Close() {
    76  	s.client.Close()
    77  }
    78  
    79  // Hash returns an MD5 of the contents of the file that can be used by caching and other functions
    80  // to track storage changes etc
    81  //
    82  func (s *gsStorage) Hash(ctx context.Context, name string) (hash string, err kv.Error) {
    83  
    84  	attrs, errGo := s.client.Bucket(s.bucket).Object(name).Attrs(ctx)
    85  	if errGo != nil {
    86  		return "", kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
    87  	}
    88  	return hex.EncodeToString(attrs.MD5), nil
    89  }
    90  
    91  // Gather is used to retrieve files prefixed with a specific key.  It is used to retrieve the individual files
    92  // associated with a previous Hoard operation
    93  //
    94  func (s *gsStorage) Gather(ctx context.Context, keyPrefix string, outputDir string, tap io.Writer) (warnings []kv.Error, err kv.Error) {
    95  	return warnings, kv.NewError("unimplemented").With("stack", stack.Trace().TrimRuntime())
    96  }
    97  
    98  // Fetch is used to retrieve a file from a well known google storage bucket and either
    99  // copy it directly into a directory, or unpack the file into the same directory.
   100  //
   101  // Calling this function with output not being a valid directory will result in an error
   102  // being returned.
   103  //
   104  // The tap can be used to make a side copy of the content that is being read.
   105  //
   106  func (s *gsStorage) Fetch(ctx context.Context, name string, unpack bool, output string, tap io.Writer) (warns []kv.Error, err kv.Error) {
   107  
   108  	kv := kv.With("output", output).With("name", name)
   109  
   110  	// Make sure output is an existing directory
   111  	info, errGo := os.Stat(output)
   112  	if errGo != nil {
   113  		return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   114  	}
   115  	if !info.IsDir() {
   116  		errGo = fmt.Errorf("%s is not a directory", output)
   117  		return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   118  	}
   119  
   120  	fileType, w := MimeFromExt(name)
   121  	if w != nil {
   122  		warns = append(warns, w)
   123  	}
   124  
   125  	obj, errGo := s.client.Bucket(s.bucket).Object(name).NewReader(ctx)
   126  	if errGo != nil {
   127  		return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   128  	}
   129  	defer obj.Close()
   130  
   131  	// If the unpack flag is set then use a tar decompressor and unpacker
   132  	// but first make sure the output location is an existing directory
   133  	if unpack {
   134  
   135  		var inReader io.ReadCloser
   136  
   137  		switch fileType {
   138  		case "application/x-gzip", "application/zip":
   139  			if tap != nil {
   140  				// Create a stack of reader that first tee off any data read to a tap
   141  				// the tap being able to send data to things like caches etc
   142  				//
   143  				// Second in the stack of readers after the TAP is a decompression reader
   144  				inReader, errGo = gzip.NewReader(io.TeeReader(obj, tap))
   145  			} else {
   146  				inReader, errGo = gzip.NewReader(obj)
   147  			}
   148  		case "application/bzip2", "application/octet-stream":
   149  			if tap != nil {
   150  				// Create a stack of reader that first tee off any data read to a tap
   151  				// the tap being able to send data to things like caches etc
   152  				//
   153  				// Second in the stack of readers after the TAP is a decompression reader
   154  				inReader = ioutil.NopCloser(bzip2.NewReader(io.TeeReader(obj, tap)))
   155  			} else {
   156  				inReader = ioutil.NopCloser(bzip2.NewReader(obj))
   157  			}
   158  		default:
   159  			if tap != nil {
   160  				// Create a stack of reader that first tee off any data read to a tap
   161  				// the tap being able to send data to things like caches etc
   162  				//
   163  				// Second in the stack of readers after the TAP is a decompression reader
   164  				inReader = ioutil.NopCloser(io.TeeReader(obj, tap))
   165  			} else {
   166  				inReader = ioutil.NopCloser(obj)
   167  			}
   168  		}
   169  		if errGo != nil {
   170  			return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   171  		}
   172  		defer inReader.Close()
   173  
   174  		tarReader := tar.NewReader(inReader)
   175  
   176  		for {
   177  			header, errGo := tarReader.Next()
   178  			if errGo == io.EOF {
   179  				break
   180  			} else if errGo != nil {
   181  				return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   182  			}
   183  
   184  			path := filepath.Join(output, header.Name)
   185  			info := header.FileInfo()
   186  			if info.IsDir() {
   187  				if errGo = os.MkdirAll(path, info.Mode()); errGo != nil {
   188  					return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   189  				}
   190  				continue
   191  			}
   192  
   193  			file, errGo := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, info.Mode())
   194  			if errGo != nil {
   195  				return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   196  			}
   197  
   198  			_, errGo = io.Copy(file, tarReader)
   199  			file.Close()
   200  			if errGo != nil {
   201  				return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   202  			}
   203  		}
   204  	} else {
   205  		errGo := os.MkdirAll(output, 0700)
   206  		if errGo != nil {
   207  			return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()).With("output", output)
   208  		}
   209  		path := filepath.Join(output, filepath.Base(name))
   210  		f, errGo := os.Create(path)
   211  		if errGo != nil {
   212  			return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   213  		}
   214  		defer f.Close()
   215  
   216  		outf := bufio.NewWriter(f)
   217  		if _, errGo = io.Copy(outf, obj); errGo != nil {
   218  			return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   219  		}
   220  		outf.Flush()
   221  	}
   222  	return warns, nil
   223  }
   224  
   225  // Hoard is used to upload the contents of a directory to the storage server as individual files rather than a single
   226  // archive
   227  //
   228  func (s *gsStorage) Hoard(ctx context.Context, src string, dest string) (warnings []kv.Error, err kv.Error) {
   229  	return warnings, kv.NewError("unimplemented").With("stack", stack.Trace().TrimRuntime())
   230  }
   231  
   232  // Deposit directories as compressed artifacts to the firebase storage for an
   233  // experiment
   234  //
   235  func (s *gsStorage) Deposit(ctx context.Context, src string, dest string) (warns []kv.Error, err kv.Error) {
   236  
   237  	if !IsTar(dest) {
   238  		return warns, kv.NewError("uploads must be tar, or tar compressed files").With("stack", stack.Trace().TrimRuntime()).With("key", dest)
   239  	}
   240  
   241  	obj := s.client.Bucket(s.bucket).Object(dest).NewWriter(ctx)
   242  	defer obj.Close()
   243  
   244  	files, err := NewTarWriter(src)
   245  	if err != nil {
   246  		return warns, err
   247  	}
   248  
   249  	if !files.HasFiles() {
   250  		return warns, nil
   251  	}
   252  
   253  	var outw io.Writer
   254  
   255  	typ, w := MimeFromExt(dest)
   256  	warns = append(warns, w)
   257  
   258  	switch typ {
   259  	case "application/tar", "application/octet-stream":
   260  		outw = bufio.NewWriter(obj)
   261  	case "application/bzip2":
   262  		outZ, errGo := bzip2w.NewWriter(obj, &bzip2w.WriterConfig{Level: 6})
   263  		if err != nil {
   264  			return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime())
   265  		}
   266  		defer outZ.Close()
   267  		outw = outZ
   268  	case "application/x-gzip":
   269  		outZ := gzip.NewWriter(obj)
   270  		defer outZ.Close()
   271  		outw = outZ
   272  	case "application/zip":
   273  		return warns, kv.NewError("only tar archives are supported").With("stack", stack.Trace().TrimRuntime()).With("key", dest)
   274  	default:
   275  		return warns, kv.NewError("unrecognized upload compression").With("stack", stack.Trace().TrimRuntime()).With("key", dest)
   276  	}
   277  
   278  	tw := tar.NewWriter(outw)
   279  	defer tw.Close()
   280  
   281  	if err = files.Write(tw); err != nil {
   282  		return warns, err.(kv.Error)
   283  	}
   284  	return warns, nil
   285  }