github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/gs.go (about) 1 // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. 2 3 package runner 4 5 // This file contains the implementation for the storage sub system that will 6 // be used by the runner to retrieve storage from cloud providers or localized storage 7 import ( 8 "archive/tar" 9 "bufio" 10 "compress/bzip2" 11 "compress/gzip" 12 "context" 13 "encoding/hex" 14 "fmt" 15 "io" 16 "io/ioutil" 17 "os" 18 "path/filepath" 19 20 "cloud.google.com/go/storage" 21 "google.golang.org/api/iterator" 22 "google.golang.org/api/option" 23 24 bzip2w "github.com/dsnet/compress/bzip2" 25 26 "github.com/go-stack/stack" 27 28 "github.com/jjeffery/kv" // MIT License 29 ) 30 31 type gsStorage struct { 32 project string 33 bucket string 34 client *storage.Client 35 } 36 37 // NewGSstorage will initialize a receiver that operates with the google cloud storage platform 38 // 39 func NewGSstorage(ctx context.Context, projectID string, creds string, env map[string]string, bucket string, validate bool) (s *gsStorage, err kv.Error) { 40 41 s = &gsStorage{ 42 project: projectID, 43 bucket: bucket, 44 } 45 46 client, errGo := storage.NewClient(ctx, option.WithCredentialsFile(creds)) 47 if errGo != nil { 48 return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 49 } 50 s.client = client 51 52 if validate { 53 // Validate the bucket during the NewBucket to give an early warning of issues 54 buckets := s.client.Buckets(ctx, projectID) 55 for { 56 attrs, errGo := buckets.Next() 57 if errGo == iterator.Done { 58 return nil, kv.NewError("bucket not found").With("stack", stack.Trace().TrimRuntime()).With("project", projectID).With("bucket", bucket) 59 } 60 if errGo != nil { 61 return nil, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 62 } 63 if attrs.Name == bucket { 64 break 65 } 66 } 67 } 68 69 return s, nil 70 } 71 72 // Close in the context of the google cloud storage implementation terminate the 73 // client connect to the google server 74 // 75 func (s *gsStorage) Close() { 76 s.client.Close() 77 } 78 79 // Hash returns an MD5 of the contents of the file that can be used by caching and other functions 80 // to track storage changes etc 81 // 82 func (s *gsStorage) Hash(ctx context.Context, name string) (hash string, err kv.Error) { 83 84 attrs, errGo := s.client.Bucket(s.bucket).Object(name).Attrs(ctx) 85 if errGo != nil { 86 return "", kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 87 } 88 return hex.EncodeToString(attrs.MD5), nil 89 } 90 91 // Gather is used to retrieve files prefixed with a specific key. It is used to retrieve the individual files 92 // associated with a previous Hoard operation 93 // 94 func (s *gsStorage) Gather(ctx context.Context, keyPrefix string, outputDir string, tap io.Writer) (warnings []kv.Error, err kv.Error) { 95 return warnings, kv.NewError("unimplemented").With("stack", stack.Trace().TrimRuntime()) 96 } 97 98 // Fetch is used to retrieve a file from a well known google storage bucket and either 99 // copy it directly into a directory, or unpack the file into the same directory. 100 // 101 // Calling this function with output not being a valid directory will result in an error 102 // being returned. 103 // 104 // The tap can be used to make a side copy of the content that is being read. 105 // 106 func (s *gsStorage) Fetch(ctx context.Context, name string, unpack bool, output string, tap io.Writer) (warns []kv.Error, err kv.Error) { 107 108 kv := kv.With("output", output).With("name", name) 109 110 // Make sure output is an existing directory 111 info, errGo := os.Stat(output) 112 if errGo != nil { 113 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 114 } 115 if !info.IsDir() { 116 errGo = fmt.Errorf("%s is not a directory", output) 117 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 118 } 119 120 fileType, w := MimeFromExt(name) 121 if w != nil { 122 warns = append(warns, w) 123 } 124 125 obj, errGo := s.client.Bucket(s.bucket).Object(name).NewReader(ctx) 126 if errGo != nil { 127 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 128 } 129 defer obj.Close() 130 131 // If the unpack flag is set then use a tar decompressor and unpacker 132 // but first make sure the output location is an existing directory 133 if unpack { 134 135 var inReader io.ReadCloser 136 137 switch fileType { 138 case "application/x-gzip", "application/zip": 139 if tap != nil { 140 // Create a stack of reader that first tee off any data read to a tap 141 // the tap being able to send data to things like caches etc 142 // 143 // Second in the stack of readers after the TAP is a decompression reader 144 inReader, errGo = gzip.NewReader(io.TeeReader(obj, tap)) 145 } else { 146 inReader, errGo = gzip.NewReader(obj) 147 } 148 case "application/bzip2", "application/octet-stream": 149 if tap != nil { 150 // Create a stack of reader that first tee off any data read to a tap 151 // the tap being able to send data to things like caches etc 152 // 153 // Second in the stack of readers after the TAP is a decompression reader 154 inReader = ioutil.NopCloser(bzip2.NewReader(io.TeeReader(obj, tap))) 155 } else { 156 inReader = ioutil.NopCloser(bzip2.NewReader(obj)) 157 } 158 default: 159 if tap != nil { 160 // Create a stack of reader that first tee off any data read to a tap 161 // the tap being able to send data to things like caches etc 162 // 163 // Second in the stack of readers after the TAP is a decompression reader 164 inReader = ioutil.NopCloser(io.TeeReader(obj, tap)) 165 } else { 166 inReader = ioutil.NopCloser(obj) 167 } 168 } 169 if errGo != nil { 170 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 171 } 172 defer inReader.Close() 173 174 tarReader := tar.NewReader(inReader) 175 176 for { 177 header, errGo := tarReader.Next() 178 if errGo == io.EOF { 179 break 180 } else if errGo != nil { 181 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 182 } 183 184 path := filepath.Join(output, header.Name) 185 info := header.FileInfo() 186 if info.IsDir() { 187 if errGo = os.MkdirAll(path, info.Mode()); errGo != nil { 188 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 189 } 190 continue 191 } 192 193 file, errGo := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, info.Mode()) 194 if errGo != nil { 195 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 196 } 197 198 _, errGo = io.Copy(file, tarReader) 199 file.Close() 200 if errGo != nil { 201 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 202 } 203 } 204 } else { 205 errGo := os.MkdirAll(output, 0700) 206 if errGo != nil { 207 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()).With("output", output) 208 } 209 path := filepath.Join(output, filepath.Base(name)) 210 f, errGo := os.Create(path) 211 if errGo != nil { 212 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 213 } 214 defer f.Close() 215 216 outf := bufio.NewWriter(f) 217 if _, errGo = io.Copy(outf, obj); errGo != nil { 218 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 219 } 220 outf.Flush() 221 } 222 return warns, nil 223 } 224 225 // Hoard is used to upload the contents of a directory to the storage server as individual files rather than a single 226 // archive 227 // 228 func (s *gsStorage) Hoard(ctx context.Context, src string, dest string) (warnings []kv.Error, err kv.Error) { 229 return warnings, kv.NewError("unimplemented").With("stack", stack.Trace().TrimRuntime()) 230 } 231 232 // Deposit directories as compressed artifacts to the firebase storage for an 233 // experiment 234 // 235 func (s *gsStorage) Deposit(ctx context.Context, src string, dest string) (warns []kv.Error, err kv.Error) { 236 237 if !IsTar(dest) { 238 return warns, kv.NewError("uploads must be tar, or tar compressed files").With("stack", stack.Trace().TrimRuntime()).With("key", dest) 239 } 240 241 obj := s.client.Bucket(s.bucket).Object(dest).NewWriter(ctx) 242 defer obj.Close() 243 244 files, err := NewTarWriter(src) 245 if err != nil { 246 return warns, err 247 } 248 249 if !files.HasFiles() { 250 return warns, nil 251 } 252 253 var outw io.Writer 254 255 typ, w := MimeFromExt(dest) 256 warns = append(warns, w) 257 258 switch typ { 259 case "application/tar", "application/octet-stream": 260 outw = bufio.NewWriter(obj) 261 case "application/bzip2": 262 outZ, errGo := bzip2w.NewWriter(obj, &bzip2w.WriterConfig{Level: 6}) 263 if err != nil { 264 return warns, kv.Wrap(errGo).With("stack", stack.Trace().TrimRuntime()) 265 } 266 defer outZ.Close() 267 outw = outZ 268 case "application/x-gzip": 269 outZ := gzip.NewWriter(obj) 270 defer outZ.Close() 271 outw = outZ 272 case "application/zip": 273 return warns, kv.NewError("only tar archives are supported").With("stack", stack.Trace().TrimRuntime()).With("key", dest) 274 default: 275 return warns, kv.NewError("unrecognized upload compression").With("stack", stack.Trace().TrimRuntime()).With("key", dest) 276 } 277 278 tw := tar.NewWriter(outw) 279 defer tw.Close() 280 281 if err = files.Write(tw); err != nil { 282 return warns, err.(kv.Error) 283 } 284 return warns, nil 285 }