github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/pod-utils/gcs/upload.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package gcs 18 19 import ( 20 "compress/gzip" 21 "context" 22 "fmt" 23 "io" 24 "k8s.io/apimachinery/pkg/util/sets" 25 "mime" 26 "net/http" 27 "net/url" 28 "os" 29 "path/filepath" 30 "strings" 31 "sync" 32 "time" 33 34 "github.com/sirupsen/logrus" 35 "golang.org/x/sync/semaphore" 36 utilerrors "k8s.io/apimachinery/pkg/util/errors" 37 utilpointer "k8s.io/utils/pointer" 38 39 pkgio "sigs.k8s.io/prow/pkg/io" 40 "sigs.k8s.io/prow/pkg/io/providers" 41 ) 42 43 // UploadFunc knows how to upload into an object 44 type UploadFunc func(writer dataWriter) error 45 46 type ReaderFunc func() (io.ReadCloser, error) 47 48 type destToWriter func(dest string) dataWriter 49 50 const retryCount = 4 51 52 // Upload uploads all the data in the uploadTargets map to blob storage in parallel. 53 // The map is keyed on blob storage path under the bucket. 54 // Files with an extension in the compressFileTypes list will be compressed prior to uploading 55 func Upload(ctx context.Context, bucket, gcsCredentialsFile, s3CredentialsFile string, compressFileTypes []string, uploadTargets map[string]UploadFunc) error { 56 parsedBucket, err := url.Parse(bucket) 57 if err != nil { 58 return fmt.Errorf("cannot parse bucket name %s: %w", bucket, err) 59 } 60 if parsedBucket.Scheme == "" { 61 parsedBucket.Scheme = providers.GS 62 } 63 64 opener, err := pkgio.NewOpener(ctx, gcsCredentialsFile, s3CredentialsFile) 65 if err != nil { 66 return fmt.Errorf("new opener: %w", err) 67 } 68 dtw := func(dest string) dataWriter { 69 compressFileType := shouldCompressFileType(dest, sets.New[string](compressFileTypes...)) 70 return &openerObjectWriter{Opener: opener, Context: ctx, Bucket: parsedBucket.String(), Dest: dest, compressFileType: compressFileType} 71 } 72 return upload(dtw, uploadTargets) 73 } 74 75 func shouldCompressFileType(dest string, compressFileTypes sets.Set[string]) bool { 76 ext := strings.TrimPrefix(filepath.Ext(dest), ".") 77 if ext == "gz" || ext == "gzip" { 78 return false 79 } 80 return compressFileTypes.Has("*") || compressFileTypes.Has(ext) 81 } 82 83 // LocalExport copies all of the data in the uploadTargets map to local files in parallel. The map 84 // is keyed on file path under the exportDir. 85 func LocalExport(ctx context.Context, exportDir string, uploadTargets map[string]UploadFunc) error { 86 opener, err := pkgio.NewOpener(ctx, "", "") 87 if err != nil { 88 return fmt.Errorf("new opener: %w", err) 89 } 90 dtw := func(dest string) dataWriter { 91 return &openerObjectWriter{Opener: opener, Context: ctx, Bucket: exportDir, Dest: dest} 92 } 93 return upload(dtw, uploadTargets) 94 } 95 96 func upload(dtw destToWriter, uploadTargets map[string]UploadFunc) error { 97 errCh := make(chan error, len(uploadTargets)) 98 group := &sync.WaitGroup{} 99 sem := semaphore.NewWeighted(4) 100 group.Add(len(uploadTargets)) 101 for dest, upload := range uploadTargets { 102 writer := dtw(dest) 103 log := logrus.WithField("dest", writer.fullUploadPath()) 104 log.Info("Queued for upload") 105 go func(f UploadFunc, writer dataWriter, log *logrus.Entry) { 106 defer group.Done() 107 108 var err error 109 110 for retryIndex := 1; retryIndex <= retryCount; retryIndex++ { 111 err = func() error { 112 sem.Acquire(context.Background(), 1) 113 defer sem.Release(1) 114 if retryIndex > 1 { 115 log.WithField("retry_attempt", retryIndex).Debugf("Retrying upload") 116 } 117 return f(writer) 118 }() 119 120 if err == nil { 121 break 122 } 123 if retryIndex < retryCount { 124 time.Sleep(time.Duration(retryIndex*retryIndex) * time.Second) 125 } 126 } 127 128 if err != nil { 129 errCh <- err 130 log.Info("Failed upload") 131 } else { 132 log.Info("Finished upload") 133 } 134 }(upload, writer, log) 135 } 136 group.Wait() 137 close(errCh) 138 if len(errCh) != 0 { 139 var uploadErrors []error 140 for err := range errCh { 141 uploadErrors = append(uploadErrors, err) 142 } 143 return fmt.Errorf("encountered errors during upload: %v", uploadErrors) 144 } 145 return nil 146 } 147 148 // FileUpload returns an UploadFunc which copies all 149 // data from the file on disk to the GCS object 150 func FileUpload(file string) UploadFunc { 151 return FileUploadWithOptions(file, pkgio.WriterOptions{}) 152 } 153 154 // FileUploadWithOptions returns an UploadFunc which copies all data 155 // from the file on disk into GCS object and also sets the provided 156 // attributes on the object. 157 func FileUploadWithOptions(file string, opts pkgio.WriterOptions) UploadFunc { 158 return func(writer dataWriter) error { 159 if fi, err := os.Stat(file); err == nil { 160 opts.BufferSize = utilpointer.Int64(fi.Size()) 161 if *opts.BufferSize > 25*1024*1024 { 162 *opts.BufferSize = 25 * 1024 * 1024 163 } 164 } 165 166 newReader := func() (io.ReadCloser, error) { 167 reader, err := os.Open(file) 168 if err != nil { 169 return nil, err 170 } 171 return reader, nil 172 } 173 174 uploadErr := DataUploadWithOptions(newReader, opts)(writer) 175 if uploadErr != nil { 176 uploadErr = fmt.Errorf("upload error: %w", uploadErr) 177 } 178 return uploadErr 179 } 180 } 181 182 // DataUpload returns an UploadFunc which copies all 183 // data from src reader into GCS. 184 func DataUpload(newReader ReaderFunc) UploadFunc { 185 return DataUploadWithOptions(newReader, pkgio.WriterOptions{}) 186 } 187 188 // DataUploadWithMetadata returns an UploadFunc which copies all 189 // data from src reader into GCS and also sets the provided metadata 190 // fields onto the object. 191 func DataUploadWithMetadata(newReader ReaderFunc, metadata map[string]string) UploadFunc { 192 return DataUploadWithOptions(newReader, pkgio.WriterOptions{Metadata: metadata}) 193 } 194 195 // DataUploadWithOptions returns an UploadFunc which copies all data 196 // from src reader into GCS and also sets the provided attributes on 197 // the object. 198 func DataUploadWithOptions(newReader ReaderFunc, attrs pkgio.WriterOptions) UploadFunc { 199 return func(writer dataWriter) (e error) { 200 errors := make([]error, 0, 4) 201 defer func() { 202 if err := writer.Close(); err != nil { 203 errors = append(errors, fmt.Errorf("writer close error: %w", err)) 204 } 205 e = utilerrors.NewAggregate(errors) 206 }() 207 208 writer.ApplyWriterOptions(attrs) 209 210 reader, err := newReader() 211 if err != nil { 212 errors = append(errors, fmt.Errorf("reader new error: %w", err)) 213 return e 214 } 215 defer func() { 216 if err := reader.Close(); err != nil { 217 errors = append(errors, fmt.Errorf("reader close error: %w", err)) 218 } 219 }() 220 221 if _, err := io.Copy(writer, reader); err != nil { 222 errors = append(errors, fmt.Errorf("copy error: %w", err)) 223 } 224 225 return e 226 } 227 } 228 229 type dataWriter interface { 230 io.WriteCloser 231 fullUploadPath() string 232 ApplyWriterOptions(opts pkgio.WriterOptions) 233 } 234 235 type openerObjectWriter struct { 236 pkgio.Opener 237 Context context.Context 238 Bucket string 239 Dest string 240 compressFileType bool 241 opts []pkgio.WriterOptions 242 writer pkgio.Writer 243 closers []pkgio.Closer 244 } 245 246 func (w *openerObjectWriter) Write(p []byte) (n int, err error) { 247 if w.writer == nil { 248 largerThanOneKB := len(p) > 1024 249 shouldCompressFile := w.compressFileType && largerThanOneKB && http.DetectContentType(p) != "application/x-gzip" 250 if shouldCompressFile { 251 path := w.fullUploadPath() 252 ext := filepath.Ext(path) 253 mediaType := mime.TypeByExtension(ext) 254 if mediaType == "" { 255 mediaType = "text/plain; charset=utf-8" 256 } 257 ce := "gzip" 258 w.opts = append(w.opts, pkgio.WriterOptions{ 259 ContentType: &mediaType, 260 ContentEncoding: &ce, 261 }) 262 } 263 var storageWriter pkgio.WriteCloser 264 storageWriter, err = w.Opener.Writer(w.Context, w.fullUploadPath(), w.opts...) 265 if err != nil { 266 return 0, err 267 } 268 if shouldCompressFile { 269 zipWriter := gzip.NewWriter(storageWriter) 270 w.writer = zipWriter 271 w.closers = append(w.closers, zipWriter) 272 } else { 273 w.writer = storageWriter 274 } 275 // The storage closer needs to be last in the list to close in the correct order 276 w.closers = append(w.closers, storageWriter) 277 } 278 return w.writer.Write(p) 279 } 280 281 func (w *openerObjectWriter) Close() error { 282 if w.writer == nil { 283 // Always create a writer even if Write() was never called 284 // otherwise empty files are never created, because Write() is 285 // never called for them 286 if _, err := w.Write([]byte("")); err != nil { 287 return err 288 } 289 } 290 291 var errs []error 292 for _, closer := range w.closers { 293 if err := closer.Close(); err != nil { 294 errs = append(errs, err) 295 } 296 } 297 w.closers = nil 298 w.writer = nil 299 return utilerrors.NewAggregate(errs) 300 } 301 302 func (w *openerObjectWriter) ApplyWriterOptions(opts pkgio.WriterOptions) { 303 w.opts = append(w.opts, opts) 304 } 305 306 func (w *openerObjectWriter) fullUploadPath() string { 307 return fmt.Sprintf("%s/%s", w.Bucket, w.Dest) 308 }