github.com/zppinho/prow@v0.0.0-20240510014325-1738badeb017/pkg/pod-utils/gcs/upload.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package gcs
    18  
    19  import (
    20  	"compress/gzip"
    21  	"context"
    22  	"fmt"
    23  	"io"
    24  	"k8s.io/apimachinery/pkg/util/sets"
    25  	"mime"
    26  	"net/http"
    27  	"net/url"
    28  	"os"
    29  	"path/filepath"
    30  	"strings"
    31  	"sync"
    32  	"time"
    33  
    34  	"github.com/sirupsen/logrus"
    35  	"golang.org/x/sync/semaphore"
    36  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    37  	utilpointer "k8s.io/utils/pointer"
    38  
    39  	pkgio "sigs.k8s.io/prow/pkg/io"
    40  	"sigs.k8s.io/prow/pkg/io/providers"
    41  )
    42  
    43  // UploadFunc knows how to upload into an object
    44  type UploadFunc func(writer dataWriter) error
    45  
    46  type ReaderFunc func() (io.ReadCloser, error)
    47  
    48  type destToWriter func(dest string) dataWriter
    49  
    50  const retryCount = 4
    51  
    52  // Upload uploads all the data in the uploadTargets map to blob storage in parallel.
    53  // The map is keyed on blob storage path under the bucket.
    54  // Files with an extension in the compressFileTypes list will be compressed prior to uploading
    55  func Upload(ctx context.Context, bucket, gcsCredentialsFile, s3CredentialsFile string, compressFileTypes []string, uploadTargets map[string]UploadFunc) error {
    56  	parsedBucket, err := url.Parse(bucket)
    57  	if err != nil {
    58  		return fmt.Errorf("cannot parse bucket name %s: %w", bucket, err)
    59  	}
    60  	if parsedBucket.Scheme == "" {
    61  		parsedBucket.Scheme = providers.GS
    62  	}
    63  
    64  	opener, err := pkgio.NewOpener(ctx, gcsCredentialsFile, s3CredentialsFile)
    65  	if err != nil {
    66  		return fmt.Errorf("new opener: %w", err)
    67  	}
    68  	dtw := func(dest string) dataWriter {
    69  		compressFileType := shouldCompressFileType(dest, sets.New[string](compressFileTypes...))
    70  		return &openerObjectWriter{Opener: opener, Context: ctx, Bucket: parsedBucket.String(), Dest: dest, compressFileType: compressFileType}
    71  	}
    72  	return upload(dtw, uploadTargets)
    73  }
    74  
    75  func shouldCompressFileType(dest string, compressFileTypes sets.Set[string]) bool {
    76  	ext := strings.TrimPrefix(filepath.Ext(dest), ".")
    77  	if ext == "gz" || ext == "gzip" {
    78  		return false
    79  	}
    80  	return compressFileTypes.Has("*") || compressFileTypes.Has(ext)
    81  }
    82  
    83  // LocalExport copies all of the data in the uploadTargets map to local files in parallel. The map
    84  // is keyed on file path under the exportDir.
    85  func LocalExport(ctx context.Context, exportDir string, uploadTargets map[string]UploadFunc) error {
    86  	opener, err := pkgio.NewOpener(ctx, "", "")
    87  	if err != nil {
    88  		return fmt.Errorf("new opener: %w", err)
    89  	}
    90  	dtw := func(dest string) dataWriter {
    91  		return &openerObjectWriter{Opener: opener, Context: ctx, Bucket: exportDir, Dest: dest}
    92  	}
    93  	return upload(dtw, uploadTargets)
    94  }
    95  
    96  func upload(dtw destToWriter, uploadTargets map[string]UploadFunc) error {
    97  	errCh := make(chan error, len(uploadTargets))
    98  	group := &sync.WaitGroup{}
    99  	sem := semaphore.NewWeighted(4)
   100  	group.Add(len(uploadTargets))
   101  	for dest, upload := range uploadTargets {
   102  		writer := dtw(dest)
   103  		log := logrus.WithField("dest", writer.fullUploadPath())
   104  		log.Info("Queued for upload")
   105  		go func(f UploadFunc, writer dataWriter, log *logrus.Entry) {
   106  			defer group.Done()
   107  
   108  			var err error
   109  
   110  			for retryIndex := 1; retryIndex <= retryCount; retryIndex++ {
   111  				err = func() error {
   112  					sem.Acquire(context.Background(), 1)
   113  					defer sem.Release(1)
   114  					if retryIndex > 1 {
   115  						log.WithField("retry_attempt", retryIndex).Debugf("Retrying upload")
   116  					}
   117  					return f(writer)
   118  				}()
   119  
   120  				if err == nil {
   121  					break
   122  				}
   123  				if retryIndex < retryCount {
   124  					time.Sleep(time.Duration(retryIndex*retryIndex) * time.Second)
   125  				}
   126  			}
   127  
   128  			if err != nil {
   129  				errCh <- err
   130  				log.Info("Failed upload")
   131  			} else {
   132  				log.Info("Finished upload")
   133  			}
   134  		}(upload, writer, log)
   135  	}
   136  	group.Wait()
   137  	close(errCh)
   138  	if len(errCh) != 0 {
   139  		var uploadErrors []error
   140  		for err := range errCh {
   141  			uploadErrors = append(uploadErrors, err)
   142  		}
   143  		return fmt.Errorf("encountered errors during upload: %v", uploadErrors)
   144  	}
   145  	return nil
   146  }
   147  
   148  // FileUpload returns an UploadFunc which copies all
   149  // data from the file on disk to the GCS object
   150  func FileUpload(file string) UploadFunc {
   151  	return FileUploadWithOptions(file, pkgio.WriterOptions{})
   152  }
   153  
   154  // FileUploadWithOptions returns an UploadFunc which copies all data
   155  // from the file on disk into GCS object and also sets the provided
   156  // attributes on the object.
   157  func FileUploadWithOptions(file string, opts pkgio.WriterOptions) UploadFunc {
   158  	return func(writer dataWriter) error {
   159  		if fi, err := os.Stat(file); err == nil {
   160  			opts.BufferSize = utilpointer.Int64(fi.Size())
   161  			if *opts.BufferSize > 25*1024*1024 {
   162  				*opts.BufferSize = 25 * 1024 * 1024
   163  			}
   164  		}
   165  
   166  		newReader := func() (io.ReadCloser, error) {
   167  			reader, err := os.Open(file)
   168  			if err != nil {
   169  				return nil, err
   170  			}
   171  			return reader, nil
   172  		}
   173  
   174  		uploadErr := DataUploadWithOptions(newReader, opts)(writer)
   175  		if uploadErr != nil {
   176  			uploadErr = fmt.Errorf("upload error: %w", uploadErr)
   177  		}
   178  		return uploadErr
   179  	}
   180  }
   181  
   182  // DataUpload returns an UploadFunc which copies all
   183  // data from src reader into GCS.
   184  func DataUpload(newReader ReaderFunc) UploadFunc {
   185  	return DataUploadWithOptions(newReader, pkgio.WriterOptions{})
   186  }
   187  
   188  // DataUploadWithMetadata returns an UploadFunc which copies all
   189  // data from src reader into GCS and also sets the provided metadata
   190  // fields onto the object.
   191  func DataUploadWithMetadata(newReader ReaderFunc, metadata map[string]string) UploadFunc {
   192  	return DataUploadWithOptions(newReader, pkgio.WriterOptions{Metadata: metadata})
   193  }
   194  
   195  // DataUploadWithOptions returns an UploadFunc which copies all data
   196  // from src reader into GCS and also sets the provided attributes on
   197  // the object.
   198  func DataUploadWithOptions(newReader ReaderFunc, attrs pkgio.WriterOptions) UploadFunc {
   199  	return func(writer dataWriter) (e error) {
   200  		errors := make([]error, 0, 4)
   201  		defer func() {
   202  			if err := writer.Close(); err != nil {
   203  				errors = append(errors, fmt.Errorf("writer close error: %w", err))
   204  			}
   205  			e = utilerrors.NewAggregate(errors)
   206  		}()
   207  
   208  		writer.ApplyWriterOptions(attrs)
   209  
   210  		reader, err := newReader()
   211  		if err != nil {
   212  			errors = append(errors, fmt.Errorf("reader new error: %w", err))
   213  			return e
   214  		}
   215  		defer func() {
   216  			if err := reader.Close(); err != nil {
   217  				errors = append(errors, fmt.Errorf("reader close error: %w", err))
   218  			}
   219  		}()
   220  
   221  		if _, err := io.Copy(writer, reader); err != nil {
   222  			errors = append(errors, fmt.Errorf("copy error: %w", err))
   223  		}
   224  
   225  		return e
   226  	}
   227  }
   228  
   229  type dataWriter interface {
   230  	io.WriteCloser
   231  	fullUploadPath() string
   232  	ApplyWriterOptions(opts pkgio.WriterOptions)
   233  }
   234  
   235  type openerObjectWriter struct {
   236  	pkgio.Opener
   237  	Context          context.Context
   238  	Bucket           string
   239  	Dest             string
   240  	compressFileType bool
   241  	opts             []pkgio.WriterOptions
   242  	writer           pkgio.Writer
   243  	closers          []pkgio.Closer
   244  }
   245  
   246  func (w *openerObjectWriter) Write(p []byte) (n int, err error) {
   247  	if w.writer == nil {
   248  		largerThanOneKB := len(p) > 1024
   249  		shouldCompressFile := w.compressFileType && largerThanOneKB && http.DetectContentType(p) != "application/x-gzip"
   250  		if shouldCompressFile {
   251  			path := w.fullUploadPath()
   252  			ext := filepath.Ext(path)
   253  			mediaType := mime.TypeByExtension(ext)
   254  			if mediaType == "" {
   255  				mediaType = "text/plain; charset=utf-8"
   256  			}
   257  			ce := "gzip"
   258  			w.opts = append(w.opts, pkgio.WriterOptions{
   259  				ContentType:     &mediaType,
   260  				ContentEncoding: &ce,
   261  			})
   262  		}
   263  		var storageWriter pkgio.WriteCloser
   264  		storageWriter, err = w.Opener.Writer(w.Context, w.fullUploadPath(), w.opts...)
   265  		if err != nil {
   266  			return 0, err
   267  		}
   268  		if shouldCompressFile {
   269  			zipWriter := gzip.NewWriter(storageWriter)
   270  			w.writer = zipWriter
   271  			w.closers = append(w.closers, zipWriter)
   272  		} else {
   273  			w.writer = storageWriter
   274  		}
   275  		// The storage closer needs to be last in the list to close in the correct order
   276  		w.closers = append(w.closers, storageWriter)
   277  	}
   278  	return w.writer.Write(p)
   279  }
   280  
   281  func (w *openerObjectWriter) Close() error {
   282  	if w.writer == nil {
   283  		// Always create a writer even if Write() was never called
   284  		// otherwise empty files are never created, because Write() is
   285  		// never called for them
   286  		if _, err := w.Write([]byte("")); err != nil {
   287  			return err
   288  		}
   289  	}
   290  
   291  	var errs []error
   292  	for _, closer := range w.closers {
   293  		if err := closer.Close(); err != nil {
   294  			errs = append(errs, err)
   295  		}
   296  	}
   297  	w.closers = nil
   298  	w.writer = nil
   299  	return utilerrors.NewAggregate(errs)
   300  }
   301  
   302  func (w *openerObjectWriter) ApplyWriterOptions(opts pkgio.WriterOptions) {
   303  	w.opts = append(w.opts, opts)
   304  }
   305  
   306  func (w *openerObjectWriter) fullUploadPath() string {
   307  	return fmt.Sprintf("%s/%s", w.Bucket, w.Dest)
   308  }