sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/spyglass/storageartifact_fetcher.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package spyglass
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"math/rand"
    25  	"net/url"
    26  	"path"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/sirupsen/logrus"
    31  
    32  	"sigs.k8s.io/prow/pkg/config"
    33  	pkgio "sigs.k8s.io/prow/pkg/io"
    34  	"sigs.k8s.io/prow/pkg/spyglass/api"
    35  )
    36  
    37  var (
    38  	// ErrCannotParseSource is returned by newStorageJobSource when an incorrectly formatted source string is passed
    39  	ErrCannotParseSource = errors.New("could not create job source from provided source")
    40  )
    41  
    42  // StorageArtifactFetcher contains information used for fetching artifacts from GCS
    43  type StorageArtifactFetcher struct {
    44  	opener        pkgio.Opener
    45  	cfg           config.Getter
    46  	useCookieAuth bool
    47  }
    48  
    49  // storageJobSource is a location in GCS where Prow job-specific artifacts are stored. This implementation assumes
    50  // Prow's native GCS upload format (treating GCS keys as a directory structure), and is not
    51  // intended to support arbitrary GCS bucket upload formats.
    52  type storageJobSource struct {
    53  	source     string
    54  	linkPrefix string
    55  	bucket     string
    56  	jobPrefix  string
    57  	jobName    string
    58  	buildID    string
    59  }
    60  
    61  // NewStorageArtifactFetcher creates a new ArtifactFetcher with a real GCS Client
    62  func NewStorageArtifactFetcher(opener pkgio.Opener, cfg config.Getter, useCookieAuth bool) *StorageArtifactFetcher {
    63  	return &StorageArtifactFetcher{
    64  		opener:        opener,
    65  		cfg:           cfg,
    66  		useCookieAuth: useCookieAuth,
    67  	}
    68  }
    69  
    70  // parseStorageURL parses and validates the storage path.
    71  // If no scheme is given we assume Google Cloud Storage ("gs"). For example:
    72  // * test-bucket/logs/sig-flexing/example-ci-run/403 or
    73  // * gs://test-bucket/logs/sig-flexing/example-ci-run/403
    74  func (af *StorageArtifactFetcher) parseStorageURL(storagePath string) (*url.URL, error) {
    75  	if !strings.Contains(storagePath, "://") {
    76  		storagePath = "gs://" + storagePath
    77  	}
    78  	storageURL, err := url.Parse(storagePath)
    79  	if err != nil {
    80  		return nil, ErrCannotParseSource
    81  	}
    82  	if err := af.cfg().ValidateStorageBucket(storageURL.Host); err != nil {
    83  		return nil, err
    84  	}
    85  	return storageURL, nil
    86  }
    87  
    88  func fieldsForJob(src *storageJobSource) logrus.Fields {
    89  	return logrus.Fields{
    90  		"jobPrefix": src.jobPath(),
    91  	}
    92  }
    93  
    94  // newStorageJobSource creates a new storageJobSource from a given storage URL.
    95  // If no scheme is given we assume Google Cloud Storage ("gs"). For example:
    96  // * test-bucket/logs/sig-flexing/example-ci-run/403 or
    97  // * gs://test-bucket/logs/sig-flexing/example-ci-run/403
    98  func (af *StorageArtifactFetcher) newStorageJobSource(storagePath string) (*storageJobSource, error) {
    99  	storageURL, err := af.parseStorageURL(storagePath)
   100  	if err != nil {
   101  		return &storageJobSource{}, err
   102  	}
   103  	var object string
   104  	if storageURL.Path == "" {
   105  		object = storageURL.Path
   106  	} else {
   107  		object = storageURL.Path[1:]
   108  	}
   109  
   110  	tokens := strings.FieldsFunc(object, func(c rune) bool { return c == '/' })
   111  	if len(tokens) < 2 {
   112  		return &storageJobSource{}, ErrCannotParseSource
   113  	}
   114  	buildID := tokens[len(tokens)-1]
   115  	name := tokens[len(tokens)-2]
   116  	potentialAlias := storageURL.Host
   117  	if bucket, exists := af.cfg().Deck.Spyglass.BucketAliases[potentialAlias]; exists {
   118  		storageURL.Host = bucket
   119  	}
   120  	return &storageJobSource{
   121  		source:     storageURL.String(),
   122  		linkPrefix: storageURL.Scheme + "://",
   123  		bucket:     storageURL.Host,
   124  		jobPrefix:  path.Clean(object) + "/",
   125  		jobName:    name,
   126  		buildID:    buildID,
   127  	}, nil
   128  }
   129  
   130  // Artifacts lists all artifacts available for the given job source
   131  // If no scheme is given we assume GS, e.g.:
   132  // * test-bucket/logs/sig-flexing/example-ci-run/403 or
   133  // * gs://test-bucket/logs/sig-flexing/example-ci-run/403
   134  func (af *StorageArtifactFetcher) artifacts(ctx context.Context, key string) ([]string, error) {
   135  	src, err := af.newStorageJobSource(key)
   136  	if err != nil {
   137  		return nil, fmt.Errorf("Failed to get GCS job source from %s: %w", key, err)
   138  	}
   139  
   140  	listStart := time.Now()
   141  	_, prefix := extractBucketPrefixPair(src.jobPath())
   142  	artifacts := []string{}
   143  
   144  	it, err := af.opener.Iterator(ctx, src.source, "")
   145  	if err != nil {
   146  		return artifacts, err
   147  	}
   148  
   149  	wait := []time.Duration{16, 32, 64, 128, 256, 256, 512, 512}
   150  	for i := 0; ; {
   151  		oAttrs, err := it.Next(ctx)
   152  		if err == io.EOF {
   153  			break
   154  		}
   155  		if err != nil {
   156  			if err == context.Canceled {
   157  				return nil, err
   158  			}
   159  			logrus.WithFields(fieldsForJob(src)).WithError(err).Error("Error accessing GCS artifact.")
   160  			if i >= len(wait) {
   161  				return artifacts, fmt.Errorf("timed out: error accessing GCS artifact: %w", err)
   162  			}
   163  			time.Sleep((wait[i] + time.Duration(rand.Intn(10))) * time.Millisecond)
   164  			i++
   165  			continue
   166  		}
   167  		artifacts = append(artifacts, strings.TrimPrefix(oAttrs.Name, prefix))
   168  		i = 0
   169  	}
   170  	logrus.WithField("duration", time.Since(listStart).String()).Infof("Listed %d artifacts.", len(artifacts))
   171  	return artifacts, nil
   172  }
   173  
   174  func (af *StorageArtifactFetcher) signURL(ctx context.Context, key string) (string, error) {
   175  	return af.opener.SignedURL(ctx, key, pkgio.SignedURLOptions{
   176  		UseGSCookieAuth: af.useCookieAuth,
   177  	})
   178  }
   179  
   180  type storageArtifactHandle struct {
   181  	pkgio.Opener
   182  	Name string
   183  }
   184  
   185  func (h *storageArtifactHandle) NewReader(ctx context.Context) (io.ReadCloser, error) {
   186  	return h.Opener.Reader(ctx, h.Name)
   187  }
   188  
   189  func (h *storageArtifactHandle) NewRangeReader(ctx context.Context, offset, length int64) (io.ReadCloser, error) {
   190  	return h.Opener.RangeReader(ctx, h.Name, offset, length)
   191  }
   192  
   193  func (h *storageArtifactHandle) Attrs(ctx context.Context) (pkgio.Attributes, error) {
   194  	return h.Opener.Attributes(ctx, h.Name)
   195  }
   196  
   197  func (h *storageArtifactHandle) UpdateAttrs(ctx context.Context, attrs pkgio.ObjectAttrsToUpdate) (*pkgio.Attributes, error) {
   198  	return h.UpdateAtributes(ctx, h.Name, attrs)
   199  }
   200  
   201  // Artifact constructs a GCS artifact from the given GCS bucket and key. Uses the golang GCS library
   202  // to get read handles. If the artifactName is not a valid key in the bucket a handle will still be
   203  // constructed and returned, but all read operations will fail (dictated by behavior of golang GCS lib).
   204  // If no scheme is given we assume GS, e.g.:
   205  // * test-bucket/logs/sig-flexing/example-ci-run/403 or
   206  // * gs://test-bucket/logs/sig-flexing/example-ci-run/403
   207  func (af *StorageArtifactFetcher) Artifact(ctx context.Context, key string, artifactName string, sizeLimit int64) (api.Artifact, error) {
   208  	src, err := af.newStorageJobSource(key)
   209  	if err != nil {
   210  		return nil, fmt.Errorf("failed to get GCS job source from %s: %w", key, err)
   211  	}
   212  
   213  	_, prefix := extractBucketPrefixPair(src.jobPath())
   214  	objName := path.Join(prefix, artifactName)
   215  	obj := &storageArtifactHandle{Opener: af.opener, Name: fmt.Sprintf("%s%s/%s", src.linkPrefix, src.bucket, objName)}
   216  	signedURL, err := af.signURL(ctx, fmt.Sprintf("%s%s/%s", src.linkPrefix, src.bucket, objName))
   217  	if err != nil {
   218  		return nil, err
   219  	}
   220  	return NewStorageArtifact(context.Background(), obj, signedURL, artifactName, sizeLimit), nil
   221  }
   222  
   223  func extractBucketPrefixPair(storagePath string) (string, string) {
   224  	split := strings.SplitN(storagePath, "/", 2)
   225  	return split[0], split[1]
   226  }
   227  
   228  // JobPath gets the prefix to all artifacts in GCS in the job
   229  func (src *storageJobSource) jobPath() string {
   230  	return fmt.Sprintf("%s/%s", src.bucket, src.jobPrefix)
   231  }