github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/prow/spyglass/gcsartifact_fetcher.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package spyglass
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"math/rand"
    25  	"net/url"
    26  	"path"
    27  	"strings"
    28  	"time"
    29  
    30  	"cloud.google.com/go/storage"
    31  	"github.com/sirupsen/logrus"
    32  	"google.golang.org/api/iterator"
    33  
    34  	"k8s.io/test-infra/prow/spyglass/lenses"
    35  	"k8s.io/test-infra/testgrid/util/gcs"
    36  )
    37  
    38  const (
    39  	httpScheme  = "http"
    40  	httpsScheme = "https"
    41  )
    42  
    43  var (
    44  	// ErrCannotParseSource is returned by newGCSJobSource when an incorrectly formatted source string is passed
    45  	ErrCannotParseSource = errors.New("could not create job source from provided source")
    46  )
    47  
    48  // GCSArtifactFetcher contains information used for fetching artifacts from GCS
    49  type GCSArtifactFetcher struct {
    50  	client *storage.Client
    51  }
    52  
    53  // gcsJobSource is a location in GCS where Prow job-specific artifacts are stored. This implementation assumes
    54  // Prow's native GCS upload format (treating GCS keys as a directory structure), and is not
    55  // intended to support arbitrary GCS bucket upload formats.
    56  type gcsJobSource struct {
    57  	source     string
    58  	linkPrefix string
    59  	bucket     string
    60  	jobPrefix  string
    61  	jobName    string
    62  	buildID    string
    63  }
    64  
    65  // NewGCSArtifactFetcher creates a new ArtifactFetcher with a real GCS Client
    66  func NewGCSArtifactFetcher(c *storage.Client) *GCSArtifactFetcher {
    67  	return &GCSArtifactFetcher{
    68  		client: c,
    69  	}
    70  }
    71  
    72  func fieldsForJob(src *gcsJobSource) logrus.Fields {
    73  	return logrus.Fields{
    74  		"jobPrefix": src.jobPath(),
    75  	}
    76  }
    77  
    78  // newGCSJobSource creates a new gcsJobSource from a given bucket and jobPrefix
    79  func newGCSJobSource(src string) (*gcsJobSource, error) {
    80  	gcsURL, err := url.Parse(fmt.Sprintf("gs://%s", src))
    81  	if err != nil {
    82  		return &gcsJobSource{}, ErrCannotParseSource
    83  	}
    84  	gcsPath := &gcs.Path{}
    85  	err = gcsPath.SetURL(gcsURL)
    86  	if err != nil {
    87  		return &gcsJobSource{}, ErrCannotParseSource
    88  	}
    89  
    90  	tokens := strings.FieldsFunc(gcsPath.Object(), func(c rune) bool { return c == '/' })
    91  	if len(tokens) < 2 {
    92  		return &gcsJobSource{}, ErrCannotParseSource
    93  	}
    94  	buildID := tokens[len(tokens)-1]
    95  	name := tokens[len(tokens)-2]
    96  	return &gcsJobSource{
    97  		source:     src,
    98  		linkPrefix: "gs://",
    99  		bucket:     gcsPath.Bucket(),
   100  		jobPrefix:  path.Clean(gcsPath.Object()) + "/",
   101  		jobName:    name,
   102  		buildID:    buildID,
   103  	}, nil
   104  }
   105  
   106  // Artifacts lists all artifacts available for the given job source
   107  func (af *GCSArtifactFetcher) artifacts(key string) ([]string, error) {
   108  	src, err := newGCSJobSource(key)
   109  	if err != nil {
   110  		return nil, fmt.Errorf("Failed to get GCS job source from %s: %v", key, err)
   111  	}
   112  
   113  	listStart := time.Now()
   114  	bucketName, prefix := extractBucketPrefixPair(src.jobPath())
   115  	artifacts := []string{}
   116  	bkt := af.client.Bucket(bucketName)
   117  	q := storage.Query{
   118  		Prefix:   prefix,
   119  		Versions: false,
   120  	}
   121  	objIter := bkt.Objects(context.Background(), &q)
   122  	wait := []time.Duration{16, 32, 64, 128, 256, 256, 512, 512}
   123  	for i := 0; ; {
   124  		oAttrs, err := objIter.Next()
   125  		if err == iterator.Done {
   126  			break
   127  		}
   128  		if err != nil {
   129  			logrus.WithFields(fieldsForJob(src)).WithError(err).Error("Error accessing GCS artifact.")
   130  			if i >= len(wait) {
   131  				return artifacts, fmt.Errorf("timed out: error accessing GCS artifact: %v", err)
   132  			}
   133  			time.Sleep((wait[i] + time.Duration(rand.Intn(10))) * time.Millisecond)
   134  			i++
   135  			continue
   136  		}
   137  		artifacts = append(artifacts, strings.TrimPrefix(oAttrs.Name, prefix))
   138  		i = 0
   139  	}
   140  	listElapsed := time.Since(listStart)
   141  	logrus.WithField("duration", listElapsed).Infof("Listed %d artifacts.", len(artifacts))
   142  	return artifacts, nil
   143  }
   144  
   145  type gcsArtifactHandle struct {
   146  	*storage.ObjectHandle
   147  }
   148  
   149  func (h *gcsArtifactHandle) NewReader(ctx context.Context) (io.ReadCloser, error) {
   150  	return h.ObjectHandle.NewReader(ctx)
   151  }
   152  
   153  func (h *gcsArtifactHandle) NewRangeReader(ctx context.Context, offset, length int64) (io.ReadCloser, error) {
   154  	return h.ObjectHandle.NewRangeReader(ctx, offset, length)
   155  }
   156  
   157  // Artifact constructs a GCS artifact from the given GCS bucket and key. Uses the golang GCS library
   158  // to get read handles. If the artifactName is not a valid key in the bucket a handle will still be
   159  // constructed and returned, but all read operations will fail (dictated by behavior of golang GCS lib).
   160  func (af *GCSArtifactFetcher) artifact(key string, artifactName string, sizeLimit int64) (lenses.Artifact, error) {
   161  	src, err := newGCSJobSource(key)
   162  	if err != nil {
   163  		return nil, fmt.Errorf("Failed to get GCS job source from %s: %v", key, err)
   164  	}
   165  
   166  	bucketName, prefix := extractBucketPrefixPair(src.jobPath())
   167  	bkt := af.client.Bucket(bucketName)
   168  	obj := &gcsArtifactHandle{bkt.Object(path.Join(prefix, artifactName))}
   169  	artifactLink := &url.URL{
   170  		Scheme: httpsScheme,
   171  		Host:   "storage.googleapis.com",
   172  		Path:   path.Join(src.jobPath(), artifactName),
   173  	}
   174  	return NewGCSArtifact(context.Background(), obj, artifactLink.String(), artifactName, sizeLimit), nil
   175  }
   176  
   177  func extractBucketPrefixPair(gcsPath string) (string, string) {
   178  	split := strings.SplitN(gcsPath, "/", 2)
   179  	return split[0], split[1]
   180  }
   181  
   182  // CanonicalLink gets a link to the location of job-specific artifacts in GCS
   183  func (src *gcsJobSource) canonicalLink() string {
   184  	return path.Join(src.linkPrefix, src.bucket, src.jobPrefix)
   185  }
   186  
   187  // JobPath gets the prefix to all artifacts in GCS in the job
   188  func (src *gcsJobSource) jobPath() string {
   189  	return fmt.Sprintf("%s/%s", src.bucket, src.jobPrefix)
   190  }