github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/prow/spyglass/gcsartifact_fetcher.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package spyglass 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "math/rand" 25 "net/url" 26 "path" 27 "strings" 28 "time" 29 30 "cloud.google.com/go/storage" 31 "github.com/sirupsen/logrus" 32 "google.golang.org/api/iterator" 33 34 "k8s.io/test-infra/prow/spyglass/lenses" 35 "k8s.io/test-infra/testgrid/util/gcs" 36 ) 37 38 const ( 39 httpScheme = "http" 40 httpsScheme = "https" 41 ) 42 43 var ( 44 // ErrCannotParseSource is returned by newGCSJobSource when an incorrectly formatted source string is passed 45 ErrCannotParseSource = errors.New("could not create job source from provided source") 46 ) 47 48 // GCSArtifactFetcher contains information used for fetching artifacts from GCS 49 type GCSArtifactFetcher struct { 50 client *storage.Client 51 } 52 53 // gcsJobSource is a location in GCS where Prow job-specific artifacts are stored. This implementation assumes 54 // Prow's native GCS upload format (treating GCS keys as a directory structure), and is not 55 // intended to support arbitrary GCS bucket upload formats. 56 type gcsJobSource struct { 57 source string 58 linkPrefix string 59 bucket string 60 jobPrefix string 61 jobName string 62 buildID string 63 } 64 65 // NewGCSArtifactFetcher creates a new ArtifactFetcher with a real GCS Client 66 func NewGCSArtifactFetcher(c *storage.Client) *GCSArtifactFetcher { 67 return &GCSArtifactFetcher{ 68 client: c, 69 } 70 } 71 72 func fieldsForJob(src *gcsJobSource) logrus.Fields { 73 return logrus.Fields{ 74 "jobPrefix": src.jobPath(), 75 } 76 } 77 78 // newGCSJobSource creates a new gcsJobSource from a given bucket and jobPrefix 79 func newGCSJobSource(src string) (*gcsJobSource, error) { 80 gcsURL, err := url.Parse(fmt.Sprintf("gs://%s", src)) 81 if err != nil { 82 return &gcsJobSource{}, ErrCannotParseSource 83 } 84 gcsPath := &gcs.Path{} 85 err = gcsPath.SetURL(gcsURL) 86 if err != nil { 87 return &gcsJobSource{}, ErrCannotParseSource 88 } 89 90 tokens := strings.FieldsFunc(gcsPath.Object(), func(c rune) bool { return c == '/' }) 91 if len(tokens) < 2 { 92 return &gcsJobSource{}, ErrCannotParseSource 93 } 94 buildID := tokens[len(tokens)-1] 95 name := tokens[len(tokens)-2] 96 return &gcsJobSource{ 97 source: src, 98 linkPrefix: "gs://", 99 bucket: gcsPath.Bucket(), 100 jobPrefix: path.Clean(gcsPath.Object()) + "/", 101 jobName: name, 102 buildID: buildID, 103 }, nil 104 } 105 106 // Artifacts lists all artifacts available for the given job source 107 func (af *GCSArtifactFetcher) artifacts(key string) ([]string, error) { 108 src, err := newGCSJobSource(key) 109 if err != nil { 110 return nil, fmt.Errorf("Failed to get GCS job source from %s: %v", key, err) 111 } 112 113 listStart := time.Now() 114 bucketName, prefix := extractBucketPrefixPair(src.jobPath()) 115 artifacts := []string{} 116 bkt := af.client.Bucket(bucketName) 117 q := storage.Query{ 118 Prefix: prefix, 119 Versions: false, 120 } 121 objIter := bkt.Objects(context.Background(), &q) 122 wait := []time.Duration{16, 32, 64, 128, 256, 256, 512, 512} 123 for i := 0; ; { 124 oAttrs, err := objIter.Next() 125 if err == iterator.Done { 126 break 127 } 128 if err != nil { 129 logrus.WithFields(fieldsForJob(src)).WithError(err).Error("Error accessing GCS artifact.") 130 if i >= len(wait) { 131 return artifacts, fmt.Errorf("timed out: error accessing GCS artifact: %v", err) 132 } 133 time.Sleep((wait[i] + time.Duration(rand.Intn(10))) * time.Millisecond) 134 i++ 135 continue 136 } 137 artifacts = append(artifacts, strings.TrimPrefix(oAttrs.Name, prefix)) 138 i = 0 139 } 140 listElapsed := time.Since(listStart) 141 logrus.WithField("duration", listElapsed).Infof("Listed %d artifacts.", len(artifacts)) 142 return artifacts, nil 143 } 144 145 type gcsArtifactHandle struct { 146 *storage.ObjectHandle 147 } 148 149 func (h *gcsArtifactHandle) NewReader(ctx context.Context) (io.ReadCloser, error) { 150 return h.ObjectHandle.NewReader(ctx) 151 } 152 153 func (h *gcsArtifactHandle) NewRangeReader(ctx context.Context, offset, length int64) (io.ReadCloser, error) { 154 return h.ObjectHandle.NewRangeReader(ctx, offset, length) 155 } 156 157 // Artifact constructs a GCS artifact from the given GCS bucket and key. Uses the golang GCS library 158 // to get read handles. If the artifactName is not a valid key in the bucket a handle will still be 159 // constructed and returned, but all read operations will fail (dictated by behavior of golang GCS lib). 160 func (af *GCSArtifactFetcher) artifact(key string, artifactName string, sizeLimit int64) (lenses.Artifact, error) { 161 src, err := newGCSJobSource(key) 162 if err != nil { 163 return nil, fmt.Errorf("Failed to get GCS job source from %s: %v", key, err) 164 } 165 166 bucketName, prefix := extractBucketPrefixPair(src.jobPath()) 167 bkt := af.client.Bucket(bucketName) 168 obj := &gcsArtifactHandle{bkt.Object(path.Join(prefix, artifactName))} 169 artifactLink := &url.URL{ 170 Scheme: httpsScheme, 171 Host: "storage.googleapis.com", 172 Path: path.Join(src.jobPath(), artifactName), 173 } 174 return NewGCSArtifact(context.Background(), obj, artifactLink.String(), artifactName, sizeLimit), nil 175 } 176 177 func extractBucketPrefixPair(gcsPath string) (string, string) { 178 split := strings.SplitN(gcsPath, "/", 2) 179 return split[0], split[1] 180 } 181 182 // CanonicalLink gets a link to the location of job-specific artifacts in GCS 183 func (src *gcsJobSource) canonicalLink() string { 184 return path.Join(src.linkPrefix, src.bucket, src.jobPrefix) 185 } 186 187 // JobPath gets the prefix to all artifacts in GCS in the job 188 func (src *gcsJobSource) jobPath() string { 189 return fmt.Sprintf("%s/%s", src.bucket, src.jobPrefix) 190 }