sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/spyglass/storageartifact_fetcher.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package spyglass 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "math/rand" 25 "net/url" 26 "path" 27 "strings" 28 "time" 29 30 "github.com/sirupsen/logrus" 31 32 "sigs.k8s.io/prow/pkg/config" 33 pkgio "sigs.k8s.io/prow/pkg/io" 34 "sigs.k8s.io/prow/pkg/spyglass/api" 35 ) 36 37 var ( 38 // ErrCannotParseSource is returned by newStorageJobSource when an incorrectly formatted source string is passed 39 ErrCannotParseSource = errors.New("could not create job source from provided source") 40 ) 41 42 // StorageArtifactFetcher contains information used for fetching artifacts from GCS 43 type StorageArtifactFetcher struct { 44 opener pkgio.Opener 45 cfg config.Getter 46 useCookieAuth bool 47 } 48 49 // storageJobSource is a location in GCS where Prow job-specific artifacts are stored. This implementation assumes 50 // Prow's native GCS upload format (treating GCS keys as a directory structure), and is not 51 // intended to support arbitrary GCS bucket upload formats. 52 type storageJobSource struct { 53 source string 54 linkPrefix string 55 bucket string 56 jobPrefix string 57 jobName string 58 buildID string 59 } 60 61 // NewStorageArtifactFetcher creates a new ArtifactFetcher with a real GCS Client 62 func NewStorageArtifactFetcher(opener pkgio.Opener, cfg config.Getter, useCookieAuth bool) *StorageArtifactFetcher { 63 return &StorageArtifactFetcher{ 64 opener: opener, 65 cfg: cfg, 66 useCookieAuth: useCookieAuth, 67 } 68 } 69 70 // parseStorageURL parses and validates the storage path. 71 // If no scheme is given we assume Google Cloud Storage ("gs"). For example: 72 // * test-bucket/logs/sig-flexing/example-ci-run/403 or 73 // * gs://test-bucket/logs/sig-flexing/example-ci-run/403 74 func (af *StorageArtifactFetcher) parseStorageURL(storagePath string) (*url.URL, error) { 75 if !strings.Contains(storagePath, "://") { 76 storagePath = "gs://" + storagePath 77 } 78 storageURL, err := url.Parse(storagePath) 79 if err != nil { 80 return nil, ErrCannotParseSource 81 } 82 if err := af.cfg().ValidateStorageBucket(storageURL.Host); err != nil { 83 return nil, err 84 } 85 return storageURL, nil 86 } 87 88 func fieldsForJob(src *storageJobSource) logrus.Fields { 89 return logrus.Fields{ 90 "jobPrefix": src.jobPath(), 91 } 92 } 93 94 // newStorageJobSource creates a new storageJobSource from a given storage URL. 95 // If no scheme is given we assume Google Cloud Storage ("gs"). For example: 96 // * test-bucket/logs/sig-flexing/example-ci-run/403 or 97 // * gs://test-bucket/logs/sig-flexing/example-ci-run/403 98 func (af *StorageArtifactFetcher) newStorageJobSource(storagePath string) (*storageJobSource, error) { 99 storageURL, err := af.parseStorageURL(storagePath) 100 if err != nil { 101 return &storageJobSource{}, err 102 } 103 var object string 104 if storageURL.Path == "" { 105 object = storageURL.Path 106 } else { 107 object = storageURL.Path[1:] 108 } 109 110 tokens := strings.FieldsFunc(object, func(c rune) bool { return c == '/' }) 111 if len(tokens) < 2 { 112 return &storageJobSource{}, ErrCannotParseSource 113 } 114 buildID := tokens[len(tokens)-1] 115 name := tokens[len(tokens)-2] 116 potentialAlias := storageURL.Host 117 if bucket, exists := af.cfg().Deck.Spyglass.BucketAliases[potentialAlias]; exists { 118 storageURL.Host = bucket 119 } 120 return &storageJobSource{ 121 source: storageURL.String(), 122 linkPrefix: storageURL.Scheme + "://", 123 bucket: storageURL.Host, 124 jobPrefix: path.Clean(object) + "/", 125 jobName: name, 126 buildID: buildID, 127 }, nil 128 } 129 130 // Artifacts lists all artifacts available for the given job source 131 // If no scheme is given we assume GS, e.g.: 132 // * test-bucket/logs/sig-flexing/example-ci-run/403 or 133 // * gs://test-bucket/logs/sig-flexing/example-ci-run/403 134 func (af *StorageArtifactFetcher) artifacts(ctx context.Context, key string) ([]string, error) { 135 src, err := af.newStorageJobSource(key) 136 if err != nil { 137 return nil, fmt.Errorf("Failed to get GCS job source from %s: %w", key, err) 138 } 139 140 listStart := time.Now() 141 _, prefix := extractBucketPrefixPair(src.jobPath()) 142 artifacts := []string{} 143 144 it, err := af.opener.Iterator(ctx, src.source, "") 145 if err != nil { 146 return artifacts, err 147 } 148 149 wait := []time.Duration{16, 32, 64, 128, 256, 256, 512, 512} 150 for i := 0; ; { 151 oAttrs, err := it.Next(ctx) 152 if err == io.EOF { 153 break 154 } 155 if err != nil { 156 if err == context.Canceled { 157 return nil, err 158 } 159 logrus.WithFields(fieldsForJob(src)).WithError(err).Error("Error accessing GCS artifact.") 160 if i >= len(wait) { 161 return artifacts, fmt.Errorf("timed out: error accessing GCS artifact: %w", err) 162 } 163 time.Sleep((wait[i] + time.Duration(rand.Intn(10))) * time.Millisecond) 164 i++ 165 continue 166 } 167 artifacts = append(artifacts, strings.TrimPrefix(oAttrs.Name, prefix)) 168 i = 0 169 } 170 logrus.WithField("duration", time.Since(listStart).String()).Infof("Listed %d artifacts.", len(artifacts)) 171 return artifacts, nil 172 } 173 174 func (af *StorageArtifactFetcher) signURL(ctx context.Context, key string) (string, error) { 175 return af.opener.SignedURL(ctx, key, pkgio.SignedURLOptions{ 176 UseGSCookieAuth: af.useCookieAuth, 177 }) 178 } 179 180 type storageArtifactHandle struct { 181 pkgio.Opener 182 Name string 183 } 184 185 func (h *storageArtifactHandle) NewReader(ctx context.Context) (io.ReadCloser, error) { 186 return h.Opener.Reader(ctx, h.Name) 187 } 188 189 func (h *storageArtifactHandle) NewRangeReader(ctx context.Context, offset, length int64) (io.ReadCloser, error) { 190 return h.Opener.RangeReader(ctx, h.Name, offset, length) 191 } 192 193 func (h *storageArtifactHandle) Attrs(ctx context.Context) (pkgio.Attributes, error) { 194 return h.Opener.Attributes(ctx, h.Name) 195 } 196 197 func (h *storageArtifactHandle) UpdateAttrs(ctx context.Context, attrs pkgio.ObjectAttrsToUpdate) (*pkgio.Attributes, error) { 198 return h.UpdateAtributes(ctx, h.Name, attrs) 199 } 200 201 // Artifact constructs a GCS artifact from the given GCS bucket and key. Uses the golang GCS library 202 // to get read handles. If the artifactName is not a valid key in the bucket a handle will still be 203 // constructed and returned, but all read operations will fail (dictated by behavior of golang GCS lib). 204 // If no scheme is given we assume GS, e.g.: 205 // * test-bucket/logs/sig-flexing/example-ci-run/403 or 206 // * gs://test-bucket/logs/sig-flexing/example-ci-run/403 207 func (af *StorageArtifactFetcher) Artifact(ctx context.Context, key string, artifactName string, sizeLimit int64) (api.Artifact, error) { 208 src, err := af.newStorageJobSource(key) 209 if err != nil { 210 return nil, fmt.Errorf("failed to get GCS job source from %s: %w", key, err) 211 } 212 213 _, prefix := extractBucketPrefixPair(src.jobPath()) 214 objName := path.Join(prefix, artifactName) 215 obj := &storageArtifactHandle{Opener: af.opener, Name: fmt.Sprintf("%s%s/%s", src.linkPrefix, src.bucket, objName)} 216 signedURL, err := af.signURL(ctx, fmt.Sprintf("%s%s/%s", src.linkPrefix, src.bucket, objName)) 217 if err != nil { 218 return nil, err 219 } 220 return NewStorageArtifact(context.Background(), obj, signedURL, artifactName, sizeLimit), nil 221 } 222 223 func extractBucketPrefixPair(storagePath string) (string, string) { 224 split := strings.SplitN(storagePath, "/", 2) 225 return split[0], split[1] 226 } 227 228 // JobPath gets the prefix to all artifacts in GCS in the job 229 func (src *storageJobSource) jobPath() string { 230 return fmt.Sprintf("%s/%s", src.bucket, src.jobPrefix) 231 }