sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/io/opener.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package io 18 19 import ( 20 "context" 21 "encoding/json" 22 "errors" 23 "fmt" 24 "io" 25 "net/http" 26 "net/url" 27 "os" 28 "path" 29 "strings" 30 "sync" 31 "time" 32 33 utilerrors "k8s.io/apimachinery/pkg/util/errors" 34 35 "cloud.google.com/go/storage" 36 "github.com/sirupsen/logrus" 37 "gocloud.dev/blob" 38 "gocloud.dev/gcerrors" 39 "google.golang.org/api/googleapi" 40 "google.golang.org/api/option" 41 42 "github.com/GoogleCloudPlatform/testgrid/util/gcs" // TODO(fejta): move this logic here 43 44 "sigs.k8s.io/prow/pkg/io/providers" 45 ) 46 47 const ( 48 httpsScheme = "https" 49 ) 50 51 type storageClient interface { 52 Bucket(name string) *storage.BucketHandle 53 } 54 55 // Aliases to types in the standard library 56 type ( 57 ReadCloser = io.ReadCloser 58 WriteCloser = io.WriteCloser 59 Writer = io.Writer 60 Closer = io.Closer 61 ) 62 63 type Attributes struct { 64 // ContentEncoding specifies the encoding used for the blob's content, if any. 65 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding 66 ContentEncoding string 67 // ContentType is the MIME type of the blob, if any. 68 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type 69 ContentType string 70 // ContentDisposition specifies whether the blob content is expected to be displayed inline or as an attachment. 71 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition 72 ContentDisposition string 73 // ContentLanguage specifies the language used in the blob's content, if any. 74 // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Language 75 ContentLanguage string 76 // Size is the size of the blob's content in bytes. 77 Size int64 78 // Metadata includes user-metadata associated with the file 79 Metadata map[string]string 80 } 81 82 type ObjectAttrsToUpdate struct { 83 ContentEncoding *string 84 Metadata map[string]string 85 } 86 87 // Opener has methods to read and write paths 88 type Opener interface { 89 Reader(ctx context.Context, path string) (ReadCloser, error) 90 RangeReader(ctx context.Context, path string, offset, length int64) (io.ReadCloser, error) 91 Writer(ctx context.Context, path string, opts ...WriterOptions) (WriteCloser, error) 92 Attributes(ctx context.Context, path string) (Attributes, error) 93 SignedURL(ctx context.Context, path string, opts SignedURLOptions) (string, error) 94 Iterator(ctx context.Context, prefix, delimiter string) (ObjectIterator, error) 95 UpdateAtributes(context.Context, string, ObjectAttrsToUpdate) (*Attributes, error) 96 } 97 98 type opener struct { 99 gcsCredentialsFile string 100 gcsClient storageClient 101 s3Credentials []byte 102 cachedBuckets map[string]*blob.Bucket 103 cachedBucketsMutex sync.Mutex 104 } 105 106 // NewOpener returns an opener that can read GCS, S3 and local paths. 107 // credentialsFile may also be empty 108 // For local paths it has to be empty 109 // In all other cases gocloud auto-discovery is used to detect credentials, if credentialsFile is empty. 110 // For more details about the possible content of the credentialsFile see prow/io/providers.GetBucket 111 func NewOpener(ctx context.Context, gcsCredentialsFile, s3CredentialsFile string) (Opener, error) { 112 gcsClient, err := createGCSClient(ctx, gcsCredentialsFile) 113 if err != nil { 114 return nil, err 115 } 116 var s3Credentials []byte 117 if s3CredentialsFile != "" { 118 s3Credentials, err = os.ReadFile(s3CredentialsFile) 119 if err != nil { 120 return nil, err 121 } 122 } 123 return &opener{ 124 gcsClient: gcsClient, 125 gcsCredentialsFile: gcsCredentialsFile, 126 s3Credentials: s3Credentials, 127 cachedBuckets: map[string]*blob.Bucket{}, 128 }, nil 129 } 130 131 // NewGCSOpener can be used for testing against a fakeGCSClient 132 func NewGCSOpener(gcsClient *storage.Client) Opener { 133 return &opener{ 134 gcsClient: gcsClient, 135 cachedBuckets: map[string]*blob.Bucket{}, 136 } 137 } 138 139 func createGCSClient(ctx context.Context, gcsCredentialsFile string) (storageClient, error) { 140 // if gcsCredentialsFile is set, we have to be able to create storage.Client withCredentialsFile 141 if gcsCredentialsFile != "" { 142 return storage.NewClient(ctx, option.WithCredentialsFile(gcsCredentialsFile)) 143 } 144 145 // if gcsCredentialsFile is unset, first try to use the default credentials 146 gcsClient, err := storage.NewClient(ctx) 147 if err == nil { 148 return gcsClient, nil 149 } 150 logrus.WithError(err).Debug("Cannot load application default gcp credentials, falling back to anonymous client") 151 152 // if default credentials don't work, use an anonymous client, this should always work 153 return storage.NewClient(ctx, option.WithoutAuthentication()) 154 } 155 156 // ErrNotFoundTest can be used for unit tests to simulate NotFound errors. 157 // This is required because gocloud doesn't expose its errors. 158 var ErrNotFoundTest = fmt.Errorf("not found error which should only be used in tests") 159 160 // IsNotExist will return true if the error shows that the object does not exist. 161 func IsNotExist(err error) bool { 162 if os.IsNotExist(err) { 163 return true 164 } 165 if errors.Is(err, ErrNotFoundTest) { 166 return true 167 } 168 if errors.Is(err, os.ErrNotExist) { 169 return true 170 } 171 if errors.Is(err, storage.ErrObjectNotExist) { 172 return true 173 } 174 return gcerrors.Code(err) == gcerrors.NotFound 175 } 176 177 // LogClose will attempt a close an log any error 178 func LogClose(c io.Closer) { 179 if err := c.Close(); err != nil { 180 logrus.WithError(err).Error("Failed to close") 181 } 182 } 183 184 func (o *opener) openGCS(path string) (*storage.ObjectHandle, error) { 185 if !strings.HasPrefix(path, providers.GS+"://") { 186 return nil, nil 187 } 188 if o.gcsClient == nil { 189 return nil, errors.New("no gcs client configured") 190 } 191 var p gcs.Path 192 if err := p.Set(path); err != nil { 193 return nil, err 194 } 195 if p.Object() == "" { 196 return nil, errors.New("object name is empty") 197 } 198 return o.gcsClient.Bucket(p.Bucket()).Object(p.Object()), nil 199 } 200 201 // getBucket opens a bucket 202 // The storageProvider is discovered based on the given path. 203 // The buckets are cached per bucket name. So we don't open a bucket multiple times in the same process 204 func (o *opener) getBucket(ctx context.Context, path string) (*blob.Bucket, string, error) { 205 _, bucketName, relativePath, err := providers.ParseStoragePath(path) 206 if err != nil { 207 return nil, "", fmt.Errorf("could not get bucket: %w", err) 208 } 209 210 o.cachedBucketsMutex.Lock() 211 defer o.cachedBucketsMutex.Unlock() 212 if bucket, ok := o.cachedBuckets[bucketName]; ok { 213 return bucket, relativePath, nil 214 } 215 216 bucket, err := providers.GetBucket(ctx, o.s3Credentials, path) 217 if err != nil { 218 return nil, "", err 219 } 220 o.cachedBuckets[bucketName] = bucket 221 return bucket, relativePath, nil 222 } 223 224 // Reader will open the path for reading, returning an IsNotExist() error when missing 225 func (o *opener) Reader(ctx context.Context, path string) (io.ReadCloser, error) { 226 if strings.HasPrefix(path, providers.GS+"://") { 227 g, err := o.openGCS(path) 228 if err != nil { 229 return nil, fmt.Errorf("bad gcs path: %w", err) 230 } 231 return g.NewReader(ctx) 232 } 233 if strings.HasPrefix(path, "/") { 234 return os.Open(path) 235 } 236 237 bucket, relativePath, err := o.getBucket(ctx, path) 238 if err != nil { 239 return nil, err 240 } 241 reader, err := bucket.NewReader(ctx, relativePath, nil) 242 if err != nil { 243 return nil, err 244 } 245 return reader, nil 246 } 247 248 func (o *opener) RangeReader(ctx context.Context, path string, offset, length int64) (io.ReadCloser, error) { 249 if strings.HasPrefix(path, providers.GS+"://") { 250 g, err := o.openGCS(path) 251 if err != nil { 252 return nil, fmt.Errorf("bad gcs path: %w", err) 253 } 254 return g.NewRangeReader(ctx, offset, length) 255 } 256 257 bucket, relativePath, err := o.getBucket(ctx, path) 258 if err != nil { 259 return nil, err 260 } 261 reader, err := bucket.NewRangeReader(ctx, relativePath, offset, length, nil) 262 if err != nil { 263 return nil, err 264 } 265 return reader, nil 266 } 267 268 var PreconditionFailedObjectAlreadyExists = fmt.Errorf("object already exists") 269 270 // Writer returns a writer that overwrites the path. 271 func (o *opener) Writer(ctx context.Context, p string, opts ...WriterOptions) (io.WriteCloser, error) { 272 options := &WriterOptions{} 273 for _, opt := range opts { 274 opt.Apply(options) 275 } 276 if strings.HasPrefix(p, providers.GS+"://") { 277 g, err := o.openGCS(p) 278 if err != nil { 279 return nil, fmt.Errorf("bad gcs path: %w", err) 280 } 281 if options.PreconditionDoesNotExist != nil && *options.PreconditionDoesNotExist { 282 g = g.If(storage.Conditions{DoesNotExist: true}) 283 } 284 285 writer := g.NewWriter(ctx) 286 options.apply(writer, nil) 287 return writer, nil 288 } 289 if strings.HasPrefix(p, "/") || strings.HasPrefix(p, providers.File+"://") { 290 p := strings.TrimPrefix(p, providers.File+"://") 291 // create parent dir if doesn't exist 292 dir := path.Dir(p) 293 if err := os.MkdirAll(dir, 0755); err != nil { 294 return nil, fmt.Errorf("create directory %q: %w", dir, err) 295 } 296 return os.OpenFile(p, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0666) 297 } 298 299 bucket, relativePath, err := o.getBucket(ctx, p) 300 if err != nil { 301 return nil, err 302 } 303 var wOpts blob.WriterOptions 304 options.apply(nil, &wOpts) 305 306 if options.PreconditionDoesNotExist != nil && *options.PreconditionDoesNotExist { 307 wOpts.BeforeWrite = func(asFunc func(interface{}) bool) error { 308 _, err := o.Reader(ctx, p) 309 if err != nil { 310 // we got an error, but not object not exists 311 if !IsNotExist(err) { 312 return err 313 } 314 // Precondition fulfilled, return nil 315 return nil 316 } 317 // Precondition failed, we got no err because object already exists 318 return PreconditionFailedObjectAlreadyExists 319 } 320 } 321 322 writer, err := bucket.NewWriter(ctx, relativePath, &wOpts) 323 if err != nil { 324 return nil, err 325 } 326 return writer, nil 327 } 328 329 func (o *opener) Attributes(ctx context.Context, path string) (Attributes, error) { 330 if strings.HasPrefix(path, providers.GS+"://") { 331 g, err := o.openGCS(path) 332 if err != nil { 333 return Attributes{}, fmt.Errorf("bad gcs path: %w", err) 334 } 335 attr, err := g.Attrs(ctx) 336 if err != nil { 337 return Attributes{}, err 338 } 339 return Attributes{ 340 ContentEncoding: attr.ContentEncoding, 341 ContentType: attr.ContentType, 342 ContentDisposition: attr.ContentDisposition, 343 ContentLanguage: attr.ContentLanguage, 344 Size: attr.Size, 345 Metadata: attr.Metadata, 346 }, nil 347 } 348 349 bucket, relativePath, err := o.getBucket(ctx, path) 350 if err != nil { 351 return Attributes{}, err 352 } 353 354 attr, err := bucket.Attributes(ctx, relativePath) 355 if err != nil { 356 return Attributes{}, err 357 } 358 return Attributes{ 359 ContentEncoding: attr.ContentEncoding, 360 ContentType: attr.ContentType, 361 ContentDisposition: attr.ContentDisposition, 362 ContentLanguage: attr.ContentLanguage, 363 Size: attr.Size, 364 Metadata: attr.Metadata, 365 }, nil 366 } 367 368 func (o *opener) UpdateAtributes(ctx context.Context, path string, attrs ObjectAttrsToUpdate) (*Attributes, error) { 369 if !strings.HasPrefix(path, providers.GS+"://") { 370 return nil, fmt.Errorf("unsupported provider: %q", path) 371 } 372 373 g, err := o.openGCS(path) 374 if err != nil { 375 return nil, fmt.Errorf("open: %w", err) 376 } 377 up := storage.ObjectAttrsToUpdate{ 378 Metadata: attrs.Metadata, 379 } 380 if attrs.ContentEncoding != nil { 381 up.ContentEncoding = *attrs.ContentEncoding 382 } 383 oa, err := g.Update(ctx, up) 384 if err != nil { 385 return nil, fmt.Errorf("update: %w", err) 386 } 387 return &Attributes{ 388 ContentEncoding: oa.ContentEncoding, 389 Size: oa.Size, 390 Metadata: oa.Metadata, 391 }, nil 392 } 393 394 const ( 395 GSAnonHost = "storage.googleapis.com" 396 GSCookieHost = "storage.cloud.google.com" 397 ) 398 399 func (o *opener) SignedURL(ctx context.Context, p string, opts SignedURLOptions) (string, error) { 400 _, bucketName, relativePath, err := providers.ParseStoragePath(p) 401 if err != nil { 402 return "", fmt.Errorf("could not get bucket: %w", err) 403 } 404 if strings.HasPrefix(p, providers.GS+"://") { 405 // We specifically want to use cookie auth, see: 406 // https://cloud.google.com/storage/docs/access-control/cookie-based-authentication 407 if opts.UseGSCookieAuth { 408 artifactLink := &url.URL{ 409 Scheme: httpsScheme, 410 Host: GSCookieHost, 411 Path: path.Join(bucketName, relativePath), 412 } 413 return artifactLink.String(), nil 414 } 415 416 // If we're anonymous we can just return a plain URL. 417 if o.gcsCredentialsFile == "" { 418 artifactLink := &url.URL{ 419 Scheme: httpsScheme, 420 Host: GSAnonHost, 421 Path: path.Join(bucketName, relativePath), 422 } 423 return artifactLink.String(), nil 424 } 425 426 // TODO(fejta): do not require the json file https://github.com/kubernetes/test-infra/issues/16489 427 // As far as I can tell, there is no sane way to get these values other than just 428 // reading them out of the JSON file ourselves. 429 f, err := os.Open(o.gcsCredentialsFile) 430 if err != nil { 431 return "", err 432 } 433 defer f.Close() 434 auth := struct { 435 Type string `json:"type"` 436 PrivateKey string `json:"private_key"` 437 ClientEmail string `json:"client_email"` 438 }{} 439 if err := json.NewDecoder(f).Decode(&auth); err != nil { 440 return "", err 441 } 442 if auth.Type != "service_account" { 443 return "", fmt.Errorf("only service_account GCS auth is supported, got %q", auth.Type) 444 } 445 return storage.SignedURL(bucketName, relativePath, &storage.SignedURLOptions{ 446 Method: "GET", 447 Expires: time.Now().Add(10 * time.Minute), 448 GoogleAccessID: auth.ClientEmail, 449 PrivateKey: []byte(auth.PrivateKey), 450 }) 451 } 452 453 bucket, relativePath, err := o.getBucket(ctx, p) 454 if err != nil { 455 return "", err 456 } 457 return bucket.SignedURL(ctx, relativePath, &blob.SignedURLOptions{ 458 Method: "GET", 459 Expiry: 10 * time.Minute, 460 }) 461 } 462 463 func (o *opener) Iterator(ctx context.Context, prefix, delimiter string) (ObjectIterator, error) { 464 storageProvider, bucketName, relativePath, err := providers.ParseStoragePath(prefix) 465 if err != nil { 466 return nil, fmt.Errorf("could not get bucket: %w", err) 467 } 468 469 if storageProvider == providers.GS { 470 if o.gcsClient == nil { 471 return nil, errors.New("no gcs client configured") 472 } 473 bkt := o.gcsClient.Bucket(bucketName) 474 query := &storage.Query{ 475 Prefix: relativePath, 476 Delimiter: delimiter, 477 Versions: false, 478 } 479 if delimiter == "" { 480 // query.SetAttrSelection cannot be used in directory-like mode (when delimiter != ""). 481 if err := query.SetAttrSelection([]string{"Name"}); err != nil { 482 return nil, err 483 } 484 } 485 return gcsObjectIterator{ 486 Iterator: bkt.Objects(ctx, query), 487 }, nil 488 } 489 490 bucket, relativePath, err := o.getBucket(ctx, prefix) 491 if err != nil { 492 return nil, err 493 } 494 // listing a directory requires the "/" suffix except if we try to list the bucket's root directory 495 if relativePath != "" && !strings.HasSuffix(relativePath, "/") { 496 relativePath += "/" 497 } 498 return openerObjectIterator{ 499 Iterator: bucket.List(&blob.ListOptions{ 500 Prefix: relativePath, 501 Delimiter: delimiter, 502 }), 503 }, nil 504 } 505 506 func ReadContent(ctx context.Context, logger *logrus.Entry, opener Opener, path string) ([]byte, error) { 507 log := logger.WithFields(logrus.Fields{"path": path}) 508 log.Debug("Reading") 509 r, err := opener.Reader(ctx, path) 510 if err != nil { 511 return nil, err 512 } 513 defer r.Close() 514 return io.ReadAll(r) 515 } 516 517 func WriteContent(ctx context.Context, logger *logrus.Entry, opener Opener, path string, content []byte, opts ...WriterOptions) error { 518 log := logger.WithFields(logrus.Fields{"path": path, "write-options": opts}) 519 log.Debug("Uploading") 520 w, err := opener.Writer(ctx, path, opts...) 521 if err != nil { 522 return err 523 } 524 _, err = w.Write(content) 525 var writeErr error 526 if isErrUnexpected(err) { 527 writeErr = err 528 log.WithError(err).Warn("Uploading info to storage failed (write)") 529 } 530 err = w.Close() 531 var closeErr error 532 if isErrUnexpected(err) { 533 closeErr = err 534 log.WithError(err).Warn("Uploading info to storage failed (close)") 535 } 536 return utilerrors.NewAggregate([]error{writeErr, closeErr}) 537 } 538 539 func isErrUnexpected(err error) bool { 540 if err == nil { 541 return false 542 } 543 // Precondition Failed is expected and we can silently ignore it. 544 if e, ok := err.(*googleapi.Error); ok { 545 if e.Code == http.StatusPreconditionFailed { 546 return false 547 } 548 } 549 // Precondition file already exists is expected 550 if errors.Is(err, PreconditionFailedObjectAlreadyExists) { 551 return false 552 } 553 554 return true 555 }