github.com/cornelk/go-cloud@v0.17.1/blob/gcsblob/gcsblob.go (about) 1 // Copyright 2018 The Go Cloud Development Kit Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package gcsblob provides a blob implementation that uses GCS. Use OpenBucket 16 // to construct a *blob.Bucket. 17 // 18 // URLs 19 // 20 // For blob.OpenBucket, gcsblob registers for the scheme "gs". 21 // The default URL opener will creating a connection using use default 22 // credentials from the environment, as described in 23 // https://cloud.google.com/docs/authentication/production. 24 // To customize the URL opener, or for more details on the URL format, 25 // see URLOpener. 26 // See https://github.com/cornelk/go-cloud/concepts/urls/ for background information. 27 // 28 // Escaping 29 // 30 // Go CDK supports all UTF-8 strings; to make this work with services lacking 31 // full UTF-8 support, strings must be escaped (during writes) and unescaped 32 // (during reads). The following escapes are performed for gcsblob: 33 // - Blob keys: ASCII characters 10 and 13 are escaped to "__0x<hex>__". 34 // Additionally, the "/" in "../" is escaped in the same way. 35 // 36 // As 37 // 38 // gcsblob exposes the following types for As: 39 // - Bucket: *storage.Client 40 // - Error: *googleapi.Error 41 // - ListObject: storage.ObjectAttrs 42 // - ListOptions.BeforeList: *storage.Query 43 // - Reader: *storage.Reader 44 // - ReaderOptions.BeforeRead: **storage.ObjectHandle, *storage.Reader 45 // - Attributes: storage.ObjectAttrs 46 // - CopyOptions.BeforeCopy: *CopyObjectHandles, *storage.Copier 47 // - WriterOptions.BeforeWrite: **storage.ObjectHandle, *storage.Writer 48 package gcsblob // import "github.com/cornelk/go-cloud/blob/gcsblob" 49 50 import ( 51 "context" 52 "errors" 53 "fmt" 54 "io" 55 "io/ioutil" 56 "net/http" 57 "net/url" 58 "sort" 59 "strings" 60 "sync" 61 "time" 62 63 "cloud.google.com/go/storage" 64 "github.com/cornelk/go-cloud/blob" 65 "github.com/cornelk/go-cloud/blob/driver" 66 "github.com/cornelk/go-cloud/gcerrors" 67 "github.com/cornelk/go-cloud/gcp" 68 "github.com/cornelk/go-cloud/internal/escape" 69 "github.com/cornelk/go-cloud/internal/useragent" 70 "github.com/google/wire" 71 "google.golang.org/api/googleapi" 72 "google.golang.org/api/iterator" 73 "google.golang.org/api/option" 74 ) 75 76 const defaultPageSize = 1000 77 78 func init() { 79 blob.DefaultURLMux().RegisterBucket(Scheme, new(lazyCredsOpener)) 80 } 81 82 // Set holds Wire providers for this package. 83 var Set = wire.NewSet( 84 wire.Struct(new(URLOpener), "Client"), 85 ) 86 87 // lazyCredsOpener obtains Application Default Credentials on the first call 88 // lazyCredsOpener obtains Application Default Credentials on the first call 89 // to OpenBucketURL. 90 type lazyCredsOpener struct { 91 init sync.Once 92 opener *URLOpener 93 err error 94 } 95 96 func (o *lazyCredsOpener) OpenBucketURL(ctx context.Context, u *url.URL) (*blob.Bucket, error) { 97 o.init.Do(func() { 98 creds, err := gcp.DefaultCredentials(ctx) 99 if err != nil { 100 o.err = err 101 return 102 } 103 client, err := gcp.NewHTTPClient(gcp.DefaultTransport(), creds.TokenSource) 104 if err != nil { 105 o.err = err 106 return 107 } 108 o.opener = &URLOpener{Client: client} 109 }) 110 if o.err != nil { 111 return nil, fmt.Errorf("open bucket %v: %v", u, o.err) 112 } 113 return o.opener.OpenBucketURL(ctx, u) 114 } 115 116 // Scheme is the URL scheme gcsblob registers its URLOpener under on 117 // blob.DefaultMux. 118 const Scheme = "gs" 119 120 // URLOpener opens GCS URLs like "gs://mybucket". 121 // 122 // The URL host is used as the bucket name. 123 // 124 // The following query parameters are supported: 125 // 126 // - access_id: sets Options.GoogleAccessID 127 // - private_key_path: path to read for Options.PrivateKey 128 type URLOpener struct { 129 // Client must be set to a non-nil HTTP client authenticated with 130 // Cloud Storage scope or equivalent. 131 Client *gcp.HTTPClient 132 133 // Options specifies the default options to pass to OpenBucket. 134 Options Options 135 } 136 137 // OpenBucketURL opens the GCS bucket with the same name as the URL's host. 138 func (o *URLOpener) OpenBucketURL(ctx context.Context, u *url.URL) (*blob.Bucket, error) { 139 opts, err := o.forParams(ctx, u.Query()) 140 if err != nil { 141 return nil, fmt.Errorf("open bucket %v: %v", u, err) 142 } 143 return OpenBucket(ctx, o.Client, u.Host, opts) 144 } 145 146 func (o *URLOpener) forParams(ctx context.Context, q url.Values) (*Options, error) { 147 for k := range q { 148 if k != "access_id" && k != "private_key_path" { 149 return nil, fmt.Errorf("invalid query parameter %q", k) 150 } 151 } 152 opts := new(Options) 153 *opts = o.Options 154 if accessID := q.Get("access_id"); accessID != "" { 155 opts.GoogleAccessID = accessID 156 } 157 if keyPath := q.Get("private_key_path"); keyPath != "" { 158 pk, err := ioutil.ReadFile(keyPath) 159 if err != nil { 160 return nil, err 161 } 162 opts.PrivateKey = pk 163 } 164 return opts, nil 165 } 166 167 // Options sets options for constructing a *blob.Bucket backed by GCS. 168 type Options struct { 169 // GoogleAccessID represents the authorizer for SignedURL. 170 // Required to use SignedURL. 171 // See https://godoc.org/cloud.google.com/go/storage#SignedURLOptions. 172 GoogleAccessID string 173 174 // PrivateKey is the Google service account private key. 175 // Exactly one of PrivateKey or SignBytes must be non-nil to use SignedURL. 176 // See https://godoc.org/cloud.google.com/go/storage#SignedURLOptions. 177 PrivateKey []byte 178 179 // SignBytes is a function for implementing custom signing. 180 // Exactly one of PrivateKey or SignBytes must be non-nil to use SignedURL. 181 // See https://godoc.org/cloud.google.com/go/storage#SignedURLOptions. 182 SignBytes func([]byte) ([]byte, error) 183 } 184 185 // openBucket returns a GCS Bucket that communicates using the given HTTP client. 186 func openBucket(ctx context.Context, client *gcp.HTTPClient, bucketName string, opts *Options) (*bucket, error) { 187 if client == nil { 188 return nil, errors.New("gcsblob.OpenBucket: client is required") 189 } 190 if bucketName == "" { 191 return nil, errors.New("gcsblob.OpenBucket: bucketName is required") 192 } 193 // We wrap the provided http.Client to add a Go CDK User-Agent. 194 c, err := storage.NewClient(ctx, option.WithHTTPClient(useragent.HTTPClient(&client.Client, "blob"))) 195 if err != nil { 196 return nil, err 197 } 198 if opts == nil { 199 opts = &Options{} 200 } 201 return &bucket{name: bucketName, client: c, opts: opts}, nil 202 } 203 204 // OpenBucket returns a *blob.Bucket backed by an existing GCS bucket. See the 205 // package documentation for an example. 206 func OpenBucket(ctx context.Context, client *gcp.HTTPClient, bucketName string, opts *Options) (*blob.Bucket, error) { 207 drv, err := openBucket(ctx, client, bucketName, opts) 208 if err != nil { 209 return nil, err 210 } 211 return blob.NewBucket(drv), nil 212 } 213 214 // bucket represents a GCS bucket, which handles read, write and delete operations 215 // on objects within it. 216 type bucket struct { 217 name string 218 client *storage.Client 219 opts *Options 220 } 221 222 var emptyBody = ioutil.NopCloser(strings.NewReader("")) 223 224 // reader reads a GCS object. It implements driver.Reader. 225 type reader struct { 226 body io.ReadCloser 227 attrs driver.ReaderAttributes 228 raw *storage.Reader 229 } 230 231 func (r *reader) Read(p []byte) (int, error) { 232 return r.body.Read(p) 233 } 234 235 // Close closes the reader itself. It must be called when done reading. 236 func (r *reader) Close() error { 237 return r.body.Close() 238 } 239 240 func (r *reader) Attributes() *driver.ReaderAttributes { 241 return &r.attrs 242 } 243 244 func (r *reader) As(i interface{}) bool { 245 p, ok := i.(**storage.Reader) 246 if !ok { 247 return false 248 } 249 *p = r.raw 250 return true 251 } 252 253 func (b *bucket) ErrorCode(err error) gcerrors.ErrorCode { 254 if err == storage.ErrObjectNotExist { 255 return gcerrors.NotFound 256 } 257 if gerr, ok := err.(*googleapi.Error); ok { 258 switch gerr.Code { 259 case http.StatusNotFound: 260 return gcerrors.NotFound 261 case http.StatusPreconditionFailed: 262 return gcerrors.FailedPrecondition 263 } 264 } 265 return gcerrors.Unknown 266 } 267 268 func (b *bucket) Close() error { 269 return nil 270 } 271 272 // ListPaged implements driver.ListPaged. 273 func (b *bucket) ListPaged(ctx context.Context, opts *driver.ListOptions) (*driver.ListPage, error) { 274 bkt := b.client.Bucket(b.name) 275 query := &storage.Query{ 276 Prefix: escapeKey(opts.Prefix), 277 Delimiter: escapeKey(opts.Delimiter), 278 } 279 if opts.BeforeList != nil { 280 asFunc := func(i interface{}) bool { 281 p, ok := i.(**storage.Query) 282 if !ok { 283 return false 284 } 285 *p = query 286 return true 287 } 288 if err := opts.BeforeList(asFunc); err != nil { 289 return nil, err 290 } 291 } 292 pageSize := opts.PageSize 293 if pageSize == 0 { 294 pageSize = defaultPageSize 295 } 296 iter := bkt.Objects(ctx, query) 297 pager := iterator.NewPager(iter, pageSize, string(opts.PageToken)) 298 var objects []*storage.ObjectAttrs 299 nextPageToken, err := pager.NextPage(&objects) 300 if err != nil { 301 return nil, err 302 } 303 page := driver.ListPage{NextPageToken: []byte(nextPageToken)} 304 if len(objects) > 0 { 305 page.Objects = make([]*driver.ListObject, len(objects)) 306 for i, obj := range objects { 307 asFunc := func(i interface{}) bool { 308 p, ok := i.(*storage.ObjectAttrs) 309 if !ok { 310 return false 311 } 312 *p = *obj 313 return true 314 } 315 if obj.Prefix == "" { 316 // Regular blob. 317 page.Objects[i] = &driver.ListObject{ 318 Key: unescapeKey(obj.Name), 319 ModTime: obj.Updated, 320 Size: obj.Size, 321 MD5: obj.MD5, 322 AsFunc: asFunc, 323 } 324 } else { 325 // "Directory". 326 page.Objects[i] = &driver.ListObject{ 327 Key: unescapeKey(obj.Prefix), 328 IsDir: true, 329 AsFunc: asFunc, 330 } 331 } 332 } 333 // GCS always returns "directories" at the end; sort them. 334 sort.Slice(page.Objects, func(i, j int) bool { 335 return page.Objects[i].Key < page.Objects[j].Key 336 }) 337 } 338 return &page, nil 339 } 340 341 // As implements driver.As. 342 func (b *bucket) As(i interface{}) bool { 343 p, ok := i.(**storage.Client) 344 if !ok { 345 return false 346 } 347 *p = b.client 348 return true 349 } 350 351 // As implements driver.ErrorAs. 352 func (b *bucket) ErrorAs(err error, i interface{}) bool { 353 switch v := err.(type) { 354 case *googleapi.Error: 355 if p, ok := i.(**googleapi.Error); ok { 356 *p = v 357 return true 358 } 359 } 360 return false 361 } 362 363 // Attributes implements driver.Attributes. 364 func (b *bucket) Attributes(ctx context.Context, key string) (*driver.Attributes, error) { 365 key = escapeKey(key) 366 bkt := b.client.Bucket(b.name) 367 obj := bkt.Object(key) 368 attrs, err := obj.Attrs(ctx) 369 if err != nil { 370 return nil, err 371 } 372 return &driver.Attributes{ 373 CacheControl: attrs.CacheControl, 374 ContentDisposition: attrs.ContentDisposition, 375 ContentEncoding: attrs.ContentEncoding, 376 ContentLanguage: attrs.ContentLanguage, 377 ContentType: attrs.ContentType, 378 Metadata: attrs.Metadata, 379 ModTime: attrs.Updated, 380 Size: attrs.Size, 381 MD5: attrs.MD5, 382 AsFunc: func(i interface{}) bool { 383 p, ok := i.(*storage.ObjectAttrs) 384 if !ok { 385 return false 386 } 387 *p = *attrs 388 return true 389 }, 390 }, nil 391 } 392 393 // NewRangeReader implements driver.NewRangeReader. 394 func (b *bucket) NewRangeReader(ctx context.Context, key string, offset, length int64, opts *driver.ReaderOptions) (driver.Reader, error) { 395 key = escapeKey(key) 396 bkt := b.client.Bucket(b.name) 397 obj := bkt.Object(key) 398 399 // Add an extra level of indirection so that BeforeRead can replace obj 400 // if needed. For example, ObjectHandle.If returns a new ObjectHandle. 401 // Also, make the Reader lazily in case this replacement happens. 402 objp := &obj 403 makeReader := func() (*storage.Reader, error) { 404 return (*objp).NewRangeReader(ctx, offset, length) 405 } 406 407 var r *storage.Reader 408 var rerr error 409 madeReader := false 410 if opts.BeforeRead != nil { 411 asFunc := func(i interface{}) bool { 412 if p, ok := i.(***storage.ObjectHandle); ok && !madeReader { 413 *p = objp 414 return true 415 } 416 if p, ok := i.(**storage.Reader); ok { 417 if !madeReader { 418 r, rerr = makeReader() 419 madeReader = true 420 } 421 *p = r 422 return true 423 } 424 return false 425 } 426 if err := opts.BeforeRead(asFunc); err != nil { 427 return nil, err 428 } 429 } 430 if !madeReader { 431 r, rerr = makeReader() 432 } 433 if rerr != nil { 434 return nil, rerr 435 } 436 modTime, _ := r.LastModified() 437 return &reader{ 438 body: r, 439 attrs: driver.ReaderAttributes{ 440 ContentType: r.ContentType(), 441 ModTime: modTime, 442 Size: r.Size(), 443 }, 444 raw: r, 445 }, nil 446 } 447 448 // escapeKey does all required escaping for UTF-8 strings to work with GCS. 449 func escapeKey(key string) string { 450 return escape.HexEscape(key, func(r []rune, i int) bool { 451 switch { 452 // GCS doesn't handle these characters (determined via experimentation). 453 case r[i] == 10 || r[i] == 13: 454 return true 455 // For "../", escape the trailing slash. 456 case i > 1 && r[i] == '/' && r[i-1] == '.' && r[i-2] == '.': 457 return true 458 } 459 return false 460 }) 461 } 462 463 // unescapeKey reverses escapeKey. 464 func unescapeKey(key string) string { 465 return escape.HexUnescape(key) 466 } 467 468 // NewTypedWriter implements driver.NewTypedWriter. 469 func (b *bucket) NewTypedWriter(ctx context.Context, key string, contentType string, opts *driver.WriterOptions) (driver.Writer, error) { 470 key = escapeKey(key) 471 bkt := b.client.Bucket(b.name) 472 obj := bkt.Object(key) 473 474 // Add an extra level of indirection so that BeforeWrite can replace obj 475 // if needed. For example, ObjectHandle.If returns a new ObjectHandle. 476 // Also, make the Writer lazily in case this replacement happens. 477 objp := &obj 478 makeWriter := func() *storage.Writer { 479 w := (*objp).NewWriter(ctx) 480 w.CacheControl = opts.CacheControl 481 w.ContentDisposition = opts.ContentDisposition 482 w.ContentEncoding = opts.ContentEncoding 483 w.ContentLanguage = opts.ContentLanguage 484 w.ContentType = contentType 485 w.ChunkSize = bufferSize(opts.BufferSize) 486 w.Metadata = opts.Metadata 487 w.MD5 = opts.ContentMD5 488 return w 489 } 490 491 var w *storage.Writer 492 if opts.BeforeWrite != nil { 493 asFunc := func(i interface{}) bool { 494 if p, ok := i.(***storage.ObjectHandle); ok && w == nil { 495 *p = objp 496 return true 497 } 498 if p, ok := i.(**storage.Writer); ok { 499 if w == nil { 500 w = makeWriter() 501 } 502 *p = w 503 return true 504 } 505 return false 506 } 507 if err := opts.BeforeWrite(asFunc); err != nil { 508 return nil, err 509 } 510 } 511 if w == nil { 512 w = makeWriter() 513 } 514 return w, nil 515 } 516 517 // CopyObjectHandles holds the ObjectHandles for the destination and source 518 // of a Copy. It is used by the BeforeCopy As hook. 519 type CopyObjectHandles struct { 520 Dst, Src *storage.ObjectHandle 521 } 522 523 // Copy implements driver.Copy. 524 func (b *bucket) Copy(ctx context.Context, dstKey, srcKey string, opts *driver.CopyOptions) error { 525 dstKey = escapeKey(dstKey) 526 srcKey = escapeKey(srcKey) 527 bkt := b.client.Bucket(b.name) 528 529 // Add an extra level of indirection so that BeforeCopy can replace the 530 // dst or src ObjectHandles if needed. 531 // Also, make the Copier lazily in case this replacement happens. 532 handles := CopyObjectHandles{ 533 Dst: bkt.Object(dstKey), 534 Src: bkt.Object(srcKey), 535 } 536 makeCopier := func() *storage.Copier { 537 return handles.Dst.CopierFrom(handles.Src) 538 } 539 540 var copier *storage.Copier 541 if opts.BeforeCopy != nil { 542 asFunc := func(i interface{}) bool { 543 if p, ok := i.(**CopyObjectHandles); ok && copier == nil { 544 *p = &handles 545 return true 546 } 547 if p, ok := i.(**storage.Copier); ok { 548 if copier == nil { 549 copier = makeCopier() 550 } 551 *p = copier 552 return true 553 } 554 return false 555 } 556 if err := opts.BeforeCopy(asFunc); err != nil { 557 return err 558 } 559 } 560 if copier == nil { 561 copier = makeCopier() 562 } 563 _, err := copier.Run(ctx) 564 return err 565 } 566 567 // Delete implements driver.Delete. 568 func (b *bucket) Delete(ctx context.Context, key string) error { 569 key = escapeKey(key) 570 bkt := b.client.Bucket(b.name) 571 obj := bkt.Object(key) 572 return obj.Delete(ctx) 573 } 574 575 func (b *bucket) SignedURL(ctx context.Context, key string, dopts *driver.SignedURLOptions) (string, error) { 576 if b.opts.GoogleAccessID == "" || (b.opts.PrivateKey == nil && b.opts.SignBytes == nil) { 577 return "", errors.New("to use SignedURL, you must call OpenBucket with a valid Options.GoogleAccessID and exactly one of Options.PrivateKey or Options.SignBytes") 578 } 579 key = escapeKey(key) 580 opts := &storage.SignedURLOptions{ 581 Expires: time.Now().Add(dopts.Expiry), 582 Method: dopts.Method, 583 GoogleAccessID: b.opts.GoogleAccessID, 584 PrivateKey: b.opts.PrivateKey, 585 SignBytes: b.opts.SignBytes, 586 } 587 return storage.SignedURL(b.name, key, opts) 588 } 589 590 func bufferSize(size int) int { 591 if size == 0 { 592 return googleapi.DefaultUploadChunkSize 593 } else if size > 0 { 594 return size 595 } 596 return 0 // disable buffering 597 }