github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/aws_sdk_v2.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fileservice 16 17 import ( 18 "context" 19 "crypto/tls" 20 "crypto/x509" 21 "errors" 22 "fmt" 23 "io" 24 "net" 25 stdhttp "net/http" 26 "os" 27 gotrace "runtime/trace" 28 "strings" 29 "time" 30 31 "github.com/aws/aws-sdk-go-v2/aws" 32 "github.com/aws/aws-sdk-go-v2/aws/retry" 33 "github.com/aws/aws-sdk-go-v2/aws/transport/http" 34 "github.com/aws/aws-sdk-go-v2/config" 35 "github.com/aws/aws-sdk-go-v2/credentials" 36 "github.com/aws/aws-sdk-go-v2/credentials/stscreds" 37 "github.com/aws/aws-sdk-go-v2/service/s3" 38 "github.com/aws/aws-sdk-go-v2/service/s3/types" 39 "github.com/aws/aws-sdk-go-v2/service/sts" 40 "github.com/matrixorigin/matrixone/pkg/common/moerr" 41 "github.com/matrixorigin/matrixone/pkg/logutil" 42 "github.com/matrixorigin/matrixone/pkg/perfcounter" 43 "github.com/matrixorigin/matrixone/pkg/util/trace" 44 "go.uber.org/zap" 45 ) 46 47 type AwsSDKv2 struct { 48 name string 49 bucket string 50 client *s3.Client 51 perfCounterSets []*perfcounter.CounterSet 52 listMaxKeys int32 53 } 54 55 func NewAwsSDKv2( 56 ctx context.Context, 57 args ObjectStorageArguments, 58 perfCounterSets []*perfcounter.CounterSet, 59 ) (*AwsSDKv2, error) { 60 61 if err := args.validate(); err != nil { 62 return nil, err 63 } 64 65 ctx, cancel := context.WithTimeout(ctx, time.Minute) 66 defer cancel() 67 68 // http client 69 dialer := &net.Dialer{ 70 KeepAlive: 5 * time.Second, 71 } 72 transport := &stdhttp.Transport{ 73 Proxy: stdhttp.ProxyFromEnvironment, 74 DialContext: dialer.DialContext, 75 MaxIdleConns: 100, 76 IdleConnTimeout: 180 * time.Second, 77 MaxIdleConnsPerHost: 100, 78 MaxConnsPerHost: 1000, 79 TLSHandshakeTimeout: 3 * time.Second, 80 ExpectContinueTimeout: 1 * time.Second, 81 ForceAttemptHTTP2: true, 82 } 83 if len(args.CertFiles) > 0 { 84 // custom certs 85 pool, err := x509.SystemCertPool() 86 if err != nil { 87 panic(err) 88 } 89 for _, path := range args.CertFiles { 90 content, err := os.ReadFile(path) 91 if err != nil { 92 logutil.Info("load cert file error", 93 zap.Any("err", err), 94 ) 95 // ignore 96 continue 97 } 98 logutil.Info("file service: load cert file", 99 zap.Any("path", path), 100 ) 101 pool.AppendCertsFromPEM(content) 102 } 103 tlsConfig := &tls.Config{ 104 InsecureSkipVerify: true, 105 RootCAs: pool, 106 } 107 transport.TLSClientConfig = tlsConfig 108 } 109 httpClient := &stdhttp.Client{ 110 Transport: transport, 111 } 112 113 // options for loading configs 114 loadConfigOptions := []func(*config.LoadOptions) error{ 115 config.WithLogger(logutil.GetS3Logger()), 116 config.WithClientLogMode( 117 aws.LogSigning | 118 aws.LogRetries | 119 aws.LogRequest | 120 aws.LogResponse | 121 aws.LogDeprecatedUsage | 122 aws.LogRequestEventMessage | 123 aws.LogResponseEventMessage, 124 ), 125 config.WithHTTPClient(httpClient), 126 } 127 128 // shared config profile 129 if args.SharedConfigProfile != "" { 130 loadConfigOptions = append(loadConfigOptions, 131 config.WithSharedConfigProfile(args.SharedConfigProfile), 132 ) 133 } 134 135 credentialProvider, err := args.credentialsProviderForAwsSDKv2(ctx) 136 if err != nil { 137 return nil, err 138 } 139 140 // validate 141 if credentialProvider != nil { 142 _, err := credentialProvider.Retrieve(ctx) 143 if err != nil { 144 return nil, err 145 } 146 } 147 148 // load configs 149 if credentialProvider != nil { 150 loadConfigOptions = append(loadConfigOptions, 151 config.WithCredentialsProvider( 152 credentialProvider, 153 ), 154 ) 155 } 156 config, err := config.LoadDefaultConfig(ctx, loadConfigOptions...) 157 if err != nil { 158 return nil, err 159 } 160 161 // options for s3 client 162 s3Options := []func(*s3.Options){ 163 func(opts *s3.Options) { 164 165 opts.Retryer = retry.NewStandard(func(o *retry.StandardOptions) { 166 o.MaxAttempts = maxRetryAttemps 167 o.RateLimiter = noOpRateLimit{} 168 }) 169 170 }, 171 } 172 173 // credential provider for s3 client 174 if credentialProvider != nil { 175 s3Options = append(s3Options, 176 func(opt *s3.Options) { 177 opt.Credentials = credentialProvider 178 }, 179 ) 180 } 181 182 // endpoint for s3 client 183 if args.Endpoint != "" { 184 if args.IsMinio { 185 // special handling for MinIO 186 s3Options = append(s3Options, 187 s3.WithEndpointResolver( 188 s3.EndpointResolverFunc( 189 func( 190 region string, 191 _ s3.EndpointResolverOptions, 192 ) ( 193 ep aws.Endpoint, 194 err error, 195 ) { 196 ep.URL = args.Endpoint 197 ep.Source = aws.EndpointSourceCustom 198 ep.HostnameImmutable = true 199 ep.SigningRegion = region 200 return 201 }, 202 ), 203 ), 204 ) 205 } else { 206 s3Options = append(s3Options, 207 s3.WithEndpointResolver( 208 s3.EndpointResolverFromURL(args.Endpoint), 209 ), 210 ) 211 } 212 } 213 214 // region for s3 client 215 if args.Region != "" { 216 s3Options = append(s3Options, 217 func(opt *s3.Options) { 218 opt.Region = args.Region 219 }, 220 ) 221 } 222 223 // new s3 client 224 client := s3.NewFromConfig( 225 config, 226 s3Options..., 227 ) 228 229 logutil.Info("new object storage", 230 zap.Any("sdk", "aws v2"), 231 zap.Any("arguments", args), 232 ) 233 234 if !args.NoBucketValidation { 235 // head bucket to validate 236 _, err = client.HeadBucket(ctx, &s3.HeadBucketInput{ 237 Bucket: ptrTo(args.Bucket), 238 }) 239 if err != nil { 240 return nil, moerr.NewInternalErrorNoCtx("bad s3 config: %v", err) 241 } 242 } 243 244 return &AwsSDKv2{ 245 name: args.Name, 246 bucket: args.Bucket, 247 client: client, 248 perfCounterSets: perfCounterSets, 249 }, nil 250 251 } 252 253 var _ ObjectStorage = new(AwsSDKv2) 254 255 func (a *AwsSDKv2) List( 256 ctx context.Context, 257 prefix string, 258 fn func(bool, string, int64) (bool, error), 259 ) error { 260 261 select { 262 case <-ctx.Done(): 263 return ctx.Err() 264 default: 265 } 266 267 var marker *string 268 269 loop1: 270 for { 271 output, err := a.listObjects( 272 ctx, 273 &s3.ListObjectsInput{ 274 Bucket: ptrTo(a.bucket), 275 Delimiter: ptrTo("/"), 276 Prefix: ptrTo(prefix), 277 Marker: marker, 278 MaxKeys: a.listMaxKeys, 279 }, 280 ) 281 if err != nil { 282 return err 283 } 284 285 for _, obj := range output.Contents { 286 more, err := fn(false, *obj.Key, obj.Size) 287 if err != nil { 288 return err 289 } 290 if !more { 291 break loop1 292 } 293 } 294 295 for _, prefix := range output.CommonPrefixes { 296 more, err := fn(true, *prefix.Prefix, 0) 297 if err != nil { 298 return err 299 } 300 if !more { 301 break loop1 302 } 303 } 304 305 if !output.IsTruncated { 306 break 307 } 308 marker = output.NextMarker 309 } 310 311 return nil 312 } 313 314 func (a *AwsSDKv2) Stat( 315 ctx context.Context, 316 key string, 317 ) ( 318 size int64, 319 err error, 320 ) { 321 322 select { 323 case <-ctx.Done(): 324 err = ctx.Err() 325 return 326 default: 327 } 328 329 output, err := a.headObject( 330 ctx, 331 &s3.HeadObjectInput{ 332 Bucket: ptrTo(a.bucket), 333 Key: ptrTo(key), 334 }, 335 ) 336 if err != nil { 337 var httpError *http.ResponseError 338 if errors.As(err, &httpError) { 339 if httpError.Response.StatusCode == 404 { 340 err = moerr.NewFileNotFound(ctx, key) 341 return 342 } 343 } 344 return 345 } 346 347 size = output.ContentLength 348 349 return 350 } 351 352 func (a *AwsSDKv2) Exists( 353 ctx context.Context, 354 key string, 355 ) ( 356 bool, 357 error, 358 ) { 359 output, err := a.headObject( 360 ctx, 361 &s3.HeadObjectInput{ 362 Bucket: ptrTo(a.bucket), 363 Key: ptrTo(key), 364 }, 365 ) 366 if err != nil { 367 var httpError *http.ResponseError 368 if errors.As(err, &httpError) { 369 if httpError.Response.StatusCode == 404 { 370 return false, nil 371 } 372 } 373 return false, err 374 } 375 return output != nil, nil 376 } 377 378 func (a *AwsSDKv2) Write( 379 ctx context.Context, 380 key string, 381 r io.Reader, 382 size int64, 383 expire *time.Time, 384 ) ( 385 err error, 386 ) { 387 388 _, err = a.putObject( 389 ctx, 390 &s3.PutObjectInput{ 391 Bucket: ptrTo(a.bucket), 392 Key: ptrTo(key), 393 Body: r, 394 ContentLength: size, 395 Expires: expire, 396 }, 397 ) 398 if err != nil { 399 return err 400 } 401 402 return 403 } 404 405 func (a *AwsSDKv2) Read( 406 ctx context.Context, 407 key string, 408 min *int64, 409 max *int64, 410 ) ( 411 r io.ReadCloser, 412 err error, 413 ) { 414 415 if max == nil { 416 // read to end 417 r, err := a.getObject( 418 ctx, 419 min, 420 nil, 421 &s3.GetObjectInput{ 422 Bucket: ptrTo(a.bucket), 423 Key: ptrTo(key), 424 }, 425 ) 426 err = a.mapError(err, key) 427 if err != nil { 428 return nil, err 429 } 430 return r, nil 431 } 432 433 r, err = a.getObject( 434 ctx, 435 min, 436 max, 437 &s3.GetObjectInput{ 438 Bucket: ptrTo(a.bucket), 439 Key: ptrTo(key), 440 }, 441 ) 442 err = a.mapError(err, key) 443 if err != nil { 444 return nil, err 445 } 446 return &readCloser{ 447 r: io.LimitReader(r, int64(*max-*min)), 448 closeFunc: r.Close, 449 }, nil 450 } 451 452 func (a *AwsSDKv2) Delete( 453 ctx context.Context, 454 keys ...string, 455 ) ( 456 err error, 457 ) { 458 459 select { 460 case <-ctx.Done(): 461 return ctx.Err() 462 default: 463 } 464 465 if len(keys) == 0 { 466 return nil 467 } 468 if len(keys) == 1 { 469 return a.deleteSingle(ctx, keys[0]) 470 } 471 472 objs := make([]types.ObjectIdentifier, 0, 1000) 473 for _, key := range keys { 474 objs = append(objs, types.ObjectIdentifier{Key: ptrTo(key)}) 475 if len(objs) == 1000 { 476 if err := a.deleteMultiObj(ctx, objs); err != nil { 477 return err 478 } 479 objs = objs[:0] 480 } 481 } 482 if err := a.deleteMultiObj(ctx, objs); err != nil { 483 return err 484 } 485 return nil 486 } 487 488 func (a *AwsSDKv2) deleteSingle(ctx context.Context, key string) error { 489 ctx, span := trace.Start(ctx, "AwsSDKv2.deleteSingle") 490 defer span.End() 491 _, err := a.deleteObject( 492 ctx, 493 &s3.DeleteObjectInput{ 494 Bucket: ptrTo(a.bucket), 495 Key: ptrTo(key), 496 }, 497 ) 498 if err != nil { 499 return err 500 } 501 502 return nil 503 } 504 505 func (a *AwsSDKv2) deleteMultiObj(ctx context.Context, objs []types.ObjectIdentifier) error { 506 ctx, span := trace.Start(ctx, "AwsSDKv2.deleteMultiObj") 507 defer span.End() 508 output, err := a.deleteObjects(ctx, &s3.DeleteObjectsInput{ 509 Bucket: ptrTo(a.bucket), 510 Delete: &types.Delete{ 511 Objects: objs, 512 // In quiet mode the response includes only keys where the delete action encountered an error. 513 Quiet: true, 514 }, 515 }) 516 // delete api failed 517 if err != nil { 518 return err 519 } 520 // delete api success, but with delete file failed. 521 message := strings.Builder{} 522 if len(output.Errors) > 0 { 523 for _, Error := range output.Errors { 524 if *Error.Code == (*types.NoSuchKey)(nil).ErrorCode() { 525 continue 526 } 527 message.WriteString(fmt.Sprintf("%s: %s, %s;", *Error.Key, *Error.Code, *Error.Message)) 528 } 529 } 530 if message.Len() > 0 { 531 return moerr.NewInternalErrorNoCtx("S3 Delete failed: %s", message.String()) 532 } 533 return nil 534 } 535 536 func (a *AwsSDKv2) listObjects(ctx context.Context, params *s3.ListObjectsInput, optFns ...func(*s3.Options)) (*s3.ListObjectsOutput, error) { 537 ctx, task := gotrace.NewTask(ctx, "AwsSDKv2.listObjects") 538 defer task.End() 539 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 540 counter.FileService.S3.List.Add(1) 541 }, a.perfCounterSets...) 542 return DoWithRetry( 543 "s3 list objects", 544 func() (*s3.ListObjectsOutput, error) { 545 return a.client.ListObjects(ctx, params, optFns...) 546 }, 547 maxRetryAttemps, 548 IsRetryableError, 549 ) 550 } 551 552 func (a *AwsSDKv2) headObject(ctx context.Context, params *s3.HeadObjectInput, optFns ...func(*s3.Options)) (*s3.HeadObjectOutput, error) { 553 ctx, task := gotrace.NewTask(ctx, "AwsSDKv2.headObject") 554 defer task.End() 555 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 556 counter.FileService.S3.Head.Add(1) 557 }, a.perfCounterSets...) 558 return DoWithRetry( 559 "s3 head object", 560 func() (*s3.HeadObjectOutput, error) { 561 return a.client.HeadObject(ctx, params, optFns...) 562 }, 563 maxRetryAttemps, 564 IsRetryableError, 565 ) 566 } 567 568 func (a *AwsSDKv2) putObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) { 569 ctx, task := gotrace.NewTask(ctx, "AwsSDKv2.putObject") 570 defer task.End() 571 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 572 counter.FileService.S3.Put.Add(1) 573 }, a.perfCounterSets...) 574 // not retryable because Reader may be half consumed 575 return a.client.PutObject(ctx, params, optFns...) 576 } 577 578 func (a *AwsSDKv2) getObject(ctx context.Context, min *int64, max *int64, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (io.ReadCloser, error) { 579 ctx, task := gotrace.NewTask(ctx, "AwsSDKv2.getObject") 580 defer task.End() 581 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 582 counter.FileService.S3.Get.Add(1) 583 }, a.perfCounterSets...) 584 r, err := newRetryableReader( 585 func(offset int64) (io.ReadCloser, error) { 586 var rang string 587 if max != nil { 588 rang = fmt.Sprintf("bytes=%d-%d", offset, *max) 589 } else { 590 rang = fmt.Sprintf("bytes=%d-", offset) 591 } 592 params.Range = &rang 593 output, err := DoWithRetry( 594 "s3 get object", 595 func() (*s3.GetObjectOutput, error) { 596 return a.client.GetObject(ctx, params, optFns...) 597 }, 598 maxRetryAttemps, 599 IsRetryableError, 600 ) 601 if err != nil { 602 return nil, err 603 } 604 return output.Body, nil 605 }, 606 *min, 607 IsRetryableError, 608 ) 609 if err != nil { 610 return nil, err 611 } 612 return r, nil 613 } 614 615 func (a *AwsSDKv2) deleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { 616 ctx, task := gotrace.NewTask(ctx, "AwsSDKv2.deleteObject") 617 defer task.End() 618 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 619 counter.FileService.S3.Delete.Add(1) 620 }, a.perfCounterSets...) 621 return DoWithRetry( 622 "s3 delete object", 623 func() (*s3.DeleteObjectOutput, error) { 624 return a.client.DeleteObject(ctx, params, optFns...) 625 }, 626 maxRetryAttemps, 627 IsRetryableError, 628 ) 629 } 630 631 func (a *AwsSDKv2) deleteObjects(ctx context.Context, params *s3.DeleteObjectsInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectsOutput, error) { 632 ctx, task := gotrace.NewTask(ctx, "AwsSDKv2.deleteObjects") 633 defer task.End() 634 perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) { 635 counter.FileService.S3.DeleteMulti.Add(1) 636 }, a.perfCounterSets...) 637 return DoWithRetry( 638 "s3 delete objects", 639 func() (*s3.DeleteObjectsOutput, error) { 640 return a.client.DeleteObjects(ctx, params, optFns...) 641 }, 642 maxRetryAttemps, 643 IsRetryableError, 644 ) 645 } 646 647 func (a *AwsSDKv2) mapError(err error, path string) error { 648 if err == nil { 649 return nil 650 } 651 var httpError *http.ResponseError 652 if errors.As(err, &httpError) { 653 if httpError.Response.StatusCode == 404 { 654 return moerr.NewFileNotFoundNoCtx(path) 655 } 656 } 657 return err 658 } 659 660 // from https://github.com/aws/aws-sdk-go-v2/issues/543 661 type noOpRateLimit struct{} 662 663 func (noOpRateLimit) AddTokens(uint) error { return nil } 664 func (noOpRateLimit) GetToken(context.Context, uint) (func() error, error) { 665 return noOpToken, nil 666 } 667 func noOpToken() error { return nil } 668 669 func (o ObjectStorageArguments) credentialsProviderForAwsSDKv2( 670 ctx context.Context, 671 ) ( 672 ret aws.CredentialsProvider, 673 err error, 674 ) { 675 676 // cache 677 defer func() { 678 if ret != nil { 679 ret = aws.NewCredentialsCache(ret) 680 } 681 }() 682 683 defer func() { 684 // handle assume role 685 if o.RoleARN == "" { 686 return 687 } 688 689 logutil.Info("setting assume role provider") 690 // load default options 691 awsConfig, err := config.LoadDefaultConfig(ctx) 692 if err != nil { 693 panic(err) 694 } 695 if ret != nil { 696 logutil.Info("using upstream credential provider for assume role", 697 zap.Any("type", fmt.Sprintf("%T", ret)), 698 ) 699 awsConfig.Credentials = ret 700 } 701 702 stsSvc := sts.NewFromConfig(awsConfig, func(options *sts.Options) { 703 if o.Region == "" { 704 options.Region = "ap-northeast-1" 705 } else { 706 options.Region = o.Region 707 } 708 }) 709 provider := stscreds.NewAssumeRoleProvider( 710 stsSvc, 711 o.RoleARN, 712 func(opts *stscreds.AssumeRoleOptions) { 713 if o.ExternalID != "" { 714 opts.ExternalID = &o.ExternalID 715 } 716 }, 717 ) 718 _, err = provider.Retrieve(ctx) 719 if err != nil { 720 // not good 721 logutil.Info("bad assume role provider", 722 zap.Any("err", err), 723 ) 724 return 725 } 726 727 // set to assume role provider 728 ret = provider 729 }() 730 731 // static credential 732 if o.KeyID != "" && o.KeySecret != "" { 733 // static 734 logutil.Info("static credential") 735 return credentials.NewStaticCredentialsProvider(o.KeyID, o.KeySecret, o.SessionToken), nil 736 } 737 738 if !o.shouldLoadDefaultCredentials() { 739 return nil, moerr.NewInvalidInputNoCtx( 740 "no valid credentials", 741 ) 742 } 743 744 return 745 }