github.com/matrixorigin/matrixone@v1.2.0/pkg/fileservice/aws_sdk_v1.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fileservice
    16  
    17  import (
    18  	"context"
    19  	"crypto/tls"
    20  	"crypto/x509"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"net"
    25  	stdhttp "net/http"
    26  	"os"
    27  	gotrace "runtime/trace"
    28  	"strings"
    29  	"time"
    30  
    31  	"github.com/aws/aws-sdk-go/aws"
    32  	"github.com/aws/aws-sdk-go/aws/awserr"
    33  	"github.com/aws/aws-sdk-go/aws/credentials"
    34  	"github.com/aws/aws-sdk-go/aws/session"
    35  	"github.com/aws/aws-sdk-go/service/s3"
    36  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    37  	"github.com/matrixorigin/matrixone/pkg/logutil"
    38  	"github.com/matrixorigin/matrixone/pkg/perfcounter"
    39  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    40  	"go.uber.org/zap"
    41  )
    42  
    43  type AwsSDKv1 struct {
    44  	name            string
    45  	bucket          string
    46  	client          *s3.S3
    47  	perfCounterSets []*perfcounter.CounterSet
    48  	listMaxKeys     int64
    49  }
    50  
    51  func NewAwsSDKv1(
    52  	ctx context.Context,
    53  	args ObjectStorageArguments,
    54  	perfCounterSets []*perfcounter.CounterSet,
    55  ) (*AwsSDKv1, error) {
    56  
    57  	if err := args.validate(); err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	// configs
    62  	config := new(aws.Config)
    63  	if args.Endpoint != "" {
    64  		config.Endpoint = &args.Endpoint
    65  	}
    66  	if args.Region != "" {
    67  		config.Region = &args.Region
    68  	}
    69  
    70  	// for 天翼云
    71  	// from https://gitee.com/ctyun-xstore/ctyun-xstore-sdk-demo/blob/master/xos-go-demo/s3demo.go
    72  	if strings.Contains(args.Endpoint, "ctyunapi.cn") {
    73  		config.S3ForcePathStyle = aws.Bool(true)
    74  		config.DisableSSL = aws.Bool(true)
    75  	}
    76  
    77  	// http client
    78  	dialer := &net.Dialer{
    79  		KeepAlive: 5 * time.Second,
    80  	}
    81  	transport := &stdhttp.Transport{
    82  		Proxy:                 stdhttp.ProxyFromEnvironment,
    83  		DialContext:           dialer.DialContext,
    84  		MaxIdleConns:          100,
    85  		IdleConnTimeout:       180 * time.Second,
    86  		MaxIdleConnsPerHost:   100,
    87  		MaxConnsPerHost:       1000,
    88  		TLSHandshakeTimeout:   3 * time.Second,
    89  		ExpectContinueTimeout: 1 * time.Second,
    90  		ForceAttemptHTTP2:     true,
    91  	}
    92  	if len(args.CertFiles) > 0 {
    93  		// custom certs
    94  		pool, err := x509.SystemCertPool()
    95  		if err != nil {
    96  			panic(err)
    97  		}
    98  		for _, path := range args.CertFiles {
    99  			content, err := os.ReadFile(path)
   100  			if err != nil {
   101  				logutil.Info("load cert file error",
   102  					zap.Any("err", err),
   103  				)
   104  				// ignore
   105  				continue
   106  			}
   107  			logutil.Info("file service: load cert file",
   108  				zap.Any("path", path),
   109  			)
   110  			pool.AppendCertsFromPEM(content)
   111  		}
   112  		tlsConfig := &tls.Config{
   113  			InsecureSkipVerify: true,
   114  			RootCAs:            pool,
   115  		}
   116  		transport.TLSClientConfig = tlsConfig
   117  	}
   118  	httpClient := &stdhttp.Client{
   119  		Transport: transport,
   120  	}
   121  	config.HTTPClient = httpClient
   122  
   123  	// credentials
   124  	if args.KeyID != "" && args.KeySecret != "" {
   125  		config.Credentials = credentials.NewStaticCredentials(
   126  			args.KeyID,
   127  			args.KeySecret,
   128  			args.SessionToken,
   129  		)
   130  	}
   131  
   132  	sess, err := session.NewSession(config)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	client := s3.New(sess, config)
   138  
   139  	logutil.Info("new object storage",
   140  		zap.Any("sdk", "aws v1"),
   141  		zap.Any("arguments", args),
   142  	)
   143  
   144  	if !args.NoBucketValidation {
   145  		// head bucket to validate
   146  		_, err = client.HeadBucket(&s3.HeadBucketInput{
   147  			Bucket: ptrTo(args.Bucket),
   148  		})
   149  		if err != nil {
   150  			return nil, moerr.NewInternalErrorNoCtx("bad s3 config: %v", err)
   151  		}
   152  	}
   153  
   154  	return &AwsSDKv1{
   155  		name:            args.Name,
   156  		bucket:          args.Bucket,
   157  		client:          client,
   158  		perfCounterSets: perfCounterSets,
   159  	}, nil
   160  
   161  }
   162  
   163  var _ ObjectStorage = new(AwsSDKv1)
   164  
   165  func (a *AwsSDKv1) List(
   166  	ctx context.Context,
   167  	prefix string,
   168  	fn func(bool, string, int64) (bool, error),
   169  ) error {
   170  
   171  	select {
   172  	case <-ctx.Done():
   173  		return ctx.Err()
   174  	default:
   175  	}
   176  
   177  	var cont *string
   178  
   179  loop1:
   180  	for {
   181  		output, err := a.listObjects(
   182  			ctx,
   183  			&s3.ListObjectsV2Input{
   184  				Bucket:            ptrTo(a.bucket),
   185  				Delimiter:         ptrTo("/"),
   186  				Prefix:            ptrTo(prefix),
   187  				ContinuationToken: cont,
   188  				MaxKeys:           ptrTo(a.listMaxKeys),
   189  			},
   190  		)
   191  		if err != nil {
   192  			return err
   193  		}
   194  
   195  		for _, obj := range output.Contents {
   196  			more, err := fn(false, *obj.Key, *obj.Size)
   197  			if err != nil {
   198  				return err
   199  			}
   200  			if !more {
   201  				break loop1
   202  			}
   203  		}
   204  
   205  		for _, prefix := range output.CommonPrefixes {
   206  			more, err := fn(true, *prefix.Prefix, 0)
   207  			if err != nil {
   208  				return err
   209  			}
   210  			if !more {
   211  				break loop1
   212  			}
   213  		}
   214  
   215  		if !*output.IsTruncated {
   216  			break
   217  		}
   218  		cont = output.ContinuationToken
   219  	}
   220  
   221  	return nil
   222  }
   223  
   224  func (a *AwsSDKv1) Stat(
   225  	ctx context.Context,
   226  	key string,
   227  ) (
   228  	size int64,
   229  	err error,
   230  ) {
   231  
   232  	select {
   233  	case <-ctx.Done():
   234  		err = ctx.Err()
   235  		return
   236  	default:
   237  	}
   238  
   239  	output, err := a.headObject(
   240  		ctx,
   241  		&s3.HeadObjectInput{
   242  			Bucket: ptrTo(a.bucket),
   243  			Key:    ptrTo(key),
   244  		},
   245  	)
   246  	if err != nil {
   247  		return
   248  	}
   249  
   250  	size = *output.ContentLength
   251  
   252  	return
   253  }
   254  
   255  func (a *AwsSDKv1) Exists(
   256  	ctx context.Context,
   257  	key string,
   258  ) (
   259  	bool,
   260  	error,
   261  ) {
   262  	output, err := a.headObject(
   263  		ctx,
   264  		&s3.HeadObjectInput{
   265  			Bucket: ptrTo(a.bucket),
   266  			Key:    ptrTo(key),
   267  		},
   268  	)
   269  	if err != nil {
   270  		if a.is404(err) {
   271  			return false, nil
   272  		}
   273  		return false, err
   274  	}
   275  	return output != nil, nil
   276  }
   277  
   278  func (a *AwsSDKv1) Write(
   279  	ctx context.Context,
   280  	key string,
   281  	r io.Reader,
   282  	size int64,
   283  	expire *time.Time,
   284  ) (
   285  	err error,
   286  ) {
   287  
   288  	_, err = a.putObject(
   289  		ctx,
   290  		&s3.PutObjectInput{
   291  			Bucket:        ptrTo(a.bucket),
   292  			Key:           ptrTo(key),
   293  			Body:          r.(io.ReadSeeker), //TODO
   294  			ContentLength: ptrTo(size),
   295  			Expires:       expire,
   296  		},
   297  	)
   298  	if err != nil {
   299  		return err
   300  	}
   301  
   302  	return
   303  }
   304  
   305  func (a *AwsSDKv1) Read(
   306  	ctx context.Context,
   307  	key string,
   308  	min *int64,
   309  	max *int64,
   310  ) (
   311  	r io.ReadCloser,
   312  	err error,
   313  ) {
   314  
   315  	if max == nil {
   316  		// read to end
   317  		r, err := a.getObject(
   318  			ctx,
   319  			min,
   320  			nil,
   321  			&s3.GetObjectInput{
   322  				Bucket: ptrTo(a.bucket),
   323  				Key:    ptrTo(key),
   324  			},
   325  		)
   326  		if err != nil {
   327  			return nil, err
   328  		}
   329  		return r, nil
   330  	}
   331  
   332  	r, err = a.getObject(
   333  		ctx,
   334  		min,
   335  		max,
   336  		&s3.GetObjectInput{
   337  			Bucket: ptrTo(a.bucket),
   338  			Key:    ptrTo(key),
   339  		},
   340  	)
   341  	if err != nil {
   342  		return nil, err
   343  	}
   344  	return &readCloser{
   345  		r:         io.LimitReader(r, int64(*max-*min)),
   346  		closeFunc: r.Close,
   347  	}, nil
   348  }
   349  
   350  func (a *AwsSDKv1) Delete(
   351  	ctx context.Context,
   352  	keys ...string,
   353  ) (
   354  	err error,
   355  ) {
   356  
   357  	select {
   358  	case <-ctx.Done():
   359  		return ctx.Err()
   360  	default:
   361  	}
   362  
   363  	if len(keys) == 0 {
   364  		return nil
   365  	}
   366  	if len(keys) == 1 {
   367  		return a.deleteSingle(ctx, keys[0])
   368  	}
   369  
   370  	objs := make([]*s3.ObjectIdentifier, 0, 1000)
   371  	for _, key := range keys {
   372  		objs = append(objs, &s3.ObjectIdentifier{Key: ptrTo(key)})
   373  		if len(objs) == 1000 {
   374  			if err := a.deleteMultiObj(ctx, objs); err != nil {
   375  				return err
   376  			}
   377  			objs = objs[:0]
   378  		}
   379  	}
   380  	if err := a.deleteMultiObj(ctx, objs); err != nil {
   381  		return err
   382  	}
   383  	return nil
   384  }
   385  
   386  func (a *AwsSDKv1) deleteSingle(ctx context.Context, key string) error {
   387  	ctx, span := trace.Start(ctx, "AwsSDKv1.deleteSingle")
   388  	defer span.End()
   389  	_, err := a.deleteObject(
   390  		ctx,
   391  		&s3.DeleteObjectInput{
   392  			Bucket: ptrTo(a.bucket),
   393  			Key:    ptrTo(key),
   394  		},
   395  	)
   396  	if err != nil {
   397  		return err
   398  	}
   399  
   400  	return nil
   401  }
   402  
   403  func (a *AwsSDKv1) deleteMultiObj(ctx context.Context, objs []*s3.ObjectIdentifier) error {
   404  	ctx, span := trace.Start(ctx, "AwsSDKv1.deleteMultiObj")
   405  	defer span.End()
   406  	output, err := a.deleteObjects(ctx, &s3.DeleteObjectsInput{
   407  		Bucket: ptrTo(a.bucket),
   408  		Delete: &s3.Delete{
   409  			Objects: objs,
   410  			// In quiet mode the response includes only keys where the delete action encountered an error.
   411  			Quiet: ptrTo(true),
   412  		},
   413  	})
   414  	// delete api failed
   415  	if err != nil {
   416  		return err
   417  	}
   418  	// delete api success, but with delete file failed.
   419  	message := strings.Builder{}
   420  	if len(output.Errors) > 0 {
   421  		for _, Error := range output.Errors {
   422  			if *Error.Code == s3.ErrCodeNoSuchKey {
   423  				continue
   424  			}
   425  			message.WriteString(fmt.Sprintf("%s: %s, %s;", *Error.Key, *Error.Code, *Error.Message))
   426  		}
   427  	}
   428  	if message.Len() > 0 {
   429  		return moerr.NewInternalErrorNoCtx("S3 Delete failed: %s", message.String())
   430  	}
   431  	return nil
   432  }
   433  
   434  func (a *AwsSDKv1) listObjects(ctx context.Context, params *s3.ListObjectsV2Input) (*s3.ListObjectsV2Output, error) {
   435  	ctx, task := gotrace.NewTask(ctx, "AwsSDKv1.listObjects")
   436  	defer task.End()
   437  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   438  		counter.FileService.S3.List.Add(1)
   439  	}, a.perfCounterSets...)
   440  	return DoWithRetry(
   441  		"s3 list objects",
   442  		func() (*s3.ListObjectsV2Output, error) {
   443  			return a.client.ListObjectsV2(params)
   444  		},
   445  		maxRetryAttemps,
   446  		IsRetryableError,
   447  	)
   448  }
   449  
   450  func (a *AwsSDKv1) headObject(ctx context.Context, params *s3.HeadObjectInput) (*s3.HeadObjectOutput, error) {
   451  	ctx, task := gotrace.NewTask(ctx, "AwsSDKv1.headObject")
   452  	defer task.End()
   453  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   454  		counter.FileService.S3.Head.Add(1)
   455  	}, a.perfCounterSets...)
   456  	return DoWithRetry(
   457  		"s3 head object",
   458  		func() (*s3.HeadObjectOutput, error) {
   459  			return a.client.HeadObject(params)
   460  		},
   461  		maxRetryAttemps,
   462  		IsRetryableError,
   463  	)
   464  }
   465  
   466  func (a *AwsSDKv1) putObject(ctx context.Context, params *s3.PutObjectInput) (*s3.PutObjectOutput, error) {
   467  	ctx, task := gotrace.NewTask(ctx, "AwsSDKv1.putObject")
   468  	defer task.End()
   469  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   470  		counter.FileService.S3.Put.Add(1)
   471  	}, a.perfCounterSets...)
   472  	// not retryable because Reader may be half consumed
   473  	return a.client.PutObject(params)
   474  }
   475  
   476  func (a *AwsSDKv1) getObject(ctx context.Context, min *int64, max *int64, params *s3.GetObjectInput) (io.ReadCloser, error) {
   477  	ctx, task := gotrace.NewTask(ctx, "AwsSDKv1.getObject")
   478  	defer task.End()
   479  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   480  		counter.FileService.S3.Get.Add(1)
   481  	}, a.perfCounterSets...)
   482  	r, err := newRetryableReader(
   483  		func(offset int64) (io.ReadCloser, error) {
   484  			var rang string
   485  			if max != nil {
   486  				rang = fmt.Sprintf("bytes=%d-%d", offset, *max)
   487  			} else {
   488  				rang = fmt.Sprintf("bytes=%d-", offset)
   489  			}
   490  			params.Range = &rang
   491  			output, err := DoWithRetry(
   492  				"s3 get object",
   493  				func() (*s3.GetObjectOutput, error) {
   494  					return a.client.GetObject(params)
   495  				},
   496  				maxRetryAttemps,
   497  				IsRetryableError,
   498  			)
   499  			if err != nil {
   500  				return nil, err
   501  			}
   502  			return output.Body, nil
   503  		},
   504  		*min,
   505  		IsRetryableError,
   506  	)
   507  	if err != nil {
   508  		return nil, err
   509  	}
   510  	return r, nil
   511  }
   512  
   513  func (a *AwsSDKv1) deleteObject(ctx context.Context, params *s3.DeleteObjectInput) (*s3.DeleteObjectOutput, error) {
   514  	ctx, task := gotrace.NewTask(ctx, "AwsSDKv1.deleteObject")
   515  	defer task.End()
   516  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   517  		counter.FileService.S3.Delete.Add(1)
   518  	}, a.perfCounterSets...)
   519  	return DoWithRetry(
   520  		"s3 delete object",
   521  		func() (*s3.DeleteObjectOutput, error) {
   522  			return a.client.DeleteObject(params)
   523  		},
   524  		maxRetryAttemps,
   525  		IsRetryableError,
   526  	)
   527  }
   528  
   529  func (a *AwsSDKv1) deleteObjects(ctx context.Context, params *s3.DeleteObjectsInput) (*s3.DeleteObjectsOutput, error) {
   530  	ctx, task := gotrace.NewTask(ctx, "AwsSDKv1.deleteObjects")
   531  	defer task.End()
   532  	perfcounter.Update(ctx, func(counter *perfcounter.CounterSet) {
   533  		counter.FileService.S3.DeleteMulti.Add(1)
   534  	}, a.perfCounterSets...)
   535  	return DoWithRetry(
   536  		"s3 delete objects",
   537  		func() (*s3.DeleteObjectsOutput, error) {
   538  			return a.client.DeleteObjects(params)
   539  		},
   540  		maxRetryAttemps,
   541  		IsRetryableError,
   542  	)
   543  }
   544  
   545  func (a *AwsSDKv1) is404(err error) bool {
   546  	if err == nil {
   547  		return false
   548  	}
   549  	var awsErr awserr.Error
   550  	if !errors.As(err, &awsErr) {
   551  		return false
   552  	}
   553  	return awsErr.Code() == "NotFound"
   554  }