go.temporal.io/server@v1.23.0/common/archiver/s3store/history_archiver.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  // S3 History Archiver will archive workflow histories to amazon s3
    26  
    27  package s3store
    28  
    29  import (
    30  	"context"
    31  	"encoding/binary"
    32  	"errors"
    33  	"strconv"
    34  	"strings"
    35  	"time"
    36  
    37  	"github.com/aws/aws-sdk-go/aws"
    38  	"github.com/aws/aws-sdk-go/aws/awserr"
    39  	"github.com/aws/aws-sdk-go/aws/request"
    40  	"github.com/aws/aws-sdk-go/aws/session"
    41  	"github.com/aws/aws-sdk-go/service/s3"
    42  	"github.com/aws/aws-sdk-go/service/s3/s3iface"
    43  	"go.temporal.io/api/serviceerror"
    44  
    45  	archiverspb "go.temporal.io/server/api/archiver/v1"
    46  	"go.temporal.io/server/common"
    47  	"go.temporal.io/server/common/archiver"
    48  	"go.temporal.io/server/common/codec"
    49  	"go.temporal.io/server/common/config"
    50  	"go.temporal.io/server/common/log"
    51  	"go.temporal.io/server/common/log/tag"
    52  	"go.temporal.io/server/common/metrics"
    53  	"go.temporal.io/server/common/persistence"
    54  )
    55  
    56  const (
    57  	// URIScheme is the scheme for the s3 implementation
    58  	URIScheme               = "s3"
    59  	errEncodeHistory        = "failed to encode history batches"
    60  	errWriteKey             = "failed to write history to s3"
    61  	defaultBlobstoreTimeout = time.Minute
    62  	targetHistoryBlobSize   = 2 * 1024 * 1024 // 2MB
    63  )
    64  
    65  var (
    66  	errNoBucketSpecified = errors.New("no bucket specified")
    67  	errBucketNotExists   = errors.New("requested bucket does not exist")
    68  	errEmptyAwsRegion    = errors.New("empty aws region")
    69  )
    70  
    71  type (
    72  	historyArchiver struct {
    73  		container *archiver.HistoryBootstrapContainer
    74  		s3cli     s3iface.S3API
    75  		// only set in test code
    76  		historyIterator archiver.HistoryIterator
    77  	}
    78  
    79  	getHistoryToken struct {
    80  		CloseFailoverVersion int64
    81  		BatchIdx             int
    82  	}
    83  
    84  	uploadProgress struct {
    85  		BatchIdx      int
    86  		IteratorState []byte
    87  		uploadedSize  int64
    88  		historySize   int64
    89  	}
    90  )
    91  
    92  // NewHistoryArchiver creates a new archiver.HistoryArchiver based on s3
    93  func NewHistoryArchiver(
    94  	container *archiver.HistoryBootstrapContainer,
    95  	config *config.S3Archiver,
    96  ) (archiver.HistoryArchiver, error) {
    97  	return newHistoryArchiver(container, config, nil)
    98  }
    99  
   100  func newHistoryArchiver(
   101  	container *archiver.HistoryBootstrapContainer,
   102  	config *config.S3Archiver,
   103  	historyIterator archiver.HistoryIterator,
   104  ) (*historyArchiver, error) {
   105  	if len(config.Region) == 0 {
   106  		return nil, errEmptyAwsRegion
   107  	}
   108  	s3Config := &aws.Config{
   109  		Endpoint:         config.Endpoint,
   110  		Region:           aws.String(config.Region),
   111  		S3ForcePathStyle: aws.Bool(config.S3ForcePathStyle),
   112  		LogLevel:         (*aws.LogLevelType)(&config.LogLevel),
   113  	}
   114  	sess, err := session.NewSession(s3Config)
   115  	if err != nil {
   116  		return nil, err
   117  	}
   118  
   119  	return &historyArchiver{
   120  		container:       container,
   121  		s3cli:           s3.New(sess),
   122  		historyIterator: historyIterator,
   123  	}, nil
   124  }
   125  func (h *historyArchiver) Archive(
   126  	ctx context.Context,
   127  	URI archiver.URI,
   128  	request *archiver.ArchiveHistoryRequest,
   129  	opts ...archiver.ArchiveOption,
   130  ) (err error) {
   131  	handler := h.container.MetricsHandler.WithTags(metrics.OperationTag(metrics.HistoryArchiverScope), metrics.NamespaceTag(request.Namespace))
   132  	featureCatalog := archiver.GetFeatureCatalog(opts...)
   133  	startTime := time.Now().UTC()
   134  	defer func() {
   135  		handler.Timer(metrics.ServiceLatency.Name()).Record(time.Since(startTime))
   136  		if err != nil {
   137  			if common.IsPersistenceTransientError(err) || isRetryableError(err) {
   138  				handler.Counter(metrics.HistoryArchiverArchiveTransientErrorCount.Name()).Record(1)
   139  			} else {
   140  				handler.Counter(metrics.HistoryArchiverArchiveNonRetryableErrorCount.Name()).Record(1)
   141  				if featureCatalog.NonRetryableError != nil {
   142  					err = featureCatalog.NonRetryableError()
   143  				}
   144  			}
   145  		}
   146  	}()
   147  
   148  	logger := archiver.TagLoggerWithArchiveHistoryRequestAndURI(h.container.Logger, request, URI.String())
   149  
   150  	if err := SoftValidateURI(URI); err != nil {
   151  		logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidURI), tag.Error(err))
   152  		return err
   153  	}
   154  
   155  	if err := archiver.ValidateHistoryArchiveRequest(request); err != nil {
   156  		logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidArchiveRequest), tag.Error(err))
   157  		return err
   158  	}
   159  
   160  	var progress uploadProgress
   161  	historyIterator := h.historyIterator
   162  	if historyIterator == nil { // will only be set by testing code
   163  		historyIterator = loadHistoryIterator(ctx, request, h.container.ExecutionManager, featureCatalog, &progress)
   164  	}
   165  	for historyIterator.HasNext() {
   166  		historyBlob, err := historyIterator.Next(ctx)
   167  		if err != nil {
   168  			if _, isNotFound := err.(*serviceerror.NotFound); isNotFound {
   169  				// workflow history no longer exists, may due to duplicated archival signal
   170  				// this may happen even in the middle of iterating history as two archival signals
   171  				// can be processed concurrently.
   172  				logger.Info(archiver.ArchiveSkippedInfoMsg)
   173  				handler.Counter(metrics.HistoryArchiverDuplicateArchivalsCount.Name()).Record(1)
   174  				return nil
   175  			}
   176  
   177  			logger := log.With(logger, tag.ArchivalArchiveFailReason(archiver.ErrReasonReadHistory), tag.Error(err))
   178  			if common.IsPersistenceTransientError(err) {
   179  				logger.Error(archiver.ArchiveTransientErrorMsg)
   180  			} else {
   181  				logger.Error(archiver.ArchiveNonRetryableErrorMsg)
   182  			}
   183  			return err
   184  		}
   185  
   186  		if historyMutated(request, historyBlob.Body, historyBlob.Header.IsLast) {
   187  			logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonHistoryMutated))
   188  			return archiver.ErrHistoryMutated
   189  		}
   190  
   191  		encoder := codec.NewJSONPBEncoder()
   192  		encodedHistoryBlob, err := encoder.Encode(historyBlob)
   193  		if err != nil {
   194  			logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errEncodeHistory), tag.Error(err))
   195  			return err
   196  		}
   197  		key := constructHistoryKey(URI.Path(), request.NamespaceID, request.WorkflowID, request.RunID, request.CloseFailoverVersion, progress.BatchIdx)
   198  
   199  		exists, err := KeyExists(ctx, h.s3cli, URI, key)
   200  		if err != nil {
   201  			if isRetryableError(err) {
   202  				logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err))
   203  			} else {
   204  				logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err))
   205  			}
   206  			return err
   207  		}
   208  		blobSize := int64(binary.Size(encodedHistoryBlob))
   209  		if exists {
   210  			handler.Counter(metrics.HistoryArchiverBlobExistsCount.Name()).Record(1)
   211  		} else {
   212  			if err := Upload(ctx, h.s3cli, URI, key, encodedHistoryBlob); err != nil {
   213  				if isRetryableError(err) {
   214  					logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err))
   215  				} else {
   216  					logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err))
   217  				}
   218  				return err
   219  			}
   220  			progress.uploadedSize += blobSize
   221  			handler.Histogram(metrics.HistoryArchiverBlobSize.Name(), metrics.HistoryArchiverBlobSize.Unit()).Record(blobSize)
   222  		}
   223  
   224  		progress.historySize += blobSize
   225  		progress.BatchIdx = progress.BatchIdx + 1
   226  		saveHistoryIteratorState(ctx, featureCatalog, historyIterator, &progress)
   227  	}
   228  
   229  	handler.Histogram(metrics.HistoryArchiverTotalUploadSize.Name(), metrics.HistoryArchiverTotalUploadSize.Unit()).Record(progress.uploadedSize)
   230  	handler.Histogram(metrics.HistoryArchiverHistorySize.Name(), metrics.HistoryArchiverHistorySize.Unit()).Record(progress.historySize)
   231  	handler.Counter(metrics.HistoryArchiverArchiveSuccessCount.Name()).Record(1)
   232  	return nil
   233  }
   234  
   235  func loadHistoryIterator(ctx context.Context, request *archiver.ArchiveHistoryRequest, executionManager persistence.ExecutionManager, featureCatalog *archiver.ArchiveFeatureCatalog, progress *uploadProgress) (historyIterator archiver.HistoryIterator) {
   236  	if featureCatalog.ProgressManager != nil {
   237  		if featureCatalog.ProgressManager.HasProgress(ctx) {
   238  			err := featureCatalog.ProgressManager.LoadProgress(ctx, progress)
   239  			if err == nil {
   240  				historyIterator, err := archiver.NewHistoryIteratorFromState(request, executionManager, targetHistoryBlobSize, progress.IteratorState)
   241  				if err == nil {
   242  					return historyIterator
   243  				}
   244  			}
   245  			progress.IteratorState = nil
   246  			progress.BatchIdx = 0
   247  			progress.historySize = 0
   248  			progress.uploadedSize = 0
   249  		}
   250  	}
   251  	return archiver.NewHistoryIterator(request, executionManager, targetHistoryBlobSize)
   252  }
   253  
   254  func saveHistoryIteratorState(ctx context.Context, featureCatalog *archiver.ArchiveFeatureCatalog, historyIterator archiver.HistoryIterator, progress *uploadProgress) {
   255  	// Saving history state is a best effort operation. Ignore errors and continue
   256  	if featureCatalog.ProgressManager != nil {
   257  		state, err := historyIterator.GetState()
   258  		if err != nil {
   259  			return
   260  		}
   261  		progress.IteratorState = state
   262  		err = featureCatalog.ProgressManager.RecordProgress(ctx, progress)
   263  		if err != nil {
   264  			return
   265  		}
   266  	}
   267  }
   268  
   269  func (h *historyArchiver) Get(
   270  	ctx context.Context,
   271  	URI archiver.URI,
   272  	request *archiver.GetHistoryRequest,
   273  ) (*archiver.GetHistoryResponse, error) {
   274  	if err := SoftValidateURI(URI); err != nil {
   275  		return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidURI.Error())
   276  	}
   277  
   278  	if err := archiver.ValidateGetRequest(request); err != nil {
   279  		return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidGetHistoryRequest.Error())
   280  	}
   281  
   282  	var err error
   283  	var token *getHistoryToken
   284  	if request.NextPageToken != nil {
   285  		token, err = deserializeGetHistoryToken(request.NextPageToken)
   286  		if err != nil {
   287  			return nil, serviceerror.NewInvalidArgument(archiver.ErrNextPageTokenCorrupted.Error())
   288  		}
   289  	} else if request.CloseFailoverVersion != nil {
   290  		token = &getHistoryToken{
   291  			CloseFailoverVersion: *request.CloseFailoverVersion,
   292  		}
   293  	} else {
   294  		highestVersion, err := h.getHighestVersion(ctx, URI, request)
   295  		if err != nil {
   296  			if err == archiver.ErrHistoryNotExist {
   297  				return nil, serviceerror.NewNotFound(err.Error())
   298  			}
   299  			return nil, serviceerror.NewInvalidArgument(err.Error())
   300  		}
   301  		token = &getHistoryToken{
   302  			CloseFailoverVersion: *highestVersion,
   303  		}
   304  	}
   305  	encoder := codec.NewJSONPBEncoder()
   306  	response := &archiver.GetHistoryResponse{}
   307  	numOfEvents := 0
   308  	isTruncated := false
   309  	for {
   310  		if numOfEvents >= request.PageSize {
   311  			isTruncated = true
   312  			break
   313  		}
   314  		key := constructHistoryKey(URI.Path(), request.NamespaceID, request.WorkflowID, request.RunID, token.CloseFailoverVersion, token.BatchIdx)
   315  
   316  		encodedRecord, err := Download(ctx, h.s3cli, URI, key)
   317  		if err != nil {
   318  			if isRetryableError(err) {
   319  				return nil, serviceerror.NewUnavailable(err.Error())
   320  			}
   321  			switch err.(type) {
   322  			case *serviceerror.InvalidArgument, *serviceerror.Unavailable, *serviceerror.NotFound:
   323  				return nil, err
   324  			default:
   325  				return nil, serviceerror.NewInternal(err.Error())
   326  			}
   327  		}
   328  
   329  		historyBlob := archiverspb.HistoryBlob{}
   330  		err = encoder.Decode(encodedRecord, &historyBlob)
   331  		if err != nil {
   332  			return nil, serviceerror.NewInternal(err.Error())
   333  		}
   334  
   335  		for _, batch := range historyBlob.Body {
   336  			response.HistoryBatches = append(response.HistoryBatches, batch)
   337  			numOfEvents += len(batch.Events)
   338  		}
   339  
   340  		if historyBlob.Header.IsLast {
   341  			break
   342  		}
   343  		token.BatchIdx++
   344  	}
   345  
   346  	if isTruncated {
   347  		nextToken, err := SerializeToken(token)
   348  		if err != nil {
   349  			return nil, serviceerror.NewInternal(err.Error())
   350  		}
   351  		response.NextPageToken = nextToken
   352  	}
   353  
   354  	return response, nil
   355  }
   356  
   357  func (h *historyArchiver) ValidateURI(URI archiver.URI) error {
   358  	err := SoftValidateURI(URI)
   359  	if err != nil {
   360  		return err
   361  	}
   362  	return BucketExists(context.TODO(), h.s3cli, URI)
   363  }
   364  
   365  func (h *historyArchiver) getHighestVersion(ctx context.Context, URI archiver.URI, request *archiver.GetHistoryRequest) (*int64, error) {
   366  	ctx, cancel := ensureContextTimeout(ctx)
   367  	defer cancel()
   368  	var prefix = constructHistoryKeyPrefix(URI.Path(), request.NamespaceID, request.WorkflowID, request.RunID) + "/"
   369  	results, err := h.s3cli.ListObjectsV2WithContext(ctx, &s3.ListObjectsV2Input{
   370  		Bucket:    aws.String(URI.Hostname()),
   371  		Prefix:    aws.String(prefix),
   372  		Delimiter: aws.String("/"),
   373  	})
   374  	if err != nil {
   375  		if aerr, ok := err.(awserr.Error); ok && aerr.Code() == s3.ErrCodeNoSuchBucket {
   376  			return nil, serviceerror.NewInvalidArgument(errBucketNotExists.Error())
   377  		}
   378  		return nil, err
   379  	}
   380  	var highestVersion *int64
   381  
   382  	for _, v := range results.CommonPrefixes {
   383  		var version int64
   384  		version, err = strconv.ParseInt(strings.Replace(strings.Replace(*v.Prefix, prefix, "", 1), "/", "", 1), 10, 64)
   385  		if err != nil {
   386  			continue
   387  		}
   388  		if highestVersion == nil || version > *highestVersion {
   389  			highestVersion = &version
   390  		}
   391  	}
   392  	if highestVersion == nil {
   393  		return nil, archiver.ErrHistoryNotExist
   394  	}
   395  	return highestVersion, nil
   396  }
   397  
   398  func isRetryableError(err error) bool {
   399  	if err == nil {
   400  		return false
   401  	}
   402  	if aerr, ok := err.(awserr.Error); ok {
   403  		return isStatusCodeRetryable(aerr) || request.IsErrorRetryable(aerr) || request.IsErrorThrottle(aerr)
   404  	}
   405  	return false
   406  }
   407  
   408  func isStatusCodeRetryable(err error) bool {
   409  	if aerr, ok := err.(awserr.Error); ok {
   410  		if rerr, ok := err.(awserr.RequestFailure); ok {
   411  			if rerr.StatusCode() == 429 {
   412  				return true
   413  			}
   414  			if rerr.StatusCode() >= 500 && rerr.StatusCode() != 501 {
   415  				return true
   416  			}
   417  		}
   418  		return isStatusCodeRetryable(aerr.OrigErr())
   419  	}
   420  	return false
   421  }