go.temporal.io/server@v1.23.0/common/archiver/gcloud/history_archiver.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package gcloud
    26  
    27  import (
    28  	"context"
    29  	"encoding/binary"
    30  	"errors"
    31  	"path/filepath"
    32  	"time"
    33  
    34  	historypb "go.temporal.io/api/history/v1"
    35  	"go.temporal.io/api/serviceerror"
    36  
    37  	"go.temporal.io/server/common"
    38  	"go.temporal.io/server/common/archiver"
    39  	"go.temporal.io/server/common/archiver/gcloud/connector"
    40  	"go.temporal.io/server/common/codec"
    41  	"go.temporal.io/server/common/config"
    42  	"go.temporal.io/server/common/log"
    43  	"go.temporal.io/server/common/log/tag"
    44  	"go.temporal.io/server/common/metrics"
    45  	"go.temporal.io/server/common/persistence"
    46  )
    47  
    48  var (
    49  	errUploadNonRetryable = errors.New("upload non-retryable error")
    50  )
    51  
    52  const (
    53  	// URIScheme is the scheme for the gcloud storage implementation
    54  	URIScheme = "gs"
    55  
    56  	targetHistoryBlobSize = 2 * 1024 * 1024 // 2MB
    57  	errEncodeHistory      = "failed to encode history batches"
    58  	errBucketHistory      = "failed to get google storage bucket handle"
    59  	errWriteFile          = "failed to write history to google storage"
    60  )
    61  
    62  type historyArchiver struct {
    63  	container     *archiver.HistoryBootstrapContainer
    64  	gcloudStorage connector.Client
    65  
    66  	// only set in test code
    67  	historyIterator archiver.HistoryIterator
    68  }
    69  
    70  type progress struct {
    71  	CurrentPageNumber int
    72  	IteratorState     []byte
    73  }
    74  
    75  type getHistoryToken struct {
    76  	CloseFailoverVersion int64
    77  	HighestPart          int
    78  	CurrentPart          int
    79  	BatchIdxOffset       int
    80  }
    81  
    82  // NewHistoryArchiver creates a new gcloud storage HistoryArchiver
    83  func NewHistoryArchiver(
    84  	container *archiver.HistoryBootstrapContainer,
    85  	config *config.GstorageArchiver,
    86  ) (archiver.HistoryArchiver, error) {
    87  	storage, err := connector.NewClient(context.Background(), config)
    88  	if err == nil {
    89  		return newHistoryArchiver(container, nil, storage), nil
    90  	}
    91  	return nil, err
    92  }
    93  
    94  func newHistoryArchiver(container *archiver.HistoryBootstrapContainer, historyIterator archiver.HistoryIterator, storage connector.Client) archiver.HistoryArchiver {
    95  	return &historyArchiver{
    96  		container:       container,
    97  		gcloudStorage:   storage,
    98  		historyIterator: historyIterator,
    99  	}
   100  }
   101  
   102  // Archive is used to archive a workflow history. When the context expires the method should stop trying to archive.
   103  // Implementors are free to archive however they want, including implementing retries of sub-operations. The URI defines
   104  // the resource that histories should be archived into. The implementor gets to determine how to interpret the URI.
   105  // The Archive method may or may not be automatically retried by the caller. The ArchiveOptions are used
   106  // to interact with these retries including giving the implementor the ability to cancel retries and record progress
   107  // between retry attempts.
   108  // This method will be invoked after a workflow passes its retention period.
   109  func (h *historyArchiver) Archive(ctx context.Context, URI archiver.URI, request *archiver.ArchiveHistoryRequest, opts ...archiver.ArchiveOption) (err error) {
   110  	handler := h.container.MetricsHandler.WithTags(metrics.OperationTag(metrics.HistoryArchiverScope), metrics.NamespaceTag(request.Namespace))
   111  	featureCatalog := archiver.GetFeatureCatalog(opts...)
   112  	startTime := time.Now().UTC()
   113  	defer func() {
   114  		metrics.ServiceLatency.With(handler).Record(time.Since(startTime))
   115  		if err != nil {
   116  
   117  			if err.Error() != errUploadNonRetryable.Error() {
   118  				handler.Counter(metrics.HistoryArchiverArchiveTransientErrorCount.Name()).Record(1)
   119  				return
   120  			}
   121  
   122  			handler.Counter(metrics.HistoryArchiverArchiveNonRetryableErrorCount.Name()).Record(1)
   123  			if featureCatalog.NonRetryableError != nil {
   124  				err = featureCatalog.NonRetryableError()
   125  			}
   126  
   127  		}
   128  	}()
   129  
   130  	logger := archiver.TagLoggerWithArchiveHistoryRequestAndURI(h.container.Logger, request, URI.String())
   131  
   132  	if err := h.ValidateURI(URI); err != nil {
   133  		logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidURI), tag.Error(err))
   134  		return errUploadNonRetryable
   135  	}
   136  
   137  	if err := archiver.ValidateHistoryArchiveRequest(request); err != nil {
   138  		logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidArchiveRequest), tag.Error(err))
   139  		return errUploadNonRetryable
   140  	}
   141  
   142  	var totalUploadSize int64
   143  	historyIterator := h.historyIterator
   144  	var progress progress
   145  	if historyIterator == nil { // will only be set by testing code
   146  		historyIterator, _ = loadHistoryIterator(ctx, request, h.container.ExecutionManager, featureCatalog, &progress)
   147  	}
   148  
   149  	encoder := codec.NewJSONPBEncoder()
   150  
   151  	for historyIterator.HasNext() {
   152  		part := progress.CurrentPageNumber
   153  		historyBlob, err := historyIterator.Next(ctx)
   154  		if err != nil {
   155  			if _, isNotFound := err.(*serviceerror.NotFound); isNotFound {
   156  				// workflow history no longer exists, may due to duplicated archival signal
   157  				// this may happen even in the middle of iterating history as two archival signals
   158  				// can be processed concurrently.
   159  				logger.Info(archiver.ArchiveSkippedInfoMsg)
   160  				handler.Counter(metrics.HistoryArchiverDuplicateArchivalsCount.Name()).Record(1)
   161  				return nil
   162  			}
   163  
   164  			logger = log.With(logger, tag.ArchivalArchiveFailReason(archiver.ErrReasonReadHistory), tag.Error(err))
   165  			if !common.IsPersistenceTransientError(err) {
   166  				logger.Error(archiver.ArchiveNonRetryableErrorMsg)
   167  				return errUploadNonRetryable
   168  			}
   169  			logger.Error(archiver.ArchiveTransientErrorMsg)
   170  			return err
   171  		}
   172  
   173  		if historyMutated(request, historyBlob.Body, historyBlob.Header.IsLast) {
   174  			logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonHistoryMutated))
   175  			return archiver.ErrHistoryMutated
   176  		}
   177  
   178  		encodedHistoryPart, err := encoder.EncodeHistories(historyBlob.Body)
   179  		if err != nil {
   180  			logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errEncodeHistory), tag.Error(err))
   181  			return errUploadNonRetryable
   182  		}
   183  
   184  		filename := constructHistoryFilenameMultipart(request.NamespaceID, request.WorkflowID, request.RunID, request.CloseFailoverVersion, part)
   185  		if exist, _ := h.gcloudStorage.Exist(ctx, URI, filename); !exist {
   186  			if err := h.gcloudStorage.Upload(ctx, URI, filename, encodedHistoryPart); err != nil {
   187  				logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(errWriteFile), tag.Error(err))
   188  				handler.Counter(metrics.HistoryArchiverArchiveTransientErrorCount.Name()).Record(1)
   189  				return err
   190  			}
   191  
   192  			totalUploadSize = totalUploadSize + int64(binary.Size(encodedHistoryPart))
   193  		}
   194  
   195  		if err := saveHistoryIteratorState(ctx, featureCatalog, historyIterator, part, &progress); err != nil {
   196  			return err
   197  		}
   198  	}
   199  
   200  	handler.Counter(metrics.HistoryArchiverTotalUploadSize.Name()).Record(totalUploadSize)
   201  	handler.Counter(metrics.HistoryArchiverHistorySize.Name()).Record(totalUploadSize)
   202  	handler.Counter(metrics.HistoryArchiverArchiveSuccessCount.Name()).Record(1)
   203  	return
   204  }
   205  
   206  // Get is used to access an archived history. When context expires method should stop trying to fetch history.
   207  // The URI identifies the resource from which history should be accessed and it is up to the implementor to interpret this URI.
   208  // This method should thrift errors - see filestore as an example.
   209  func (h *historyArchiver) Get(ctx context.Context, URI archiver.URI, request *archiver.GetHistoryRequest) (*archiver.GetHistoryResponse, error) {
   210  
   211  	err := h.ValidateURI(URI)
   212  	if err != nil {
   213  		return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidURI.Error())
   214  	}
   215  
   216  	if err := archiver.ValidateGetRequest(request); err != nil {
   217  		return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidGetHistoryRequest.Error())
   218  	}
   219  
   220  	var token *getHistoryToken
   221  	if request.NextPageToken != nil {
   222  		token, err = deserializeGetHistoryToken(request.NextPageToken)
   223  		if err != nil {
   224  			return nil, serviceerror.NewInvalidArgument(archiver.ErrNextPageTokenCorrupted.Error())
   225  		}
   226  	} else {
   227  		highestVersion, historyhighestPart, historyCurrentPart, err := h.getHighestVersion(ctx, URI, request)
   228  		if err != nil {
   229  			return nil, serviceerror.NewUnavailable(err.Error())
   230  		}
   231  		if highestVersion == nil {
   232  			return nil, serviceerror.NewNotFound(archiver.ErrHistoryNotExist.Error())
   233  		}
   234  		token = &getHistoryToken{
   235  			CloseFailoverVersion: *highestVersion,
   236  			HighestPart:          *historyhighestPart,
   237  			CurrentPart:          *historyCurrentPart,
   238  			BatchIdxOffset:       0,
   239  		}
   240  	}
   241  
   242  	response := &archiver.GetHistoryResponse{}
   243  	response.HistoryBatches = []*historypb.History{}
   244  	numOfEvents := 0
   245  	encoder := codec.NewJSONPBEncoder()
   246  
   247  outer:
   248  	for token.CurrentPart <= token.HighestPart {
   249  
   250  		filename := constructHistoryFilenameMultipart(request.NamespaceID, request.WorkflowID, request.RunID, token.CloseFailoverVersion, token.CurrentPart)
   251  		encodedHistoryBatches, err := h.gcloudStorage.Get(ctx, URI, filename)
   252  		if err != nil {
   253  			return nil, serviceerror.NewUnavailable(err.Error())
   254  		}
   255  		if encodedHistoryBatches == nil {
   256  			return nil, serviceerror.NewInternal("Fail retrieving history file: " + URI.String() + "/" + filename)
   257  		}
   258  
   259  		batches, err := encoder.DecodeHistories(encodedHistoryBatches)
   260  		if err != nil {
   261  			return nil, serviceerror.NewInternal(err.Error())
   262  		}
   263  		// trim the batches in the beginning based on token.BatchIdxOffset
   264  		batches = batches[token.BatchIdxOffset:]
   265  
   266  		for idx, batch := range batches {
   267  			response.HistoryBatches = append(response.HistoryBatches, batch)
   268  			token.BatchIdxOffset++
   269  			numOfEvents += len(batch.Events)
   270  
   271  			if numOfEvents >= request.PageSize {
   272  				if idx == len(batches)-1 {
   273  					// handle the edge case where page size is meeted after adding the last batch
   274  					token.BatchIdxOffset = 0
   275  					token.CurrentPart++
   276  				}
   277  				break outer
   278  			}
   279  		}
   280  
   281  		// reset the offset to 0 as we will read a new page
   282  		token.BatchIdxOffset = 0
   283  		token.CurrentPart++
   284  
   285  	}
   286  
   287  	if token.CurrentPart <= token.HighestPart {
   288  		nextToken, err := serializeToken(token)
   289  		if err != nil {
   290  			return nil, serviceerror.NewInternal(err.Error())
   291  		}
   292  		response.NextPageToken = nextToken
   293  	}
   294  
   295  	return response, nil
   296  }
   297  
   298  // ValidateURI is used to define what a valid URI for an implementation is.
   299  func (h *historyArchiver) ValidateURI(URI archiver.URI) (err error) {
   300  
   301  	if err = h.validateURI(URI); err == nil {
   302  		_, err = h.gcloudStorage.Exist(context.Background(), URI, "")
   303  	}
   304  
   305  	return
   306  }
   307  
   308  func (h *historyArchiver) validateURI(URI archiver.URI) (err error) {
   309  	if URI.Scheme() != URIScheme {
   310  		return archiver.ErrURISchemeMismatch
   311  	}
   312  
   313  	if URI.Path() == "" || URI.Hostname() == "" {
   314  		return archiver.ErrInvalidURI
   315  	}
   316  
   317  	return
   318  }
   319  
   320  func historyMutated(request *archiver.ArchiveHistoryRequest, historyBatches []*historypb.History, isLast bool) bool {
   321  	lastBatch := historyBatches[len(historyBatches)-1].Events
   322  	lastEvent := lastBatch[len(lastBatch)-1]
   323  	lastFailoverVersion := lastEvent.GetVersion()
   324  	if lastFailoverVersion > request.CloseFailoverVersion {
   325  		return true
   326  	}
   327  
   328  	if !isLast {
   329  		return false
   330  	}
   331  	lastEventID := lastEvent.GetEventId()
   332  	return lastFailoverVersion != request.CloseFailoverVersion || lastEventID+1 != request.NextEventID
   333  }
   334  
   335  func (h *historyArchiver) getHighestVersion(ctx context.Context, URI archiver.URI, request *archiver.GetHistoryRequest) (*int64, *int, *int, error) {
   336  
   337  	filenames, err := h.gcloudStorage.Query(ctx, URI, constructHistoryFilenamePrefix(request.NamespaceID, request.WorkflowID, request.RunID))
   338  
   339  	if err != nil {
   340  		return nil, nil, nil, err
   341  	}
   342  
   343  	var highestVersion *int64
   344  	var highestVersionPart *int
   345  	var lowestVersionPart *int
   346  
   347  	for _, filename := range filenames {
   348  		version, partVersionID, err := extractCloseFailoverVersion(filepath.Base(filename))
   349  		if err != nil || (request.CloseFailoverVersion != nil && version != *request.CloseFailoverVersion) {
   350  			continue
   351  		}
   352  
   353  		if highestVersion == nil || version > *highestVersion {
   354  			highestVersion = &version
   355  			highestVersionPart = new(int)
   356  			lowestVersionPart = new(int)
   357  		}
   358  
   359  		if *highestVersion == version {
   360  			if highestVersionPart == nil || partVersionID > *highestVersionPart {
   361  				highestVersionPart = &partVersionID
   362  			}
   363  
   364  			if lowestVersionPart == nil || partVersionID < *lowestVersionPart {
   365  				lowestVersionPart = &partVersionID
   366  			}
   367  		}
   368  
   369  	}
   370  
   371  	return highestVersion, highestVersionPart, lowestVersionPart, nil
   372  }
   373  
   374  func loadHistoryIterator(ctx context.Context, request *archiver.ArchiveHistoryRequest, executionManager persistence.ExecutionManager, featureCatalog *archiver.ArchiveFeatureCatalog, progress *progress) (historyIterator archiver.HistoryIterator, err error) {
   375  
   376  	defer func() {
   377  		if err != nil || historyIterator == nil {
   378  			historyIterator, err = archiver.NewHistoryIteratorFromState(request, executionManager, targetHistoryBlobSize, nil)
   379  		}
   380  	}()
   381  
   382  	if featureCatalog.ProgressManager != nil {
   383  		if featureCatalog.ProgressManager.HasProgress(ctx) {
   384  			err = featureCatalog.ProgressManager.LoadProgress(ctx, &progress)
   385  			if err == nil {
   386  				historyIterator, err = archiver.NewHistoryIteratorFromState(request, executionManager, targetHistoryBlobSize, progress.IteratorState)
   387  			}
   388  		}
   389  
   390  	}
   391  	return
   392  }
   393  
   394  func saveHistoryIteratorState(ctx context.Context, featureCatalog *archiver.ArchiveFeatureCatalog, historyIterator archiver.HistoryIterator, currentPartNum int, progress *progress) (err error) {
   395  	var state []byte
   396  	if featureCatalog.ProgressManager != nil {
   397  		state, err = historyIterator.GetState()
   398  		if err == nil {
   399  			progress.CurrentPageNumber = currentPartNum + 1
   400  			progress.IteratorState = state
   401  
   402  			err = featureCatalog.ProgressManager.RecordProgress(ctx, progress)
   403  		}
   404  	}
   405  
   406  	return err
   407  }