go.temporal.io/server@v1.23.0/common/archiver/gcloud/history_archiver.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package gcloud 26 27 import ( 28 "context" 29 "encoding/binary" 30 "errors" 31 "path/filepath" 32 "time" 33 34 historypb "go.temporal.io/api/history/v1" 35 "go.temporal.io/api/serviceerror" 36 37 "go.temporal.io/server/common" 38 "go.temporal.io/server/common/archiver" 39 "go.temporal.io/server/common/archiver/gcloud/connector" 40 "go.temporal.io/server/common/codec" 41 "go.temporal.io/server/common/config" 42 "go.temporal.io/server/common/log" 43 "go.temporal.io/server/common/log/tag" 44 "go.temporal.io/server/common/metrics" 45 "go.temporal.io/server/common/persistence" 46 ) 47 48 var ( 49 errUploadNonRetryable = errors.New("upload non-retryable error") 50 ) 51 52 const ( 53 // URIScheme is the scheme for the gcloud storage implementation 54 URIScheme = "gs" 55 56 targetHistoryBlobSize = 2 * 1024 * 1024 // 2MB 57 errEncodeHistory = "failed to encode history batches" 58 errBucketHistory = "failed to get google storage bucket handle" 59 errWriteFile = "failed to write history to google storage" 60 ) 61 62 type historyArchiver struct { 63 container *archiver.HistoryBootstrapContainer 64 gcloudStorage connector.Client 65 66 // only set in test code 67 historyIterator archiver.HistoryIterator 68 } 69 70 type progress struct { 71 CurrentPageNumber int 72 IteratorState []byte 73 } 74 75 type getHistoryToken struct { 76 CloseFailoverVersion int64 77 HighestPart int 78 CurrentPart int 79 BatchIdxOffset int 80 } 81 82 // NewHistoryArchiver creates a new gcloud storage HistoryArchiver 83 func NewHistoryArchiver( 84 container *archiver.HistoryBootstrapContainer, 85 config *config.GstorageArchiver, 86 ) (archiver.HistoryArchiver, error) { 87 storage, err := connector.NewClient(context.Background(), config) 88 if err == nil { 89 return newHistoryArchiver(container, nil, storage), nil 90 } 91 return nil, err 92 } 93 94 func newHistoryArchiver(container *archiver.HistoryBootstrapContainer, historyIterator archiver.HistoryIterator, storage connector.Client) archiver.HistoryArchiver { 95 return &historyArchiver{ 96 container: container, 97 gcloudStorage: storage, 98 historyIterator: historyIterator, 99 } 100 } 101 102 // Archive is used to archive a workflow history. When the context expires the method should stop trying to archive. 103 // Implementors are free to archive however they want, including implementing retries of sub-operations. The URI defines 104 // the resource that histories should be archived into. The implementor gets to determine how to interpret the URI. 105 // The Archive method may or may not be automatically retried by the caller. The ArchiveOptions are used 106 // to interact with these retries including giving the implementor the ability to cancel retries and record progress 107 // between retry attempts. 108 // This method will be invoked after a workflow passes its retention period. 109 func (h *historyArchiver) Archive(ctx context.Context, URI archiver.URI, request *archiver.ArchiveHistoryRequest, opts ...archiver.ArchiveOption) (err error) { 110 handler := h.container.MetricsHandler.WithTags(metrics.OperationTag(metrics.HistoryArchiverScope), metrics.NamespaceTag(request.Namespace)) 111 featureCatalog := archiver.GetFeatureCatalog(opts...) 112 startTime := time.Now().UTC() 113 defer func() { 114 metrics.ServiceLatency.With(handler).Record(time.Since(startTime)) 115 if err != nil { 116 117 if err.Error() != errUploadNonRetryable.Error() { 118 handler.Counter(metrics.HistoryArchiverArchiveTransientErrorCount.Name()).Record(1) 119 return 120 } 121 122 handler.Counter(metrics.HistoryArchiverArchiveNonRetryableErrorCount.Name()).Record(1) 123 if featureCatalog.NonRetryableError != nil { 124 err = featureCatalog.NonRetryableError() 125 } 126 127 } 128 }() 129 130 logger := archiver.TagLoggerWithArchiveHistoryRequestAndURI(h.container.Logger, request, URI.String()) 131 132 if err := h.ValidateURI(URI); err != nil { 133 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidURI), tag.Error(err)) 134 return errUploadNonRetryable 135 } 136 137 if err := archiver.ValidateHistoryArchiveRequest(request); err != nil { 138 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidArchiveRequest), tag.Error(err)) 139 return errUploadNonRetryable 140 } 141 142 var totalUploadSize int64 143 historyIterator := h.historyIterator 144 var progress progress 145 if historyIterator == nil { // will only be set by testing code 146 historyIterator, _ = loadHistoryIterator(ctx, request, h.container.ExecutionManager, featureCatalog, &progress) 147 } 148 149 encoder := codec.NewJSONPBEncoder() 150 151 for historyIterator.HasNext() { 152 part := progress.CurrentPageNumber 153 historyBlob, err := historyIterator.Next(ctx) 154 if err != nil { 155 if _, isNotFound := err.(*serviceerror.NotFound); isNotFound { 156 // workflow history no longer exists, may due to duplicated archival signal 157 // this may happen even in the middle of iterating history as two archival signals 158 // can be processed concurrently. 159 logger.Info(archiver.ArchiveSkippedInfoMsg) 160 handler.Counter(metrics.HistoryArchiverDuplicateArchivalsCount.Name()).Record(1) 161 return nil 162 } 163 164 logger = log.With(logger, tag.ArchivalArchiveFailReason(archiver.ErrReasonReadHistory), tag.Error(err)) 165 if !common.IsPersistenceTransientError(err) { 166 logger.Error(archiver.ArchiveNonRetryableErrorMsg) 167 return errUploadNonRetryable 168 } 169 logger.Error(archiver.ArchiveTransientErrorMsg) 170 return err 171 } 172 173 if historyMutated(request, historyBlob.Body, historyBlob.Header.IsLast) { 174 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonHistoryMutated)) 175 return archiver.ErrHistoryMutated 176 } 177 178 encodedHistoryPart, err := encoder.EncodeHistories(historyBlob.Body) 179 if err != nil { 180 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errEncodeHistory), tag.Error(err)) 181 return errUploadNonRetryable 182 } 183 184 filename := constructHistoryFilenameMultipart(request.NamespaceID, request.WorkflowID, request.RunID, request.CloseFailoverVersion, part) 185 if exist, _ := h.gcloudStorage.Exist(ctx, URI, filename); !exist { 186 if err := h.gcloudStorage.Upload(ctx, URI, filename, encodedHistoryPart); err != nil { 187 logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(errWriteFile), tag.Error(err)) 188 handler.Counter(metrics.HistoryArchiverArchiveTransientErrorCount.Name()).Record(1) 189 return err 190 } 191 192 totalUploadSize = totalUploadSize + int64(binary.Size(encodedHistoryPart)) 193 } 194 195 if err := saveHistoryIteratorState(ctx, featureCatalog, historyIterator, part, &progress); err != nil { 196 return err 197 } 198 } 199 200 handler.Counter(metrics.HistoryArchiverTotalUploadSize.Name()).Record(totalUploadSize) 201 handler.Counter(metrics.HistoryArchiverHistorySize.Name()).Record(totalUploadSize) 202 handler.Counter(metrics.HistoryArchiverArchiveSuccessCount.Name()).Record(1) 203 return 204 } 205 206 // Get is used to access an archived history. When context expires method should stop trying to fetch history. 207 // The URI identifies the resource from which history should be accessed and it is up to the implementor to interpret this URI. 208 // This method should thrift errors - see filestore as an example. 209 func (h *historyArchiver) Get(ctx context.Context, URI archiver.URI, request *archiver.GetHistoryRequest) (*archiver.GetHistoryResponse, error) { 210 211 err := h.ValidateURI(URI) 212 if err != nil { 213 return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidURI.Error()) 214 } 215 216 if err := archiver.ValidateGetRequest(request); err != nil { 217 return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidGetHistoryRequest.Error()) 218 } 219 220 var token *getHistoryToken 221 if request.NextPageToken != nil { 222 token, err = deserializeGetHistoryToken(request.NextPageToken) 223 if err != nil { 224 return nil, serviceerror.NewInvalidArgument(archiver.ErrNextPageTokenCorrupted.Error()) 225 } 226 } else { 227 highestVersion, historyhighestPart, historyCurrentPart, err := h.getHighestVersion(ctx, URI, request) 228 if err != nil { 229 return nil, serviceerror.NewUnavailable(err.Error()) 230 } 231 if highestVersion == nil { 232 return nil, serviceerror.NewNotFound(archiver.ErrHistoryNotExist.Error()) 233 } 234 token = &getHistoryToken{ 235 CloseFailoverVersion: *highestVersion, 236 HighestPart: *historyhighestPart, 237 CurrentPart: *historyCurrentPart, 238 BatchIdxOffset: 0, 239 } 240 } 241 242 response := &archiver.GetHistoryResponse{} 243 response.HistoryBatches = []*historypb.History{} 244 numOfEvents := 0 245 encoder := codec.NewJSONPBEncoder() 246 247 outer: 248 for token.CurrentPart <= token.HighestPart { 249 250 filename := constructHistoryFilenameMultipart(request.NamespaceID, request.WorkflowID, request.RunID, token.CloseFailoverVersion, token.CurrentPart) 251 encodedHistoryBatches, err := h.gcloudStorage.Get(ctx, URI, filename) 252 if err != nil { 253 return nil, serviceerror.NewUnavailable(err.Error()) 254 } 255 if encodedHistoryBatches == nil { 256 return nil, serviceerror.NewInternal("Fail retrieving history file: " + URI.String() + "/" + filename) 257 } 258 259 batches, err := encoder.DecodeHistories(encodedHistoryBatches) 260 if err != nil { 261 return nil, serviceerror.NewInternal(err.Error()) 262 } 263 // trim the batches in the beginning based on token.BatchIdxOffset 264 batches = batches[token.BatchIdxOffset:] 265 266 for idx, batch := range batches { 267 response.HistoryBatches = append(response.HistoryBatches, batch) 268 token.BatchIdxOffset++ 269 numOfEvents += len(batch.Events) 270 271 if numOfEvents >= request.PageSize { 272 if idx == len(batches)-1 { 273 // handle the edge case where page size is meeted after adding the last batch 274 token.BatchIdxOffset = 0 275 token.CurrentPart++ 276 } 277 break outer 278 } 279 } 280 281 // reset the offset to 0 as we will read a new page 282 token.BatchIdxOffset = 0 283 token.CurrentPart++ 284 285 } 286 287 if token.CurrentPart <= token.HighestPart { 288 nextToken, err := serializeToken(token) 289 if err != nil { 290 return nil, serviceerror.NewInternal(err.Error()) 291 } 292 response.NextPageToken = nextToken 293 } 294 295 return response, nil 296 } 297 298 // ValidateURI is used to define what a valid URI for an implementation is. 299 func (h *historyArchiver) ValidateURI(URI archiver.URI) (err error) { 300 301 if err = h.validateURI(URI); err == nil { 302 _, err = h.gcloudStorage.Exist(context.Background(), URI, "") 303 } 304 305 return 306 } 307 308 func (h *historyArchiver) validateURI(URI archiver.URI) (err error) { 309 if URI.Scheme() != URIScheme { 310 return archiver.ErrURISchemeMismatch 311 } 312 313 if URI.Path() == "" || URI.Hostname() == "" { 314 return archiver.ErrInvalidURI 315 } 316 317 return 318 } 319 320 func historyMutated(request *archiver.ArchiveHistoryRequest, historyBatches []*historypb.History, isLast bool) bool { 321 lastBatch := historyBatches[len(historyBatches)-1].Events 322 lastEvent := lastBatch[len(lastBatch)-1] 323 lastFailoverVersion := lastEvent.GetVersion() 324 if lastFailoverVersion > request.CloseFailoverVersion { 325 return true 326 } 327 328 if !isLast { 329 return false 330 } 331 lastEventID := lastEvent.GetEventId() 332 return lastFailoverVersion != request.CloseFailoverVersion || lastEventID+1 != request.NextEventID 333 } 334 335 func (h *historyArchiver) getHighestVersion(ctx context.Context, URI archiver.URI, request *archiver.GetHistoryRequest) (*int64, *int, *int, error) { 336 337 filenames, err := h.gcloudStorage.Query(ctx, URI, constructHistoryFilenamePrefix(request.NamespaceID, request.WorkflowID, request.RunID)) 338 339 if err != nil { 340 return nil, nil, nil, err 341 } 342 343 var highestVersion *int64 344 var highestVersionPart *int 345 var lowestVersionPart *int 346 347 for _, filename := range filenames { 348 version, partVersionID, err := extractCloseFailoverVersion(filepath.Base(filename)) 349 if err != nil || (request.CloseFailoverVersion != nil && version != *request.CloseFailoverVersion) { 350 continue 351 } 352 353 if highestVersion == nil || version > *highestVersion { 354 highestVersion = &version 355 highestVersionPart = new(int) 356 lowestVersionPart = new(int) 357 } 358 359 if *highestVersion == version { 360 if highestVersionPart == nil || partVersionID > *highestVersionPart { 361 highestVersionPart = &partVersionID 362 } 363 364 if lowestVersionPart == nil || partVersionID < *lowestVersionPart { 365 lowestVersionPart = &partVersionID 366 } 367 } 368 369 } 370 371 return highestVersion, highestVersionPart, lowestVersionPart, nil 372 } 373 374 func loadHistoryIterator(ctx context.Context, request *archiver.ArchiveHistoryRequest, executionManager persistence.ExecutionManager, featureCatalog *archiver.ArchiveFeatureCatalog, progress *progress) (historyIterator archiver.HistoryIterator, err error) { 375 376 defer func() { 377 if err != nil || historyIterator == nil { 378 historyIterator, err = archiver.NewHistoryIteratorFromState(request, executionManager, targetHistoryBlobSize, nil) 379 } 380 }() 381 382 if featureCatalog.ProgressManager != nil { 383 if featureCatalog.ProgressManager.HasProgress(ctx) { 384 err = featureCatalog.ProgressManager.LoadProgress(ctx, &progress) 385 if err == nil { 386 historyIterator, err = archiver.NewHistoryIteratorFromState(request, executionManager, targetHistoryBlobSize, progress.IteratorState) 387 } 388 } 389 390 } 391 return 392 } 393 394 func saveHistoryIteratorState(ctx context.Context, featureCatalog *archiver.ArchiveFeatureCatalog, historyIterator archiver.HistoryIterator, currentPartNum int, progress *progress) (err error) { 395 var state []byte 396 if featureCatalog.ProgressManager != nil { 397 state, err = historyIterator.GetState() 398 if err == nil { 399 progress.CurrentPageNumber = currentPartNum + 1 400 progress.IteratorState = state 401 402 err = featureCatalog.ProgressManager.RecordProgress(ctx, progress) 403 } 404 } 405 406 return err 407 }