go.temporal.io/server@v1.23.0/common/archiver/s3store/history_archiver.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 // S3 History Archiver will archive workflow histories to amazon s3 26 27 package s3store 28 29 import ( 30 "context" 31 "encoding/binary" 32 "errors" 33 "strconv" 34 "strings" 35 "time" 36 37 "github.com/aws/aws-sdk-go/aws" 38 "github.com/aws/aws-sdk-go/aws/awserr" 39 "github.com/aws/aws-sdk-go/aws/request" 40 "github.com/aws/aws-sdk-go/aws/session" 41 "github.com/aws/aws-sdk-go/service/s3" 42 "github.com/aws/aws-sdk-go/service/s3/s3iface" 43 "go.temporal.io/api/serviceerror" 44 45 archiverspb "go.temporal.io/server/api/archiver/v1" 46 "go.temporal.io/server/common" 47 "go.temporal.io/server/common/archiver" 48 "go.temporal.io/server/common/codec" 49 "go.temporal.io/server/common/config" 50 "go.temporal.io/server/common/log" 51 "go.temporal.io/server/common/log/tag" 52 "go.temporal.io/server/common/metrics" 53 "go.temporal.io/server/common/persistence" 54 ) 55 56 const ( 57 // URIScheme is the scheme for the s3 implementation 58 URIScheme = "s3" 59 errEncodeHistory = "failed to encode history batches" 60 errWriteKey = "failed to write history to s3" 61 defaultBlobstoreTimeout = time.Minute 62 targetHistoryBlobSize = 2 * 1024 * 1024 // 2MB 63 ) 64 65 var ( 66 errNoBucketSpecified = errors.New("no bucket specified") 67 errBucketNotExists = errors.New("requested bucket does not exist") 68 errEmptyAwsRegion = errors.New("empty aws region") 69 ) 70 71 type ( 72 historyArchiver struct { 73 container *archiver.HistoryBootstrapContainer 74 s3cli s3iface.S3API 75 // only set in test code 76 historyIterator archiver.HistoryIterator 77 } 78 79 getHistoryToken struct { 80 CloseFailoverVersion int64 81 BatchIdx int 82 } 83 84 uploadProgress struct { 85 BatchIdx int 86 IteratorState []byte 87 uploadedSize int64 88 historySize int64 89 } 90 ) 91 92 // NewHistoryArchiver creates a new archiver.HistoryArchiver based on s3 93 func NewHistoryArchiver( 94 container *archiver.HistoryBootstrapContainer, 95 config *config.S3Archiver, 96 ) (archiver.HistoryArchiver, error) { 97 return newHistoryArchiver(container, config, nil) 98 } 99 100 func newHistoryArchiver( 101 container *archiver.HistoryBootstrapContainer, 102 config *config.S3Archiver, 103 historyIterator archiver.HistoryIterator, 104 ) (*historyArchiver, error) { 105 if len(config.Region) == 0 { 106 return nil, errEmptyAwsRegion 107 } 108 s3Config := &aws.Config{ 109 Endpoint: config.Endpoint, 110 Region: aws.String(config.Region), 111 S3ForcePathStyle: aws.Bool(config.S3ForcePathStyle), 112 LogLevel: (*aws.LogLevelType)(&config.LogLevel), 113 } 114 sess, err := session.NewSession(s3Config) 115 if err != nil { 116 return nil, err 117 } 118 119 return &historyArchiver{ 120 container: container, 121 s3cli: s3.New(sess), 122 historyIterator: historyIterator, 123 }, nil 124 } 125 func (h *historyArchiver) Archive( 126 ctx context.Context, 127 URI archiver.URI, 128 request *archiver.ArchiveHistoryRequest, 129 opts ...archiver.ArchiveOption, 130 ) (err error) { 131 handler := h.container.MetricsHandler.WithTags(metrics.OperationTag(metrics.HistoryArchiverScope), metrics.NamespaceTag(request.Namespace)) 132 featureCatalog := archiver.GetFeatureCatalog(opts...) 133 startTime := time.Now().UTC() 134 defer func() { 135 handler.Timer(metrics.ServiceLatency.Name()).Record(time.Since(startTime)) 136 if err != nil { 137 if common.IsPersistenceTransientError(err) || isRetryableError(err) { 138 handler.Counter(metrics.HistoryArchiverArchiveTransientErrorCount.Name()).Record(1) 139 } else { 140 handler.Counter(metrics.HistoryArchiverArchiveNonRetryableErrorCount.Name()).Record(1) 141 if featureCatalog.NonRetryableError != nil { 142 err = featureCatalog.NonRetryableError() 143 } 144 } 145 } 146 }() 147 148 logger := archiver.TagLoggerWithArchiveHistoryRequestAndURI(h.container.Logger, request, URI.String()) 149 150 if err := SoftValidateURI(URI); err != nil { 151 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidURI), tag.Error(err)) 152 return err 153 } 154 155 if err := archiver.ValidateHistoryArchiveRequest(request); err != nil { 156 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonInvalidArchiveRequest), tag.Error(err)) 157 return err 158 } 159 160 var progress uploadProgress 161 historyIterator := h.historyIterator 162 if historyIterator == nil { // will only be set by testing code 163 historyIterator = loadHistoryIterator(ctx, request, h.container.ExecutionManager, featureCatalog, &progress) 164 } 165 for historyIterator.HasNext() { 166 historyBlob, err := historyIterator.Next(ctx) 167 if err != nil { 168 if _, isNotFound := err.(*serviceerror.NotFound); isNotFound { 169 // workflow history no longer exists, may due to duplicated archival signal 170 // this may happen even in the middle of iterating history as two archival signals 171 // can be processed concurrently. 172 logger.Info(archiver.ArchiveSkippedInfoMsg) 173 handler.Counter(metrics.HistoryArchiverDuplicateArchivalsCount.Name()).Record(1) 174 return nil 175 } 176 177 logger := log.With(logger, tag.ArchivalArchiveFailReason(archiver.ErrReasonReadHistory), tag.Error(err)) 178 if common.IsPersistenceTransientError(err) { 179 logger.Error(archiver.ArchiveTransientErrorMsg) 180 } else { 181 logger.Error(archiver.ArchiveNonRetryableErrorMsg) 182 } 183 return err 184 } 185 186 if historyMutated(request, historyBlob.Body, historyBlob.Header.IsLast) { 187 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiver.ErrReasonHistoryMutated)) 188 return archiver.ErrHistoryMutated 189 } 190 191 encoder := codec.NewJSONPBEncoder() 192 encodedHistoryBlob, err := encoder.Encode(historyBlob) 193 if err != nil { 194 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errEncodeHistory), tag.Error(err)) 195 return err 196 } 197 key := constructHistoryKey(URI.Path(), request.NamespaceID, request.WorkflowID, request.RunID, request.CloseFailoverVersion, progress.BatchIdx) 198 199 exists, err := KeyExists(ctx, h.s3cli, URI, key) 200 if err != nil { 201 if isRetryableError(err) { 202 logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err)) 203 } else { 204 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err)) 205 } 206 return err 207 } 208 blobSize := int64(binary.Size(encodedHistoryBlob)) 209 if exists { 210 handler.Counter(metrics.HistoryArchiverBlobExistsCount.Name()).Record(1) 211 } else { 212 if err := Upload(ctx, h.s3cli, URI, key, encodedHistoryBlob); err != nil { 213 if isRetryableError(err) { 214 logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err)) 215 } else { 216 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(errWriteKey), tag.Error(err)) 217 } 218 return err 219 } 220 progress.uploadedSize += blobSize 221 handler.Histogram(metrics.HistoryArchiverBlobSize.Name(), metrics.HistoryArchiverBlobSize.Unit()).Record(blobSize) 222 } 223 224 progress.historySize += blobSize 225 progress.BatchIdx = progress.BatchIdx + 1 226 saveHistoryIteratorState(ctx, featureCatalog, historyIterator, &progress) 227 } 228 229 handler.Histogram(metrics.HistoryArchiverTotalUploadSize.Name(), metrics.HistoryArchiverTotalUploadSize.Unit()).Record(progress.uploadedSize) 230 handler.Histogram(metrics.HistoryArchiverHistorySize.Name(), metrics.HistoryArchiverHistorySize.Unit()).Record(progress.historySize) 231 handler.Counter(metrics.HistoryArchiverArchiveSuccessCount.Name()).Record(1) 232 return nil 233 } 234 235 func loadHistoryIterator(ctx context.Context, request *archiver.ArchiveHistoryRequest, executionManager persistence.ExecutionManager, featureCatalog *archiver.ArchiveFeatureCatalog, progress *uploadProgress) (historyIterator archiver.HistoryIterator) { 236 if featureCatalog.ProgressManager != nil { 237 if featureCatalog.ProgressManager.HasProgress(ctx) { 238 err := featureCatalog.ProgressManager.LoadProgress(ctx, progress) 239 if err == nil { 240 historyIterator, err := archiver.NewHistoryIteratorFromState(request, executionManager, targetHistoryBlobSize, progress.IteratorState) 241 if err == nil { 242 return historyIterator 243 } 244 } 245 progress.IteratorState = nil 246 progress.BatchIdx = 0 247 progress.historySize = 0 248 progress.uploadedSize = 0 249 } 250 } 251 return archiver.NewHistoryIterator(request, executionManager, targetHistoryBlobSize) 252 } 253 254 func saveHistoryIteratorState(ctx context.Context, featureCatalog *archiver.ArchiveFeatureCatalog, historyIterator archiver.HistoryIterator, progress *uploadProgress) { 255 // Saving history state is a best effort operation. Ignore errors and continue 256 if featureCatalog.ProgressManager != nil { 257 state, err := historyIterator.GetState() 258 if err != nil { 259 return 260 } 261 progress.IteratorState = state 262 err = featureCatalog.ProgressManager.RecordProgress(ctx, progress) 263 if err != nil { 264 return 265 } 266 } 267 } 268 269 func (h *historyArchiver) Get( 270 ctx context.Context, 271 URI archiver.URI, 272 request *archiver.GetHistoryRequest, 273 ) (*archiver.GetHistoryResponse, error) { 274 if err := SoftValidateURI(URI); err != nil { 275 return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidURI.Error()) 276 } 277 278 if err := archiver.ValidateGetRequest(request); err != nil { 279 return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidGetHistoryRequest.Error()) 280 } 281 282 var err error 283 var token *getHistoryToken 284 if request.NextPageToken != nil { 285 token, err = deserializeGetHistoryToken(request.NextPageToken) 286 if err != nil { 287 return nil, serviceerror.NewInvalidArgument(archiver.ErrNextPageTokenCorrupted.Error()) 288 } 289 } else if request.CloseFailoverVersion != nil { 290 token = &getHistoryToken{ 291 CloseFailoverVersion: *request.CloseFailoverVersion, 292 } 293 } else { 294 highestVersion, err := h.getHighestVersion(ctx, URI, request) 295 if err != nil { 296 if err == archiver.ErrHistoryNotExist { 297 return nil, serviceerror.NewNotFound(err.Error()) 298 } 299 return nil, serviceerror.NewInvalidArgument(err.Error()) 300 } 301 token = &getHistoryToken{ 302 CloseFailoverVersion: *highestVersion, 303 } 304 } 305 encoder := codec.NewJSONPBEncoder() 306 response := &archiver.GetHistoryResponse{} 307 numOfEvents := 0 308 isTruncated := false 309 for { 310 if numOfEvents >= request.PageSize { 311 isTruncated = true 312 break 313 } 314 key := constructHistoryKey(URI.Path(), request.NamespaceID, request.WorkflowID, request.RunID, token.CloseFailoverVersion, token.BatchIdx) 315 316 encodedRecord, err := Download(ctx, h.s3cli, URI, key) 317 if err != nil { 318 if isRetryableError(err) { 319 return nil, serviceerror.NewUnavailable(err.Error()) 320 } 321 switch err.(type) { 322 case *serviceerror.InvalidArgument, *serviceerror.Unavailable, *serviceerror.NotFound: 323 return nil, err 324 default: 325 return nil, serviceerror.NewInternal(err.Error()) 326 } 327 } 328 329 historyBlob := archiverspb.HistoryBlob{} 330 err = encoder.Decode(encodedRecord, &historyBlob) 331 if err != nil { 332 return nil, serviceerror.NewInternal(err.Error()) 333 } 334 335 for _, batch := range historyBlob.Body { 336 response.HistoryBatches = append(response.HistoryBatches, batch) 337 numOfEvents += len(batch.Events) 338 } 339 340 if historyBlob.Header.IsLast { 341 break 342 } 343 token.BatchIdx++ 344 } 345 346 if isTruncated { 347 nextToken, err := SerializeToken(token) 348 if err != nil { 349 return nil, serviceerror.NewInternal(err.Error()) 350 } 351 response.NextPageToken = nextToken 352 } 353 354 return response, nil 355 } 356 357 func (h *historyArchiver) ValidateURI(URI archiver.URI) error { 358 err := SoftValidateURI(URI) 359 if err != nil { 360 return err 361 } 362 return BucketExists(context.TODO(), h.s3cli, URI) 363 } 364 365 func (h *historyArchiver) getHighestVersion(ctx context.Context, URI archiver.URI, request *archiver.GetHistoryRequest) (*int64, error) { 366 ctx, cancel := ensureContextTimeout(ctx) 367 defer cancel() 368 var prefix = constructHistoryKeyPrefix(URI.Path(), request.NamespaceID, request.WorkflowID, request.RunID) + "/" 369 results, err := h.s3cli.ListObjectsV2WithContext(ctx, &s3.ListObjectsV2Input{ 370 Bucket: aws.String(URI.Hostname()), 371 Prefix: aws.String(prefix), 372 Delimiter: aws.String("/"), 373 }) 374 if err != nil { 375 if aerr, ok := err.(awserr.Error); ok && aerr.Code() == s3.ErrCodeNoSuchBucket { 376 return nil, serviceerror.NewInvalidArgument(errBucketNotExists.Error()) 377 } 378 return nil, err 379 } 380 var highestVersion *int64 381 382 for _, v := range results.CommonPrefixes { 383 var version int64 384 version, err = strconv.ParseInt(strings.Replace(strings.Replace(*v.Prefix, prefix, "", 1), "/", "", 1), 10, 64) 385 if err != nil { 386 continue 387 } 388 if highestVersion == nil || version > *highestVersion { 389 highestVersion = &version 390 } 391 } 392 if highestVersion == nil { 393 return nil, archiver.ErrHistoryNotExist 394 } 395 return highestVersion, nil 396 } 397 398 func isRetryableError(err error) bool { 399 if err == nil { 400 return false 401 } 402 if aerr, ok := err.(awserr.Error); ok { 403 return isStatusCodeRetryable(aerr) || request.IsErrorRetryable(aerr) || request.IsErrorThrottle(aerr) 404 } 405 return false 406 } 407 408 func isStatusCodeRetryable(err error) bool { 409 if aerr, ok := err.(awserr.Error); ok { 410 if rerr, ok := err.(awserr.RequestFailure); ok { 411 if rerr.StatusCode() == 429 { 412 return true 413 } 414 if rerr.StatusCode() >= 500 && rerr.StatusCode() != 501 { 415 return true 416 } 417 } 418 return isStatusCodeRetryable(aerr.OrigErr()) 419 } 420 return false 421 }