go.temporal.io/server@v1.23.0/common/archiver/s3store/visibility_archiver.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package s3store 26 27 import ( 28 "context" 29 "strings" 30 "time" 31 32 "github.com/aws/aws-sdk-go/aws" 33 "github.com/aws/aws-sdk-go/aws/session" 34 "github.com/aws/aws-sdk-go/service/s3" 35 "github.com/aws/aws-sdk-go/service/s3/s3iface" 36 "go.temporal.io/api/serviceerror" 37 workflowpb "go.temporal.io/api/workflow/v1" 38 39 "go.temporal.io/server/common/searchattribute" 40 41 archiverspb "go.temporal.io/server/api/archiver/v1" 42 "go.temporal.io/server/common/archiver" 43 "go.temporal.io/server/common/config" 44 "go.temporal.io/server/common/log/tag" 45 "go.temporal.io/server/common/metrics" 46 "go.temporal.io/server/common/primitives/timestamp" 47 ) 48 49 type ( 50 visibilityArchiver struct { 51 container *archiver.VisibilityBootstrapContainer 52 s3cli s3iface.S3API 53 queryParser QueryParser 54 } 55 56 queryVisibilityRequest struct { 57 namespaceID string 58 pageSize int 59 nextPageToken []byte 60 parsedQuery *parsedQuery 61 } 62 63 indexToArchive struct { 64 primaryIndex string 65 primaryIndexValue string 66 secondaryIndex string 67 secondaryIndexTimestamp time.Time 68 } 69 ) 70 71 const ( 72 errEncodeVisibilityRecord = "failed to encode visibility record" 73 secondaryIndexKeyStartTimeout = "startTimeout" 74 secondaryIndexKeyCloseTimeout = "closeTimeout" 75 primaryIndexKeyWorkflowTypeName = "workflowTypeName" 76 primaryIndexKeyWorkflowID = "workflowID" 77 ) 78 79 // NewVisibilityArchiver creates a new archiver.VisibilityArchiver based on s3 80 func NewVisibilityArchiver( 81 container *archiver.VisibilityBootstrapContainer, 82 config *config.S3Archiver, 83 ) (archiver.VisibilityArchiver, error) { 84 return newVisibilityArchiver(container, config) 85 } 86 87 func newVisibilityArchiver( 88 container *archiver.VisibilityBootstrapContainer, 89 config *config.S3Archiver) (*visibilityArchiver, error) { 90 s3Config := &aws.Config{ 91 Endpoint: config.Endpoint, 92 Region: aws.String(config.Region), 93 S3ForcePathStyle: aws.Bool(config.S3ForcePathStyle), 94 LogLevel: (*aws.LogLevelType)(&config.LogLevel), 95 } 96 sess, err := session.NewSession(s3Config) 97 if err != nil { 98 return nil, err 99 } 100 return &visibilityArchiver{ 101 container: container, 102 s3cli: s3.New(sess), 103 queryParser: NewQueryParser(), 104 }, nil 105 } 106 107 func (v *visibilityArchiver) Archive( 108 ctx context.Context, 109 URI archiver.URI, 110 request *archiverspb.VisibilityRecord, 111 opts ...archiver.ArchiveOption, 112 ) (err error) { 113 handler := v.container.MetricsHandler.WithTags(metrics.OperationTag(metrics.VisibilityArchiverScope), metrics.NamespaceTag(request.Namespace)) 114 featureCatalog := archiver.GetFeatureCatalog(opts...) 115 startTime := time.Now().UTC() 116 logger := archiver.TagLoggerWithArchiveVisibilityRequestAndURI(v.container.Logger, request, URI.String()) 117 archiveFailReason := "" 118 defer func() { 119 handler.Timer(metrics.ServiceLatency.Name()).Record(time.Since(startTime)) 120 if err != nil { 121 if isRetryableError(err) { 122 handler.Counter(metrics.VisibilityArchiverArchiveTransientErrorCount.Name()).Record(1) 123 logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(archiveFailReason), tag.Error(err)) 124 } else { 125 handler.Counter(metrics.VisibilityArchiverArchiveNonRetryableErrorCount.Name()).Record(1) 126 logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiveFailReason), tag.Error(err)) 127 if featureCatalog.NonRetryableError != nil { 128 err = featureCatalog.NonRetryableError() 129 } 130 } 131 } 132 }() 133 134 if err := SoftValidateURI(URI); err != nil { 135 archiveFailReason = archiver.ErrReasonInvalidURI 136 return err 137 } 138 139 if err := archiver.ValidateVisibilityArchivalRequest(request); err != nil { 140 archiveFailReason = archiver.ErrReasonInvalidArchiveRequest 141 return err 142 } 143 144 encodedVisibilityRecord, err := Encode(request) 145 if err != nil { 146 archiveFailReason = errEncodeVisibilityRecord 147 return err 148 } 149 indexes := createIndexesToArchive(request) 150 // Upload archive to all indexes 151 for _, element := range indexes { 152 key := constructTimestampIndex(URI.Path(), request.GetNamespaceId(), element.primaryIndex, element.primaryIndexValue, element.secondaryIndex, element.secondaryIndexTimestamp, request.GetRunId()) 153 if err := Upload(ctx, v.s3cli, URI, key, encodedVisibilityRecord); err != nil { 154 archiveFailReason = errWriteKey 155 return err 156 } 157 } 158 handler.Counter(metrics.VisibilityArchiveSuccessCount.Name()).Record(1) 159 return nil 160 } 161 162 func createIndexesToArchive(request *archiverspb.VisibilityRecord) []indexToArchive { 163 return []indexToArchive{ 164 {primaryIndexKeyWorkflowTypeName, request.WorkflowTypeName, secondaryIndexKeyCloseTimeout, timestamp.TimeValue(request.CloseTime)}, 165 {primaryIndexKeyWorkflowTypeName, request.WorkflowTypeName, secondaryIndexKeyStartTimeout, timestamp.TimeValue(request.StartTime)}, 166 {primaryIndexKeyWorkflowID, request.GetWorkflowId(), secondaryIndexKeyCloseTimeout, timestamp.TimeValue(request.CloseTime)}, 167 {primaryIndexKeyWorkflowID, request.GetWorkflowId(), secondaryIndexKeyStartTimeout, timestamp.TimeValue(request.StartTime)}, 168 } 169 } 170 171 func (v *visibilityArchiver) Query( 172 ctx context.Context, 173 URI archiver.URI, 174 request *archiver.QueryVisibilityRequest, 175 saTypeMap searchattribute.NameTypeMap, 176 ) (*archiver.QueryVisibilityResponse, error) { 177 178 if err := SoftValidateURI(URI); err != nil { 179 return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidURI.Error()) 180 } 181 182 if err := archiver.ValidateQueryRequest(request); err != nil { 183 return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidQueryVisibilityRequest.Error()) 184 } 185 186 if strings.TrimSpace(request.Query) == "" { 187 return v.queryAll(ctx, URI, request, saTypeMap) 188 } 189 190 parsedQuery, err := v.queryParser.Parse(request.Query) 191 if err != nil { 192 return nil, serviceerror.NewInvalidArgument(err.Error()) 193 } 194 195 return v.query( 196 ctx, 197 URI, 198 &queryVisibilityRequest{ 199 namespaceID: request.NamespaceID, 200 pageSize: request.PageSize, 201 nextPageToken: request.NextPageToken, 202 parsedQuery: parsedQuery, 203 }, 204 saTypeMap, 205 ) 206 } 207 208 // queryAll returns all workflow executions in the archive. 209 func (v *visibilityArchiver) queryAll( 210 ctx context.Context, 211 uri archiver.URI, 212 request *archiver.QueryVisibilityRequest, 213 saTypeMap searchattribute.NameTypeMap, 214 ) (*archiver.QueryVisibilityResponse, error) { 215 // remaining is the number of workflow executions left to return before we reach pageSize. 216 remaining := request.PageSize 217 nextPageToken := request.NextPageToken 218 var executions []*workflowpb.WorkflowExecutionInfo 219 // We need to loop because the number of workflow executions returned by each call to query may be fewer than 220 // pageSize. This is because we may have to skip some workflow executions after querying S3 (client-side filtering) 221 // because there are 2 entries in S3 for each workflow execution indexed by workflowTypeName (one for closeTimeout 222 // and one for startTimeout), and we only want to return one entry per workflow execution. See 223 // createIndexesToArchive for a list of all indexes. 224 for { 225 searchPrefix := constructVisibilitySearchPrefix(uri.Path(), request.NamespaceID) 226 // We suffix searchPrefix with workflowTypeName because the data in S3 is duplicated across combinations of 2 227 // different primary indices (workflowID and workflowTypeName) and 2 different secondary indices (closeTimeout 228 // and startTimeout). We only want to return one entry per workflow execution, but the full path to the S3 key 229 // is <primaryIndexKey>/<primaryIndexValue>/<secondaryIndexKey>/<secondaryIndexValue>/<runID>, and we don't have 230 // the primaryIndexValue when we make the call to query, so we can only specify the primaryIndexKey. 231 searchPrefix += "/" + primaryIndexKeyWorkflowTypeName 232 // The pageSize we supply here is actually the maximum number of keys to fetch from S3. For each execution, 233 // there should be 2 keys in S3 for this prefix, so you might think that we should multiply the pageSize by 2. 234 // However, if we do that, we may end up returning more than pageSize workflow executions to the end user of 235 // this API. This is because we aren't guaranteed that both keys for a given workflow execution will be returned 236 // in the same call. For example, if the user supplies a pageSize of 1, and we specify a maximum number of keys 237 // of 2 to S3, we may get back entries from S3 for 2 different workflow executions. You might think that we can 238 // just truncate this result to 1 workflow execution, but then the nextPageToken would be incorrect. So, we may 239 // need to make multiple calls to S3 to get the correct number of workflow executions, which will probably make 240 // this API call slower. 241 res, err := v.queryPrefix(ctx, uri, &queryVisibilityRequest{ 242 namespaceID: request.NamespaceID, 243 pageSize: remaining, 244 nextPageToken: nextPageToken, 245 parsedQuery: &parsedQuery{}, 246 }, saTypeMap, searchPrefix, func(key string) bool { 247 // We only want to return entries for the closeTimeout secondary index, which will always be of the form: 248 // .../closeTimeout/<closeTimeout>/<runID>, so we split the key on "/" and check that the third-to-last 249 // element is "closeTimeout". 250 elements := strings.Split(key, "/") 251 return len(elements) >= 3 && elements[len(elements)-3] == secondaryIndexKeyCloseTimeout 252 }) 253 if err != nil { 254 return nil, err 255 } 256 nextPageToken = res.NextPageToken 257 executions = append(executions, res.Executions...) 258 remaining -= len(res.Executions) 259 if len(nextPageToken) == 0 || remaining <= 0 { 260 break 261 } 262 } 263 return &archiver.QueryVisibilityResponse{ 264 Executions: executions, 265 NextPageToken: nextPageToken, 266 }, nil 267 } 268 269 func (v *visibilityArchiver) query( 270 ctx context.Context, 271 URI archiver.URI, 272 request *queryVisibilityRequest, 273 saTypeMap searchattribute.NameTypeMap, 274 ) (*archiver.QueryVisibilityResponse, error) { 275 primaryIndex := primaryIndexKeyWorkflowTypeName 276 primaryIndexValue := request.parsedQuery.workflowTypeName 277 if request.parsedQuery.workflowID != nil { 278 primaryIndex = primaryIndexKeyWorkflowID 279 primaryIndexValue = request.parsedQuery.workflowID 280 } 281 282 prefix := constructIndexedVisibilitySearchPrefix( 283 URI.Path(), 284 request.namespaceID, 285 primaryIndex, 286 *primaryIndexValue, 287 secondaryIndexKeyCloseTimeout, 288 ) + "/" 289 if request.parsedQuery.closeTime != nil { 290 prefix = constructTimeBasedSearchKey( 291 URI.Path(), 292 request.namespaceID, 293 primaryIndex, 294 *primaryIndexValue, 295 secondaryIndexKeyCloseTimeout, 296 *request.parsedQuery.closeTime, 297 *request.parsedQuery.searchPrecision, 298 ) 299 } 300 if request.parsedQuery.startTime != nil { 301 prefix = constructTimeBasedSearchKey( 302 URI.Path(), 303 request.namespaceID, 304 primaryIndex, 305 *primaryIndexValue, 306 secondaryIndexKeyStartTimeout, 307 *request.parsedQuery.startTime, 308 *request.parsedQuery.searchPrecision, 309 ) 310 } 311 312 return v.queryPrefix(ctx, URI, request, saTypeMap, prefix, nil) 313 } 314 315 // queryPrefix returns all workflow executions in the archive that match the given prefix. The keyFilter function is an 316 // optional filter that can be used to further filter the results. If keyFilter returns false for a given key, that key 317 // will be skipped, and the object will not be downloaded from S3 or included in the results. 318 func (v *visibilityArchiver) queryPrefix( 319 ctx context.Context, 320 uri archiver.URI, 321 request *queryVisibilityRequest, 322 saTypeMap searchattribute.NameTypeMap, 323 prefix string, 324 keyFilter func(key string) bool, 325 ) (*archiver.QueryVisibilityResponse, error) { 326 ctx, cancel := ensureContextTimeout(ctx) 327 defer cancel() 328 329 var token *string 330 331 if request.nextPageToken != nil { 332 token = deserializeQueryVisibilityToken(request.nextPageToken) 333 } 334 results, err := v.s3cli.ListObjectsV2WithContext(ctx, &s3.ListObjectsV2Input{ 335 Bucket: aws.String(uri.Hostname()), 336 Prefix: aws.String(prefix), 337 MaxKeys: aws.Int64(int64(request.pageSize)), 338 ContinuationToken: token, 339 }) 340 if err != nil { 341 if isRetryableError(err) { 342 return nil, serviceerror.NewUnavailable(err.Error()) 343 } 344 return nil, serviceerror.NewInvalidArgument(err.Error()) 345 } 346 if len(results.Contents) == 0 { 347 return &archiver.QueryVisibilityResponse{}, nil 348 } 349 350 response := &archiver.QueryVisibilityResponse{} 351 if *results.IsTruncated { 352 response.NextPageToken = serializeQueryVisibilityToken(*results.NextContinuationToken) 353 } 354 for _, item := range results.Contents { 355 if keyFilter != nil && !keyFilter(*item.Key) { 356 continue 357 } 358 359 encodedRecord, err := Download(ctx, v.s3cli, uri, *item.Key) 360 if err != nil { 361 return nil, serviceerror.NewUnavailable(err.Error()) 362 } 363 364 record, err := decodeVisibilityRecord(encodedRecord) 365 if err != nil { 366 return nil, serviceerror.NewInternal(err.Error()) 367 } 368 executionInfo, err := convertToExecutionInfo(record, saTypeMap) 369 if err != nil { 370 return nil, serviceerror.NewInternal(err.Error()) 371 } 372 response.Executions = append(response.Executions, executionInfo) 373 } 374 return response, nil 375 } 376 377 func (v *visibilityArchiver) ValidateURI(URI archiver.URI) error { 378 err := SoftValidateURI(URI) 379 if err != nil { 380 return err 381 } 382 return BucketExists(context.TODO(), v.s3cli, URI) 383 }