go.temporal.io/server@v1.23.0/common/archiver/s3store/visibility_archiver.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package s3store
    26  
    27  import (
    28  	"context"
    29  	"strings"
    30  	"time"
    31  
    32  	"github.com/aws/aws-sdk-go/aws"
    33  	"github.com/aws/aws-sdk-go/aws/session"
    34  	"github.com/aws/aws-sdk-go/service/s3"
    35  	"github.com/aws/aws-sdk-go/service/s3/s3iface"
    36  	"go.temporal.io/api/serviceerror"
    37  	workflowpb "go.temporal.io/api/workflow/v1"
    38  
    39  	"go.temporal.io/server/common/searchattribute"
    40  
    41  	archiverspb "go.temporal.io/server/api/archiver/v1"
    42  	"go.temporal.io/server/common/archiver"
    43  	"go.temporal.io/server/common/config"
    44  	"go.temporal.io/server/common/log/tag"
    45  	"go.temporal.io/server/common/metrics"
    46  	"go.temporal.io/server/common/primitives/timestamp"
    47  )
    48  
    49  type (
    50  	visibilityArchiver struct {
    51  		container   *archiver.VisibilityBootstrapContainer
    52  		s3cli       s3iface.S3API
    53  		queryParser QueryParser
    54  	}
    55  
    56  	queryVisibilityRequest struct {
    57  		namespaceID   string
    58  		pageSize      int
    59  		nextPageToken []byte
    60  		parsedQuery   *parsedQuery
    61  	}
    62  
    63  	indexToArchive struct {
    64  		primaryIndex            string
    65  		primaryIndexValue       string
    66  		secondaryIndex          string
    67  		secondaryIndexTimestamp time.Time
    68  	}
    69  )
    70  
    71  const (
    72  	errEncodeVisibilityRecord       = "failed to encode visibility record"
    73  	secondaryIndexKeyStartTimeout   = "startTimeout"
    74  	secondaryIndexKeyCloseTimeout   = "closeTimeout"
    75  	primaryIndexKeyWorkflowTypeName = "workflowTypeName"
    76  	primaryIndexKeyWorkflowID       = "workflowID"
    77  )
    78  
    79  // NewVisibilityArchiver creates a new archiver.VisibilityArchiver based on s3
    80  func NewVisibilityArchiver(
    81  	container *archiver.VisibilityBootstrapContainer,
    82  	config *config.S3Archiver,
    83  ) (archiver.VisibilityArchiver, error) {
    84  	return newVisibilityArchiver(container, config)
    85  }
    86  
    87  func newVisibilityArchiver(
    88  	container *archiver.VisibilityBootstrapContainer,
    89  	config *config.S3Archiver) (*visibilityArchiver, error) {
    90  	s3Config := &aws.Config{
    91  		Endpoint:         config.Endpoint,
    92  		Region:           aws.String(config.Region),
    93  		S3ForcePathStyle: aws.Bool(config.S3ForcePathStyle),
    94  		LogLevel:         (*aws.LogLevelType)(&config.LogLevel),
    95  	}
    96  	sess, err := session.NewSession(s3Config)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	return &visibilityArchiver{
   101  		container:   container,
   102  		s3cli:       s3.New(sess),
   103  		queryParser: NewQueryParser(),
   104  	}, nil
   105  }
   106  
   107  func (v *visibilityArchiver) Archive(
   108  	ctx context.Context,
   109  	URI archiver.URI,
   110  	request *archiverspb.VisibilityRecord,
   111  	opts ...archiver.ArchiveOption,
   112  ) (err error) {
   113  	handler := v.container.MetricsHandler.WithTags(metrics.OperationTag(metrics.VisibilityArchiverScope), metrics.NamespaceTag(request.Namespace))
   114  	featureCatalog := archiver.GetFeatureCatalog(opts...)
   115  	startTime := time.Now().UTC()
   116  	logger := archiver.TagLoggerWithArchiveVisibilityRequestAndURI(v.container.Logger, request, URI.String())
   117  	archiveFailReason := ""
   118  	defer func() {
   119  		handler.Timer(metrics.ServiceLatency.Name()).Record(time.Since(startTime))
   120  		if err != nil {
   121  			if isRetryableError(err) {
   122  				handler.Counter(metrics.VisibilityArchiverArchiveTransientErrorCount.Name()).Record(1)
   123  				logger.Error(archiver.ArchiveTransientErrorMsg, tag.ArchivalArchiveFailReason(archiveFailReason), tag.Error(err))
   124  			} else {
   125  				handler.Counter(metrics.VisibilityArchiverArchiveNonRetryableErrorCount.Name()).Record(1)
   126  				logger.Error(archiver.ArchiveNonRetryableErrorMsg, tag.ArchivalArchiveFailReason(archiveFailReason), tag.Error(err))
   127  				if featureCatalog.NonRetryableError != nil {
   128  					err = featureCatalog.NonRetryableError()
   129  				}
   130  			}
   131  		}
   132  	}()
   133  
   134  	if err := SoftValidateURI(URI); err != nil {
   135  		archiveFailReason = archiver.ErrReasonInvalidURI
   136  		return err
   137  	}
   138  
   139  	if err := archiver.ValidateVisibilityArchivalRequest(request); err != nil {
   140  		archiveFailReason = archiver.ErrReasonInvalidArchiveRequest
   141  		return err
   142  	}
   143  
   144  	encodedVisibilityRecord, err := Encode(request)
   145  	if err != nil {
   146  		archiveFailReason = errEncodeVisibilityRecord
   147  		return err
   148  	}
   149  	indexes := createIndexesToArchive(request)
   150  	// Upload archive to all indexes
   151  	for _, element := range indexes {
   152  		key := constructTimestampIndex(URI.Path(), request.GetNamespaceId(), element.primaryIndex, element.primaryIndexValue, element.secondaryIndex, element.secondaryIndexTimestamp, request.GetRunId())
   153  		if err := Upload(ctx, v.s3cli, URI, key, encodedVisibilityRecord); err != nil {
   154  			archiveFailReason = errWriteKey
   155  			return err
   156  		}
   157  	}
   158  	handler.Counter(metrics.VisibilityArchiveSuccessCount.Name()).Record(1)
   159  	return nil
   160  }
   161  
   162  func createIndexesToArchive(request *archiverspb.VisibilityRecord) []indexToArchive {
   163  	return []indexToArchive{
   164  		{primaryIndexKeyWorkflowTypeName, request.WorkflowTypeName, secondaryIndexKeyCloseTimeout, timestamp.TimeValue(request.CloseTime)},
   165  		{primaryIndexKeyWorkflowTypeName, request.WorkflowTypeName, secondaryIndexKeyStartTimeout, timestamp.TimeValue(request.StartTime)},
   166  		{primaryIndexKeyWorkflowID, request.GetWorkflowId(), secondaryIndexKeyCloseTimeout, timestamp.TimeValue(request.CloseTime)},
   167  		{primaryIndexKeyWorkflowID, request.GetWorkflowId(), secondaryIndexKeyStartTimeout, timestamp.TimeValue(request.StartTime)},
   168  	}
   169  }
   170  
   171  func (v *visibilityArchiver) Query(
   172  	ctx context.Context,
   173  	URI archiver.URI,
   174  	request *archiver.QueryVisibilityRequest,
   175  	saTypeMap searchattribute.NameTypeMap,
   176  ) (*archiver.QueryVisibilityResponse, error) {
   177  
   178  	if err := SoftValidateURI(URI); err != nil {
   179  		return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidURI.Error())
   180  	}
   181  
   182  	if err := archiver.ValidateQueryRequest(request); err != nil {
   183  		return nil, serviceerror.NewInvalidArgument(archiver.ErrInvalidQueryVisibilityRequest.Error())
   184  	}
   185  
   186  	if strings.TrimSpace(request.Query) == "" {
   187  		return v.queryAll(ctx, URI, request, saTypeMap)
   188  	}
   189  
   190  	parsedQuery, err := v.queryParser.Parse(request.Query)
   191  	if err != nil {
   192  		return nil, serviceerror.NewInvalidArgument(err.Error())
   193  	}
   194  
   195  	return v.query(
   196  		ctx,
   197  		URI,
   198  		&queryVisibilityRequest{
   199  			namespaceID:   request.NamespaceID,
   200  			pageSize:      request.PageSize,
   201  			nextPageToken: request.NextPageToken,
   202  			parsedQuery:   parsedQuery,
   203  		},
   204  		saTypeMap,
   205  	)
   206  }
   207  
   208  // queryAll returns all workflow executions in the archive.
   209  func (v *visibilityArchiver) queryAll(
   210  	ctx context.Context,
   211  	uri archiver.URI,
   212  	request *archiver.QueryVisibilityRequest,
   213  	saTypeMap searchattribute.NameTypeMap,
   214  ) (*archiver.QueryVisibilityResponse, error) {
   215  	// remaining is the number of workflow executions left to return before we reach pageSize.
   216  	remaining := request.PageSize
   217  	nextPageToken := request.NextPageToken
   218  	var executions []*workflowpb.WorkflowExecutionInfo
   219  	// We need to loop because the number of workflow executions returned by each call to query may be fewer than
   220  	// pageSize. This is because we may have to skip some workflow executions after querying S3 (client-side filtering)
   221  	// because there are 2 entries in S3 for each workflow execution indexed by workflowTypeName (one for closeTimeout
   222  	// and one for startTimeout), and we only want to return one entry per workflow execution. See
   223  	// createIndexesToArchive for a list of all indexes.
   224  	for {
   225  		searchPrefix := constructVisibilitySearchPrefix(uri.Path(), request.NamespaceID)
   226  		// We suffix searchPrefix with workflowTypeName because the data in S3 is duplicated across combinations of 2
   227  		// different primary indices (workflowID and workflowTypeName) and 2 different secondary indices (closeTimeout
   228  		// and startTimeout). We only want to return one entry per workflow execution, but the full path to the S3 key
   229  		// is <primaryIndexKey>/<primaryIndexValue>/<secondaryIndexKey>/<secondaryIndexValue>/<runID>, and we don't have
   230  		// the primaryIndexValue when we make the call to query, so we can only specify the primaryIndexKey.
   231  		searchPrefix += "/" + primaryIndexKeyWorkflowTypeName
   232  		// The pageSize we supply here is actually the maximum number of keys to fetch from S3. For each execution,
   233  		// there should be 2 keys in S3 for this prefix, so you might think that we should multiply the pageSize by 2.
   234  		// However, if we do that, we may end up returning more than pageSize workflow executions to the end user of
   235  		// this API. This is because we aren't guaranteed that both keys for a given workflow execution will be returned
   236  		// in the same call. For example, if the user supplies a pageSize of 1, and we specify a maximum number of keys
   237  		// of 2 to S3, we may get back entries from S3 for 2 different workflow executions. You might think that we can
   238  		// just truncate this result to 1 workflow execution, but then the nextPageToken would be incorrect. So, we may
   239  		// need to make multiple calls to S3 to get the correct number of workflow executions, which will probably make
   240  		// this API call slower.
   241  		res, err := v.queryPrefix(ctx, uri, &queryVisibilityRequest{
   242  			namespaceID:   request.NamespaceID,
   243  			pageSize:      remaining,
   244  			nextPageToken: nextPageToken,
   245  			parsedQuery:   &parsedQuery{},
   246  		}, saTypeMap, searchPrefix, func(key string) bool {
   247  			// We only want to return entries for the closeTimeout secondary index, which will always be of the form:
   248  			// .../closeTimeout/<closeTimeout>/<runID>, so we split the key on "/" and check that the third-to-last
   249  			// element is "closeTimeout".
   250  			elements := strings.Split(key, "/")
   251  			return len(elements) >= 3 && elements[len(elements)-3] == secondaryIndexKeyCloseTimeout
   252  		})
   253  		if err != nil {
   254  			return nil, err
   255  		}
   256  		nextPageToken = res.NextPageToken
   257  		executions = append(executions, res.Executions...)
   258  		remaining -= len(res.Executions)
   259  		if len(nextPageToken) == 0 || remaining <= 0 {
   260  			break
   261  		}
   262  	}
   263  	return &archiver.QueryVisibilityResponse{
   264  		Executions:    executions,
   265  		NextPageToken: nextPageToken,
   266  	}, nil
   267  }
   268  
   269  func (v *visibilityArchiver) query(
   270  	ctx context.Context,
   271  	URI archiver.URI,
   272  	request *queryVisibilityRequest,
   273  	saTypeMap searchattribute.NameTypeMap,
   274  ) (*archiver.QueryVisibilityResponse, error) {
   275  	primaryIndex := primaryIndexKeyWorkflowTypeName
   276  	primaryIndexValue := request.parsedQuery.workflowTypeName
   277  	if request.parsedQuery.workflowID != nil {
   278  		primaryIndex = primaryIndexKeyWorkflowID
   279  		primaryIndexValue = request.parsedQuery.workflowID
   280  	}
   281  
   282  	prefix := constructIndexedVisibilitySearchPrefix(
   283  		URI.Path(),
   284  		request.namespaceID,
   285  		primaryIndex,
   286  		*primaryIndexValue,
   287  		secondaryIndexKeyCloseTimeout,
   288  	) + "/"
   289  	if request.parsedQuery.closeTime != nil {
   290  		prefix = constructTimeBasedSearchKey(
   291  			URI.Path(),
   292  			request.namespaceID,
   293  			primaryIndex,
   294  			*primaryIndexValue,
   295  			secondaryIndexKeyCloseTimeout,
   296  			*request.parsedQuery.closeTime,
   297  			*request.parsedQuery.searchPrecision,
   298  		)
   299  	}
   300  	if request.parsedQuery.startTime != nil {
   301  		prefix = constructTimeBasedSearchKey(
   302  			URI.Path(),
   303  			request.namespaceID,
   304  			primaryIndex,
   305  			*primaryIndexValue,
   306  			secondaryIndexKeyStartTimeout,
   307  			*request.parsedQuery.startTime,
   308  			*request.parsedQuery.searchPrecision,
   309  		)
   310  	}
   311  
   312  	return v.queryPrefix(ctx, URI, request, saTypeMap, prefix, nil)
   313  }
   314  
   315  // queryPrefix returns all workflow executions in the archive that match the given prefix. The keyFilter function is an
   316  // optional filter that can be used to further filter the results. If keyFilter returns false for a given key, that key
   317  // will be skipped, and the object will not be downloaded from S3 or included in the results.
   318  func (v *visibilityArchiver) queryPrefix(
   319  	ctx context.Context,
   320  	uri archiver.URI,
   321  	request *queryVisibilityRequest,
   322  	saTypeMap searchattribute.NameTypeMap,
   323  	prefix string,
   324  	keyFilter func(key string) bool,
   325  ) (*archiver.QueryVisibilityResponse, error) {
   326  	ctx, cancel := ensureContextTimeout(ctx)
   327  	defer cancel()
   328  
   329  	var token *string
   330  
   331  	if request.nextPageToken != nil {
   332  		token = deserializeQueryVisibilityToken(request.nextPageToken)
   333  	}
   334  	results, err := v.s3cli.ListObjectsV2WithContext(ctx, &s3.ListObjectsV2Input{
   335  		Bucket:            aws.String(uri.Hostname()),
   336  		Prefix:            aws.String(prefix),
   337  		MaxKeys:           aws.Int64(int64(request.pageSize)),
   338  		ContinuationToken: token,
   339  	})
   340  	if err != nil {
   341  		if isRetryableError(err) {
   342  			return nil, serviceerror.NewUnavailable(err.Error())
   343  		}
   344  		return nil, serviceerror.NewInvalidArgument(err.Error())
   345  	}
   346  	if len(results.Contents) == 0 {
   347  		return &archiver.QueryVisibilityResponse{}, nil
   348  	}
   349  
   350  	response := &archiver.QueryVisibilityResponse{}
   351  	if *results.IsTruncated {
   352  		response.NextPageToken = serializeQueryVisibilityToken(*results.NextContinuationToken)
   353  	}
   354  	for _, item := range results.Contents {
   355  		if keyFilter != nil && !keyFilter(*item.Key) {
   356  			continue
   357  		}
   358  
   359  		encodedRecord, err := Download(ctx, v.s3cli, uri, *item.Key)
   360  		if err != nil {
   361  			return nil, serviceerror.NewUnavailable(err.Error())
   362  		}
   363  
   364  		record, err := decodeVisibilityRecord(encodedRecord)
   365  		if err != nil {
   366  			return nil, serviceerror.NewInternal(err.Error())
   367  		}
   368  		executionInfo, err := convertToExecutionInfo(record, saTypeMap)
   369  		if err != nil {
   370  			return nil, serviceerror.NewInternal(err.Error())
   371  		}
   372  		response.Executions = append(response.Executions, executionInfo)
   373  	}
   374  	return response, nil
   375  }
   376  
   377  func (v *visibilityArchiver) ValidateURI(URI archiver.URI) error {
   378  	err := SoftValidateURI(URI)
   379  	if err != nil {
   380  		return err
   381  	}
   382  	return BucketExists(context.TODO(), v.s3cli, URI)
   383  }