go.temporal.io/server@v1.23.0/common/persistence/history_manager.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package persistence
    26  
    27  import (
    28  	"context"
    29  	"fmt"
    30  
    31  	"github.com/pborman/uuid"
    32  	commonpb "go.temporal.io/api/common/v1"
    33  	enumspb "go.temporal.io/api/enums/v1"
    34  	historypb "go.temporal.io/api/history/v1"
    35  	"go.temporal.io/api/serviceerror"
    36  
    37  	persistencespb "go.temporal.io/server/api/persistence/v1"
    38  	"go.temporal.io/server/common"
    39  	"go.temporal.io/server/common/log/tag"
    40  	"go.temporal.io/server/common/primitives/timestamp"
    41  )
    42  
    43  const (
    44  	defaultLastNodeID        = common.FirstEventID - 1
    45  	defaultLastTransactionID = int64(0)
    46  
    47  	// TrimHistoryBranch will only dump metadata, relatively cheap
    48  	trimHistoryBranchPageSize = 1000
    49  )
    50  
    51  var _ ExecutionManager = (*executionManagerImpl)(nil)
    52  
    53  // ForkHistoryBranch forks a new branch from a old branch
    54  func (m *executionManagerImpl) ForkHistoryBranch(
    55  	ctx context.Context,
    56  	request *ForkHistoryBranchRequest,
    57  ) (*ForkHistoryBranchResponse, error) {
    58  
    59  	if request.ForkNodeID <= 1 {
    60  		return nil, &InvalidPersistenceRequestError{
    61  			Msg: "ForkNodeID must be > 1",
    62  		}
    63  	}
    64  
    65  	forkBranch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(request.ForkBranchToken)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	newAncestors := make([]*persistencespb.HistoryBranchRange, 0, len(forkBranch.Ancestors)+1)
    71  
    72  	beginNodeID := GetBeginNodeID(forkBranch)
    73  	if beginNodeID >= request.ForkNodeID {
    74  		// this is the case that new branch's ancestors doesn't include the forking branch
    75  		for _, br := range forkBranch.Ancestors {
    76  			if br.GetEndNodeId() >= request.ForkNodeID {
    77  				newAncestors = append(newAncestors, &persistencespb.HistoryBranchRange{
    78  					BranchId:    br.GetBranchId(),
    79  					BeginNodeId: br.GetBeginNodeId(),
    80  					EndNodeId:   request.ForkNodeID,
    81  				})
    82  				break
    83  			} else {
    84  				newAncestors = append(newAncestors, br)
    85  			}
    86  		}
    87  	} else {
    88  		// this is the case the new branch will inherit all ancestors from forking branch
    89  		newAncestors = forkBranch.Ancestors
    90  		newAncestors = append(newAncestors, &persistencespb.HistoryBranchRange{
    91  			BranchId:    forkBranch.GetBranchId(),
    92  			BeginNodeId: beginNodeID,
    93  			EndNodeId:   request.ForkNodeID,
    94  		})
    95  	}
    96  	newBranchInfo := &persistencespb.HistoryBranch{
    97  		TreeId:    forkBranch.TreeId,
    98  		BranchId:  uuid.New(),
    99  		Ancestors: newAncestors,
   100  	}
   101  
   102  	// The above newBranchInfo is a lossy construction of the forked branch token from the original opaque branch token.
   103  	// It only initializes with the fields it understands, which may inadvertently discard other misc fields. The
   104  	// following is the replacement logic to correctly apply the updated fields into the original opaque branch token.
   105  	newBranchToken, err := m.GetHistoryBranchUtil().UpdateHistoryBranchInfo(request.ForkBranchToken, newBranchInfo)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	treeInfo := &persistencespb.HistoryTreeInfo{
   111  		BranchToken: newBranchToken,
   112  		BranchInfo:  newBranchInfo,
   113  		ForkTime:    timestamp.TimeNowPtrUtc(),
   114  		Info:        request.Info,
   115  	}
   116  
   117  	treeInfoBlob, err := m.serializer.HistoryTreeInfoToBlob(treeInfo, enumspb.ENCODING_TYPE_PROTO3)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  
   122  	req := &InternalForkHistoryBranchRequest{
   123  		ForkBranchToken: request.ForkBranchToken,
   124  		ForkBranchInfo:  forkBranch,
   125  		TreeInfo:        treeInfoBlob,
   126  		ForkNodeID:      request.ForkNodeID,
   127  		NewBranchID:     newBranchInfo.BranchId,
   128  		Info:            request.Info,
   129  		ShardID:         request.ShardID,
   130  	}
   131  
   132  	err = m.persistence.ForkHistoryBranch(ctx, req)
   133  	if err != nil {
   134  		return nil, err
   135  	}
   136  
   137  	return &ForkHistoryBranchResponse{
   138  		NewBranchToken: newBranchToken,
   139  	}, nil
   140  }
   141  
   142  // DeleteHistoryBranch removes a branch
   143  func (m *executionManagerImpl) DeleteHistoryBranch(
   144  	ctx context.Context,
   145  	request *DeleteHistoryBranchRequest,
   146  ) error {
   147  
   148  	branch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(request.BranchToken)
   149  	if err != nil {
   150  		return err
   151  	}
   152  
   153  	// We need to delete the target branch and its ancestors if they are not referenced by any other branches.
   154  	// However, it is possible that part of the target branch (or its ancestors) is used as ancestors by other branch.
   155  	// We need to avoid deleting those referenced parts. This is similar to reference count in garbage collection.
   156  	brsToDelete := branch.Ancestors
   157  	brsToDelete = append(brsToDelete, &persistencespb.HistoryBranchRange{
   158  		BranchId:    branch.GetBranchId(),
   159  		BeginNodeId: GetBeginNodeID(branch),
   160  	})
   161  
   162  	// Get the entire history tree, so we know if any part of the target branch is referenced by other branches.
   163  	historyTreeResp, err := m.GetHistoryTree(ctx, &GetHistoryTreeRequest{
   164  		TreeID:  branch.TreeId,
   165  		ShardID: request.ShardID,
   166  	})
   167  	if err != nil {
   168  		return err
   169  	}
   170  
   171  	// usedBranches record branches referenced by others
   172  	usedBranches := map[string]int64{}
   173  	for _, branchInfo := range historyTreeResp.BranchInfos {
   174  		if branchInfo.BranchId == branch.BranchId {
   175  			// skip the target branch
   176  			continue
   177  		}
   178  		usedBranches[branchInfo.BranchId] = common.LastEventID
   179  		for _, ancestor := range branchInfo.Ancestors {
   180  			if curr, ok := usedBranches[ancestor.GetBranchId()]; !ok || curr < ancestor.GetEndNodeId() {
   181  				usedBranches[ancestor.GetBranchId()] = ancestor.GetEndNodeId()
   182  			}
   183  		}
   184  	}
   185  
   186  	var deleteRanges []InternalDeleteHistoryBranchRange
   187  	// for each branch range to delete, we iterate from bottom up, and stop when the range is also used by others
   188  findDeleteRanges:
   189  	for i := len(brsToDelete) - 1; i >= 0; i-- {
   190  		br := brsToDelete[i]
   191  		if maxEndNode, ok := usedBranches[br.GetBranchId()]; ok {
   192  			// branch is used by others, we can only delete from the maxEndNode
   193  			if maxEndNode != common.LastEventID {
   194  				deleteRanges = append(deleteRanges, InternalDeleteHistoryBranchRange{
   195  					BranchId:    br.BranchId,
   196  					BeginNodeId: maxEndNode,
   197  				})
   198  			}
   199  			// all ancestors are also used, no need to go up further,
   200  			break findDeleteRanges
   201  		} else {
   202  			// No other branch is using this range, we can delete all of it
   203  			deleteRanges = append(deleteRanges, InternalDeleteHistoryBranchRange{
   204  				BranchId:    br.BranchId,
   205  				BeginNodeId: br.BeginNodeId,
   206  			})
   207  		}
   208  	}
   209  
   210  	req := &InternalDeleteHistoryBranchRequest{
   211  		BranchToken:  request.BranchToken,
   212  		BranchInfo:   branch,
   213  		ShardID:      request.ShardID,
   214  		BranchRanges: deleteRanges,
   215  	}
   216  	return m.persistence.DeleteHistoryBranch(ctx, req)
   217  }
   218  
   219  // TrimHistoryBranch trims a branch
   220  func (m *executionManagerImpl) TrimHistoryBranch(
   221  	ctx context.Context,
   222  	request *TrimHistoryBranchRequest,
   223  ) (*TrimHistoryBranchResponse, error) {
   224  
   225  	shardID := request.ShardID
   226  	minNodeID := common.FirstEventID
   227  	maxNodeID := request.NodeID + 1
   228  	pageSize := trimHistoryBranchPageSize
   229  
   230  	branch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(request.BranchToken)
   231  	if err != nil {
   232  		return nil, fmt.Errorf("unable to parse history branch info: %w", err)
   233  	}
   234  	treeID := branch.TreeId
   235  	branchID := branch.BranchId
   236  	branchAncestors := branch.Ancestors
   237  
   238  	// merge tree ID & branch ID into branch ancestors so the processing logic is simple
   239  	beginNodeID := common.FirstEventID
   240  	if len(branch.Ancestors) > 0 {
   241  		beginNodeID = branch.Ancestors[len(branch.Ancestors)-1].GetEndNodeId()
   242  	}
   243  	branchAncestors = append(branchAncestors, &persistencespb.HistoryBranchRange{
   244  		BranchId:    branchID,
   245  		BeginNodeId: beginNodeID,
   246  		EndNodeId:   maxNodeID,
   247  	})
   248  
   249  	var pageToken []byte
   250  	transactionIDToNode := map[int64]historyNodeMetadata{}
   251  	for doContinue := true; doContinue; doContinue = len(pageToken) > 0 {
   252  		token, err := m.deserializeToken(pageToken, minNodeID-1, defaultLastTransactionID)
   253  		if err != nil {
   254  			return nil, fmt.Errorf("unable to deserialize token: %w", err)
   255  		}
   256  
   257  		nodes, token, err := m.readRawHistoryBranch(
   258  			ctx,
   259  			request.BranchToken,
   260  			shardID,
   261  			branchAncestors,
   262  			minNodeID,
   263  			maxNodeID,
   264  			token,
   265  			pageSize,
   266  			true,
   267  		)
   268  		if err != nil {
   269  			return nil, fmt.Errorf("unable to read raw history branch: %w", err)
   270  		}
   271  
   272  		branchID := branchAncestors[token.CurrentRangeIndex].BranchId
   273  		for _, node := range nodes {
   274  			transactionIDToNode[node.TransactionID] = historyNodeMetadata{
   275  				branchInfo: &persistencespb.HistoryBranch{
   276  					TreeId:    treeID,
   277  					BranchId:  branchID,
   278  					Ancestors: branchAncestors[0:token.CurrentRangeIndex],
   279  				},
   280  				nodeID:            node.NodeID,
   281  				transactionID:     node.TransactionID,
   282  				prevTransactionID: node.PrevTransactionID,
   283  			}
   284  		}
   285  
   286  		pageToken, err = m.serializeToken(token, false)
   287  		if err != nil {
   288  			return nil, fmt.Errorf("unable to serialize token: %w", err)
   289  		}
   290  	}
   291  
   292  	nodesToTrim, err := validateNodeChainAndTrim(
   293  		request.NodeID,
   294  		request.TransactionID,
   295  		transactionIDToNode,
   296  	)
   297  	if err != nil {
   298  		m.logger.Debug("unable to trim history branch due to existing history node not fully onboarded", tag.Error(err))
   299  		return &TrimHistoryBranchResponse{}, nil
   300  	}
   301  
   302  	for _, node := range nodesToTrim {
   303  		if err := m.persistence.DeleteHistoryNodes(ctx, &InternalDeleteHistoryNodesRequest{
   304  			BranchToken:   request.BranchToken,
   305  			ShardID:       shardID,
   306  			BranchInfo:    node.branchInfo,
   307  			NodeID:        node.nodeID,
   308  			TransactionID: node.transactionID,
   309  		}); err != nil {
   310  			return nil, fmt.Errorf("unable to delete history nodes: %w", err)
   311  		}
   312  	}
   313  
   314  	return &TrimHistoryBranchResponse{}, nil
   315  }
   316  
   317  // GetHistoryTree returns all branch information of a tree
   318  func (m *executionManagerImpl) GetHistoryTree(
   319  	ctx context.Context,
   320  	request *GetHistoryTreeRequest,
   321  ) (*GetHistoryTreeResponse, error) {
   322  
   323  	resp, err := m.persistence.GetHistoryTree(ctx, request)
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  	branchInfos := make([]*persistencespb.HistoryBranch, 0, len(resp.TreeInfos))
   328  	for _, blob := range resp.TreeInfos {
   329  		treeInfo, err := m.serializer.HistoryTreeInfoFromBlob(blob)
   330  		if err != nil {
   331  			return nil, err
   332  		}
   333  		branchInfos = append(branchInfos, treeInfo.BranchInfo)
   334  	}
   335  	return &GetHistoryTreeResponse{BranchInfos: branchInfos}, nil
   336  }
   337  
   338  func (m *executionManagerImpl) serializeAppendHistoryNodesRequest(
   339  	ctx context.Context,
   340  	request *AppendHistoryNodesRequest,
   341  ) (*InternalAppendHistoryNodesRequest, error) {
   342  	branch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(request.BranchToken)
   343  	if err != nil {
   344  		return nil, err
   345  	}
   346  
   347  	if len(request.Events) == 0 {
   348  		return nil, &InvalidPersistenceRequestError{
   349  			Msg: "events to be appended cannot be empty",
   350  		}
   351  	}
   352  	sortAncestors(branch.Ancestors)
   353  
   354  	version := request.Events[0].Version
   355  	nodeID := request.Events[0].EventId
   356  	lastID := nodeID - 1
   357  
   358  	if nodeID <= 0 {
   359  		return nil, &InvalidPersistenceRequestError{
   360  			Msg: "eventID cannot be less than 1",
   361  		}
   362  	}
   363  	for _, e := range request.Events {
   364  		if e.Version != version {
   365  			return nil, &InvalidPersistenceRequestError{
   366  				Msg: "event version must be the same inside a batch",
   367  			}
   368  		}
   369  		if e.EventId != lastID+1 {
   370  			return nil, &InvalidPersistenceRequestError{
   371  				Msg: "event ID must be continous",
   372  			}
   373  		}
   374  		lastID++
   375  	}
   376  
   377  	// nodeID will be the first eventID
   378  	blob, err := m.serializer.SerializeEvents(request.Events, enumspb.ENCODING_TYPE_PROTO3)
   379  	if err != nil {
   380  		return nil, err
   381  	}
   382  	size := len(blob.Data)
   383  	sizeLimit := m.transactionSizeLimit()
   384  	if size > sizeLimit {
   385  		return nil, &TransactionSizeLimitError{
   386  			Msg: fmt.Sprintf("transaction size of %v bytes exceeds limit of %v bytes", size, sizeLimit),
   387  		}
   388  	}
   389  
   390  	req := &InternalAppendHistoryNodesRequest{
   391  		BranchToken: request.BranchToken,
   392  		IsNewBranch: request.IsNewBranch,
   393  		Info:        request.Info,
   394  		BranchInfo:  branch,
   395  		Node: InternalHistoryNode{
   396  			NodeID:            nodeID,
   397  			Events:            blob,
   398  			PrevTransactionID: request.PrevTransactionID,
   399  			TransactionID:     request.TransactionID,
   400  		},
   401  		ShardID: request.ShardID,
   402  	}
   403  
   404  	if req.IsNewBranch {
   405  		// TreeInfo is only needed for new branch
   406  		treeInfoBlob, err := m.serializer.HistoryTreeInfoToBlob(&persistencespb.HistoryTreeInfo{
   407  			BranchToken: request.BranchToken, // NOTE: this is redundant but double-writing until 1 minor release later
   408  			BranchInfo:  branch,
   409  			ForkTime:    timestamp.TimeNowPtrUtc(),
   410  			Info:        request.Info,
   411  		}, enumspb.ENCODING_TYPE_PROTO3)
   412  		if err != nil {
   413  			return nil, err
   414  		}
   415  		req.TreeInfo = treeInfoBlob
   416  	}
   417  
   418  	if nodeID < GetBeginNodeID(branch) {
   419  		return nil, &InvalidPersistenceRequestError{
   420  			Msg: "cannot append to ancestors' nodes",
   421  		}
   422  	}
   423  
   424  	return req, nil
   425  }
   426  
   427  func (m *executionManagerImpl) serializeAppendRawHistoryNodesRequest(
   428  	ctx context.Context,
   429  	request *AppendRawHistoryNodesRequest,
   430  ) (*InternalAppendHistoryNodesRequest, error) {
   431  	branch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(request.BranchToken)
   432  	if err != nil {
   433  		return nil, err
   434  	}
   435  
   436  	if len(request.History.Data) == 0 {
   437  		return nil, &InvalidPersistenceRequestError{
   438  			Msg: "events to be appended cannot be empty",
   439  		}
   440  	}
   441  	sortAncestors(branch.Ancestors)
   442  
   443  	nodeID := request.NodeID
   444  	if nodeID <= 0 {
   445  		return nil, &InvalidPersistenceRequestError{
   446  			Msg: "eventID cannot be less than 1",
   447  		}
   448  	}
   449  	// nodeID will be the first eventID
   450  	size := len(request.History.Data)
   451  	sizeLimit := m.transactionSizeLimit()
   452  	if size > sizeLimit {
   453  		return nil, &TransactionSizeLimitError{
   454  			Msg: fmt.Sprintf("transaction size of %v bytes exceeds limit of %v bytes", size, sizeLimit),
   455  		}
   456  	}
   457  
   458  	req := &InternalAppendHistoryNodesRequest{
   459  		BranchToken: request.BranchToken,
   460  		IsNewBranch: request.IsNewBranch,
   461  		Info:        request.Info,
   462  		BranchInfo:  branch,
   463  		Node: InternalHistoryNode{
   464  			NodeID:            nodeID,
   465  			Events:            request.History,
   466  			PrevTransactionID: request.PrevTransactionID,
   467  			TransactionID:     request.TransactionID,
   468  		},
   469  		ShardID: request.ShardID,
   470  	}
   471  
   472  	if req.IsNewBranch {
   473  		// TreeInfo is only needed for new branch
   474  		treeInfoBlob, err := m.serializer.HistoryTreeInfoToBlob(&persistencespb.HistoryTreeInfo{
   475  			BranchToken: request.BranchToken, // NOTE: this is redundant but double-writing until 1 minor release later
   476  			BranchInfo:  branch,
   477  			ForkTime:    timestamp.TimeNowPtrUtc(),
   478  			Info:        request.Info,
   479  		}, enumspb.ENCODING_TYPE_PROTO3)
   480  		if err != nil {
   481  			return nil, err
   482  		}
   483  		req.TreeInfo = treeInfoBlob
   484  	}
   485  
   486  	if nodeID < GetBeginNodeID(branch) {
   487  		return nil, &InvalidPersistenceRequestError{
   488  			Msg: "cannot append to ancestors' nodes",
   489  		}
   490  	}
   491  
   492  	return req, nil
   493  }
   494  
   495  // AppendHistoryNodes add a node to history node table
   496  func (m *executionManagerImpl) AppendHistoryNodes(
   497  	ctx context.Context,
   498  	request *AppendHistoryNodesRequest,
   499  ) (*AppendHistoryNodesResponse, error) {
   500  
   501  	req, err := m.serializeAppendHistoryNodesRequest(ctx, request)
   502  
   503  	if err != nil {
   504  		return nil, err
   505  	}
   506  
   507  	err = m.persistence.AppendHistoryNodes(ctx, req)
   508  
   509  	return &AppendHistoryNodesResponse{
   510  		Size: len(req.Node.Events.Data),
   511  	}, err
   512  }
   513  
   514  // AppendRawHistoryNodes add raw history nodes to history node table
   515  func (m *executionManagerImpl) AppendRawHistoryNodes(
   516  	ctx context.Context,
   517  	request *AppendRawHistoryNodesRequest,
   518  ) (*AppendHistoryNodesResponse, error) {
   519  
   520  	req, err := m.serializeAppendRawHistoryNodesRequest(ctx, request)
   521  	if err != nil {
   522  		return nil, err
   523  	}
   524  
   525  	err = m.persistence.AppendHistoryNodes(ctx, req)
   526  	return &AppendHistoryNodesResponse{
   527  		Size: len(request.History.Data),
   528  	}, err
   529  }
   530  
   531  // ReadHistoryBranchByBatch returns history node data for a branch by batch
   532  // Pagination is implemented here, the actual minNodeID passing to persistence layer is calculated along with token's LastNodeID
   533  func (m *executionManagerImpl) ReadHistoryBranchByBatch(
   534  	ctx context.Context,
   535  	request *ReadHistoryBranchRequest,
   536  ) (*ReadHistoryBranchByBatchResponse, error) {
   537  
   538  	resp := &ReadHistoryBranchByBatchResponse{}
   539  	var err error
   540  	_, resp.History, resp.TransactionIDs, resp.NextPageToken, resp.Size, err = m.readHistoryBranch(ctx, true, request)
   541  	return resp, err
   542  }
   543  
   544  // ReadHistoryBranch returns history node data for a branch
   545  // Pagination is implemented here, the actual minNodeID passing to persistence layer is calculated along with token's LastNodeID
   546  func (m *executionManagerImpl) ReadHistoryBranch(
   547  	ctx context.Context,
   548  	request *ReadHistoryBranchRequest,
   549  ) (*ReadHistoryBranchResponse, error) {
   550  
   551  	resp := &ReadHistoryBranchResponse{}
   552  	var err error
   553  	resp.HistoryEvents, _, _, resp.NextPageToken, resp.Size, err = m.readHistoryBranch(ctx, false, request)
   554  	return resp, err
   555  }
   556  
   557  // ReadRawHistoryBranch returns raw history binary data for a branch
   558  // Pagination is implemented here, the actual minNodeID passing to persistence layer is calculated along with token's LastNodeID
   559  // NOTE: this API should only be used by 3+DC
   560  func (m *executionManagerImpl) ReadRawHistoryBranch(
   561  	ctx context.Context,
   562  	request *ReadHistoryBranchRequest,
   563  ) (*ReadRawHistoryBranchResponse, error) {
   564  
   565  	dataBlobs, _, nodeIDs, token, dataSize, err := m.readRawHistoryBranchAndFilter(ctx, request)
   566  	if err != nil {
   567  		return nil, err
   568  	}
   569  
   570  	nextPageToken, err := m.serializeToken(token, false)
   571  	if err != nil {
   572  		return nil, err
   573  	}
   574  
   575  	return &ReadRawHistoryBranchResponse{
   576  		HistoryEventBlobs: dataBlobs,
   577  		NodeIDs:           nodeIDs,
   578  		NextPageToken:     nextPageToken,
   579  		Size:              dataSize,
   580  	}, nil
   581  }
   582  
   583  // ReadHistoryBranchReverse returns history node data for a branch
   584  // Pagination is implemented here, the actual minNodeID passing to persistence layer is calculated along with token's LastNodeID
   585  func (m *executionManagerImpl) ReadHistoryBranchReverse(
   586  	ctx context.Context,
   587  	request *ReadHistoryBranchReverseRequest,
   588  ) (*ReadHistoryBranchReverseResponse, error) {
   589  	resp := &ReadHistoryBranchReverseResponse{}
   590  	var err error
   591  	resp.HistoryEvents, _, resp.NextPageToken, resp.Size, err = m.readHistoryBranchReverse(ctx, request)
   592  	return resp, err
   593  }
   594  
   595  func (m *executionManagerImpl) GetAllHistoryTreeBranches(
   596  	ctx context.Context,
   597  	request *GetAllHistoryTreeBranchesRequest,
   598  ) (*GetAllHistoryTreeBranchesResponse, error) {
   599  	resp, err := m.persistence.GetAllHistoryTreeBranches(ctx, request)
   600  	if err != nil {
   601  		return nil, err
   602  	}
   603  	branches := make([]HistoryBranchDetail, 0, len(resp.Branches))
   604  	for _, branch := range resp.Branches {
   605  		treeInfo, err := m.serializer.HistoryTreeInfoFromBlob(NewDataBlob(branch.Data, branch.Encoding))
   606  		if err != nil {
   607  			return nil, err
   608  		}
   609  		branchDetail := HistoryBranchDetail{
   610  			BranchInfo: treeInfo.BranchInfo,
   611  			ForkTime:   treeInfo.ForkTime,
   612  			Info:       treeInfo.Info,
   613  		}
   614  		branches = append(branches, branchDetail)
   615  	}
   616  
   617  	return &GetAllHistoryTreeBranchesResponse{
   618  		NextPageToken: resp.NextPageToken,
   619  		Branches:      branches,
   620  	}, nil
   621  }
   622  
   623  func (m *executionManagerImpl) readRawHistoryBranch(
   624  	ctx context.Context,
   625  	branchToken []byte,
   626  	shardID int32,
   627  	branchAncestors []*persistencespb.HistoryBranchRange,
   628  	minNodeID int64,
   629  	maxNodeID int64,
   630  	token *historyPagingToken,
   631  	pageSize int,
   632  	metadataOnly bool,
   633  ) ([]InternalHistoryNode, *historyPagingToken, error) {
   634  
   635  	if token.CurrentRangeIndex == notStartedIndex {
   636  		for idx, br := range branchAncestors {
   637  			// this range won't contain any nodes needed
   638  			if minNodeID >= br.GetEndNodeId() {
   639  				continue
   640  			}
   641  			// similarly, the ranges and the rest won't contain any nodes needed,
   642  			if maxNodeID <= br.GetBeginNodeId() {
   643  				break
   644  			}
   645  
   646  			if token.CurrentRangeIndex == notStartedIndex {
   647  				token.CurrentRangeIndex = idx
   648  			}
   649  			token.FinalRangeIndex = idx
   650  		}
   651  
   652  		if token.CurrentRangeIndex == notStartedIndex {
   653  			return nil, nil, serviceerror.NewDataLoss("branchRange is corrupted")
   654  		}
   655  	}
   656  
   657  	currentBranch := branchAncestors[token.CurrentRangeIndex]
   658  	// minNodeID remains the same, since caller can read from the middle
   659  	// maxNodeID need to be shortened since this branch can contain additional history nodes
   660  	if currentBranch.GetEndNodeId() < maxNodeID {
   661  		maxNodeID = currentBranch.GetEndNodeId()
   662  	}
   663  	branchID := currentBranch.GetBranchId()
   664  	resp, err := m.persistence.ReadHistoryBranch(ctx, &InternalReadHistoryBranchRequest{
   665  		BranchToken:   branchToken,
   666  		ShardID:       shardID,
   667  		BranchID:      branchID,
   668  		MinNodeID:     minNodeID,
   669  		MaxNodeID:     maxNodeID,
   670  		NextPageToken: token.StoreToken,
   671  		PageSize:      pageSize,
   672  		MetadataOnly:  metadataOnly,
   673  	})
   674  	if err != nil {
   675  		return nil, nil, err
   676  	}
   677  	token.StoreToken = resp.NextPageToken
   678  	return resp.Nodes, token, nil
   679  }
   680  
   681  func (m *executionManagerImpl) readRawHistoryBranchReverse(
   682  	ctx context.Context,
   683  	branchToken []byte,
   684  	shardID int32,
   685  	treeID string,
   686  	branchAncestors []*persistencespb.HistoryBranchRange,
   687  	minNodeID int64,
   688  	maxNodeID int64,
   689  	token *historyPagingToken,
   690  	pageSize int,
   691  	metadataOnly bool,
   692  ) ([]InternalHistoryNode, *historyPagingToken, error) {
   693  	if token.CurrentRangeIndex == notStartedIndex {
   694  		for i := range branchAncestors {
   695  			idx := len(branchAncestors) - 1 - i
   696  			br := branchAncestors[idx]
   697  			// Skip branches that don't have relevant nodes
   698  			if maxNodeID <= br.GetBeginNodeId() {
   699  				continue
   700  			}
   701  			if minNodeID >= br.GetEndNodeId() {
   702  				break
   703  			}
   704  
   705  			if token.CurrentRangeIndex == notStartedIndex {
   706  				token.CurrentRangeIndex = idx
   707  			}
   708  			token.FinalRangeIndex = idx
   709  		}
   710  
   711  		if token.CurrentRangeIndex == notStartedIndex {
   712  			return nil, nil, serviceerror.NewDataLoss("branchRange is corrupted")
   713  		}
   714  	}
   715  
   716  	currentBranch := branchAncestors[token.CurrentRangeIndex]
   717  	// minNodeID remains the same, since caller can read from the middle
   718  	// maxNodeID need to be shortened since this branch can contain additional history nodes
   719  	if currentBranch.GetEndNodeId() < maxNodeID {
   720  		maxNodeID = currentBranch.GetEndNodeId()
   721  	}
   722  	branchID := currentBranch.GetBranchId()
   723  
   724  	resp, err := m.persistence.ReadHistoryBranch(ctx, &InternalReadHistoryBranchRequest{
   725  		BranchToken:   branchToken,
   726  		ShardID:       shardID,
   727  		BranchID:      branchID,
   728  		MinNodeID:     minNodeID,
   729  		MaxNodeID:     maxNodeID,
   730  		NextPageToken: token.StoreToken,
   731  		PageSize:      pageSize,
   732  		MetadataOnly:  metadataOnly,
   733  		ReverseOrder:  true,
   734  	})
   735  	if err != nil {
   736  		return nil, nil, err
   737  	}
   738  	token.StoreToken = resp.NextPageToken
   739  	return resp.Nodes, token, nil
   740  }
   741  
   742  func (m *executionManagerImpl) readRawHistoryBranchAndFilter(
   743  	ctx context.Context,
   744  	request *ReadHistoryBranchRequest,
   745  ) ([]*commonpb.DataBlob, []int64, []int64, *historyPagingToken, int, error) {
   746  
   747  	shardID := request.ShardID
   748  	branchToken := request.BranchToken
   749  	minNodeID := request.MinEventID
   750  	maxNodeID := request.MaxEventID
   751  
   752  	branch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(branchToken)
   753  	if err != nil {
   754  		return nil, nil, nil, nil, 0, err
   755  	}
   756  	branchID := branch.BranchId
   757  	branchAncestors := branch.Ancestors
   758  
   759  	// merge tree ID & branch ID into branch ancestors so the processing logic is simple
   760  	beginNodeID := common.FirstEventID
   761  	if len(branch.Ancestors) > 0 {
   762  		beginNodeID = branch.Ancestors[len(branch.Ancestors)-1].GetEndNodeId()
   763  	}
   764  	branchAncestors = append(branchAncestors, &persistencespb.HistoryBranchRange{
   765  		BranchId:    branchID,
   766  		BeginNodeId: beginNodeID,
   767  		EndNodeId:   maxNodeID,
   768  	})
   769  
   770  	token, err := m.deserializeToken(
   771  		request.NextPageToken,
   772  		request.MinEventID-1,
   773  		defaultLastTransactionID,
   774  	)
   775  	if err != nil {
   776  		return nil, nil, nil, nil, 0, err
   777  	}
   778  
   779  	nodes, token, err := m.readRawHistoryBranch(
   780  		ctx,
   781  		branchToken,
   782  		shardID,
   783  		branchAncestors,
   784  		minNodeID,
   785  		maxNodeID,
   786  		token,
   787  		request.PageSize,
   788  		false,
   789  	)
   790  	if err != nil {
   791  		return nil, nil, nil, nil, 0, err
   792  	}
   793  	if len(nodes) == 0 && len(request.NextPageToken) == 0 {
   794  		return nil, nil, nil, nil, 0, serviceerror.NewNotFound("Workflow execution history not found.")
   795  	}
   796  
   797  	nodes, err = m.filterHistoryNodes(
   798  		token.LastNodeID,
   799  		token.LastTransactionID,
   800  		nodes,
   801  	)
   802  	if err != nil {
   803  		return nil, nil, nil, nil, 0, err
   804  	}
   805  
   806  	var dataBlobs []*commonpb.DataBlob
   807  	transactionIDs := make([]int64, 0, len(nodes))
   808  	nodeIDs := make([]int64, 0, len(nodes))
   809  	dataSize := 0
   810  	if len(nodes) > 0 {
   811  		dataBlobs = make([]*commonpb.DataBlob, len(nodes))
   812  		for index, node := range nodes {
   813  			dataBlobs[index] = node.Events
   814  			dataSize += len(node.Events.Data)
   815  			transactionIDs = append(transactionIDs, node.TransactionID)
   816  			nodeIDs = append(nodeIDs, node.NodeID)
   817  		}
   818  		lastNode := nodes[len(nodes)-1]
   819  		token.LastNodeID = lastNode.NodeID
   820  		token.LastTransactionID = lastNode.TransactionID
   821  	}
   822  
   823  	return dataBlobs, transactionIDs, nodeIDs, token, dataSize, nil
   824  }
   825  
   826  func (m *executionManagerImpl) readRawHistoryBranchReverseAndFilter(
   827  	ctx context.Context,
   828  	request *ReadHistoryBranchReverseRequest,
   829  ) ([]*commonpb.DataBlob, []int64, *historyPagingToken, int, error) {
   830  
   831  	shardID := request.ShardID
   832  	branchToken := request.BranchToken
   833  	minNodeID := common.FirstEventID
   834  	maxNodeID := request.MaxEventID
   835  	if maxNodeID == common.EmptyEventID {
   836  		maxNodeID = common.EndEventID
   837  	} else {
   838  		maxNodeID++ // downstream code is exclusive on maxNodeID
   839  	}
   840  
   841  	branch, err := m.GetHistoryBranchUtil().ParseHistoryBranchInfo(branchToken)
   842  	if err != nil {
   843  		return nil, nil, nil, 0, err
   844  	}
   845  	treeID := branch.TreeId
   846  	branchID := branch.BranchId
   847  	branchAncestors := branch.Ancestors
   848  
   849  	// merge tree ID & branch ID into branch ancestors so the processing logic is simple
   850  	beginNodeID := common.FirstEventID
   851  	if len(branch.Ancestors) > 0 {
   852  		beginNodeID = branch.Ancestors[len(branch.Ancestors)-1].GetEndNodeId()
   853  	}
   854  	branchAncestors = append(branchAncestors, &persistencespb.HistoryBranchRange{
   855  		BranchId:    branchID,
   856  		BeginNodeId: beginNodeID,
   857  		EndNodeId:   maxNodeID,
   858  	})
   859  
   860  	token, err := m.deserializeToken(
   861  		request.NextPageToken,
   862  		request.MaxEventID,
   863  		request.LastFirstTransactionID,
   864  	)
   865  	if err != nil {
   866  		return nil, nil, nil, 0, err
   867  	}
   868  
   869  	nodes, token, err := m.readRawHistoryBranchReverse(
   870  		ctx,
   871  		branchToken,
   872  		shardID,
   873  		treeID,
   874  		branchAncestors,
   875  		minNodeID,
   876  		maxNodeID,
   877  		token,
   878  		request.PageSize,
   879  		false,
   880  	)
   881  	if err != nil {
   882  		return nil, nil, nil, 0, err
   883  	}
   884  	if len(nodes) == 0 && len(request.NextPageToken) == 0 {
   885  		return nil, nil, nil, 0, serviceerror.NewNotFound("Workflow execution history not found.")
   886  	}
   887  
   888  	nodes, err = m.filterHistoryNodesReverse(
   889  		token.LastNodeID,
   890  		token.LastTransactionID,
   891  		nodes,
   892  	)
   893  	if err != nil {
   894  		return nil, nil, nil, 0, err
   895  	}
   896  
   897  	var dataBlobs []*commonpb.DataBlob
   898  	transactionIDs := make([]int64, 0, len(nodes))
   899  	dataSize := 0
   900  	if len(nodes) > 0 {
   901  		dataBlobs = make([]*commonpb.DataBlob, len(nodes))
   902  		for index, node := range nodes {
   903  			dataBlobs[index] = node.Events
   904  			dataSize += len(node.Events.Data)
   905  			transactionIDs = append(transactionIDs, node.TransactionID)
   906  		}
   907  		lastNode := nodes[len(nodes)-1]
   908  		token.LastNodeID = lastNode.NodeID
   909  		token.LastTransactionID = lastNode.PrevTransactionID
   910  	}
   911  
   912  	return dataBlobs, transactionIDs, token, dataSize, nil
   913  }
   914  
   915  func (m *executionManagerImpl) readHistoryBranch(
   916  	ctx context.Context,
   917  	byBatch bool,
   918  	request *ReadHistoryBranchRequest,
   919  ) ([]*historypb.HistoryEvent, []*historypb.History, []int64, []byte, int, error) {
   920  
   921  	dataBlobs, transactionIDs, _, token, dataSize, err := m.readRawHistoryBranchAndFilter(ctx, request)
   922  	if err != nil {
   923  		return nil, nil, nil, nil, 0, err
   924  	}
   925  
   926  	historyEvents := make([]*historypb.HistoryEvent, 0, request.PageSize)
   927  	historyEventBatches := make([]*historypb.History, 0, request.PageSize)
   928  
   929  	for _, batch := range dataBlobs {
   930  		events, err := m.serializer.DeserializeEvents(batch)
   931  		if err != nil {
   932  			return nil, nil, nil, nil, dataSize, err
   933  		}
   934  		if len(events) == 0 {
   935  			m.logger.Error("Empty events in a batch")
   936  			return nil, nil, nil, nil, dataSize, serviceerror.NewDataLoss("corrupted history event batch, empty events")
   937  		}
   938  
   939  		firstEvent := events[0]           // first
   940  		eventCount := len(events)         // length
   941  		lastEvent := events[eventCount-1] // last
   942  
   943  		if firstEvent.GetVersion() != lastEvent.GetVersion() || firstEvent.GetEventId()+int64(eventCount-1) != lastEvent.GetEventId() {
   944  			// in a single batch, version should be the same, and ID should be contiguous
   945  			m.logger.Error("Corrupted event batch",
   946  				tag.FirstEventVersion(firstEvent.GetVersion()), tag.WorkflowFirstEventID(firstEvent.GetEventId()),
   947  				tag.LastEventVersion(lastEvent.GetVersion()), tag.WorkflowNextEventID(lastEvent.GetEventId()),
   948  				tag.Counter(eventCount))
   949  			return historyEvents, historyEventBatches, transactionIDs, nil, dataSize, serviceerror.NewDataLoss("corrupted history event batch, wrong version and IDs")
   950  		}
   951  		if firstEvent.GetEventId() != token.LastEventID+1 {
   952  			m.logger.Error("Corrupted non-contiguous event batch",
   953  				tag.WorkflowFirstEventID(firstEvent.GetEventId()),
   954  				tag.WorkflowNextEventID(lastEvent.GetEventId()),
   955  				tag.TokenLastEventID(token.LastEventID),
   956  				tag.Counter(eventCount))
   957  			return historyEvents, historyEventBatches, transactionIDs, nil, dataSize, serviceerror.NewDataLoss("corrupted history event batch, eventID is not contiguous")
   958  		}
   959  
   960  		if byBatch {
   961  			historyEventBatches = append(historyEventBatches, &historypb.History{Events: events})
   962  		} else {
   963  			historyEvents = append(historyEvents, events...)
   964  		}
   965  		token.LastEventID = lastEvent.GetEventId()
   966  	}
   967  
   968  	nextPageToken, err := m.serializeToken(token, false)
   969  	if err != nil {
   970  		return nil, nil, nil, nil, 0, err
   971  	}
   972  	return historyEvents, historyEventBatches, transactionIDs, nextPageToken, dataSize, nil
   973  }
   974  
   975  func (m *executionManagerImpl) readHistoryBranchReverse(
   976  	ctx context.Context,
   977  	request *ReadHistoryBranchReverseRequest,
   978  ) ([]*historypb.HistoryEvent, []int64, []byte, int, error) {
   979  
   980  	dataBlobs, transactionIDs, token, dataSize, err := m.readRawHistoryBranchReverseAndFilter(ctx, request)
   981  	if err != nil {
   982  		return nil, nil, nil, 0, err
   983  	}
   984  
   985  	historyEvents := make([]*historypb.HistoryEvent, 0, request.PageSize)
   986  
   987  	for _, batch := range dataBlobs {
   988  		events, err := m.serializer.DeserializeEvents(batch)
   989  		if err != nil {
   990  			return nil, nil, nil, dataSize, err
   991  		}
   992  		if len(events) == 0 {
   993  			m.logger.Error("Empty events in a batch")
   994  			return nil, nil, nil, dataSize, serviceerror.NewDataLoss("corrupted history event batch, empty events")
   995  		}
   996  
   997  		firstEvent := events[0]           // first
   998  		eventCount := len(events)         // length
   999  		lastEvent := events[eventCount-1] // last
  1000  
  1001  		if firstEvent.GetVersion() != lastEvent.GetVersion() || firstEvent.GetEventId()+int64(eventCount-1) != lastEvent.GetEventId() {
  1002  			// in a single batch, version should be the same, and ID should be contiguous
  1003  			m.logger.Error("Corrupted event batch",
  1004  				tag.FirstEventVersion(firstEvent.GetVersion()), tag.WorkflowFirstEventID(firstEvent.GetEventId()),
  1005  				tag.LastEventVersion(lastEvent.GetVersion()), tag.WorkflowNextEventID(lastEvent.GetEventId()),
  1006  				tag.Counter(eventCount))
  1007  			return historyEvents, transactionIDs, nil, dataSize, serviceerror.NewDataLoss("corrupted history event batch, wrong version and IDs")
  1008  		}
  1009  		if (token.LastEventID != common.EmptyEventID) && (lastEvent.GetEventId() != token.LastEventID-1) {
  1010  			m.logger.Error("Corrupted non-contiguous event batch",
  1011  				tag.WorkflowFirstEventID(firstEvent.GetEventId()),
  1012  				tag.WorkflowNextEventID(lastEvent.GetEventId()),
  1013  				tag.TokenLastEventID(token.LastEventID),
  1014  				tag.Counter(eventCount))
  1015  			return historyEvents, transactionIDs, nil, dataSize, serviceerror.NewDataLoss("corrupted history event batch, eventID is not contiguous")
  1016  		}
  1017  
  1018  		events = m.reverseSlice(events)
  1019  
  1020  		historyEvents = append(historyEvents, events...)
  1021  		token.LastEventID = firstEvent.GetEventId()
  1022  	}
  1023  
  1024  	nextPageToken, err := m.serializeToken(token, true)
  1025  	if err != nil {
  1026  		return nil, nil, nil, 0, err
  1027  	}
  1028  	return historyEvents, transactionIDs, nextPageToken, dataSize, nil
  1029  }
  1030  
  1031  func (m *executionManagerImpl) reverseSlice(events []*historypb.HistoryEvent) []*historypb.HistoryEvent {
  1032  	for i, j := 0, len(events)-1; i < j; i, j = i+1, j-1 {
  1033  		events[i], events[j] = events[j], events[i]
  1034  	}
  1035  	return events
  1036  }
  1037  
  1038  func (m *executionManagerImpl) filterHistoryNodes(
  1039  	lastNodeID int64,
  1040  	lastTransactionID int64,
  1041  	nodes []InternalHistoryNode,
  1042  ) ([]InternalHistoryNode, error) {
  1043  	var result []InternalHistoryNode
  1044  	for _, node := range nodes {
  1045  		// assuming that business logic layer is correct and transaction ID only increase
  1046  		// thus, valid event batch will come with increasing transaction ID
  1047  
  1048  		// event batches with smaller node ID
  1049  		//  -> should not be possible since records are already sorted
  1050  		// event batches with same node ID
  1051  		//  -> batch with higher transaction ID is valid
  1052  		// event batches with larger node ID
  1053  		//  -> batch with lower transaction ID is invalid (happens before)
  1054  		//  -> batch with higher transaction ID is valid
  1055  		if node.TransactionID < lastTransactionID {
  1056  			continue
  1057  		}
  1058  
  1059  		switch {
  1060  		case node.NodeID < lastNodeID:
  1061  			return nil, serviceerror.NewDataLoss("corrupted data, nodeID cannot decrease")
  1062  		case node.NodeID == lastNodeID:
  1063  			return nil, serviceerror.NewDataLoss("corrupted data, same nodeID must have smaller txnID")
  1064  		default: // row.NodeID > lastNodeID:
  1065  			// NOTE: when row.nodeID > lastNodeID, we expect the one with largest txnID comes first
  1066  			lastTransactionID = node.TransactionID
  1067  			lastNodeID = node.NodeID
  1068  			result = append(result, node)
  1069  		}
  1070  	}
  1071  	return result, nil
  1072  }
  1073  
  1074  func (m *executionManagerImpl) filterHistoryNodesReverse(
  1075  	lastNodeID int64,
  1076  	lastTransactionID int64,
  1077  	nodes []InternalHistoryNode,
  1078  ) ([]InternalHistoryNode, error) {
  1079  	var result []InternalHistoryNode
  1080  	for _, node := range nodes {
  1081  		if lastNodeID == defaultLastNodeID {
  1082  			lastNodeID = node.NodeID
  1083  		}
  1084  		if node.TransactionID != lastTransactionID {
  1085  			continue
  1086  		}
  1087  
  1088  		switch {
  1089  		case node.NodeID > lastNodeID:
  1090  			return nil, serviceerror.NewDataLoss("corrupted data, nodeID cannot decrease")
  1091  		default:
  1092  			lastTransactionID = node.PrevTransactionID
  1093  			lastNodeID = node.NodeID
  1094  			result = append(result, node)
  1095  		}
  1096  	}
  1097  	return result, nil
  1098  }
  1099  
  1100  func (m *executionManagerImpl) deserializeToken(
  1101  	token []byte,
  1102  	defaultLastEventID int64,
  1103  	lastTransactionId int64,
  1104  ) (*historyPagingToken, error) {
  1105  
  1106  	return m.pagingTokenSerializer.Deserialize(
  1107  		token,
  1108  		defaultLastEventID,
  1109  		defaultLastNodeID,
  1110  		lastTransactionId,
  1111  	)
  1112  }
  1113  
  1114  func (m *executionManagerImpl) serializeToken(
  1115  	pagingToken *historyPagingToken,
  1116  	reverseOrder bool,
  1117  ) ([]byte, error) {
  1118  
  1119  	if len(pagingToken.StoreToken) == 0 {
  1120  		if pagingToken.CurrentRangeIndex == pagingToken.FinalRangeIndex {
  1121  			// this means that we have reached the final page of final branchRange
  1122  			return nil, nil
  1123  		}
  1124  
  1125  		if reverseOrder {
  1126  			pagingToken.CurrentRangeIndex--
  1127  
  1128  		} else {
  1129  			pagingToken.CurrentRangeIndex++
  1130  		}
  1131  		return m.pagingTokenSerializer.Serialize(pagingToken)
  1132  	}
  1133  
  1134  	return m.pagingTokenSerializer.Serialize(pagingToken)
  1135  }