go.temporal.io/server@v1.23.0/common/persistence/persistence-tests/history_v2_persistence.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package persistencetests
    26  
    27  import (
    28  	"context"
    29  	"math/rand"
    30  	"sync"
    31  	"sync/atomic"
    32  	"time"
    33  
    34  	"github.com/pborman/uuid"
    35  	"github.com/stretchr/testify/require"
    36  	historypb "go.temporal.io/api/history/v1"
    37  	"go.temporal.io/api/serviceerror"
    38  	"google.golang.org/protobuf/types/known/timestamppb"
    39  
    40  	persistencespb "go.temporal.io/server/api/persistence/v1"
    41  	"go.temporal.io/server/common/backoff"
    42  	"go.temporal.io/server/common/debug"
    43  	p "go.temporal.io/server/common/persistence"
    44  	"go.temporal.io/server/common/testing/protorequire"
    45  )
    46  
    47  type (
    48  	// HistoryV2PersistenceSuite contains history persistence tests
    49  	HistoryV2PersistenceSuite struct {
    50  		// suite.Suite
    51  		*TestBase
    52  		// override suite.Suite.Assertions with require.Assertions; this means that s.NotNil(nil) will stop the test,
    53  		// not merely log an error
    54  		*require.Assertions
    55  		protorequire.ProtoAssertions
    56  
    57  		ctx    context.Context
    58  		cancel context.CancelFunc
    59  	}
    60  )
    61  
    62  const testForkRunID = "11220000-0000-f000-f000-000000000000"
    63  
    64  var (
    65  	historyTestRetryPolicy = backoff.NewExponentialRetryPolicy(time.Millisecond * 50).
    66  		WithMaximumInterval(time.Second * 3).
    67  		WithExpirationInterval(time.Second * 30)
    68  )
    69  
    70  func isConditionFail(err error) bool {
    71  	switch err.(type) {
    72  	case *p.ConditionFailedError:
    73  		return true
    74  	default:
    75  		return false
    76  	}
    77  }
    78  
    79  // SetupSuite implementation
    80  func (s *HistoryV2PersistenceSuite) SetupSuite() {
    81  }
    82  
    83  // TearDownSuite implementation
    84  func (s *HistoryV2PersistenceSuite) TearDownSuite() {
    85  	s.TearDownWorkflowStore()
    86  }
    87  
    88  // SetupTest implementation
    89  func (s *HistoryV2PersistenceSuite) SetupTest() {
    90  	// Have to define our overridden assertions in the test setup. If we did it earlier, s.T() will return nil
    91  	s.Assertions = require.New(s.T())
    92  	s.ProtoAssertions = protorequire.New(s.T())
    93  
    94  	s.ctx, s.cancel = context.WithTimeout(context.Background(), 30*time.Second*debug.TimeoutMultiplier)
    95  }
    96  
    97  // TearDownTest implementation
    98  func (s *HistoryV2PersistenceSuite) TearDownTest() {
    99  	s.cancel()
   100  }
   101  
   102  // TestGenUUIDs testing  uuid.New() can generate unique UUID
   103  func (s *HistoryV2PersistenceSuite) TestGenUUIDs() {
   104  	wg := sync.WaitGroup{}
   105  	m := sync.Map{}
   106  	concurrency := 1000
   107  	for i := 0; i < concurrency; i++ {
   108  		wg.Add(1)
   109  		go func() {
   110  			defer wg.Done()
   111  			u := uuid.New()
   112  			m.Store(u, true)
   113  		}()
   114  	}
   115  	wg.Wait()
   116  	cnt := 0
   117  	m.Range(func(k, v interface{}) bool {
   118  		cnt++
   119  		return true
   120  	})
   121  	s.Equal(concurrency, cnt)
   122  }
   123  
   124  // TestScanAllTrees test
   125  func (s *HistoryV2PersistenceSuite) TestScanAllTrees() {
   126  	resp, err := s.ExecutionManager.GetAllHistoryTreeBranches(s.ctx, &p.GetAllHistoryTreeBranchesRequest{
   127  		PageSize: 1,
   128  	})
   129  	s.Nil(err)
   130  	s.Equal(0, len(resp.Branches), "some trees were leaked in other tests")
   131  
   132  	trees := map[string]bool{}
   133  	totalTrees := 1002
   134  	pgSize := 100
   135  
   136  	for i := 0; i < totalTrees; i++ {
   137  		treeID := uuid.NewRandom().String()
   138  		bi, err := s.newHistoryBranch(treeID)
   139  		s.Nil(err)
   140  
   141  		events := s.genRandomEvents([]int64{1, 2, 3}, 1)
   142  		err = s.appendNewBranchAndFirstNode(bi, events, 1, "branchInfo")
   143  		s.Nil(err)
   144  		trees[string(treeID)] = true
   145  	}
   146  
   147  	var pgToken []byte
   148  	for {
   149  		resp, err := s.ExecutionManager.GetAllHistoryTreeBranches(s.ctx, &p.GetAllHistoryTreeBranchesRequest{
   150  			PageSize:      pgSize,
   151  			NextPageToken: pgToken,
   152  		})
   153  		s.Nil(err)
   154  		for _, br := range resp.Branches {
   155  			uuidTreeId := br.BranchInfo.TreeId
   156  			if trees[uuidTreeId] {
   157  				delete(trees, uuidTreeId)
   158  
   159  				s.True(br.ForkTime.AsTime().UnixNano() > 0)
   160  				s.True(len(br.BranchInfo.BranchId) > 0)
   161  				s.Equal("branchInfo", br.Info)
   162  			} else {
   163  				s.Fail("treeID not found", br.BranchInfo.TreeId)
   164  			}
   165  		}
   166  
   167  		if len(resp.NextPageToken) == 0 {
   168  			break
   169  		}
   170  		pgToken = resp.NextPageToken
   171  	}
   172  
   173  	s.Equal(0, len(trees))
   174  }
   175  
   176  // TestReadBranchByPagination test
   177  func (s *HistoryV2PersistenceSuite) TestReadBranchByPagination() {
   178  	treeID := uuid.NewRandom().String()
   179  	bi, err := s.newHistoryBranch(treeID)
   180  	s.Nil(err)
   181  
   182  	historyW := &historypb.History{}
   183  	events := s.genRandomEvents([]int64{1, 2, 3}, 0)
   184  	err = s.appendNewBranchAndFirstNode(bi, events, 1, "branchInfo")
   185  	s.Nil(err)
   186  	historyW.Events = events
   187  
   188  	events = s.genRandomEvents([]int64{4}, 0)
   189  	err = s.appendNewNode(bi, events, 2)
   190  	s.Nil(err)
   191  	historyW.Events = append(historyW.Events, events...)
   192  
   193  	events = s.genRandomEvents([]int64{5, 6, 7, 8}, 4)
   194  	err = s.appendNewNode(bi, events, 6)
   195  	s.Nil(err)
   196  	historyW.Events = append(historyW.Events, events...)
   197  
   198  	// stale event batch
   199  	events = s.genRandomEvents([]int64{6, 7, 8}, 1)
   200  	err = s.appendNewNode(bi, events, 3)
   201  	s.Nil(err)
   202  	// stale event batch
   203  	events = s.genRandomEvents([]int64{6, 7, 8}, 2)
   204  	err = s.appendNewNode(bi, events, 4)
   205  	s.Nil(err)
   206  	// stale event batch
   207  	events = s.genRandomEvents([]int64{6, 7, 8}, 3)
   208  	err = s.appendNewNode(bi, events, 5)
   209  	s.Nil(err)
   210  
   211  	events = s.genRandomEvents([]int64{9}, 4)
   212  	err = s.appendNewNode(bi, events, 7)
   213  	s.Nil(err)
   214  	historyW.Events = append(historyW.Events, events...)
   215  
   216  	// Start to read from middle, should not return error, but the first batch should be ignored by application layer
   217  	req := &p.ReadHistoryBranchRequest{
   218  		BranchToken:   bi,
   219  		MinEventID:    6,
   220  		MaxEventID:    10,
   221  		PageSize:      4,
   222  		NextPageToken: nil,
   223  		ShardID:       s.ShardInfo.GetShardId(),
   224  	}
   225  	// first page
   226  	resp, err := s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   227  	s.Nil(err)
   228  	s.Equal(4, len(resp.HistoryEvents))
   229  	s.Equal(int64(6), resp.HistoryEvents[0].GetEventId())
   230  
   231  	events = s.genRandomEvents([]int64{10}, 4)
   232  	err = s.appendNewNode(bi, events, 8)
   233  	s.Nil(err)
   234  	historyW.Events = append(historyW.Events, events...)
   235  
   236  	events = s.genRandomEvents([]int64{11}, 4)
   237  	err = s.appendNewNode(bi, events, 9)
   238  	s.Nil(err)
   239  	historyW.Events = append(historyW.Events, events...)
   240  
   241  	events = s.genRandomEvents([]int64{12}, 4)
   242  	err = s.appendNewNode(bi, events, 10)
   243  	s.Nil(err)
   244  	historyW.Events = append(historyW.Events, events...)
   245  
   246  	events = s.genRandomEvents([]int64{13, 14, 15}, 4)
   247  	err = s.appendNewNode(bi, events, 11)
   248  	s.Nil(err)
   249  	// we don't append this batch because we will fork from 13
   250  	// historyW.Events = append(historyW.Events, events...)
   251  
   252  	// fork from here
   253  	bi2, err := s.fork(bi, 13)
   254  	s.Nil(err)
   255  
   256  	events = s.genRandomEvents([]int64{13}, 4)
   257  	err = s.appendNewNode(bi2, events, 12)
   258  	s.Nil(err)
   259  	historyW.Events = append(historyW.Events, events...)
   260  
   261  	events = s.genRandomEvents([]int64{14}, 4)
   262  	err = s.appendNewNode(bi2, events, 13)
   263  	s.Nil(err)
   264  	historyW.Events = append(historyW.Events, events...)
   265  
   266  	events = s.genRandomEvents([]int64{15, 16, 17}, 4)
   267  	err = s.appendNewNode(bi2, events, 14)
   268  	s.Nil(err)
   269  	historyW.Events = append(historyW.Events, events...)
   270  
   271  	events = s.genRandomEvents([]int64{18, 19, 20}, 4)
   272  	err = s.appendNewNode(bi2, events, 15)
   273  	s.Nil(err)
   274  	historyW.Events = append(historyW.Events, events...)
   275  
   276  	// read branch to verify
   277  	historyR := &historypb.History{}
   278  
   279  	req = &p.ReadHistoryBranchRequest{
   280  		BranchToken:   bi2,
   281  		MinEventID:    1,
   282  		MaxEventID:    21,
   283  		PageSize:      3,
   284  		NextPageToken: nil,
   285  		ShardID:       s.ShardInfo.GetShardId(),
   286  	}
   287  
   288  	// first page
   289  	resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   290  	s.Nil(err)
   291  
   292  	s.Equal(8, len(resp.HistoryEvents))
   293  	historyR.Events = append(historyR.Events, resp.HistoryEvents...)
   294  	req.NextPageToken = resp.NextPageToken
   295  
   296  	// this page is all stale batches
   297  	// doe to difference in Cassandra / MySQL pagination
   298  	// the stale event batch may get returned
   299  	resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   300  	s.Nil(err)
   301  	historyR.Events = append(historyR.Events, resp.HistoryEvents...)
   302  	req.NextPageToken = resp.NextPageToken
   303  	if len(resp.HistoryEvents) == 0 {
   304  		// second page
   305  		resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   306  		s.Nil(err)
   307  		s.Equal(3, len(resp.HistoryEvents))
   308  		historyR.Events = append(historyR.Events, resp.HistoryEvents...)
   309  		req.NextPageToken = resp.NextPageToken
   310  	} else if len(resp.HistoryEvents) == 3 {
   311  		// no op
   312  	} else {
   313  		s.Fail("should either return 0 (Cassandra) or 3 (MySQL) events")
   314  	}
   315  
   316  	// 3rd page, since we fork from nodeID=13, we can only see one batch of 12 here
   317  	resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   318  	s.Nil(err)
   319  	s.Equal(1, len(resp.HistoryEvents))
   320  	historyR.Events = append(historyR.Events, resp.HistoryEvents...)
   321  	req.NextPageToken = resp.NextPageToken
   322  
   323  	// 4th page, 13~17
   324  	resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   325  	s.Nil(err)
   326  	s.Equal(5, len(resp.HistoryEvents))
   327  	historyR.Events = append(historyR.Events, resp.HistoryEvents...)
   328  	req.NextPageToken = resp.NextPageToken
   329  
   330  	// last page: one batch of 18-20
   331  	// We have only one page left and the page size is set to one. In this case,
   332  	// persistence may or may not return a nextPageToken.
   333  	// If it does return a token, we need to ensure that if the token returned is used
   334  	// to get history again, no error and history events should be returned.
   335  	req.PageSize = 1
   336  	resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   337  	s.Nil(err)
   338  	s.Equal(3, len(resp.HistoryEvents))
   339  	historyR.Events = append(historyR.Events, resp.HistoryEvents...)
   340  	req.NextPageToken = resp.NextPageToken
   341  	if len(resp.NextPageToken) != 0 {
   342  		resp, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   343  		s.Nil(err)
   344  		s.Equal(0, len(resp.HistoryEvents))
   345  	}
   346  
   347  	s.ProtoEqual(historyW, historyR)
   348  	s.Equal(0, len(resp.NextPageToken))
   349  
   350  	// MinEventID is in the middle of the last batch and this is the first request (NextPageToken
   351  	// is empty), the call should return an error.
   352  	req.MinEventID = 19
   353  	req.NextPageToken = nil
   354  	_, err = s.ExecutionManager.ReadHistoryBranch(s.ctx, req)
   355  	s.IsType(&serviceerror.NotFound{}, err)
   356  
   357  	err = s.deleteHistoryBranch(bi2)
   358  	s.Nil(err)
   359  	err = s.deleteHistoryBranch(bi)
   360  	s.Nil(err)
   361  	branches := s.descTree(treeID)
   362  	s.Equal(0, len(branches))
   363  }
   364  
   365  // TestConcurrentlyCreateAndAppendBranches test
   366  func (s *HistoryV2PersistenceSuite) TestConcurrentlyCreateAndAppendBranches() {
   367  	treeID := uuid.NewRandom().String()
   368  	wg := sync.WaitGroup{}
   369  	concurrency := 1
   370  	m := &sync.Map{}
   371  
   372  	// test create new branch along with appending new nodes
   373  	for i := 0; i < concurrency; i++ {
   374  		wg.Add(1)
   375  		go func(idx int) {
   376  			defer wg.Done()
   377  			bi, err := s.newHistoryBranch(treeID)
   378  			s.Nil(err)
   379  			historyW := &historypb.History{}
   380  			m.Store(idx, bi)
   381  
   382  			events := s.genRandomEvents([]int64{1, 2, 3}, 1)
   383  			err = s.appendNewBranchAndFirstNode(bi, events, 1, "branchInfo")
   384  			s.Nil(err)
   385  			historyW.Events = events
   386  
   387  			events = s.genRandomEvents([]int64{4}, 1)
   388  			err = s.appendNewNode(bi, events, 2)
   389  			s.Nil(err)
   390  			historyW.Events = append(historyW.Events, events...)
   391  
   392  			events = s.genRandomEvents([]int64{5, 6, 7, 8}, 1)
   393  			err = s.appendNewNode(bi, events, 3)
   394  			s.Nil(err)
   395  			historyW.Events = append(historyW.Events, events...)
   396  
   397  			events = s.genRandomEvents([]int64{9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, 1)
   398  			err = s.appendNewNode(bi, events, 4000)
   399  			s.Nil(err)
   400  			historyW.Events = append(historyW.Events, events...)
   401  
   402  			// read branch to verify
   403  			historyR := &historypb.History{}
   404  			events = s.read(bi, 1, 21)
   405  			s.Equal(20, len(events))
   406  			historyR.Events = events
   407  
   408  			s.ProtoEqual(historyW, historyR)
   409  		}(i)
   410  	}
   411  
   412  	wg.Wait()
   413  	branches := s.descTree(treeID)
   414  	s.Equal(concurrency, len(branches))
   415  
   416  	wg = sync.WaitGroup{}
   417  	// test appending nodes(override and new nodes) on each branch concurrently
   418  	for i := 0; i < concurrency; i++ {
   419  		wg.Add(1)
   420  		go func(idx int) {
   421  			defer wg.Done()
   422  
   423  			branch := s.getBranchByKey(m, idx)
   424  
   425  			// override with smaller txn_id
   426  			events := s.genRandomEvents([]int64{5}, 1)
   427  			err := s.appendNewNode(branch, events, 0)
   428  			s.Nil(err)
   429  			// it shouldn't change anything
   430  			events = s.read(branch, 1, 25)
   431  			s.Equal(20, len(events))
   432  
   433  			// override with greatest txn_id
   434  			events = s.genRandomEvents([]int64{5}, 1)
   435  			err = s.appendNewNode(branch, events, 3000)
   436  			s.Nil(err)
   437  
   438  			// read to verify override success, at this point history is corrupted, missing 6/7/8, so we should only see 5 events
   439  			events = s.read(branch, 1, 6)
   440  			s.Equal(5, len(events))
   441  			_, err = s.readWithError(branch, 1, 25)
   442  			_, ok := err.(*serviceerror.DataLoss)
   443  			s.Equal(true, ok)
   444  
   445  			// override with even larger txn_id and same version
   446  			events = s.genRandomEvents([]int64{5, 6}, 1)
   447  			err = s.appendNewNode(branch, events, 3001)
   448  			s.Nil(err)
   449  
   450  			// read to verify override success, at this point history is corrupted, missing 7/8, so we should only see 6 events
   451  			events = s.read(branch, 1, 7)
   452  			s.Equal(6, len(events))
   453  			_, err = s.readWithError(branch, 1, 25)
   454  			_, ok = err.(*serviceerror.DataLoss)
   455  			s.Equal(true, ok)
   456  
   457  			// override more with larger txn_id, this would fix the corrupted hole so that we cna get 20 events again
   458  			events = s.genRandomEvents([]int64{7, 8}, 1)
   459  			err = s.appendNewNode(branch, events, 3002)
   460  			s.Nil(err)
   461  
   462  			// read to verify override
   463  			events = s.read(branch, 1, 25)
   464  			s.Equal(20, len(events))
   465  			events = s.genRandomEvents([]int64{9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, 1)
   466  			err = s.appendNewNode(branch, events, 4001)
   467  			s.Nil(err)
   468  			events = s.read(branch, 1, 25)
   469  			s.Equal(23, len(events))
   470  		}(i)
   471  	}
   472  
   473  	wg.Wait()
   474  	// Finally lets clean up all branches
   475  	m.Range(func(k, v interface{}) bool {
   476  		br := v.([]byte)
   477  		// delete old branches along with create new branches
   478  		err := s.deleteHistoryBranch(br)
   479  		s.Nil(err)
   480  		return true
   481  	})
   482  
   483  	branches = s.descTree(treeID)
   484  	s.Equal(0, len(branches))
   485  }
   486  
   487  // TestConcurrentlyForkAndAppendBranches test
   488  func (s *HistoryV2PersistenceSuite) TestConcurrentlyForkAndAppendBranches() {
   489  	treeID := uuid.NewRandom().String()
   490  	wg := sync.WaitGroup{}
   491  	concurrency := 10
   492  	masterBr, err := s.newHistoryBranch(treeID)
   493  	s.Nil(err)
   494  	branches := s.descTree(treeID)
   495  	s.Equal(0, len(branches))
   496  
   497  	// append first batch to master branch
   498  	eids := []int64{}
   499  	for i := int64(1); i <= int64(concurrency)+1; i++ {
   500  		eids = append(eids, i)
   501  	}
   502  	events := s.genRandomEvents(eids, 1)
   503  	err = s.appendNewBranchAndFirstNode(masterBr, events[0:1], 1, "masterbr")
   504  	s.Nil(err)
   505  
   506  	readEvents := s.read(masterBr, 1, int64(concurrency)+2)
   507  	s.Nil(err)
   508  	s.Equal(1, len(readEvents))
   509  
   510  	branches = s.descTree(treeID)
   511  	s.Equal(1, len(branches))
   512  	mbrID := branches[0].BranchId
   513  
   514  	txn := int64(1)
   515  	getTxnLock := sync.Mutex{}
   516  	reserveTxn := func(count int) int64 {
   517  		getTxnLock.Lock()
   518  		defer getTxnLock.Unlock()
   519  
   520  		ret := txn
   521  		txn += int64(count)
   522  		return ret
   523  	}
   524  
   525  	err = s.appendOneByOne(masterBr, events[1:], reserveTxn(len(events[1:])))
   526  	s.Nil(err)
   527  	events = s.read(masterBr, 1, int64(concurrency)+2)
   528  	s.Nil(err)
   529  	s.Equal((concurrency)+1, len(events))
   530  
   531  	level1ID := new(sync.Map)
   532  	level1Br := new(sync.Map)
   533  	// test forking from master branch and append nodes
   534  	for i := 0; i < concurrency; i++ {
   535  		wg.Add(1)
   536  		go func(idx int) {
   537  			defer wg.Done()
   538  
   539  			forkNodeID := rand.Int63n(int64(concurrency)) + 2
   540  			level1ID.Store(idx, forkNodeID)
   541  
   542  			bi, err := s.fork(masterBr, forkNodeID)
   543  			s.Nil(err)
   544  			level1Br.Store(idx, bi)
   545  
   546  			// cannot append to ancestors
   547  			events := s.genRandomEvents([]int64{forkNodeID - 1}, 1)
   548  			err = s.appendNewNode(bi, events, reserveTxn(1))
   549  			_, ok := err.(*p.InvalidPersistenceRequestError)
   550  			s.Equal(true, ok)
   551  
   552  			// append second batch to first level
   553  			eids := make([]int64, 0)
   554  			for i := forkNodeID; i <= int64(concurrency)*2+1; i++ {
   555  				eids = append(eids, i)
   556  			}
   557  			events = s.genRandomEvents(eids, 1)
   558  
   559  			err = s.appendNewNode(bi, events[0:1], reserveTxn(1))
   560  			s.Nil(err)
   561  
   562  			err = s.appendOneByOne(bi, events[1:], reserveTxn(len(events[1:])))
   563  			s.Nil(err)
   564  
   565  			events = s.read(bi, 1, int64(concurrency)*2+2)
   566  			s.Nil(err)
   567  			s.Equal((concurrency)*2+1, len(events))
   568  
   569  			if idx == 0 {
   570  				err = s.deleteHistoryBranch(bi)
   571  				s.Nil(err)
   572  			}
   573  
   574  		}(i)
   575  	}
   576  
   577  	wg.Wait()
   578  	branches = s.descTree(treeID)
   579  	s.Equal(concurrency, len(branches))
   580  	forkOnLevel1 := int32(0)
   581  	level2Br := new(sync.Map)
   582  	wg = sync.WaitGroup{}
   583  
   584  	// test forking for second level of branch
   585  	for i := 1; i < concurrency; i++ {
   586  		wg.Add(1)
   587  		go func(idx int) {
   588  			defer wg.Done()
   589  
   590  			// Event we fork from level1 branch, it is possible that the new branch will fork from master branch
   591  			forkNodeID := rand.Int63n(int64(concurrency)*2) + 2
   592  			forkBr := s.getBranchByKey(level1Br, idx)
   593  			lastForkNodeID := s.getIDByKey(level1ID, idx)
   594  
   595  			if forkNodeID > lastForkNodeID {
   596  				atomic.AddInt32(&forkOnLevel1, int32(1))
   597  			}
   598  
   599  			bi, err := s.fork(forkBr, forkNodeID)
   600  			s.Nil(err)
   601  			level2Br.Store(idx, bi)
   602  
   603  			// append second batch to second level
   604  			eids := make([]int64, 0)
   605  			for i := forkNodeID; i <= int64(concurrency)*3+1; i++ {
   606  				eids = append(eids, i)
   607  			}
   608  			events := s.genRandomEvents(eids, 1)
   609  			err = s.appendNewNode(bi, events[0:1], reserveTxn(1))
   610  			s.Nil(err)
   611  			err = s.appendOneByOne(bi, events[1:], reserveTxn(len(events[1:])))
   612  			s.Nil(err)
   613  			events = s.read(bi, 1, int64(concurrency)*3+2)
   614  			s.Nil(err)
   615  			s.Equal((concurrency)*3+1, len(events))
   616  
   617  			// try override last event
   618  			events = s.genRandomEvents([]int64{int64(concurrency)*3 + 1}, 1)
   619  			err = s.appendNewNode(bi, events, reserveTxn(1))
   620  			s.Nil(err)
   621  			events = s.read(bi, 1, int64(concurrency)*3+2)
   622  			s.Nil(err)
   623  			s.Equal((concurrency)*3+1, len(events))
   624  
   625  			// test fork and newBranch concurrently
   626  			bi, err = s.newHistoryBranch(treeID)
   627  			s.Nil(err)
   628  			level2Br.Store(concurrency+idx, bi)
   629  
   630  			events = s.genRandomEvents([]int64{1}, 1)
   631  			err = s.appendNewBranchAndFirstNode(bi, events, reserveTxn(1), "newbr")
   632  			s.Nil(err)
   633  
   634  		}(i)
   635  	}
   636  
   637  	wg.Wait()
   638  	branches = s.descTree(treeID)
   639  	s.Equal(concurrency*3-2, len(branches))
   640  	actualForkOnLevel1 := int32(0)
   641  	masterCnt := 0
   642  	for _, b := range branches {
   643  		if len(b.Ancestors) == 2 {
   644  			actualForkOnLevel1++
   645  		} else if len(b.Ancestors) == 0 {
   646  			masterCnt++
   647  		} else {
   648  			s.Equal(1, len(b.Ancestors))
   649  			s.Equal(mbrID, b.Ancestors[0].GetBranchId())
   650  		}
   651  	}
   652  	s.Equal(forkOnLevel1, actualForkOnLevel1)
   653  	s.Equal(concurrency, masterCnt)
   654  
   655  	// Finally lets clean up all branches
   656  	level1Br.Range(func(k, v interface{}) bool {
   657  		br := v.([]byte)
   658  		// delete old branches along with create new branches
   659  		err := s.deleteHistoryBranch(br)
   660  		s.Nil(err)
   661  
   662  		return true
   663  	})
   664  	level2Br.Range(func(k, v interface{}) bool {
   665  		br := v.([]byte)
   666  		// delete old branches along with create new branches
   667  		err := s.deleteHistoryBranch(br)
   668  		s.Nil(err)
   669  
   670  		return true
   671  	})
   672  	err = s.deleteHistoryBranch(masterBr)
   673  	s.Nil(err)
   674  
   675  	branches = s.descTree(treeID)
   676  	s.Equal(0, len(branches))
   677  
   678  }
   679  
   680  func (s *HistoryV2PersistenceSuite) getBranchByKey(m *sync.Map, k int) []byte {
   681  	v, ok := m.Load(k)
   682  	s.Equal(true, ok)
   683  	br := v.([]byte)
   684  	return br
   685  }
   686  
   687  func (s *HistoryV2PersistenceSuite) getIDByKey(m *sync.Map, k int) int64 {
   688  	v, ok := m.Load(k)
   689  	s.Equal(true, ok)
   690  	id := v.(int64)
   691  	return id
   692  }
   693  
   694  func (s *HistoryV2PersistenceSuite) genRandomEvents(eventIDs []int64, version int64) []*historypb.HistoryEvent {
   695  	var events []*historypb.HistoryEvent
   696  
   697  	now := time.Date(2020, 8, 22, 0, 0, 0, 0, time.UTC)
   698  	for _, eid := range eventIDs {
   699  		e := &historypb.HistoryEvent{EventId: eid, Version: version, EventTime: timestamppb.New(now)}
   700  		events = append(events, e)
   701  	}
   702  
   703  	return events
   704  }
   705  
   706  // persistence helper
   707  func (s *HistoryV2PersistenceSuite) newHistoryBranch(treeID string) ([]byte, error) {
   708  	return s.ExecutionManager.GetHistoryBranchUtil().NewHistoryBranch(
   709  		uuid.New(),
   710  		uuid.New(),
   711  		uuid.New(),
   712  		treeID,
   713  		nil,
   714  		[]*persistencespb.HistoryBranchRange{},
   715  		0,
   716  		0,
   717  		0,
   718  	)
   719  }
   720  
   721  // persistence helper
   722  func (s *HistoryV2PersistenceSuite) deleteHistoryBranch(branch []byte) error {
   723  
   724  	op := func() error {
   725  		return s.ExecutionManager.DeleteHistoryBranch(s.ctx, &p.DeleteHistoryBranchRequest{
   726  			BranchToken: branch,
   727  			ShardID:     s.ShardInfo.GetShardId(),
   728  		})
   729  	}
   730  
   731  	return backoff.ThrottleRetry(op, historyTestRetryPolicy, isConditionFail)
   732  }
   733  
   734  // persistence helper
   735  func (s *HistoryV2PersistenceSuite) descTree(treeID string) []*persistencespb.HistoryBranch {
   736  	resp, err := s.ExecutionManager.GetHistoryTree(s.ctx, &p.GetHistoryTreeRequest{
   737  		TreeID:  treeID,
   738  		ShardID: s.ShardInfo.GetShardId(),
   739  	})
   740  	s.Nil(err)
   741  	return resp.BranchInfos
   742  }
   743  
   744  // persistence helper
   745  func (s *HistoryV2PersistenceSuite) read(branch []byte, minID, maxID int64) []*historypb.HistoryEvent {
   746  	res, err := s.readWithError(branch, minID, maxID)
   747  	s.Nil(err)
   748  	return res
   749  }
   750  
   751  func (s *HistoryV2PersistenceSuite) readWithError(branch []byte, minID, maxID int64) ([]*historypb.HistoryEvent, error) {
   752  
   753  	// use small page size to enforce pagination
   754  	randPageSize := 2
   755  	res := make([]*historypb.HistoryEvent, 0)
   756  	token := []byte{}
   757  	for {
   758  		resp, err := s.ExecutionManager.ReadHistoryBranch(s.ctx, &p.ReadHistoryBranchRequest{
   759  			BranchToken:   branch,
   760  			MinEventID:    minID,
   761  			MaxEventID:    maxID,
   762  			PageSize:      randPageSize,
   763  			NextPageToken: token,
   764  			ShardID:       s.ShardInfo.GetShardId(),
   765  		})
   766  		if err != nil {
   767  			return nil, err
   768  		}
   769  		if len(resp.HistoryEvents) > 0 {
   770  			s.True(resp.Size > 0)
   771  		}
   772  		res = append(res, resp.HistoryEvents...)
   773  		token = resp.NextPageToken
   774  		if len(token) == 0 {
   775  			break
   776  		}
   777  	}
   778  
   779  	return res, nil
   780  }
   781  
   782  func (s *HistoryV2PersistenceSuite) appendOneByOne(branch []byte, events []*historypb.HistoryEvent, txnID int64) error {
   783  	for index, e := range events {
   784  		err := s.append(branch, []*historypb.HistoryEvent{e}, txnID+int64(index), false, "")
   785  		if err != nil {
   786  			return err
   787  		}
   788  	}
   789  	return nil
   790  }
   791  
   792  func (s *HistoryV2PersistenceSuite) appendNewNode(branch []byte, events []*historypb.HistoryEvent, txnID int64) error {
   793  	return s.append(branch, events, txnID, false, "")
   794  }
   795  
   796  func (s *HistoryV2PersistenceSuite) appendNewBranchAndFirstNode(branch []byte, events []*historypb.HistoryEvent, txnID int64, branchInfo string) error {
   797  	return s.append(branch, events, txnID, true, branchInfo)
   798  }
   799  
   800  // persistence helper
   801  func (s *HistoryV2PersistenceSuite) append(branch []byte, events []*historypb.HistoryEvent, txnID int64, isNewBranch bool, branchInfo string) error {
   802  
   803  	var resp *p.AppendHistoryNodesResponse
   804  
   805  	op := func() error {
   806  		var err error
   807  		resp, err = s.ExecutionManager.AppendHistoryNodes(s.ctx, &p.AppendHistoryNodesRequest{
   808  			IsNewBranch:   isNewBranch,
   809  			Info:          branchInfo,
   810  			BranchToken:   branch,
   811  			Events:        events,
   812  			TransactionID: txnID,
   813  			ShardID:       s.ShardInfo.GetShardId(),
   814  		})
   815  		return err
   816  	}
   817  
   818  	err := backoff.ThrottleRetry(op, historyTestRetryPolicy, isConditionFail)
   819  	if err != nil {
   820  		return err
   821  	}
   822  	s.True(resp.Size > 0)
   823  
   824  	return err
   825  }
   826  
   827  // persistence helper
   828  func (s *HistoryV2PersistenceSuite) fork(forkBranch []byte, forkNodeID int64) ([]byte, error) {
   829  
   830  	bi := []byte{}
   831  
   832  	op := func() error {
   833  		var err error
   834  		resp, err := s.ExecutionManager.ForkHistoryBranch(s.ctx, &p.ForkHistoryBranchRequest{
   835  			ForkBranchToken: forkBranch,
   836  			ForkNodeID:      forkNodeID,
   837  			Info:            testForkRunID,
   838  			ShardID:         s.ShardInfo.GetShardId(),
   839  			NamespaceID:     uuid.New(),
   840  		})
   841  		if resp != nil {
   842  			bi = resp.NewBranchToken
   843  		}
   844  		return err
   845  	}
   846  
   847  	err := backoff.ThrottleRetry(op, historyTestRetryPolicy, isConditionFail)
   848  	return bi, err
   849  }