github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/retriever_test.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	stdctx "context"
    25  	"errors"
    26  	"fmt"
    27  	"io/ioutil"
    28  	"math/rand"
    29  	"os"
    30  	"path/filepath"
    31  	"strconv"
    32  	"sync"
    33  	"testing"
    34  	"time"
    35  
    36  	"github.com/uber-go/tally"
    37  
    38  	"github.com/m3db/m3/src/cluster/shard"
    39  	"github.com/m3db/m3/src/dbnode/digest"
    40  	"github.com/m3db/m3/src/dbnode/namespace"
    41  	"github.com/m3db/m3/src/dbnode/persist"
    42  	"github.com/m3db/m3/src/dbnode/retention"
    43  	"github.com/m3db/m3/src/dbnode/sharding"
    44  	"github.com/m3db/m3/src/dbnode/storage/block"
    45  	"github.com/m3db/m3/src/dbnode/storage/index/convert"
    46  	"github.com/m3db/m3/src/dbnode/ts"
    47  	"github.com/m3db/m3/src/dbnode/x/xio"
    48  	"github.com/m3db/m3/src/x/checked"
    49  	"github.com/m3db/m3/src/x/context"
    50  	"github.com/m3db/m3/src/x/ident"
    51  	"github.com/m3db/m3/src/x/instrument"
    52  	"github.com/m3db/m3/src/x/pool"
    53  	xsync "github.com/m3db/m3/src/x/sync"
    54  	xtime "github.com/m3db/m3/src/x/time"
    55  
    56  	"github.com/fortytw2/leaktest"
    57  	"github.com/golang/mock/gomock"
    58  	"github.com/stretchr/testify/assert"
    59  	"github.com/stretchr/testify/require"
    60  )
    61  
    62  type testBlockRetrieverOptions struct {
    63  	retrieverOpts  BlockRetrieverOptions
    64  	fsOpts         Options
    65  	newSeekerMgrFn newSeekerMgrFn
    66  	shards         []uint32
    67  }
    68  
    69  type testCleanupFn func()
    70  
    71  func newOpenTestBlockRetriever(
    72  	t *testing.T,
    73  	md namespace.Metadata,
    74  	opts testBlockRetrieverOptions,
    75  ) (*blockRetriever, testCleanupFn) {
    76  	require.NotNil(t, opts.retrieverOpts)
    77  	require.NotNil(t, opts.fsOpts)
    78  
    79  	r, err := NewBlockRetriever(opts.retrieverOpts, opts.fsOpts)
    80  	require.NoError(t, err)
    81  
    82  	retriever := r.(*blockRetriever)
    83  	if opts.newSeekerMgrFn != nil {
    84  		retriever.newSeekerMgrFn = opts.newSeekerMgrFn
    85  	}
    86  
    87  	shardSet, err := sharding.NewShardSet(
    88  		sharding.NewShards(opts.shards, shard.Available),
    89  		sharding.DefaultHashFn(1),
    90  	)
    91  	require.NoError(t, err)
    92  
    93  	nsPath := NamespaceDataDirPath(opts.fsOpts.FilePathPrefix(), testNs1ID)
    94  	require.NoError(t, os.MkdirAll(nsPath, opts.fsOpts.NewDirectoryMode()))
    95  	require.NoError(t, retriever.Open(md, shardSet))
    96  
    97  	return retriever, func() {
    98  		require.NoError(t, retriever.Close())
    99  	}
   100  }
   101  
   102  func newOpenTestWriter(
   103  	t *testing.T,
   104  	fsOpts Options,
   105  	shard uint32,
   106  	start xtime.UnixNano,
   107  	volume int,
   108  ) (DataFileSetWriter, testCleanupFn) {
   109  	w := newTestWriter(t, fsOpts.FilePathPrefix())
   110  	writerOpts := DataWriterOpenOptions{
   111  		BlockSize: testBlockSize,
   112  		Identifier: FileSetFileIdentifier{
   113  			Namespace:   testNs1ID,
   114  			Shard:       shard,
   115  			BlockStart:  start,
   116  			VolumeIndex: volume,
   117  		},
   118  	}
   119  	require.NoError(t, w.Open(writerOpts))
   120  	return w, func() {
   121  		require.NoError(t, w.Close())
   122  	}
   123  }
   124  
   125  type streamResult struct {
   126  	ctx        context.Context
   127  	shard      uint32
   128  	id         string
   129  	blockStart xtime.UnixNano
   130  	stream     xio.BlockReader
   131  	canceled   bool
   132  }
   133  
   134  // TestBlockRetrieverHighConcurrentSeeks tests the retriever with high
   135  // concurrent seeks, but without caching the shard indices. This means that the
   136  // seekers will be opened lazily by calls to ConcurrentIDBloomFilter() in the
   137  // SeekerManager
   138  func TestBlockRetrieverHighConcurrentSeeks(t *testing.T) {
   139  	testBlockRetrieverHighConcurrentSeeks(t, false)
   140  }
   141  
   142  // TestBlockRetrieverHighConcurrentSeeksCacheShardIndices tests the retriever
   143  // with high concurrent seekers and calls cache shard indices at the beginning.
   144  // This means that the seekers will be opened all at once in the beginning and
   145  // by the time ConcurrentIDBloomFilter() is called, they seekers will already be
   146  // open.
   147  func TestBlockRetrieverHighConcurrentSeeksCacheShardIndices(t *testing.T) {
   148  	testBlockRetrieverHighConcurrentSeeks(t, true)
   149  }
   150  
   151  type seekerTrackCloses struct {
   152  	DataFileSetSeeker
   153  
   154  	trackCloseFn func()
   155  }
   156  
   157  func (s seekerTrackCloses) Close() error {
   158  	s.trackCloseFn()
   159  	return s.DataFileSetSeeker.Close()
   160  }
   161  
   162  func testBlockRetrieverHighConcurrentSeeks(t *testing.T, shouldCacheShardIndices bool) {
   163  	ctrl := gomock.NewController(t)
   164  	defer ctrl.Finish()
   165  	defer leaktest.CheckTimeout(t, 2*time.Minute)()
   166  
   167  	dir, err := ioutil.TempDir("", "testdb")
   168  	require.NoError(t, err)
   169  	defer os.RemoveAll(dir)
   170  
   171  	// Setup data generation.
   172  	var (
   173  		nsMeta   = testNs1Metadata(t)
   174  		ropts    = nsMeta.Options().RetentionOptions()
   175  		nsCtx    = namespace.NewContextFrom(nsMeta)
   176  		now      = xtime.Now().Truncate(ropts.BlockSize())
   177  		min, max = now.Add(-6 * ropts.BlockSize()), now.Add(-ropts.BlockSize())
   178  
   179  		shards         = []uint32{0, 1, 2}
   180  		idsPerShard    = 16
   181  		shardIDs       = make(map[uint32][]ident.ID)
   182  		shardIDStrings = make(map[uint32][]string)
   183  		dataBytesPerID = 32
   184  		// Shard -> ID -> Blockstart -> Data
   185  		shardData   = make(map[uint32]map[string]map[xtime.UnixNano]checked.Bytes)
   186  		blockStarts []xtime.UnixNano
   187  		volumes     = []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
   188  	)
   189  	for st := min; !st.After(max); st = st.Add(ropts.BlockSize()) {
   190  		blockStarts = append(blockStarts, st)
   191  	}
   192  
   193  	// Setup retriever.
   194  	var (
   195  		filePathPrefix             = filepath.Join(dir, "")
   196  		fsOpts                     = testDefaultOpts.SetFilePathPrefix(filePathPrefix)
   197  		fetchConcurrency           = 4
   198  		seekConcurrency            = 4 * fetchConcurrency
   199  		updateOpenLeaseConcurrency = 4
   200  		// NB(r): Try to make sure same req structs are reused frequently
   201  		// to surface any race issues that might occur with pooling.
   202  		poolOpts = pool.NewObjectPoolOptions().
   203  				SetSize(fetchConcurrency / 2)
   204  	)
   205  	segReaderPool := xio.NewSegmentReaderPool(poolOpts)
   206  	segReaderPool.Init()
   207  
   208  	retrieveRequestPool := NewRetrieveRequestPool(segReaderPool, poolOpts)
   209  	retrieveRequestPool.Init()
   210  
   211  	opts := testBlockRetrieverOptions{
   212  		retrieverOpts: defaultTestBlockRetrieverOptions.
   213  			SetFetchConcurrency(fetchConcurrency).
   214  			SetRetrieveRequestPool(retrieveRequestPool),
   215  		fsOpts: fsOpts,
   216  		shards: shards,
   217  	}
   218  
   219  	retriever, cleanup := newOpenTestBlockRetriever(t, testNs1Metadata(t), opts)
   220  	defer cleanup()
   221  
   222  	// Setup the open seeker function to fail sometimes to exercise that code path.
   223  	var (
   224  		seekerMgr                 = retriever.seekerMgr.(*seekerManager)
   225  		existingNewOpenSeekerFn   = seekerMgr.newOpenSeekerFn
   226  		seekerStatsLock           sync.Mutex
   227  		numNonTerminalVolumeOpens int
   228  		numSeekerCloses           int
   229  	)
   230  	newNewOpenSeekerFn := func(shard uint32, blockStart xtime.UnixNano, volume int) (DataFileSetSeeker, error) {
   231  		// Artificially slow down how long it takes to open a seeker to exercise the logic where
   232  		// multiple goroutines are trying to open seekers for the same shard/blockStart and need
   233  		// to wait for the others to complete.
   234  		time.Sleep(5 * time.Millisecond)
   235  		// 10% chance for this to fail so that error paths get exercised as well.
   236  		if val := rand.Intn(100); val >= 90 {
   237  			return nil, errors.New("some-error")
   238  		}
   239  		seeker, err := existingNewOpenSeekerFn(shard, blockStart, volume)
   240  		if err != nil {
   241  			return nil, err
   242  		}
   243  
   244  		if volume != volumes[len(volumes)-1] {
   245  			// Only track the open if its not for the last volume which will help us determine if the correct
   246  			// number of seekers were closed later.
   247  			seekerStatsLock.Lock()
   248  			numNonTerminalVolumeOpens++
   249  			seekerStatsLock.Unlock()
   250  		}
   251  		return &seekerTrackCloses{
   252  			DataFileSetSeeker: seeker,
   253  			trackCloseFn: func() {
   254  				seekerStatsLock.Lock()
   255  				numSeekerCloses++
   256  				seekerStatsLock.Unlock()
   257  			},
   258  		}, nil
   259  	}
   260  	seekerMgr.newOpenSeekerFn = newNewOpenSeekerFn
   261  
   262  	// Setup the block lease manager to return errors sometimes to exercise that code path.
   263  	mockBlockLeaseManager := block.NewMockLeaseManager(ctrl)
   264  	mockBlockLeaseManager.EXPECT().RegisterLeaser(gomock.Any()).AnyTimes()
   265  	mockBlockLeaseManager.EXPECT().UnregisterLeaser(gomock.Any()).AnyTimes()
   266  	mockBlockLeaseManager.EXPECT().OpenLatestLease(gomock.Any(), gomock.Any()).DoAndReturn(func(_ block.Leaser, _ block.LeaseDescriptor) (block.LeaseState, error) {
   267  		// 10% chance for this to fail so that error paths get exercised as well.
   268  		if val := rand.Intn(100); val >= 90 {
   269  			return block.LeaseState{}, errors.New("some-error")
   270  		}
   271  
   272  		return block.LeaseState{Volume: 0}, nil
   273  	}).AnyTimes()
   274  	seekerMgr.blockRetrieverOpts = seekerMgr.blockRetrieverOpts.
   275  		SetBlockLeaseManager(mockBlockLeaseManager)
   276  
   277  	// Generate data.
   278  	for _, shard := range shards {
   279  		shardIDs[shard] = make([]ident.ID, 0, idsPerShard)
   280  		shardData[shard] = make(map[string]map[xtime.UnixNano]checked.Bytes, idsPerShard)
   281  		for _, blockStart := range blockStarts {
   282  			for _, volume := range volumes {
   283  				w, closer := newOpenTestWriter(t, fsOpts, shard, blockStart, volume)
   284  				for i := 0; i < idsPerShard; i++ {
   285  					idString := fmt.Sprintf("foo.%d", i)
   286  					shardIDStrings[shard] = append(shardIDStrings[shard], idString)
   287  
   288  					id := ident.StringID(idString)
   289  					shardIDs[shard] = append(shardIDs[shard], id)
   290  					if _, ok := shardData[shard][idString]; !ok {
   291  						shardData[shard][idString] = make(map[xtime.UnixNano]checked.Bytes, len(blockStarts))
   292  					}
   293  
   294  					// Always write the same data for each series regardless of volume to make asserting on
   295  					// Stream() responses simpler. Each volume gets a unique tag so we can verify that leases
   296  					// are being upgraded by checking the tags.
   297  					blockStartNanos := blockStart
   298  					data, ok := shardData[shard][idString][blockStartNanos]
   299  					if !ok {
   300  						data = checked.NewBytes(nil, nil)
   301  						data.IncRef()
   302  						for j := 0; j < dataBytesPerID; j++ {
   303  							data.Append(byte(rand.Int63n(256)))
   304  						}
   305  						shardData[shard][idString][blockStartNanos] = data
   306  					}
   307  
   308  					tags := testTagsFromIDAndVolume(id.String(), volume)
   309  					metadata := persist.NewMetadataFromIDAndTags(id, tags,
   310  						persist.MetadataOptions{})
   311  					err := w.Write(metadata, data, digest.Checksum(data.Bytes()))
   312  					require.NoError(t, err)
   313  				}
   314  				closer()
   315  			}
   316  		}
   317  	}
   318  
   319  	if shouldCacheShardIndices {
   320  		retriever.CacheShardIndices(shards)
   321  	}
   322  
   323  	var (
   324  		startWg, readyWg sync.WaitGroup
   325  		seeksPerID       = 24
   326  		seeksEach        = len(shards) * idsPerShard * seeksPerID
   327  	)
   328  
   329  	// Write a fake onRetrieve function so we can verify the behavior of the callback.
   330  	var (
   331  		retrievedIDs      = map[string]ident.Tags{}
   332  		retrievedIDsMutex = sync.Mutex{}
   333  		bytesPool         = pool.NewCheckedBytesPool(nil, nil, func(s []pool.Bucket) pool.BytesPool {
   334  			return pool.NewBytesPool(s, nil)
   335  		})
   336  		idPool = ident.NewPool(bytesPool, ident.PoolOptions{})
   337  	)
   338  	bytesPool.Init()
   339  
   340  	onRetrieve := block.OnRetrieveBlockFn(func(id ident.ID, tagsIter ident.TagIterator,
   341  		startTime xtime.UnixNano, segment ts.Segment, nsCtx namespace.Context) {
   342  		// TagsFromTagsIter requires a series ID to try and share bytes so we just pass
   343  		// an empty string because we don't care about efficiency.
   344  		tags, err := convert.TagsFromTagsIter(ident.StringID(""), tagsIter, idPool)
   345  		require.NoError(t, err)
   346  
   347  		retrievedIDsMutex.Lock()
   348  		retrievedIDs[id.String()] = tags
   349  		retrievedIDsMutex.Unlock()
   350  	})
   351  
   352  	// Setup concurrent seeks.
   353  	var enqueueWg sync.WaitGroup
   354  	startWg.Add(1)
   355  	for i := 0; i < seekConcurrency; i++ {
   356  		i := i
   357  		readyWg.Add(1)
   358  		enqueueWg.Add(1)
   359  		go func() {
   360  			defer enqueueWg.Done()
   361  			readyWg.Done()
   362  			startWg.Wait()
   363  
   364  			shardOffset := i
   365  			idOffset := i % seekConcurrency / 4
   366  			results := make([]streamResult, 0, len(blockStarts))
   367  			compare := ts.Segment{}
   368  			for j := 0; j < seeksEach; j++ {
   369  				shard := uint32((j + shardOffset) % len(shards))
   370  				idIdx := uint32((j + idOffset) % len(shardIDs[shard]))
   371  				id := shardIDs[shard][idIdx]
   372  				idString := shardIDStrings[shard][idIdx]
   373  
   374  				for k := 0; k < len(blockStarts); k++ {
   375  					var (
   376  						stream   xio.BlockReader
   377  						err      error
   378  						canceled bool
   379  					)
   380  					ctx := context.NewBackground()
   381  					// simulate a caller canceling the request.
   382  					if i == 1 {
   383  						stdCtx, cancel := stdctx.WithCancel(ctx.GoContext())
   384  						ctx.SetGoContext(stdCtx)
   385  						cancel()
   386  						canceled = true
   387  					}
   388  					for {
   389  
   390  						// Run in a loop since the open seeker function is configured to randomly fail
   391  						// sometimes.
   392  						stream, err = retriever.Stream(ctx, shard, id, blockStarts[k], onRetrieve, nsCtx)
   393  						if err == nil {
   394  							break
   395  						}
   396  					}
   397  
   398  					results = append(results, streamResult{
   399  						ctx:        ctx,
   400  						shard:      shard,
   401  						id:         idString,
   402  						blockStart: blockStarts[k],
   403  						stream:     stream,
   404  						canceled:   canceled,
   405  					})
   406  				}
   407  
   408  				for _, r := range results {
   409  					compare.Head = shardData[r.shard][r.id][r.blockStart]
   410  
   411  					// If the stream is empty, assert that the expected result is also nil
   412  					if r.stream.IsEmpty() {
   413  						require.Nil(t, compare.Head)
   414  						continue
   415  					}
   416  
   417  					seg, err := r.stream.Segment()
   418  					if r.canceled {
   419  						require.Error(t, err)
   420  					} else {
   421  						if err != nil {
   422  							fmt.Printf("\nstream seg err: %v\n", err)
   423  							fmt.Printf("id: %s\n", r.id)
   424  							fmt.Printf("shard: %d\n", r.shard)
   425  							fmt.Printf("start: %v\n", r.blockStart.String())
   426  						}
   427  
   428  						require.NoError(t, err)
   429  						require.True(
   430  							t,
   431  							seg.Equal(&compare),
   432  							fmt.Sprintf(
   433  								"data mismatch for series %s, returned data: %v, expected: %v",
   434  								r.id,
   435  								string(seg.Head.Bytes()),
   436  								string(compare.Head.Bytes())))
   437  					}
   438  					r.ctx.Close()
   439  				}
   440  				results = results[:0]
   441  			}
   442  		}()
   443  	}
   444  
   445  	// Wait for all routines to be ready.
   446  	readyWg.Wait()
   447  	// Allow all the goroutines to begin.
   448  	startWg.Done()
   449  
   450  	// Setup concurrent block lease updates.
   451  	workers := xsync.NewWorkerPool(updateOpenLeaseConcurrency)
   452  	workers.Init()
   453  	// Volume -> shard -> blockStart to stripe as many shard/blockStart as quickly as possible to
   454  	// improve the chance of triggering the code path where UpdateOpenLease is the first time a set
   455  	// of seekers are opened for a shard/blocksStart combination.
   456  	for _, volume := range volumes {
   457  		for _, shard := range shards {
   458  			for _, blockStart := range blockStarts {
   459  				enqueueWg.Add(1)
   460  				var (
   461  					// Capture vars for async goroutine.
   462  					volume     = volume
   463  					shard      = shard
   464  					blockStart = blockStart
   465  				)
   466  				workers.Go(func() {
   467  					defer enqueueWg.Done()
   468  					leaser := retriever.seekerMgr.(block.Leaser)
   469  
   470  					for {
   471  						// Run in a loop since the open seeker function is configured to randomly fail
   472  						// sometimes.
   473  						_, err := leaser.UpdateOpenLease(block.LeaseDescriptor{
   474  							Namespace:  nsMeta.ID(),
   475  							Shard:      shard,
   476  							BlockStart: blockStart,
   477  						}, block.LeaseState{Volume: volume})
   478  						// Ignore errOutOfOrderUpdateOpenLease because the goroutines in this test are not coordinated
   479  						// and thus may try to call UpdateOpenLease() with out of order volumes. Thats fine for the
   480  						// purposes of this test since the goal here is to make sure there are no race conditions and
   481  						// ensure that the SeekerManager ends up in the correct state when the test is complete.
   482  						if err == nil || err == errOutOfOrderUpdateOpenLease {
   483  							break
   484  						}
   485  					}
   486  				})
   487  			}
   488  		}
   489  	}
   490  
   491  	// Wait until done.
   492  	enqueueWg.Wait()
   493  
   494  	seekerStatsLock.Lock()
   495  	// Don't multiply by fetchConcurrency because the tracking doesn't take concurrent
   496  	// clones into account.
   497  	require.Equal(t, numNonTerminalVolumeOpens, numSeekerCloses)
   498  	seekerStatsLock.Unlock()
   499  
   500  	// Verify the onRetrieve callback was called properly for everything.
   501  	for _, shard := range shardIDStrings {
   502  		for _, id := range shard {
   503  			retrievedIDsMutex.Lock()
   504  			tags, ok := retrievedIDs[id]
   505  			retrievedIDsMutex.Unlock()
   506  			require.True(t, ok, fmt.Sprintf("expected %s to be retrieved, but it was not", id))
   507  
   508  			// Strip the volume tag because these reads were performed while concurrent block lease updates
   509  			// were happening so its not deterministic which volume tag they'll have at this point.
   510  			tags = stripVolumeTag(tags)
   511  			expectedTags := stripVolumeTag(testTagsFromIDAndVolume(id, 0))
   512  			require.True(
   513  				t,
   514  				tags.Equal(expectedTags),
   515  				fmt.Sprintf("expectedNumTags=%d, actualNumTags=%d", len(expectedTags.Values()), len(tags.Values())))
   516  		}
   517  	}
   518  
   519  	// Now that all the block lease updates have completed, all reads from this point should return tags with the
   520  	// highest volume number.
   521  	ctx := context.NewBackground()
   522  	for _, shard := range shards {
   523  		for _, blockStart := range blockStarts {
   524  			for _, idString := range shardIDStrings[shard] {
   525  				id := ident.StringID(idString)
   526  				for {
   527  					// Run in a loop since the open seeker function is configured to randomly fail
   528  					// sometimes.
   529  					ctx.Reset()
   530  					_, err := retriever.Stream(ctx, shard, id, blockStart, onRetrieve, nsCtx)
   531  					ctx.BlockingClose()
   532  					if err == nil {
   533  						break
   534  					}
   535  				}
   536  
   537  			}
   538  		}
   539  	}
   540  
   541  	for _, shard := range shardIDStrings {
   542  		for _, id := range shard {
   543  			retrievedIDsMutex.Lock()
   544  			tags, ok := retrievedIDs[id]
   545  			retrievedIDsMutex.Unlock()
   546  			require.True(t, ok, fmt.Sprintf("expected %s to be retrieved, but it was not", id))
   547  			tagsSlice := tags.Values()
   548  
   549  			// Highest volume is expected.
   550  			expectedVolumeTag := strconv.Itoa(volumes[len(volumes)-1])
   551  			// Volume tag is last.
   552  			volumeTag := tagsSlice[len(tagsSlice)-1].Value.String()
   553  			require.Equal(t, expectedVolumeTag, volumeTag)
   554  		}
   555  	}
   556  }
   557  
   558  // TestBlockRetrieverIDDoesNotExist verifies the behavior of the Stream() method
   559  // on the retriever in the case where the requested ID does not exist. In that
   560  // case, Stream() should return an empty segment.
   561  func TestBlockRetrieverIDDoesNotExist(t *testing.T) {
   562  	scope := tally.NewTestScope("test", nil)
   563  
   564  	// Make sure reader/writer are looking at the same test directory
   565  	dir, err := ioutil.TempDir("", "testdb")
   566  	require.NoError(t, err)
   567  	defer os.RemoveAll(dir)
   568  	filePathPrefix := filepath.Join(dir, "")
   569  
   570  	// Setup constants and config
   571  	fsOpts := testDefaultOpts.SetFilePathPrefix(filePathPrefix)
   572  	nsMeta := testNs1Metadata(t)
   573  	rOpts := nsMeta.Options().RetentionOptions()
   574  	nsCtx := namespace.NewContextFrom(nsMeta)
   575  	shard := uint32(0)
   576  	blockStart := xtime.Now().Truncate(rOpts.BlockSize())
   577  
   578  	// Setup the reader
   579  	opts := testBlockRetrieverOptions{
   580  		retrieverOpts: defaultTestBlockRetrieverOptions,
   581  		fsOpts:        fsOpts.SetInstrumentOptions(instrument.NewOptions().SetMetricsScope(scope)),
   582  		shards:        []uint32{shard},
   583  	}
   584  	retriever, cleanup := newOpenTestBlockRetriever(t, testNs1Metadata(t), opts)
   585  	defer cleanup()
   586  
   587  	// Write out a test file
   588  	w, closer := newOpenTestWriter(t, fsOpts, shard, blockStart, 0)
   589  	data := checked.NewBytes([]byte("Hello world!"), nil)
   590  	data.IncRef()
   591  	defer data.DecRef()
   592  	metadata := persist.NewMetadataFromIDAndTags(ident.StringID("exists"), ident.Tags{},
   593  		persist.MetadataOptions{})
   594  	err = w.Write(metadata, data, digest.Checksum(data.Bytes()))
   595  	assert.NoError(t, err)
   596  	closer()
   597  
   598  	ctx := context.NewBackground()
   599  	defer ctx.Close()
   600  	segmentReader, err := retriever.Stream(ctx, shard,
   601  		ident.StringID("not-exists"), blockStart, nil, nsCtx)
   602  	assert.NoError(t, err)
   603  
   604  	assert.True(t, segmentReader.IsEmpty())
   605  
   606  	// Check that the bloom filter miss metric was incremented
   607  	snapshot := scope.Snapshot()
   608  	seriesRead := snapshot.Counters()["test.retriever.series-bloom-filter-misses+"]
   609  	require.Equal(t, int64(1), seriesRead.Value())
   610  }
   611  
   612  // TestBlockRetrieverOnlyCreatesTagItersIfTagsExists verifies that the block retriever
   613  // only creates a tag iterator in the OnRetrieve pathway if the series has tags.
   614  func TestBlockRetrieverOnlyCreatesTagItersIfTagsExists(t *testing.T) {
   615  	// Make sure reader/writer are looking at the same test directory.
   616  	dir, err := ioutil.TempDir("", "testdb")
   617  	require.NoError(t, err)
   618  	defer os.RemoveAll(dir)
   619  	filePathPrefix := filepath.Join(dir, "")
   620  
   621  	// Setup constants and config.
   622  	fsOpts := testDefaultOpts.SetFilePathPrefix(filePathPrefix)
   623  	rOpts := testNs1Metadata(t).Options().RetentionOptions()
   624  	nsCtx := namespace.NewContextFrom(testNs1Metadata(t))
   625  	shard := uint32(0)
   626  	blockStart := xtime.Now().Truncate(rOpts.BlockSize())
   627  
   628  	// Setup the reader.
   629  	opts := testBlockRetrieverOptions{
   630  		retrieverOpts: defaultTestBlockRetrieverOptions,
   631  		fsOpts:        fsOpts,
   632  		shards:        []uint32{shard},
   633  	}
   634  	retriever, cleanup := newOpenTestBlockRetriever(t, testNs1Metadata(t), opts)
   635  	defer cleanup()
   636  
   637  	// Write out a test file.
   638  	var (
   639  		w, closer = newOpenTestWriter(t, fsOpts, shard, blockStart, 0)
   640  		tag       = ident.Tag{
   641  			Name:  ident.StringID("name"),
   642  			Value: ident.StringID("value"),
   643  		}
   644  		tags = ident.NewTags(tag)
   645  	)
   646  	for _, write := range []struct {
   647  		id   string
   648  		tags ident.Tags
   649  	}{
   650  		{
   651  			id:   "no-tags",
   652  			tags: ident.Tags{},
   653  		},
   654  		{
   655  			id:   "tags",
   656  			tags: tags,
   657  		},
   658  	} {
   659  		data := checked.NewBytes([]byte("Hello world!"), nil)
   660  		data.IncRef()
   661  		defer data.DecRef()
   662  
   663  		metadata := persist.NewMetadataFromIDAndTags(ident.StringID(write.id), write.tags,
   664  			persist.MetadataOptions{})
   665  		err = w.Write(metadata, data, digest.Checksum(data.Bytes()))
   666  		require.NoError(t, err)
   667  	}
   668  	closer()
   669  
   670  	// Make sure we return the correct error if the ID does not exist
   671  	ctx := context.NewBackground()
   672  	defer ctx.Close()
   673  
   674  	_, err = retriever.Stream(ctx, shard,
   675  		ident.StringID("no-tags"), blockStart, block.OnRetrieveBlockFn(func(
   676  			id ident.ID,
   677  			tagsIter ident.TagIterator,
   678  			startTime xtime.UnixNano,
   679  			segment ts.Segment,
   680  			nsCtx namespace.Context,
   681  		) {
   682  			require.Equal(t, ident.EmptyTagIterator, tagsIter)
   683  			for tagsIter.Next() {
   684  			}
   685  			require.NoError(t, tagsIter.Err())
   686  		}), nsCtx)
   687  
   688  	_, err = retriever.Stream(ctx, shard,
   689  		ident.StringID("tags"), blockStart, block.OnRetrieveBlockFn(func(
   690  			id ident.ID,
   691  			tagsIter ident.TagIterator,
   692  			startTime xtime.UnixNano,
   693  			segment ts.Segment,
   694  			nsCtx namespace.Context,
   695  		) {
   696  			for tagsIter.Next() {
   697  				currTag := tagsIter.Current()
   698  				require.True(t, tag.Equal(currTag))
   699  			}
   700  			require.NoError(t, tagsIter.Err())
   701  		}), nsCtx)
   702  
   703  	require.NoError(t, err)
   704  }
   705  
   706  // TestBlockRetrieverDoesNotInvokeOnRetrieveWithGlobalFlag verifies that the block retriever
   707  // does not invoke the OnRetrieve block if the global CacheBlocksOnRetrieve is not enabled.
   708  func TestBlockRetrieverDoesNotInvokeOnRetrieveWithGlobalFlag(t *testing.T) {
   709  	testBlockRetrieverOnRetrieve(t, false, true)
   710  }
   711  
   712  // TestBlockRetrieverDoesNotInvokeOnRetrieveWithNamespacesFlag verifies that the block retriever
   713  // does not invoke the OnRetrieve block if the namespace-specific CacheBlocksOnRetrieve is not enabled.
   714  func TestBlockRetrieverDoesNotInvokeOnRetrieveWithNamespaceFlag(t *testing.T) {
   715  	testBlockRetrieverOnRetrieve(t, true, false)
   716  }
   717  
   718  func TestBlockRetrieverDoesNotInvokeOnRetrieve(t *testing.T) {
   719  	testBlockRetrieverOnRetrieve(t, false, false)
   720  }
   721  
   722  func TestBlockRetrieverDoesInvokeOnRetrieve(t *testing.T) {
   723  	testBlockRetrieverOnRetrieve(t, true, true)
   724  }
   725  
   726  func testBlockRetrieverOnRetrieve(t *testing.T, globalFlag bool, nsFlag bool) {
   727  	// Make sure reader/writer are looking at the same test directory.
   728  	dir, err := ioutil.TempDir("", "testdb")
   729  	require.NoError(t, err)
   730  	defer os.RemoveAll(dir)
   731  	filePathPrefix := filepath.Join(dir, "")
   732  
   733  	// Setup constants and config.
   734  	md, err := namespace.NewMetadata(testNs1ID, namespace.NewOptions().
   735  		SetCacheBlocksOnRetrieve(nsFlag).
   736  		SetRetentionOptions(retention.NewOptions().SetBlockSize(testBlockSize)).
   737  		SetIndexOptions(namespace.NewIndexOptions().SetEnabled(true).SetBlockSize(testBlockSize)))
   738  	require.NoError(t, err)
   739  
   740  	fsOpts := testDefaultOpts.SetFilePathPrefix(filePathPrefix)
   741  	rOpts := md.Options().RetentionOptions()
   742  	nsCtx := namespace.NewContextFrom(md)
   743  	shard := uint32(0)
   744  	blockStart := xtime.Now().Truncate(rOpts.BlockSize())
   745  
   746  	// Setup the reader.
   747  	opts := testBlockRetrieverOptions{
   748  		retrieverOpts: defaultTestBlockRetrieverOptions.SetCacheBlocksOnRetrieve(globalFlag),
   749  		fsOpts:        fsOpts,
   750  		shards:        []uint32{shard},
   751  	}
   752  	retriever, cleanup := newOpenTestBlockRetriever(t, md, opts)
   753  	defer cleanup()
   754  
   755  	// Write out a test file.
   756  	var (
   757  		w, closer = newOpenTestWriter(t, fsOpts, shard, blockStart, 0)
   758  		tag       = ident.Tag{
   759  			Name:  ident.StringID("name"),
   760  			Value: ident.StringID("value"),
   761  		}
   762  		tags = ident.NewTags(tag)
   763  		id   = "foo"
   764  	)
   765  	data := checked.NewBytes([]byte("Hello world!"), nil)
   766  	data.IncRef()
   767  	defer data.DecRef()
   768  
   769  	metadata := persist.NewMetadataFromIDAndTags(ident.StringID(id), tags,
   770  		persist.MetadataOptions{})
   771  	err = w.Write(metadata, data, digest.Checksum(data.Bytes()))
   772  	require.NoError(t, err)
   773  	closer()
   774  
   775  	// Make sure we return the correct error if the ID does not exist
   776  	ctx := context.NewBackground()
   777  	defer ctx.Close()
   778  
   779  	onRetrieveCalled := false
   780  	retrieveFn := block.OnRetrieveBlockFn(func(
   781  		id ident.ID,
   782  		tagsIter ident.TagIterator,
   783  		startTime xtime.UnixNano,
   784  		segment ts.Segment,
   785  		nsCtx namespace.Context,
   786  	) {
   787  		onRetrieveCalled = true
   788  	})
   789  
   790  	segmentReader, err := retriever.Stream(ctx, shard,
   791  		ident.StringID("foo"), blockStart, retrieveFn, nsCtx)
   792  
   793  	_, err = segmentReader.Segment()
   794  	require.NoError(t, err)
   795  
   796  	if globalFlag && nsFlag {
   797  		require.True(t, onRetrieveCalled)
   798  	} else {
   799  		require.False(t, onRetrieveCalled)
   800  	}
   801  }
   802  
   803  // TestBlockRetrieverHandlesErrors verifies the behavior of the Stream() method
   804  // on the retriever in the case where the SeekIndexEntry function returns an
   805  // error.
   806  func TestBlockRetrieverHandlesSeekIndexEntryErrors(t *testing.T) {
   807  	ctrl := gomock.NewController(t)
   808  	defer ctrl.Finish()
   809  
   810  	mockSeeker := NewMockConcurrentDataFileSetSeeker(ctrl)
   811  	mockSeeker.EXPECT().SeekIndexEntry(gomock.Any(), gomock.Any()).Return(IndexEntry{}, errSeekErr)
   812  
   813  	testBlockRetrieverHandlesSeekErrors(t, ctrl, mockSeeker)
   814  }
   815  
   816  // TestBlockRetrieverHandlesErrors verifies the behavior of the Stream() method
   817  // on the retriever in the case where the SeekByIndexEntry function returns an
   818  // error.
   819  func TestBlockRetrieverHandlesSeekByIndexEntryErrors(t *testing.T) {
   820  	ctrl := gomock.NewController(t)
   821  	defer ctrl.Finish()
   822  
   823  	mockSeeker := NewMockConcurrentDataFileSetSeeker(ctrl)
   824  	mockSeeker.EXPECT().SeekIndexEntry(gomock.Any(), gomock.Any()).Return(IndexEntry{}, nil)
   825  	mockSeeker.EXPECT().SeekByIndexEntry(gomock.Any(), gomock.Any()).Return(nil, errSeekErr)
   826  
   827  	testBlockRetrieverHandlesSeekErrors(t, ctrl, mockSeeker)
   828  }
   829  
   830  var errSeekErr = errors.New("some-error")
   831  
   832  func testBlockRetrieverHandlesSeekErrors(t *testing.T, ctrl *gomock.Controller, mockSeeker ConcurrentDataFileSetSeeker) {
   833  	// Make sure reader/writer are looking at the same test directory.
   834  	dir, err := ioutil.TempDir("", "testdb")
   835  	require.NoError(t, err)
   836  	defer os.RemoveAll(dir)
   837  	filePathPrefix := filepath.Join(dir, "")
   838  
   839  	// Setup constants and config.
   840  	var (
   841  		fsOpts     = testDefaultOpts.SetFilePathPrefix(filePathPrefix)
   842  		rOpts      = testNs1Metadata(t).Options().RetentionOptions()
   843  		nsCtx      = namespace.NewContextFrom(testNs1Metadata(t))
   844  		shard      = uint32(0)
   845  		blockStart = xtime.Now().Truncate(rOpts.BlockSize())
   846  	)
   847  
   848  	mockSeekerManager := NewMockDataFileSetSeekerManager(ctrl)
   849  	mockSeekerManager.EXPECT().Open(gomock.Any(), gomock.Any()).Return(nil)
   850  	mockSeekerManager.EXPECT().Test(gomock.Any(), gomock.Any(), gomock.Any()).Return(true, nil)
   851  	mockSeekerManager.EXPECT().Borrow(gomock.Any(), gomock.Any()).Return(mockSeeker, nil)
   852  	mockSeekerManager.EXPECT().Return(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil)
   853  	mockSeekerManager.EXPECT().Close().Return(nil)
   854  
   855  	newSeekerMgr := func(
   856  		bytesPool pool.CheckedBytesPool,
   857  		opts Options,
   858  		blockRetrieverOpts BlockRetrieverOptions,
   859  	) DataFileSetSeekerManager {
   860  
   861  		return mockSeekerManager
   862  	}
   863  
   864  	// Setup the reader.
   865  	opts := testBlockRetrieverOptions{
   866  		retrieverOpts:  defaultTestBlockRetrieverOptions,
   867  		fsOpts:         fsOpts,
   868  		newSeekerMgrFn: newSeekerMgr,
   869  		shards:         []uint32{shard},
   870  	}
   871  	retriever, cleanup := newOpenTestBlockRetriever(t, testNs1Metadata(t), opts)
   872  	defer cleanup()
   873  
   874  	// Make sure we return the correct error.
   875  	ctx := context.NewBackground()
   876  	defer ctx.Close()
   877  	segmentReader, err := retriever.Stream(ctx, shard,
   878  		ident.StringID("not-exists"), blockStart, nil, nsCtx)
   879  	require.NoError(t, err)
   880  
   881  	segment, err := segmentReader.Segment()
   882  	assert.Equal(t, errSeekErr, err)
   883  	assert.Equal(t, nil, segment.Head)
   884  	assert.Equal(t, nil, segment.Tail)
   885  }
   886  
   887  func testTagsFromIDAndVolume(seriesID string, volume int) ident.Tags {
   888  	tags := []ident.Tag{}
   889  	for j := 0; j < 5; j++ {
   890  		tags = append(tags, ident.StringTag(
   891  			fmt.Sprintf("%s.tag.%d.name", seriesID, j),
   892  			fmt.Sprintf("%s.tag.%d.value", seriesID, j),
   893  		))
   894  	}
   895  	tags = append(tags, ident.StringTag("volume", strconv.Itoa(volume)))
   896  	return ident.NewTags(tags...)
   897  }
   898  
   899  func stripVolumeTag(tags ident.Tags) ident.Tags {
   900  	tagsSlice := tags.Values()
   901  	tagsSlice = tagsSlice[:len(tagsSlice)-1]
   902  	return ident.NewTags(tagsSlice...)
   903  }