github.com/m3db/m3@v1.5.0/src/dbnode/persist/fs/merger_test.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"io"
    25  	"os"
    26  	"path/filepath"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/golang/mock/gomock"
    31  	"github.com/m3db/m3/src/dbnode/digest"
    32  	"github.com/m3db/m3/src/dbnode/encoding"
    33  	"github.com/m3db/m3/src/dbnode/encoding/m3tsz"
    34  	"github.com/m3db/m3/src/dbnode/namespace"
    35  	"github.com/m3db/m3/src/dbnode/persist"
    36  	"github.com/m3db/m3/src/dbnode/storage/block"
    37  	"github.com/m3db/m3/src/dbnode/ts"
    38  	"github.com/m3db/m3/src/dbnode/x/xio"
    39  	"github.com/m3db/m3/src/m3ninx/doc"
    40  	"github.com/m3db/m3/src/x/checked"
    41  	"github.com/m3db/m3/src/x/context"
    42  	"github.com/m3db/m3/src/x/ident"
    43  	"github.com/m3db/m3/src/x/pool"
    44  	xtime "github.com/m3db/m3/src/x/time"
    45  	"github.com/stretchr/testify/assert"
    46  	"github.com/stretchr/testify/require"
    47  )
    48  
    49  const (
    50  	blockSize = time.Hour
    51  )
    52  
    53  var (
    54  	srPool        xio.SegmentReaderPool
    55  	multiIterPool encoding.MultiReaderIteratorPool
    56  	identPool     ident.Pool
    57  	encoderPool   encoding.EncoderPool
    58  	contextPool   context.Pool
    59  	bytesPool     pool.CheckedBytesPool
    60  
    61  	startTime = xtime.Now().Truncate(blockSize)
    62  
    63  	id0 = ident.StringID("id0")
    64  	id1 = ident.StringID("id1")
    65  	id2 = ident.StringID("id2")
    66  	id3 = ident.StringID("id3")
    67  	id4 = ident.StringID("id4")
    68  	id5 = ident.StringID("id5")
    69  )
    70  
    71  // init resources _except_ the fsReader, which should be configured on a
    72  // per-test basis with NewMockDataFileSetReader.
    73  func init() {
    74  	poolOpts := pool.NewObjectPoolOptions().SetSize(1)
    75  	srPool = xio.NewSegmentReaderPool(poolOpts)
    76  	srPool.Init()
    77  	multiIterPool = encoding.NewMultiReaderIteratorPool(poolOpts)
    78  	multiIterPool.Init(m3tsz.DefaultReaderIteratorAllocFn(encoding.NewOptions()))
    79  	bytesPool := pool.NewCheckedBytesPool(nil, poolOpts, func(s []pool.Bucket) pool.BytesPool {
    80  		return pool.NewBytesPool(s, poolOpts)
    81  	})
    82  	bytesPool.Init()
    83  	identPool = ident.NewPool(bytesPool, ident.PoolOptions{})
    84  	encoderPool = encoding.NewEncoderPool(poolOpts)
    85  	encoderPool.Init(func() encoding.Encoder {
    86  		return m3tsz.NewEncoder(startTime, nil, true, encoding.NewOptions())
    87  	})
    88  	contextPool = context.NewPool(context.NewOptions().
    89  		SetContextPoolOptions(poolOpts).
    90  		SetFinalizerPoolOptions(poolOpts))
    91  	bytesPool = pool.NewCheckedBytesPool(nil, poolOpts, func(s []pool.Bucket) pool.BytesPool {
    92  		return pool.NewBytesPool(s, poolOpts)
    93  	})
    94  	bytesPool.Init()
    95  }
    96  
    97  func TestMergeWithIntersection(t *testing.T) {
    98  	// This test scenario is when there is an overlap in series data between
    99  	// disk and the merge target.
   100  	// id0-id3 is on disk, while the merge target has id1-id5.
   101  	// Both have id1, but they don't have datapoints with overlapping
   102  	// timestamps.
   103  	// Both have id2, and some datapoints have overlapping timestamps.
   104  	// Both have id3, and all datapoints have overlapping timestamps.
   105  	diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   106  	diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   107  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   108  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   109  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   110  	}))
   111  	diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   112  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   113  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   114  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   115  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   116  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   117  	}))
   118  	diskData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   119  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 7},
   120  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 8},
   121  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 9},
   122  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 10},
   123  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 11},
   124  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 12},
   125  	}))
   126  	diskData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   127  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 13},
   128  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 14},
   129  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 15},
   130  	}))
   131  
   132  	mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   133  	mergeTargetData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   134  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 16},
   135  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 17},
   136  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 18},
   137  	}))
   138  	mergeTargetData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   139  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 19},
   140  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 20},
   141  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 21},
   142  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 22},
   143  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 23},
   144  		{TimestampNanos: startTime.Add(13 * time.Second), Value: 24},
   145  		{TimestampNanos: startTime.Add(16 * time.Second), Value: 25},
   146  	}))
   147  	mergeTargetData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   148  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 26},
   149  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 27},
   150  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 28},
   151  	}))
   152  	mergeTargetData.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{
   153  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 29},
   154  	}))
   155  	mergeTargetData.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{
   156  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 30},
   157  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 31},
   158  		{TimestampNanos: startTime.Add(12 * time.Second), Value: 32},
   159  		{TimestampNanos: startTime.Add(15 * time.Second), Value: 34},
   160  	}))
   161  
   162  	expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   163  	expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   164  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   165  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   166  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   167  	}))
   168  	expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   169  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   170  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   171  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 16},
   172  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 17},
   173  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   174  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   175  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 18},
   176  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   177  	}))
   178  	expected.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   179  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 7},
   180  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 19},
   181  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 9},
   182  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 20},
   183  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 21},
   184  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 22},
   185  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 23},
   186  		{TimestampNanos: startTime.Add(13 * time.Second), Value: 24},
   187  		{TimestampNanos: startTime.Add(16 * time.Second), Value: 25},
   188  	}))
   189  	expected.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   190  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 26},
   191  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 27},
   192  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 28},
   193  	}))
   194  	expected.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{
   195  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 29},
   196  	}))
   197  	expected.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{
   198  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 30},
   199  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 31},
   200  		{TimestampNanos: startTime.Add(12 * time.Second), Value: 32},
   201  		{TimestampNanos: startTime.Add(15 * time.Second), Value: 34},
   202  	}))
   203  
   204  	testMergeWith(t, diskData, mergeTargetData, expected)
   205  }
   206  
   207  func TestMergeWithFullIntersection(t *testing.T) {
   208  	// This test scenario is when the merge target contains only and all data
   209  	// from disk.
   210  	diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   211  	diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   212  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   213  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   214  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   215  	}))
   216  	diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   217  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   218  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   219  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   220  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   221  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   222  	}))
   223  
   224  	mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   225  	mergeTargetData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   226  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 7},
   227  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 8},
   228  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 9},
   229  	}))
   230  	mergeTargetData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   231  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 10},
   232  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 11},
   233  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 12},
   234  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 13},
   235  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 14},
   236  	}))
   237  
   238  	expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   239  	expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   240  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 7},
   241  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 8},
   242  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 9},
   243  	}))
   244  	expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   245  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 10},
   246  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 11},
   247  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 12},
   248  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 13},
   249  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 14},
   250  	}))
   251  
   252  	testMergeWith(t, diskData, mergeTargetData, expected)
   253  }
   254  
   255  func TestMergeWithNoIntersection(t *testing.T) {
   256  	// This test scenario is when there is no overlap between disk data and
   257  	// merge target data (series from one source does not exist in the other).
   258  	diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   259  	diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   260  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   261  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   262  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   263  	}))
   264  	diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   265  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   266  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   267  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   268  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   269  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   270  	}))
   271  	diskData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   272  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 7},
   273  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 8},
   274  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 9},
   275  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 10},
   276  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 11},
   277  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 12},
   278  	}))
   279  
   280  	mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   281  	mergeTargetData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   282  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 26},
   283  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 27},
   284  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 28},
   285  	}))
   286  	mergeTargetData.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{
   287  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 29},
   288  	}))
   289  	mergeTargetData.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{
   290  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 30},
   291  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 31},
   292  		{TimestampNanos: startTime.Add(12 * time.Second), Value: 32},
   293  		{TimestampNanos: startTime.Add(15 * time.Second), Value: 34},
   294  	}))
   295  
   296  	expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   297  	expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   298  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   299  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   300  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   301  	}))
   302  	expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   303  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   304  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   305  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   306  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   307  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   308  	}))
   309  	expected.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   310  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 7},
   311  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 8},
   312  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 9},
   313  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 10},
   314  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 11},
   315  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 12},
   316  	}))
   317  	expected.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   318  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 26},
   319  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 27},
   320  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 28},
   321  	}))
   322  	expected.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{
   323  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 29},
   324  	}))
   325  	expected.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{
   326  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 30},
   327  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 31},
   328  		{TimestampNanos: startTime.Add(12 * time.Second), Value: 32},
   329  		{TimestampNanos: startTime.Add(15 * time.Second), Value: 34},
   330  	}))
   331  
   332  	testMergeWith(t, diskData, mergeTargetData, expected)
   333  }
   334  
   335  func TestMergeWithNoMergeTargetData(t *testing.T) {
   336  	// This test scenario is when there is no data in the merge target.
   337  	diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   338  	diskData.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   339  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   340  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   341  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   342  	}))
   343  	diskData.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   344  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   345  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   346  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   347  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   348  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   349  	}))
   350  	diskData.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   351  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 7},
   352  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 8},
   353  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 9},
   354  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 10},
   355  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 11},
   356  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 12},
   357  	}))
   358  
   359  	mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   360  
   361  	expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   362  	expected.Set(id0, datapointsToCheckedBytes(t, []ts.Datapoint{
   363  		{TimestampNanos: startTime.Add(0 * time.Second), Value: 0},
   364  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 1},
   365  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   366  	}))
   367  	expected.Set(id1, datapointsToCheckedBytes(t, []ts.Datapoint{
   368  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 2},
   369  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 3},
   370  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 4},
   371  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 5},
   372  		{TimestampNanos: startTime.Add(9 * time.Second), Value: 6},
   373  	}))
   374  	expected.Set(id2, datapointsToCheckedBytes(t, []ts.Datapoint{
   375  		{TimestampNanos: startTime.Add(1 * time.Second), Value: 7},
   376  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 8},
   377  		{TimestampNanos: startTime.Add(5 * time.Second), Value: 9},
   378  		{TimestampNanos: startTime.Add(6 * time.Second), Value: 10},
   379  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 11},
   380  		{TimestampNanos: startTime.Add(10 * time.Second), Value: 12},
   381  	}))
   382  
   383  	testMergeWith(t, diskData, mergeTargetData, expected)
   384  }
   385  
   386  func TestMergeWithNoDiskData(t *testing.T) {
   387  	// This test scenario is there is no data on disk.
   388  	diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   389  
   390  	mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   391  	mergeTargetData.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   392  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 26},
   393  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 27},
   394  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 28},
   395  	}))
   396  	mergeTargetData.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{
   397  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 29},
   398  	}))
   399  	mergeTargetData.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{
   400  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 30},
   401  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 31},
   402  		{TimestampNanos: startTime.Add(12 * time.Second), Value: 32},
   403  		{TimestampNanos: startTime.Add(15 * time.Second), Value: 34},
   404  	}))
   405  
   406  	expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   407  	expected.Set(id3, datapointsToCheckedBytes(t, []ts.Datapoint{
   408  		{TimestampNanos: startTime.Add(2 * time.Second), Value: 26},
   409  		{TimestampNanos: startTime.Add(4 * time.Second), Value: 27},
   410  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 28},
   411  	}))
   412  	expected.Set(id4, datapointsToCheckedBytes(t, []ts.Datapoint{
   413  		{TimestampNanos: startTime.Add(8 * time.Second), Value: 29},
   414  	}))
   415  	expected.Set(id5, datapointsToCheckedBytes(t, []ts.Datapoint{
   416  		{TimestampNanos: startTime.Add(3 * time.Second), Value: 30},
   417  		{TimestampNanos: startTime.Add(7 * time.Second), Value: 31},
   418  		{TimestampNanos: startTime.Add(12 * time.Second), Value: 32},
   419  		{TimestampNanos: startTime.Add(15 * time.Second), Value: 34},
   420  	}))
   421  
   422  	testMergeWith(t, diskData, mergeTargetData, expected)
   423  }
   424  
   425  func TestMergeWithNoData(t *testing.T) {
   426  	// This test scenario is there is no data on disk or the merge target.
   427  	diskData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   428  
   429  	mergeTargetData := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   430  
   431  	expected := newCheckedBytesByIDMap(newCheckedBytesByIDMapOptions{})
   432  
   433  	testMergeWith(t, diskData, mergeTargetData, expected)
   434  }
   435  
   436  func TestCleanup(t *testing.T) {
   437  	dir := createTempDir(t)
   438  	filePathPrefix := filepath.Join(dir, "")
   439  	defer os.RemoveAll(dir)
   440  
   441  	// Write fileset to disk
   442  	fsOpts := NewOptions().
   443  		SetFilePathPrefix(filePathPrefix)
   444  
   445  	md, err := namespace.NewMetadata(ident.StringID("foo"), namespace.NewOptions())
   446  	require.NoError(t, err)
   447  
   448  	blockStart := xtime.Now()
   449  	var shard uint32 = 1
   450  	fsID := FileSetFileIdentifier{
   451  		Namespace:   md.ID(),
   452  		Shard:       shard,
   453  		BlockStart:  blockStart,
   454  		VolumeIndex: 0,
   455  	}
   456  	writeFilesetToDisk(t, fsID, fsOpts)
   457  
   458  	// Verify fileset exists
   459  	exists, err := DataFileSetExists(filePathPrefix, md.ID(), shard, blockStart, 0)
   460  	require.NoError(t, err)
   461  	require.True(t, exists)
   462  
   463  	// Initialize merger
   464  	reader, err := NewReader(bytesPool, fsOpts)
   465  	require.NoError(t, err)
   466  
   467  	merger := NewMerger(reader, 0, srPool, multiIterPool, identPool, encoderPool, contextPool,
   468  		filePathPrefix, namespace.NewOptions())
   469  
   470  	// Run merger
   471  	pm, err := NewPersistManager(fsOpts)
   472  	require.NoError(t, err)
   473  
   474  	preparer, err := pm.StartFlushPersist()
   475  	require.NoError(t, err)
   476  
   477  	err = merger.MergeAndCleanup(fsID, NewNoopMergeWith(), fsID.VolumeIndex+1, preparer,
   478  		namespace.NewContextFrom(md), &persist.NoOpColdFlushNamespace{}, false)
   479  	require.NoError(t, err)
   480  
   481  	// Verify old fileset gone and new one present
   482  	exists, err = DataFileSetExists(filePathPrefix, md.ID(), shard, blockStart, 0)
   483  	require.NoError(t, err)
   484  	require.False(t, exists)
   485  
   486  	exists, err = DataFileSetExists(filePathPrefix, md.ID(), shard, blockStart, 1)
   487  	require.NoError(t, err)
   488  	require.True(t, exists)
   489  }
   490  
   491  func TestCleanupOnceBootstrapped(t *testing.T) {
   492  	ctrl := gomock.NewController(t)
   493  	defer ctrl.Finish()
   494  
   495  	preparer := persist.NewMockFlushPreparer(ctrl)
   496  	md, err := namespace.NewMetadata(ident.StringID("foo"), namespace.NewOptions())
   497  	require.NoError(t, err)
   498  
   499  	merger := merger{}
   500  	err = merger.MergeAndCleanup(FileSetFileIdentifier{}, NewNoopMergeWith(), 1, preparer,
   501  		namespace.NewContextFrom(md), &persist.NoOpColdFlushNamespace{}, true)
   502  	require.Error(t, err)
   503  }
   504  
   505  func writeFilesetToDisk(t *testing.T, fsID FileSetFileIdentifier, fsOpts Options) {
   506  	w, err := NewWriter(fsOpts)
   507  	require.NoError(t, err)
   508  
   509  	writerOpts := DataWriterOpenOptions{
   510  		Identifier: fsID,
   511  		BlockSize:  2 * time.Hour,
   512  	}
   513  	err = w.Open(writerOpts)
   514  	require.NoError(t, err)
   515  
   516  	entry := []byte{1, 2, 3}
   517  
   518  	chkdBytes := checked.NewBytes(entry, nil)
   519  	chkdBytes.IncRef()
   520  	metadata := persist.NewMetadataFromIDAndTags(ident.StringID("foo"),
   521  		ident.Tags{}, persist.MetadataOptions{})
   522  	err = w.Write(metadata, chkdBytes, digest.Checksum(entry))
   523  	require.NoError(t, err)
   524  
   525  	err = w.Close()
   526  	require.NoError(t, err)
   527  }
   528  
   529  func testMergeWith(
   530  	t *testing.T,
   531  	diskData *checkedBytesMap,
   532  	mergeTargetData *checkedBytesMap,
   533  	expectedData *checkedBytesMap,
   534  ) {
   535  	ctrl := gomock.NewController(t)
   536  	defer ctrl.Finish()
   537  	reader := mockReaderFromData(ctrl, diskData)
   538  
   539  	var persisted []persistedData
   540  	var deferClosed bool
   541  	preparer := persist.NewMockFlushPreparer(ctrl)
   542  	preparer.EXPECT().PrepareData(gomock.Any()).Return(
   543  		persist.PreparedDataPersist{
   544  			Persist: func(metadata persist.Metadata, segment ts.Segment, checksum uint32) error {
   545  				persisted = append(persisted, persistedData{
   546  					metadata: metadata,
   547  					// NB(bodu): Once data is persisted the `ts.Segment` gets finalized
   548  					// so we can't read from it anymore or that violates the read after
   549  					// free invariant. So we `Clone` the segment here.
   550  					segment: segment.Clone(nil),
   551  				})
   552  				return nil
   553  			},
   554  			DeferClose: func() (persist.DataCloser, error) {
   555  				return func() error {
   556  					require.False(t, deferClosed)
   557  					deferClosed = true
   558  					return nil
   559  				}, nil
   560  			},
   561  		}, nil)
   562  	nsCtx := namespace.Context{}
   563  
   564  	nsOpts := namespace.NewOptions()
   565  	merger := NewMerger(reader, 0, srPool, multiIterPool,
   566  		identPool, encoderPool, contextPool, NewOptions().FilePathPrefix(), nsOpts)
   567  	fsID := FileSetFileIdentifier{
   568  		Namespace:  ident.StringID("test-ns"),
   569  		Shard:      uint32(8),
   570  		BlockStart: startTime,
   571  	}
   572  	mergeWith := mockMergeWithFromData(t, ctrl, diskData, mergeTargetData)
   573  	close, err := merger.Merge(fsID, mergeWith, 1, preparer, nsCtx, &persist.NoOpColdFlushNamespace{})
   574  	require.NoError(t, err)
   575  	require.False(t, deferClosed)
   576  	require.NoError(t, close())
   577  	require.True(t, deferClosed)
   578  
   579  	assertPersistedAsExpected(t, persisted, expectedData)
   580  }
   581  
   582  func assertPersistedAsExpected(
   583  	t *testing.T,
   584  	persisted []persistedData,
   585  	expectedData *checkedBytesMap,
   586  ) {
   587  	// Assert same number of expected series IDs.
   588  	require.Equal(t, expectedData.Len(), len(persisted))
   589  
   590  	for _, actualData := range persisted {
   591  		id := actualData.metadata.BytesID()
   592  		data, exists := expectedData.Get(ident.StringID(string(id)))
   593  		require.True(t, exists)
   594  		seg := ts.NewSegment(data, nil, 0, ts.FinalizeHead)
   595  
   596  		expectedDPs := datapointsFromSegment(t, seg)
   597  		actualDPs := datapointsFromSegment(t, actualData.segment)
   598  		// Assert same number of datapoints for this series.
   599  		require.Equal(t, len(expectedDPs), len(actualDPs))
   600  		for i := range expectedDPs {
   601  			// Check each datapoint matches what's expected.
   602  			assert.Equal(t, expectedDPs[i], actualDPs[i])
   603  		}
   604  	}
   605  }
   606  
   607  func datapointsToCheckedBytes(t *testing.T, dps []ts.Datapoint) checked.Bytes {
   608  	encoder := encoderPool.Get()
   609  	defer encoder.Close()
   610  	for _, dp := range dps {
   611  		encoder.Encode(dp, xtime.Second, nil)
   612  	}
   613  
   614  	ctx := context.NewBackground()
   615  	defer ctx.Close()
   616  
   617  	r, ok := encoder.Stream(ctx)
   618  	require.True(t, ok)
   619  	bytes, err := xio.ToBytes(r)
   620  	require.Equal(t, io.EOF, err)
   621  
   622  	copied := append([]byte(nil), bytes...)
   623  	cb := checked.NewBytes(copied, nil)
   624  	return cb
   625  }
   626  
   627  func mockReaderFromData(
   628  	ctrl *gomock.Controller,
   629  	diskData *checkedBytesMap,
   630  ) *MockDataFileSetReader {
   631  	reader := NewMockDataFileSetReader(ctrl)
   632  	reader.EXPECT().Open(gomock.Any()).Return(nil)
   633  	reader.EXPECT().Close().Return(nil)
   634  	tagIter := ident.NewTagsIterator(ident.NewTags(ident.StringTag("tag-key0", "tag-val0")))
   635  	fakeChecksum := uint32(42)
   636  
   637  	var inOrderCalls []*gomock.Call
   638  	for _, val := range diskData.Iter() {
   639  		id := val.Key()
   640  		data := val.Value()
   641  		inOrderCalls = append(inOrderCalls,
   642  			reader.EXPECT().Read().Return(id, tagIter, data, fakeChecksum, nil))
   643  	}
   644  	// Make sure to return io.EOF at the end.
   645  	inOrderCalls = append(inOrderCalls,
   646  		reader.EXPECT().Read().Return(nil, nil, nil, uint32(0), io.EOF))
   647  	gomock.InOrder(inOrderCalls...)
   648  
   649  	return reader
   650  }
   651  
   652  func mockMergeWithFromData(
   653  	t *testing.T,
   654  	ctrl *gomock.Controller,
   655  	diskData *checkedBytesMap,
   656  	mergeTargetData *checkedBytesMap,
   657  ) *MockMergeWith {
   658  	mergeWith := NewMockMergeWith(ctrl)
   659  
   660  	// Get the series IDs in the merge target that does not exist in disk data.
   661  	// This logic is not tested here because it should be part of tests of the
   662  	// mergeWith implementation.
   663  	var remaining []ident.ID
   664  
   665  	// Expect mergeWith.Read for all data points once. Go through all data on
   666  	// disk, then go through remaining items from merge target.
   667  	for _, val := range diskData.Iter() {
   668  		id := val.Key()
   669  
   670  		if mergeTargetData.Contains(id) {
   671  			data, ok := mergeTargetData.Get(id)
   672  			require.True(t, ok)
   673  			segReader := srPool.Get()
   674  			br := []xio.BlockReader{blockReaderFromData(data, segReader, startTime, blockSize)}
   675  			mergeWith.EXPECT().Read(gomock.Any(), id, gomock.Any(), gomock.Any()).
   676  				Return(br, true, nil)
   677  		} else {
   678  			mergeWith.EXPECT().Read(gomock.Any(), id, gomock.Any(), gomock.Any()).
   679  				Return(nil, false, nil)
   680  		}
   681  	}
   682  	for _, val := range mergeTargetData.Iter() {
   683  		id := val.Key()
   684  		if !diskData.Contains(id) {
   685  			// Capture remaining items so that we can call the ForEachRemaining
   686  			// fn on them later.
   687  			remaining = append(remaining, id)
   688  		}
   689  	}
   690  
   691  	mergeWith.EXPECT().
   692  		ForEachRemaining(gomock.Any(), startTime, gomock.Any(), gomock.Any()).
   693  		Return(nil).
   694  		Do(func(ctx context.Context, blockStart xtime.UnixNano, fn ForEachRemainingFn, nsCtx namespace.Context) {
   695  			for _, id := range remaining {
   696  				data, ok := mergeTargetData.Get(id)
   697  				if ok {
   698  					segReader := srPool.Get()
   699  					br := block.FetchBlockResult{
   700  						Start:  startTime,
   701  						Blocks: []xio.BlockReader{blockReaderFromData(data, segReader, startTime, blockSize)},
   702  					}
   703  					err := fn(doc.Metadata{ID: id.Bytes()}, br)
   704  					require.NoError(t, err)
   705  				}
   706  			}
   707  		})
   708  
   709  	return mergeWith
   710  }
   711  
   712  type persistedData struct {
   713  	metadata persist.Metadata
   714  	segment  ts.Segment
   715  }
   716  
   717  func datapointsFromSegment(t *testing.T, seg ts.Segment) []ts.Datapoint {
   718  	segReader := srPool.Get()
   719  	segReader.Reset(seg)
   720  	iter := multiIterPool.Get()
   721  	iter.Reset([]xio.SegmentReader{segReader}, startTime, blockSize, nil)
   722  	defer iter.Close()
   723  
   724  	var dps []ts.Datapoint
   725  	for iter.Next() {
   726  		dp, _, _ := iter.Current()
   727  		dps = append(dps, dp)
   728  	}
   729  	require.NoError(t, iter.Err())
   730  
   731  	return dps
   732  }
   733  
   734  func blockReaderFromData(
   735  	data checked.Bytes,
   736  	segReader xio.SegmentReader,
   737  	startTime xtime.UnixNano,
   738  	blockSize time.Duration,
   739  ) xio.BlockReader {
   740  	seg := ts.NewSegment(data, nil, 0, ts.FinalizeHead)
   741  	segReader.Reset(seg)
   742  	return xio.BlockReader{
   743  		SegmentReader: segReader,
   744  		Start:         startTime,
   745  		BlockSize:     blockSize,
   746  	}
   747  }