github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/persist/fs/read_test.go (about)

     1  // Copyright (c) 2016 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package fs
    22  
    23  import (
    24  	"fmt"
    25  	"io"
    26  	"io/ioutil"
    27  	"os"
    28  	"path/filepath"
    29  	"strings"
    30  	"testing"
    31  	"time"
    32  
    33  	"github.com/m3db/m3/src/dbnode/digest"
    34  	"github.com/m3db/m3/src/dbnode/persist"
    35  	"github.com/m3db/m3/src/dbnode/persist/fs/msgpack"
    36  	"github.com/m3db/m3/src/dbnode/persist/schema"
    37  	"github.com/m3db/m3/src/x/checked"
    38  	"github.com/m3db/m3/src/x/ident"
    39  	"github.com/m3db/m3/src/x/mmap"
    40  	"github.com/m3db/m3/src/x/pool"
    41  	"github.com/m3db/m3/src/x/serialize"
    42  	xtime "github.com/m3db/m3/src/x/time"
    43  
    44  	"github.com/golang/mock/gomock"
    45  	"github.com/stretchr/testify/assert"
    46  	"github.com/stretchr/testify/require"
    47  )
    48  
    49  const (
    50  	testReaderBufferSize = 10
    51  	testWriterBufferSize = 10
    52  )
    53  
    54  var (
    55  	testWriterStart    = xtime.Now()
    56  	testBlockSize      = 2 * time.Hour
    57  	testDefaultOpts    = NewOptions() // To avoid allocing pools each test exec
    58  	testBytesPool      pool.CheckedBytesPool
    59  	testTagDecoderPool serialize.TagDecoderPool
    60  )
    61  
    62  // NB(r): This is kind of brittle, but basically msgpack expects a buffered
    63  // reader, but we can't use a buffered reader because we need to know where
    64  // the position of the decoder is when we need to grab bytes without copying.
    65  //
    66  // This var declaration by it successfully compiling means it implements the
    67  // `bufReader` interface in msgpack decoder library (unless it changes...)
    68  // in which case this needs to be updated.
    69  //
    70  // By it implementing the interface the msgpack decoder actually uses
    71  // the reader directly without creating a buffered reader to wrap it.
    72  // This way we can know actually where its position is and can correctly
    73  // take a valid bytes ref address when reading bytes without copying.
    74  //
    75  // We're attempting to address this by making it less brittle but the author
    76  // is not currently supportive of merging the changes:
    77  // https://github.com/vmihailenco/msgpack/pull/155
    78  var _ = msgpackBufReader(newReaderDecoderStream())
    79  
    80  type msgpackBufReader interface {
    81  	Read([]byte) (int, error)
    82  	ReadByte() (byte, error)
    83  	UnreadByte() error
    84  }
    85  
    86  func init() {
    87  	testBytesPool = pool.NewCheckedBytesPool([]pool.Bucket{{
    88  		Capacity: 1024,
    89  		Count:    10,
    90  	}}, nil, func(s []pool.Bucket) pool.BytesPool {
    91  		return pool.NewBytesPool(s, nil)
    92  	})
    93  	testBytesPool.Init()
    94  	testTagDecoderPool = serialize.NewTagDecoderPool(
    95  		serialize.NewTagDecoderOptions(serialize.TagDecoderOptionsConfig{}),
    96  		pool.NewObjectPoolOptions())
    97  	testTagDecoderPool.Init()
    98  }
    99  
   100  func newTestReader(t *testing.T, filePathPrefix string) DataFileSetReader {
   101  	reader, err := NewReader(testBytesPool, testDefaultOpts.
   102  		SetFilePathPrefix(filePathPrefix).
   103  		SetInfoReaderBufferSize(testReaderBufferSize).
   104  		SetDataReaderBufferSize(testReaderBufferSize))
   105  	require.NoError(t, err)
   106  	return reader
   107  }
   108  
   109  func bytesRefd(data []byte) checked.Bytes {
   110  	bytes := checked.NewBytes(data, nil)
   111  	bytes.IncRef()
   112  	return bytes
   113  }
   114  
   115  func TestReadEmptyIndexUnreadData(t *testing.T) {
   116  	dir, err := ioutil.TempDir("", "testdb")
   117  	if err != nil {
   118  		t.Fatal(err)
   119  	}
   120  	filePathPrefix := filepath.Join(dir, "")
   121  	defer os.RemoveAll(dir)
   122  
   123  	w := newTestWriter(t, filePathPrefix)
   124  	writerOpts := DataWriterOpenOptions{
   125  		BlockSize: testBlockSize,
   126  		Identifier: FileSetFileIdentifier{
   127  			Namespace:  testNs1ID,
   128  			Shard:      0,
   129  			BlockStart: testWriterStart,
   130  		},
   131  	}
   132  	err = w.Open(writerOpts)
   133  	assert.NoError(t, err)
   134  	assert.NoError(t, w.Close())
   135  
   136  	r := newTestReader(t, filePathPrefix)
   137  	rOpenOpts := DataReaderOpenOptions{
   138  		Identifier: FileSetFileIdentifier{
   139  			Namespace:  testNs1ID,
   140  			Shard:      0,
   141  			BlockStart: testWriterStart,
   142  		},
   143  	}
   144  	err = r.Open(rOpenOpts)
   145  	assert.NoError(t, err)
   146  
   147  	_, _, _, _, err = r.Read()
   148  	assert.Equal(t, io.EOF, err)
   149  
   150  	assert.NoError(t, r.Close())
   151  }
   152  
   153  func TestReadDataError(t *testing.T) {
   154  	ctrl := gomock.NewController(t)
   155  	defer ctrl.Finish()
   156  
   157  	dir, err := ioutil.TempDir("", "testdb")
   158  	if err != nil {
   159  		t.Fatal(err)
   160  	}
   161  	filePathPrefix := filepath.Join(dir, "")
   162  	defer os.RemoveAll(dir)
   163  
   164  	w := newTestWriter(t, filePathPrefix)
   165  	writerOpts := DataWriterOpenOptions{
   166  		BlockSize: testBlockSize,
   167  		Identifier: FileSetFileIdentifier{
   168  			Namespace:  testNs1ID,
   169  			Shard:      0,
   170  			BlockStart: testWriterStart,
   171  		},
   172  	}
   173  	metadata := persist.NewMetadataFromIDAndTags(
   174  		ident.StringID("foo"),
   175  		ident.Tags{},
   176  		persist.MetadataOptions{})
   177  	err = w.Open(writerOpts)
   178  	require.NoError(t, err)
   179  	require.NoError(t, w.Write(metadata,
   180  		bytesRefd([]byte{1, 2, 3}),
   181  		digest.Checksum([]byte{1, 2, 3})))
   182  	require.NoError(t, w.Close())
   183  
   184  	r := newTestReader(t, filePathPrefix)
   185  	rOpenOpts := DataReaderOpenOptions{
   186  		Identifier: FileSetFileIdentifier{
   187  			Namespace:  testNs1ID,
   188  			Shard:      0,
   189  			BlockStart: testWriterStart,
   190  		},
   191  	}
   192  	err = r.Open(rOpenOpts)
   193  	assert.NoError(t, err)
   194  
   195  	// Close out the dataFd and use a mock to expect an error on next read
   196  	reader := r.(*reader)
   197  	require.NoError(t, mmap.Munmap(reader.dataMmap))
   198  	require.NoError(t, reader.dataFd.Close())
   199  
   200  	mockReader := digest.NewMockReaderWithDigest(ctrl)
   201  	mockReader.EXPECT().Read(gomock.Any()).Return(0, fmt.Errorf("an error"))
   202  	reader.dataReader = mockReader
   203  
   204  	_, _, _, _, err = r.Read()
   205  	assert.Error(t, err)
   206  
   207  	// Cleanly close
   208  	require.NoError(t, mmap.Munmap(reader.indexMmap))
   209  	require.NoError(t, reader.indexFd.Close())
   210  }
   211  
   212  func TestReadDataUnexpectedSize(t *testing.T) {
   213  	dir, err := ioutil.TempDir("", "testdb")
   214  	if err != nil {
   215  		t.Fatal(err)
   216  	}
   217  	filePathPrefix := filepath.Join(dir, "")
   218  	defer os.RemoveAll(dir)
   219  
   220  	w := newTestWriter(t, filePathPrefix)
   221  	writerOpts := DataWriterOpenOptions{
   222  		BlockSize: testBlockSize,
   223  		Identifier: FileSetFileIdentifier{
   224  			Namespace:  testNs1ID,
   225  			Shard:      0,
   226  			BlockStart: testWriterStart,
   227  		},
   228  	}
   229  	metadata := persist.NewMetadataFromIDAndTags(
   230  		ident.StringID("foo"),
   231  		ident.Tags{},
   232  		persist.MetadataOptions{})
   233  	err = w.Open(writerOpts)
   234  	assert.NoError(t, err)
   235  	dataFile := w.(*writer).dataFdWithDigest.Fd().Name()
   236  
   237  	assert.NoError(t, w.Write(metadata,
   238  		bytesRefd([]byte{1, 2, 3}),
   239  		digest.Checksum([]byte{1, 2, 3})))
   240  	assert.NoError(t, w.Close())
   241  
   242  	// Truncate one bye
   243  	assert.NoError(t, os.Truncate(dataFile, 1))
   244  
   245  	r := newTestReader(t, filePathPrefix)
   246  	rOpenOpts := DataReaderOpenOptions{
   247  		Identifier: FileSetFileIdentifier{
   248  			Namespace:  testNs1ID,
   249  			Shard:      0,
   250  			BlockStart: testWriterStart,
   251  		},
   252  	}
   253  	err = r.Open(rOpenOpts)
   254  	assert.NoError(t, err)
   255  
   256  	_, _, _, _, err = r.Read()
   257  	assert.Error(t, err)
   258  	assert.Equal(t, errReadNotExpectedSize, err)
   259  
   260  	assert.NoError(t, r.Close())
   261  }
   262  
   263  func TestReadNoCheckpointFile(t *testing.T) {
   264  	filePathPrefix := createTempDir(t)
   265  	defer os.RemoveAll(filePathPrefix)
   266  
   267  	w := newTestWriter(t, filePathPrefix)
   268  	shard := uint32(0)
   269  	writerOpts := DataWriterOpenOptions{
   270  		BlockSize: testBlockSize,
   271  		Identifier: FileSetFileIdentifier{
   272  			Namespace:  testNs1ID,
   273  			Shard:      shard,
   274  			BlockStart: testWriterStart,
   275  		},
   276  	}
   277  	err := w.Open(writerOpts)
   278  	assert.NoError(t, err)
   279  	assert.NoError(t, w.Close())
   280  
   281  	var (
   282  		shardDir       = ShardDataDirPath(filePathPrefix, testNs1ID, shard)
   283  		checkpointFile = dataFilesetPathFromTimeAndIndex(shardDir, testWriterStart, 0, CheckpointFileSuffix, false)
   284  	)
   285  	exists, err := CompleteCheckpointFileExists(checkpointFile)
   286  	require.NoError(t, err)
   287  	require.True(t, exists)
   288  	os.Remove(checkpointFile)
   289  
   290  	r := newTestReader(t, filePathPrefix)
   291  	rOpenOpts := DataReaderOpenOptions{
   292  		Identifier: FileSetFileIdentifier{
   293  			Namespace:  testNs1ID,
   294  			Shard:      shard,
   295  			BlockStart: testWriterStart,
   296  		},
   297  	}
   298  	err = r.Open(rOpenOpts)
   299  	require.Equal(t, ErrCheckpointFileNotFound, err)
   300  }
   301  
   302  func testReadOpen(t *testing.T, fileData map[string][]byte) {
   303  	filePathPrefix := createTempDir(t)
   304  	defer os.RemoveAll(filePathPrefix)
   305  
   306  	shard := uint32(0)
   307  	start := xtime.FromSeconds(1000)
   308  	shardDir := ShardDataDirPath(filePathPrefix, testNs1ID, shard)
   309  
   310  	w := newTestWriter(t, filePathPrefix)
   311  	writerOpts := DataWriterOpenOptions{
   312  		BlockSize: testBlockSize,
   313  		Identifier: FileSetFileIdentifier{
   314  			Namespace:  testNs1ID,
   315  			Shard:      shard,
   316  			BlockStart: start,
   317  		},
   318  	}
   319  	metadata := persist.NewMetadataFromIDAndTags(
   320  		ident.StringID("foo"),
   321  		ident.Tags{},
   322  		persist.MetadataOptions{})
   323  	assert.NoError(t, w.Open(writerOpts))
   324  
   325  	assert.NoError(t, w.Write(metadata,
   326  		bytesRefd([]byte{0x1}),
   327  		digest.Checksum([]byte{0x1})))
   328  	assert.NoError(t, w.Close())
   329  
   330  	for suffix, data := range fileData {
   331  		digestFile := dataFilesetPathFromTimeAndIndex(shardDir, start, 0, suffix, false)
   332  		fd, err := os.OpenFile(digestFile, os.O_WRONLY|os.O_TRUNC, os.FileMode(0666))
   333  		require.NoError(t, err)
   334  		_, err = fd.Write(data)
   335  		require.NoError(t, err)
   336  		fd.Close()
   337  	}
   338  
   339  	r := newTestReader(t, filePathPrefix)
   340  	rOpenOpts := DataReaderOpenOptions{
   341  		Identifier: FileSetFileIdentifier{
   342  			Namespace:  testNs1ID,
   343  			Shard:      shard,
   344  			BlockStart: xtime.FromSeconds(1000),
   345  		},
   346  	}
   347  	require.Error(t, r.Open(rOpenOpts))
   348  }
   349  
   350  func TestReadOpenDigestOfDigestMismatch(t *testing.T) {
   351  	testReadOpen(
   352  		t,
   353  		map[string][]byte{
   354  			InfoFileSuffix:       {0x1},
   355  			indexFileSuffix:      {0x2},
   356  			dataFileSuffix:       {0x3},
   357  			DigestFileSuffix:     {0x2, 0x0, 0x2, 0x0, 0x3, 0x0, 0x3, 0x0, 0x4, 0x0, 0x4, 0x0},
   358  			CheckpointFileSuffix: {0x12, 0x0, 0x7a, 0x0},
   359  		},
   360  	)
   361  }
   362  
   363  func TestReadOpenInfoDigestMismatch(t *testing.T) {
   364  	testReadOpen(
   365  		t,
   366  		map[string][]byte{
   367  			InfoFileSuffix:       {0xa},
   368  			indexFileSuffix:      {0x2},
   369  			dataFileSuffix:       {0x3},
   370  			DigestFileSuffix:     {0x2, 0x0, 0x2, 0x0, 0x3, 0x0, 0x3, 0x0, 0x4, 0x0, 0x4, 0x0},
   371  			CheckpointFileSuffix: {0x13, 0x0, 0x7a, 0x0},
   372  		},
   373  	)
   374  }
   375  
   376  func TestReadOpenIndexDigestMismatch(t *testing.T) {
   377  	// Write the correct info digest
   378  	enc := msgpack.NewEncoder()
   379  	require.NoError(t, enc.EncodeIndexInfo(schema.IndexInfo{}))
   380  	b := enc.Bytes()
   381  
   382  	// Write the wrong index digest
   383  	buf := digest.NewBuffer()
   384  	buf.WriteDigest(digest.Checksum(b))
   385  	digestOfDigest := append(buf, make([]byte, 8)...)
   386  	buf.WriteDigest(digest.Checksum(digestOfDigest))
   387  
   388  	testReadOpen(
   389  		t,
   390  		map[string][]byte{
   391  			InfoFileSuffix:       b,
   392  			indexFileSuffix:      {0xa},
   393  			dataFileSuffix:       {0x3},
   394  			DigestFileSuffix:     digestOfDigest,
   395  			CheckpointFileSuffix: buf,
   396  		},
   397  	)
   398  }
   399  
   400  func TestReadValidate(t *testing.T) {
   401  	filePathPrefix := createTempDir(t)
   402  	defer os.RemoveAll(filePathPrefix)
   403  
   404  	shard := uint32(0)
   405  	start := xtime.FromSeconds(1000)
   406  	w := newTestWriter(t, filePathPrefix)
   407  	writerOpts := DataWriterOpenOptions{
   408  		BlockSize: testBlockSize,
   409  		Identifier: FileSetFileIdentifier{
   410  			Namespace:  testNs1ID,
   411  			Shard:      shard,
   412  			BlockStart: start,
   413  		},
   414  	}
   415  	metadata := persist.NewMetadataFromIDAndTags(
   416  		ident.StringID("foo"),
   417  		ident.Tags{},
   418  		persist.MetadataOptions{})
   419  	require.NoError(t, w.Open(writerOpts))
   420  
   421  	assert.NoError(t, w.Write(metadata,
   422  		bytesRefd([]byte{0x1}),
   423  		digest.Checksum([]byte{0x1})))
   424  	require.NoError(t, w.Close())
   425  
   426  	r := newTestReader(t, filePathPrefix)
   427  	rOpenOpts := DataReaderOpenOptions{
   428  		Identifier: FileSetFileIdentifier{
   429  			Namespace:  testNs1ID,
   430  			Shard:      shard,
   431  			BlockStart: start,
   432  		},
   433  	}
   434  	require.NoError(t, r.Open(rOpenOpts))
   435  	_, _, _, _, err := r.Read()
   436  	require.NoError(t, err)
   437  
   438  	// Mutate expected data checksum to simulate data corruption
   439  	reader := r.(*reader)
   440  	reader.expectedDataDigest = 0
   441  	require.Error(t, r.Validate())
   442  
   443  	require.NoError(t, r.Close())
   444  }
   445  
   446  func reads(buf dataFileSetReaderDecoderStream, m int) string {
   447  	var b [1000]byte
   448  	if int(buf.Remaining()) > len(b) {
   449  		panic(fmt.Errorf("cannot read all"))
   450  	}
   451  
   452  	nb := 0
   453  	for {
   454  		n, err := buf.Read(b[nb : nb+m])
   455  		nb += n
   456  		if err == io.EOF {
   457  			break
   458  		}
   459  	}
   460  	return string(b[0:nb])
   461  }
   462  
   463  func TestDecoderStream(t *testing.T) {
   464  	var texts [31]string
   465  	str := ""
   466  	all := ""
   467  	for i := 0; i < len(texts)-1; i++ {
   468  		texts[i] = str + "\n"
   469  		all += texts[i]
   470  		str += string(rune(i%26 + 'a'))
   471  	}
   472  	texts[len(texts)-1] = all
   473  
   474  	buf := newReaderDecoderStream()
   475  	for i := 0; i < len(texts); i++ {
   476  		text := texts[i]
   477  		for j := 1; j <= 8; j++ {
   478  			buf.Reset([]byte(text))
   479  			s := reads(buf, j)
   480  			if s != text {
   481  				t.Errorf("m=%d want=%q got=%q", j, text, s)
   482  			}
   483  		}
   484  	}
   485  }
   486  
   487  func TestDecoderStreamSkip(t *testing.T) {
   488  	d := []byte{1, 2, 3, 4, 5}
   489  	expectedDigest := digest.Checksum(d)
   490  	buf := newReaderDecoderStream()
   491  	buf.Reset(d)
   492  	assert.Equal(t, int64(5), buf.Remaining())
   493  	assert.NoError(t, buf.Skip(3))
   494  	assert.Equal(t, int64(2), buf.Remaining())
   495  
   496  	p := make([]byte, 2)
   497  	n, err := buf.Read(p)
   498  	assert.Equal(t, 2, n)
   499  	assert.NoError(t, err)
   500  	assert.Equal(t, []byte{4, 5}, p)
   501  
   502  	assert.NoError(t, buf.reader().Validate(expectedDigest))
   503  }
   504  
   505  func TestDecoderStreamUnreadByte(t *testing.T) {
   506  	segments := []string{"Hello, ", "world"}
   507  	got := ""
   508  	want := strings.Join(segments, "")
   509  	r := newReaderDecoderStream()
   510  	r.Reset([]byte(want))
   511  	// Normal execution.
   512  	for {
   513  		b1, err := r.ReadByte()
   514  		if err != nil {
   515  			if err != io.EOF {
   516  				t.Error("unexpected error on ReadByte:", err)
   517  			}
   518  			break
   519  		}
   520  		got += string(b1)
   521  		// Put it back and read it again.
   522  		if err = r.UnreadByte(); err != nil {
   523  			t.Fatal("unexpected error on UnreadByte:", err)
   524  		}
   525  		b2, err := r.ReadByte()
   526  		if err != nil {
   527  			t.Fatal("unexpected error reading after unreading:", err)
   528  		}
   529  		if b1 != b2 {
   530  			t.Fatalf("incorrect byte after unread: got %q, want %q", b1, b2)
   531  		}
   532  	}
   533  	if got != want {
   534  		t.Errorf("got %q, want %q", got, want)
   535  	}
   536  }
   537  
   538  func TestDecoderStreamUnreadByteMultiple(t *testing.T) {
   539  	segments := []string{"Hello, ", "world"}
   540  	data := []byte(strings.Join(segments, ""))
   541  	for n := 0; n <= len(data); n++ {
   542  		r := newReaderDecoderStream()
   543  		r.Reset(data)
   544  		// Read n bytes.
   545  		for i := 0; i < n; i++ {
   546  			b, err := r.ReadByte()
   547  			if err != nil {
   548  				t.Fatalf("n = %d: unexpected error on ReadByte: %v", n, err)
   549  			}
   550  			if b != data[i] {
   551  				t.Fatalf("n = %d: incorrect byte returned from ReadByte: got %q, want %q", n, b, data[i])
   552  			}
   553  		}
   554  		// Unread one byte if there is one.
   555  		if n > 0 {
   556  			remaining := r.Remaining()
   557  			if expect := int64(len(data) - n); remaining != expect {
   558  				t.Errorf("n = %d: unexpected remaining before UnreadByte: got %d, want %d", n, remaining, expect)
   559  			}
   560  			if err := r.UnreadByte(); err != nil {
   561  				t.Errorf("n = %d: unexpected error on UnreadByte: %v", n, err)
   562  			}
   563  			remaining = r.Remaining()
   564  			if expect := int64(len(data) - n + 1); remaining != expect {
   565  				t.Errorf("n = %d: unexpected remaining after UnreadByte: got %d, want %d", n, remaining, expect)
   566  			}
   567  		}
   568  		// Test that we cannot unread any further.
   569  		if err := r.UnreadByte(); err == nil {
   570  			t.Errorf("n = %d: expected error on UnreadByte", n)
   571  		}
   572  		// Test that it can be read back with Read.
   573  		if n > 0 {
   574  			var c [1]byte
   575  			_, err := r.Read(c[:])
   576  			if err != nil {
   577  				t.Errorf("n = %d: unexpected error on Read after UnreadByte: %v", n, err)
   578  			}
   579  			if c[0] != data[n-1] {
   580  				t.Errorf("n = %d: unexpected error on Read after UnreadByte: %v != %v", n, c[0], data[n-1])
   581  			}
   582  		}
   583  	}
   584  }