github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/journal_writer_test.go

github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/journal_writer_test.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"context"
    19  	"encoding/base32"
    20  	"math/rand"
    21  	"os"
    22  	"path/filepath"
    23  	"testing"
    24  
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  
    28  	"github.com/dolthub/dolt/go/store/chunks"
    29  	"github.com/dolthub/dolt/go/store/hash"
    30  )
    31  
    32  func TestJournalWriterReadWrite(t *testing.T) {
    33  	type opKind byte
    34  
    35  	type operation struct {
    36  		kind   opKind
    37  		buf    []byte
    38  		readAt int64
    39  	}
    40  
    41  	const (
    42  		readOp opKind = iota
    43  		writeOp
    44  		flushOp
    45  	)
    46  
    47  	tests := []struct {
    48  		name string
    49  		size int
    50  		ops  []operation
    51  	}{
    52  		{
    53  			name: "smoke test",
    54  			size: 16,
    55  		},
    56  		{
    57  			name: "write to empty file",
    58  			size: 16,
    59  			ops: []operation{
    60  				{kind: writeOp, buf: []byte("lorem")},
    61  				{kind: writeOp, buf: []byte("ipsum")},
    62  			},
    63  		},
    64  		{
    65  			name: "read from non-empty file",
    66  			size: 16,
    67  			ops: []operation{
    68  				{kind: writeOp, buf: []byte("loremipsum")},
    69  				{kind: flushOp},
    70  				{kind: readOp, buf: []byte("lorem"), readAt: 0},
    71  				{kind: readOp, buf: []byte("ipsum"), readAt: 5},
    72  				{kind: readOp, buf: []byte("loremipsum"), readAt: 0},
    73  			},
    74  		},
    75  		{
    76  			name: "read new writes",
    77  			size: 16,
    78  			ops: []operation{
    79  				{kind: writeOp, buf: []byte("lorem")},
    80  				{kind: readOp, buf: []byte("lorem"), readAt: 0},
    81  				{kind: writeOp, buf: []byte("ipsum")},
    82  				{kind: readOp, buf: []byte("lorem"), readAt: 0},
    83  				{kind: readOp, buf: []byte("ipsum"), readAt: 5},
    84  			},
    85  		},
    86  		{
    87  			name: "read flushed writes",
    88  			size: 16,
    89  			ops: []operation{
    90  				{kind: writeOp, buf: []byte("lorem")},
    91  				{kind: flushOp},
    92  				{kind: readOp, buf: []byte("lorem"), readAt: 0},
    93  				{kind: writeOp, buf: []byte("ipsum")},
    94  				{kind: readOp, buf: []byte("ipsum"), readAt: 5},
    95  				{kind: readOp, buf: []byte("lorem"), readAt: 0},
    96  				{kind: flushOp},
    97  			},
    98  		},
    99  		{
   100  			name: "read partially flushed writes",
   101  			size: 16,
   102  			ops: []operation{
   103  				{kind: writeOp, buf: []byte("lorem")},
   104  				{kind: flushOp},
   105  				{kind: writeOp, buf: []byte("ipsum")},
   106  				{kind: readOp, buf: []byte("loremipsum"), readAt: 0},
   107  			},
   108  		},
   109  		{
   110  			name: "successive writes trigger buffer flush ",
   111  			size: 16,
   112  			ops: []operation{
   113  				{kind: writeOp, buf: []byte("lorem")},
   114  				{kind: readOp, buf: []byte("lorem"), readAt: 0},
   115  				{kind: writeOp, buf: []byte("ipsum")},
   116  				{kind: readOp, buf: []byte("ipsum"), readAt: 5},
   117  				{kind: writeOp, buf: []byte("dolor")},
   118  				{kind: readOp, buf: []byte("dolor"), readAt: 10},
   119  				{kind: writeOp, buf: []byte("sit")}, // triggers a flush
   120  				{kind: readOp, buf: []byte("sit"), readAt: 15},
   121  				{kind: readOp, buf: []byte("loremipsumdolorsit"), readAt: 0},
   122  				{kind: writeOp, buf: []byte("amet")},
   123  				{kind: readOp, buf: []byte("amet"), readAt: 18},
   124  				{kind: readOp, buf: []byte("loremipsumdolorsitamet"), readAt: 0},
   125  			},
   126  		},
   127  		{
   128  			name: "flush empty buffer",
   129  			size: 16,
   130  			ops: []operation{
   131  				{kind: writeOp, buf: []byte("loremipsum")},
   132  				{kind: flushOp},
   133  			},
   134  		},
   135  		{
   136  			name: "double flush write",
   137  			size: 16,
   138  			ops: []operation{
   139  				{kind: writeOp, buf: []byte("loremipsum")},
   140  				{kind: flushOp},
   141  				{kind: writeOp, buf: []byte("dolor")},
   142  				{kind: flushOp},
   143  				{kind: flushOp},
   144  			},
   145  		},
   146  	}
   147  	for _, test := range tests {
   148  		t.Run(test.name, func(t *testing.T) {
   149  			path := newTestFilePath(t)
   150  			j := newTestJournalWriter(t, path)
   151  			// set specific buffer size
   152  			j.buf = make([]byte, 0, test.size)
   153  
   154  			var off int64
   155  			var err error
   156  			for i, op := range test.ops {
   157  				switch op.kind {
   158  				case readOp:
   159  					act := make([]byte, len(op.buf))
   160  					n, err := j.readAt(act, op.readAt)
   161  					assert.NoError(t, err, "operation %d errored", i)
   162  					assert.Equal(t, len(op.buf), n, "operation %d failed", i)
   163  					assert.Equal(t, op.buf, act, "operation %d failed", i)
   164  				case writeOp:
   165  					var p []byte
   166  					p, err = j.getBytes(len(op.buf))
   167  					require.NoError(t, err, "operation %d errored", i)
   168  					n := copy(p, op.buf)
   169  					assert.Equal(t, len(op.buf), n, "operation %d failed", i)
   170  					off += int64(n)
   171  				case flushOp:
   172  					err = j.flush()
   173  					assert.NoError(t, err, "operation %d errored", i)
   174  				default:
   175  					t.Fatal("unknown opKind")
   176  				}
   177  				assert.Equal(t, off, j.offset())
   178  			}
   179  		})
   180  	}
   181  }
   182  
   183  func newTestJournalWriter(t *testing.T, path string) *journalWriter {
   184  	ctx := context.Background()
   185  	j, err := createJournalWriter(ctx, path)
   186  	require.NoError(t, err)
   187  	require.NotNil(t, j)
   188  	_, err = j.bootstrapJournal(ctx, nil)
   189  	require.NoError(t, err)
   190  	return j
   191  }
   192  
   193  func TestJournalWriterWriteCompressedChunk(t *testing.T) {
   194  	path := newTestFilePath(t)
   195  	j := newTestJournalWriter(t, path)
   196  	data := randomCompressedChunks(1024)
   197  	for a, cc := range data {
   198  		err := j.writeCompressedChunk(cc)
   199  		require.NoError(t, err)
   200  		r, _ := j.ranges.get(a)
   201  		validateLookup(t, j, r, cc)
   202  	}
   203  	validateAllLookups(t, j, data)
   204  }
   205  
   206  func TestJournalWriterBootstrap(t *testing.T) {
   207  	ctx := context.Background()
   208  	path := newTestFilePath(t)
   209  	j := newTestJournalWriter(t, path)
   210  	data := randomCompressedChunks(1024)
   211  	var last hash.Hash
   212  	for _, cc := range data {
   213  		err := j.writeCompressedChunk(cc)
   214  		require.NoError(t, err)
   215  		last = cc.Hash()
   216  	}
   217  	require.NoError(t, j.commitRootHash(last))
   218  	require.NoError(t, j.Close())
   219  
   220  	j, _, err := openJournalWriter(ctx, path)
   221  	require.NoError(t, err)
   222  	reflogBuffer := newReflogRingBuffer(10)
   223  	last, err = j.bootstrapJournal(ctx, reflogBuffer)
   224  	require.NoError(t, err)
   225  	assertExpectedIterationOrder(t, reflogBuffer, []string{last.String()})
   226  
   227  	validateAllLookups(t, j, data)
   228  
   229  	source := journalChunkSource{journal: j}
   230  	for a, cc := range data {
   231  		buf, err := source.get(ctx, a, nil)
   232  		require.NoError(t, err)
   233  		ch, err := cc.ToChunk()
   234  		require.NoError(t, err)
   235  		assert.Equal(t, ch.Data(), buf)
   236  	}
   237  }
   238  
   239  func validateAllLookups(t *testing.T, j *journalWriter, data map[hash.Hash]CompressedChunk) {
   240  	// move |data| to addr16-keyed map
   241  	prefixMap := make(map[addr16]CompressedChunk, len(data))
   242  	var prefix addr16
   243  	for a, cc := range data {
   244  		copy(prefix[:], a[:])
   245  		prefixMap[prefix] = cc
   246  	}
   247  	iterRangeIndex(j.ranges, func(a addr16, r Range) (stop bool) {
   248  		validateLookup(t, j, r, prefixMap[a])
   249  		return
   250  	})
   251  }
   252  
   253  func iterRangeIndex(idx rangeIndex, cb func(addr16, Range) (stop bool)) {
   254  	idx.novel.Iter(func(a hash.Hash, r Range) (stop bool) {
   255  		return cb(toAddr16(a), r)
   256  	})
   257  	idx.cached.Iter(cb)
   258  }
   259  
   260  func validateLookup(t *testing.T, j *journalWriter, r Range, cc CompressedChunk) {
   261  	buf := make([]byte, r.Length)
   262  	_, err := j.readAt(buf, int64(r.Offset))
   263  	require.NoError(t, err)
   264  	act, err := NewCompressedChunk(cc.H, buf)
   265  	assert.NoError(t, err)
   266  	assert.Equal(t, cc.FullCompressedChunk, act.FullCompressedChunk)
   267  }
   268  
   269  func TestJournalWriterSyncClose(t *testing.T) {
   270  	path := newTestFilePath(t)
   271  	j := newTestJournalWriter(t, path)
   272  	p := []byte("sit")
   273  	buf, err := j.getBytes(len(p))
   274  	require.NoError(t, err)
   275  	copy(buf, p)
   276  	j.flush()
   277  	assert.Equal(t, 0, len(j.buf))
   278  	assert.Equal(t, 3, int(j.off))
   279  }
   280  
   281  func newTestFilePath(t *testing.T) string {
   282  	path, err := os.MkdirTemp("", "")
   283  	require.NoError(t, err)
   284  	return filepath.Join(path, "journal.log")
   285  }
   286  
   287  func TestJournalIndexBootstrap(t *testing.T) {
   288  	// potentially indexed region of a journal
   289  	type epoch struct {
   290  		records map[hash.Hash]CompressedChunk
   291  		last    hash.Hash
   292  	}
   293  
   294  	makeEpoch := func() (e epoch) {
   295  		e.records = randomCompressedChunks(8)
   296  		for h := range e.records {
   297  			e.last = hash.Hash(h)
   298  			break
   299  		}
   300  		return
   301  	}
   302  
   303  	tests := []struct {
   304  		name   string
   305  		epochs []epoch
   306  		novel  epoch
   307  	}{
   308  		{
   309  			name:   "smoke test",
   310  			epochs: []epoch{makeEpoch()},
   311  		},
   312  		{
   313  			name:   "non-indexed journal",
   314  			epochs: nil,
   315  			novel:  makeEpoch(),
   316  		},
   317  		{
   318  			name:   "partially indexed journal",
   319  			epochs: []epoch{makeEpoch()},
   320  			novel:  makeEpoch(),
   321  		},
   322  		{
   323  			name: "multiple index records",
   324  			epochs: []epoch{
   325  				makeEpoch(),
   326  				makeEpoch(),
   327  				makeEpoch(),
   328  			},
   329  			novel: makeEpoch(),
   330  		},
   331  	}
   332  
   333  	for _, test := range tests {
   334  		t.Run(test.name, func(t *testing.T) {
   335  			ctx := context.Background()
   336  			path := newTestFilePath(t)
   337  			j := newTestJournalWriter(t, path)
   338  			// setup
   339  			var recordCnt int
   340  			epochs := append(test.epochs, test.novel)
   341  			for i, e := range epochs {
   342  				for _, cc := range e.records {
   343  					recordCnt++
   344  					assert.NoError(t, j.writeCompressedChunk(cc))
   345  					if rand.Int()%10 == 0 { // periodic commits
   346  						assert.NoError(t, j.commitRootHash(cc.H))
   347  					}
   348  				}
   349  				o := j.offset()                             // precommit offset
   350  				assert.NoError(t, j.commitRootHash(e.last)) // commit |e.last|
   351  				if i == len(epochs) {
   352  					break // don't index |test.novel|
   353  				}
   354  				assert.NoError(t, j.flushIndexRecord(e.last, o)) // write index record
   355  			}
   356  			err := j.Close()
   357  			require.NoError(t, err)
   358  
   359  			validateJournal := func(p string, expected []epoch) {
   360  				journal, ok, err := openJournalWriter(ctx, p)
   361  				require.NoError(t, err)
   362  				require.True(t, ok)
   363  				// bootstrap journal and validate chunk records
   364  				last, err := journal.bootstrapJournal(ctx, nil)
   365  				assert.NoError(t, err)
   366  				for _, e := range expected {
   367  					var act CompressedChunk
   368  					for a, exp := range e.records {
   369  						act, err = journal.getCompressedChunk(a)
   370  						assert.NoError(t, err)
   371  						assert.Equal(t, exp, act)
   372  					}
   373  				}
   374  				assert.Equal(t, expected[len(expected)-1].last, last)
   375  				assert.NoError(t, journal.Close())
   376  			}
   377  
   378  			idxPath := filepath.Join(filepath.Dir(path), journalIndexFileName)
   379  
   380  			before, err := os.Stat(idxPath)
   381  			require.NoError(t, err)
   382  
   383  			lookupSize := int64(recordCnt * (1 + lookupSz))
   384  			metaSize := int64(len(epochs)) * (1 + lookupMetaSz)
   385  			assert.Equal(t, lookupSize+metaSize, before.Size())
   386  
   387  			// bootstrap journal using index
   388  			validateJournal(path, epochs)
   389  			// assert journal index unchanged
   390  			info, err := os.Stat(idxPath)
   391  			require.NoError(t, err)
   392  			assert.Equal(t, before.Size(), info.Size())
   393  
   394  			// bootstrap journal with corrupted index
   395  			corruptJournalIndex(t, idxPath)
   396  			jnl, ok, err := openJournalWriter(ctx, idxPath)
   397  			require.NoError(t, err)
   398  			require.True(t, ok)
   399  			_, err = jnl.bootstrapJournal(ctx, nil)
   400  			assert.Error(t, err)
   401  		})
   402  	}
   403  }
   404  
   405  var encoding = base32.NewEncoding("0123456789abcdefghijklmnopqrstuv")
   406  
   407  // encode returns the base32 encoding in the Dolt alphabet.
   408  func encode(data []byte) string {
   409  	return encoding.EncodeToString(data)
   410  }
   411  
   412  func randomCompressedChunks(cnt int) (compressed map[hash.Hash]CompressedChunk) {
   413  	compressed = make(map[hash.Hash]CompressedChunk)
   414  	var buf []byte
   415  	for i := 0; i < cnt; i++ {
   416  		k := rand.Intn(51) + 50
   417  		if k >= len(buf) {
   418  			buf = make([]byte, 64*1024)
   419  			rand.Read(buf)
   420  		}
   421  		c := chunks.NewChunk(buf[:k])
   422  		buf = buf[k:]
   423  		compressed[c.Hash()] = ChunkToCompressedChunk(c)
   424  	}
   425  	return
   426  }
   427  
   428  func corruptJournalIndex(t *testing.T, path string) {
   429  	f, err := os.OpenFile(path, os.O_RDWR, 0666)
   430  	require.NoError(t, err)
   431  	info, err := f.Stat()
   432  	require.NoError(t, err)
   433  	buf := make([]byte, 64)
   434  	rand.Read(buf)
   435  	_, err = f.WriteAt(buf, info.Size()/2)
   436  	require.NoError(t, err)
   437  }
   438  
   439  func TestRangeIndex(t *testing.T) {
   440  	data := randomCompressedChunks(1024)
   441  	idx := newRangeIndex()
   442  	for _, c := range data {
   443  		idx.put(c.Hash(), Range{})
   444  	}
   445  	for _, c := range data {
   446  		_, ok := idx.get(c.Hash())
   447  		assert.True(t, ok)
   448  	}
   449  	assert.Equal(t, len(data), idx.novelCount())
   450  	assert.Equal(t, len(data), int(idx.count()))
   451  	idx = idx.flatten()
   452  	assert.Equal(t, 0, idx.novelCount())
   453  	assert.Equal(t, len(data), int(idx.count()))
   454  }