github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/record/record_test.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package record
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"strings"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/errors"
    18  	"github.com/cockroachdb/pebble/internal/base"
    19  	"github.com/prometheus/client_golang/prometheus"
    20  	"github.com/stretchr/testify/require"
    21  	"golang.org/x/exp/rand"
    22  )
    23  
    24  func short(s string) string {
    25  	if len(s) < 64 {
    26  		return s
    27  	}
    28  	return fmt.Sprintf("%s...(skipping %d bytes)...%s", s[:20], len(s)-40, s[len(s)-20:])
    29  }
    30  
    31  // big returns a string of length n, composed of repetitions of partial.
    32  func big(partial string, n int) string {
    33  	return strings.Repeat(partial, n/len(partial)+1)[:n]
    34  }
    35  
    36  type recordWriter interface {
    37  	WriteRecord([]byte) (int64, error)
    38  	Close() error
    39  }
    40  
    41  func testGeneratorWriter(
    42  	t *testing.T, reset func(), gen func() (string, bool), newWriter func(io.Writer) recordWriter,
    43  ) {
    44  	buf := new(bytes.Buffer)
    45  
    46  	reset()
    47  	w := newWriter(buf)
    48  	for {
    49  		s, ok := gen()
    50  		if !ok {
    51  			break
    52  		}
    53  		if _, err := w.WriteRecord([]byte(s)); err != nil {
    54  			t.Fatalf("Write: %v", err)
    55  		}
    56  	}
    57  	if err := w.Close(); err != nil {
    58  		t.Fatalf("Close: %v", err)
    59  	}
    60  	reset()
    61  	r := NewReader(buf, 0 /* logNum */)
    62  	for {
    63  		s, ok := gen()
    64  		if !ok {
    65  			break
    66  		}
    67  		rr, err := r.Next()
    68  		if err != nil {
    69  			t.Fatalf("reader.Next: %v", err)
    70  		}
    71  		x, err := io.ReadAll(rr)
    72  		if err != nil {
    73  			t.Fatalf("ReadAll: %v", err)
    74  		}
    75  		if string(x) != s {
    76  			t.Fatalf("got %q, want %q", short(string(x)), short(s))
    77  		}
    78  	}
    79  	if _, err := r.Next(); err != io.EOF {
    80  		t.Fatalf("got %v, want %v", err, io.EOF)
    81  	}
    82  }
    83  
    84  func testGenerator(t *testing.T, reset func(), gen func() (string, bool)) {
    85  	t.Run("Writer", func(t *testing.T) {
    86  		testGeneratorWriter(t, reset, gen, func(w io.Writer) recordWriter {
    87  			return NewWriter(w)
    88  		})
    89  	})
    90  
    91  	t.Run("LogWriter", func(t *testing.T) {
    92  		testGeneratorWriter(t, reset, gen, func(w io.Writer) recordWriter {
    93  			return NewLogWriter(w, 0 /* logNum */, LogWriterConfig{
    94  				WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
    95  		})
    96  	})
    97  }
    98  
    99  func testLiterals(t *testing.T, s []string) {
   100  	var i int
   101  	reset := func() {
   102  		i = 0
   103  	}
   104  	gen := func() (string, bool) {
   105  		if i == len(s) {
   106  			return "", false
   107  		}
   108  		i++
   109  		return s[i-1], true
   110  	}
   111  	testGenerator(t, reset, gen)
   112  }
   113  
   114  func TestMany(t *testing.T) {
   115  	const n = 1e5
   116  	var i int
   117  	reset := func() {
   118  		i = 0
   119  	}
   120  	gen := func() (string, bool) {
   121  		if i == n {
   122  			return "", false
   123  		}
   124  		i++
   125  		return fmt.Sprintf("%d.", i-1), true
   126  	}
   127  	testGenerator(t, reset, gen)
   128  }
   129  
   130  func TestRandom(t *testing.T) {
   131  	const n = 1e2
   132  	var (
   133  		i int
   134  		r *rand.Rand
   135  	)
   136  	reset := func() {
   137  		i, r = 0, rand.New(rand.NewSource(0))
   138  	}
   139  	gen := func() (string, bool) {
   140  		if i == n {
   141  			return "", false
   142  		}
   143  		i++
   144  		return strings.Repeat(string(uint8(i)), r.Intn(2*blockSize+16)), true
   145  	}
   146  	testGenerator(t, reset, gen)
   147  }
   148  
   149  func TestBasic(t *testing.T) {
   150  	testLiterals(t, []string{
   151  		strings.Repeat("a", 1000),
   152  		strings.Repeat("b", 97270),
   153  		strings.Repeat("c", 8000),
   154  	})
   155  }
   156  
   157  func TestBoundary(t *testing.T) {
   158  	for i := blockSize - 16; i < blockSize+16; i++ {
   159  		s0 := big("abcd", i)
   160  		for j := blockSize - 16; j < blockSize+16; j++ {
   161  			s1 := big("ABCDE", j)
   162  			testLiterals(t, []string{s0, s1})
   163  			testLiterals(t, []string{s0, "", s1})
   164  			testLiterals(t, []string{s0, "x", s1})
   165  		}
   166  	}
   167  }
   168  
   169  func TestFlush(t *testing.T) {
   170  	buf := new(bytes.Buffer)
   171  	w := NewWriter(buf)
   172  	// Write a couple of records. Everything should still be held
   173  	// in the record.Writer buffer, so that buf.Len should be 0.
   174  	w0, _ := w.Next()
   175  	w0.Write([]byte("0"))
   176  	w1, _ := w.Next()
   177  	w1.Write([]byte("11"))
   178  	if got, want := buf.Len(), 0; got != want {
   179  		t.Fatalf("buffer length #0: got %d want %d", got, want)
   180  	}
   181  	// Flush the record.Writer buffer, which should yield 17 bytes.
   182  	// 17 = 2*7 + 1 + 2, which is two headers and 1 + 2 payload bytes.
   183  	require.NoError(t, w.Flush())
   184  	if got, want := buf.Len(), 17; got != want {
   185  		t.Fatalf("buffer length #1: got %d want %d", got, want)
   186  	}
   187  	// Do another write, one that isn't large enough to complete the block.
   188  	// The write should not have flowed through to buf.
   189  	w2, _ := w.Next()
   190  	w2.Write(bytes.Repeat([]byte("2"), 10000))
   191  	if got, want := buf.Len(), 17; got != want {
   192  		t.Fatalf("buffer length #2: got %d want %d", got, want)
   193  	}
   194  	// Flushing should get us up to 10024 bytes written.
   195  	// 10024 = 17 + 7 + 10000.
   196  	require.NoError(t, w.Flush())
   197  	if got, want := buf.Len(), 10024; got != want {
   198  		t.Fatalf("buffer length #3: got %d want %d", got, want)
   199  	}
   200  	// Do a bigger write, one that completes the current block.
   201  	// We should now have 32768 bytes (a complete block), without
   202  	// an explicit flush.
   203  	w3, _ := w.Next()
   204  	w3.Write(bytes.Repeat([]byte("3"), 40000))
   205  	if got, want := buf.Len(), 32768; got != want {
   206  		t.Fatalf("buffer length #4: got %d want %d", got, want)
   207  	}
   208  	// Flushing should get us up to 50038 bytes written.
   209  	// 50038 = 10024 + 2*7 + 40000. There are two headers because
   210  	// the one record was split into two chunks.
   211  	require.NoError(t, w.Flush())
   212  	if got, want := buf.Len(), 50038; got != want {
   213  		t.Fatalf("buffer length #5: got %d want %d", got, want)
   214  	}
   215  	// Check that reading those records give the right lengths.
   216  	r := NewReader(buf, 0 /* logNum */)
   217  	wants := []int64{1, 2, 10000, 40000}
   218  	for i, want := range wants {
   219  		rr, _ := r.Next()
   220  		n, err := io.Copy(io.Discard, rr)
   221  		if err != nil {
   222  			t.Fatalf("read #%d: %v", i, err)
   223  		}
   224  		if n != want {
   225  			t.Fatalf("read #%d: got %d bytes want %d", i, n, want)
   226  		}
   227  	}
   228  }
   229  
   230  func TestNonExhaustiveRead(t *testing.T) {
   231  	const n = 100
   232  	buf := new(bytes.Buffer)
   233  	p := make([]byte, 10)
   234  	rnd := rand.New(rand.NewSource(1))
   235  
   236  	w := NewWriter(buf)
   237  	for i := 0; i < n; i++ {
   238  		length := len(p) + rnd.Intn(3*blockSize)
   239  		s := string(uint8(i)) + "123456789abcdefgh"
   240  		_, _ = w.WriteRecord([]byte(big(s, length)))
   241  	}
   242  	if err := w.Close(); err != nil {
   243  		t.Fatalf("Close: %v", err)
   244  	}
   245  
   246  	r := NewReader(buf, 0 /* logNum */)
   247  	for i := 0; i < n; i++ {
   248  		rr, _ := r.Next()
   249  		_, err := io.ReadFull(rr, p)
   250  		if err != nil {
   251  			t.Fatalf("ReadFull: %v", err)
   252  		}
   253  		want := string(uint8(i)) + "123456789"
   254  		if got := string(p); got != want {
   255  			t.Fatalf("read #%d: got %q want %q", i, got, want)
   256  		}
   257  	}
   258  }
   259  
   260  func TestStaleReader(t *testing.T) {
   261  	buf := new(bytes.Buffer)
   262  
   263  	w := NewWriter(buf)
   264  	_, err := w.WriteRecord([]byte("0"))
   265  	require.NoError(t, err)
   266  
   267  	_, err = w.WriteRecord([]byte("11"))
   268  	require.NoError(t, err)
   269  
   270  	require.NoError(t, w.Close())
   271  
   272  	r := NewReader(buf, 0 /* logNum */)
   273  	r0, err := r.Next()
   274  	require.NoError(t, err)
   275  
   276  	r1, err := r.Next()
   277  	require.NoError(t, err)
   278  
   279  	p := make([]byte, 1)
   280  	if _, err := r0.Read(p); err == nil || !strings.Contains(err.Error(), "stale") {
   281  		t.Fatalf("stale read #0: unexpected error: %v", err)
   282  	}
   283  	if _, err := r1.Read(p); err != nil {
   284  		t.Fatalf("fresh read #1: got %v want nil error", err)
   285  	}
   286  	if p[0] != '1' {
   287  		t.Fatalf("fresh read #1: byte contents: got '%c' want '1'", p[0])
   288  	}
   289  }
   290  
   291  type testRecords struct {
   292  	records [][]byte // The raw value of each record.
   293  	offsets []int64  // The offset of each record within buf, derived from writer.LastRecordOffset.
   294  	buf     []byte   // The serialized records form of all records.
   295  }
   296  
   297  // makeTestRecords generates test records of specified lengths.
   298  // The first record will consist of repeating 0x00 bytes, the next record of
   299  // 0x01 bytes, and so forth. The values will loop back to 0x00 after 0xff.
   300  func makeTestRecords(recordLengths ...int) (*testRecords, error) {
   301  	ret := &testRecords{}
   302  	ret.records = make([][]byte, len(recordLengths))
   303  	ret.offsets = make([]int64, len(recordLengths))
   304  	for i, n := range recordLengths {
   305  		ret.records[i] = bytes.Repeat([]byte{byte(i)}, n)
   306  	}
   307  
   308  	buf := new(bytes.Buffer)
   309  	w := NewWriter(buf)
   310  	for i, rec := range ret.records {
   311  		wRec, err := w.Next()
   312  		if err != nil {
   313  			return nil, err
   314  		}
   315  
   316  		// Alternate between one big write and many small writes.
   317  		cSize := 8
   318  		if i&1 == 0 {
   319  			cSize = len(rec)
   320  		}
   321  		for ; len(rec) > cSize; rec = rec[cSize:] {
   322  			if _, err := wRec.Write(rec[:cSize]); err != nil {
   323  				return nil, err
   324  			}
   325  		}
   326  		if _, err := wRec.Write(rec); err != nil {
   327  			return nil, err
   328  		}
   329  
   330  		ret.offsets[i], err = w.LastRecordOffset()
   331  		if err != nil {
   332  			return nil, err
   333  		}
   334  	}
   335  
   336  	if err := w.Close(); err != nil {
   337  		return nil, err
   338  	}
   339  
   340  	ret.buf = buf.Bytes()
   341  	return ret, nil
   342  }
   343  
   344  // corruptBlock corrupts the checksum of the record that starts at the
   345  // specified block offset. The number of the block offset is 0 based.
   346  func corruptBlock(buf []byte, blockNum int) {
   347  	// Ensure we always permute at least 1 byte of the checksum.
   348  	if buf[blockSize*blockNum] == 0x00 {
   349  		buf[blockSize*blockNum] = 0xff
   350  	} else {
   351  		buf[blockSize*blockNum] = 0x00
   352  	}
   353  
   354  	buf[blockSize*blockNum+1] = 0x00
   355  	buf[blockSize*blockNum+2] = 0x00
   356  	buf[blockSize*blockNum+3] = 0x00
   357  }
   358  
   359  func TestRecoverNoOp(t *testing.T) {
   360  	recs, err := makeTestRecords(
   361  		blockSize-legacyHeaderSize,
   362  		blockSize-legacyHeaderSize,
   363  		blockSize-legacyHeaderSize,
   364  	)
   365  	if err != nil {
   366  		t.Fatalf("makeTestRecords: %v", err)
   367  	}
   368  
   369  	r := NewReader(bytes.NewReader(recs.buf), 0 /* logNum */)
   370  	_, err = r.Next()
   371  	if err != nil || r.err != nil {
   372  		t.Fatalf("reader.Next: %v reader.err: %v", err, r.err)
   373  	}
   374  
   375  	seq, begin, end, n := r.seq, r.begin, r.end, r.n
   376  
   377  	// Should be a no-op since r.err == nil.
   378  	r.recover()
   379  
   380  	// r.err was nil, nothing should have changed.
   381  	if seq != r.seq || begin != r.begin || end != r.end || n != r.n {
   382  		t.Fatal("reader.Recover when no error existed, was not a no-op")
   383  	}
   384  }
   385  
   386  func TestBasicRecover(t *testing.T) {
   387  	recs, err := makeTestRecords(
   388  		blockSize-legacyHeaderSize,
   389  		blockSize-legacyHeaderSize,
   390  		blockSize-legacyHeaderSize,
   391  	)
   392  	if err != nil {
   393  		t.Fatalf("makeTestRecords: %v", err)
   394  	}
   395  
   396  	// Corrupt the checksum of the second record r1 in our file.
   397  	corruptBlock(recs.buf, 1)
   398  
   399  	underlyingReader := bytes.NewReader(recs.buf)
   400  	r := NewReader(underlyingReader, 0 /* logNum */)
   401  
   402  	// The first record r0 should be read just fine.
   403  	r0, err := r.Next()
   404  	if err != nil {
   405  		t.Fatalf("Next: %v", err)
   406  	}
   407  	r0Data, err := io.ReadAll(r0)
   408  	if err != nil {
   409  		t.Fatalf("ReadAll: %v", err)
   410  	}
   411  	if !bytes.Equal(r0Data, recs.records[0]) {
   412  		t.Fatal("Unexpected output in r0's data")
   413  	}
   414  
   415  	// The next record should have a checksum mismatch.
   416  	_, err = r.Next()
   417  	if err == nil {
   418  		t.Fatal("Expected an error while reading a corrupted record")
   419  	}
   420  	if err != ErrInvalidChunk {
   421  		t.Fatalf("Unexpected error returned: %v", err)
   422  	}
   423  
   424  	// Recover from that checksum mismatch.
   425  	r.recover()
   426  	currentOffset, err := underlyingReader.Seek(0, io.SeekCurrent)
   427  	if err != nil {
   428  		t.Fatalf("current offset: %v", err)
   429  	}
   430  	if currentOffset != blockSize*2 {
   431  		t.Fatalf("current offset: got %d, want %d", currentOffset, blockSize*2)
   432  	}
   433  
   434  	// The third record r2 should be read just fine.
   435  	r2, err := r.Next()
   436  	if err != nil {
   437  		t.Fatalf("Next: %v", err)
   438  	}
   439  	r2Data, err := io.ReadAll(r2)
   440  	if err != nil {
   441  		t.Fatalf("ReadAll: %v", err)
   442  	}
   443  	if !bytes.Equal(r2Data, recs.records[2]) {
   444  		t.Fatal("Unexpected output in r2's data")
   445  	}
   446  }
   447  
   448  func TestRecoverSingleBlock(t *testing.T) {
   449  	// The first record will be blockSize * 3 bytes long. Since each block has
   450  	// a 7 byte header, the first record will roll over into 4 blocks.
   451  	recs, err := makeTestRecords(
   452  		blockSize*3,
   453  		blockSize-legacyHeaderSize,
   454  		blockSize/2,
   455  	)
   456  	if err != nil {
   457  		t.Fatalf("makeTestRecords: %v", err)
   458  	}
   459  
   460  	// Corrupt the checksum for the portion of the first record that exists in
   461  	// the 4th block.
   462  	corruptBlock(recs.buf, 3)
   463  
   464  	// The first record should fail, but only when we read deeper beyond the
   465  	// first block.
   466  	r := NewReader(bytes.NewReader(recs.buf), 0 /* logNum */)
   467  	r0, err := r.Next()
   468  	if err != nil {
   469  		t.Fatalf("Next: %v", err)
   470  	}
   471  
   472  	// Reading deeper should yield a checksum mismatch.
   473  	_, err = io.ReadAll(r0)
   474  	if err == nil {
   475  		t.Fatal("Expected a checksum mismatch error, got nil")
   476  	}
   477  	if err != ErrInvalidChunk {
   478  		t.Fatalf("Unexpected error returned: %v", err)
   479  	}
   480  
   481  	// Recover from that checksum mismatch.
   482  	r.recover()
   483  
   484  	// All of the data in the second record r1 is lost because the first record
   485  	// r0 shared a partial block with it. The second record also overlapped
   486  	// into the block with the third record r2. Recovery should jump to that
   487  	// block, skipping over the end of the second record and start parsing the
   488  	// third record.
   489  	r2, err := r.Next()
   490  	if err != nil {
   491  		t.Fatalf("Next: %v", err)
   492  	}
   493  	r2Data, _ := io.ReadAll(r2)
   494  	if !bytes.Equal(r2Data, recs.records[2]) {
   495  		t.Fatal("Unexpected output in r2's data")
   496  	}
   497  }
   498  
   499  func TestRecoverMultipleBlocks(t *testing.T) {
   500  	recs, err := makeTestRecords(
   501  		// The first record will consume 3 entire blocks but a fraction of the 4th.
   502  		blockSize*3,
   503  		// The second record will completely fill the remainder of the 4th block.
   504  		3*(blockSize-legacyHeaderSize)-2*blockSize-2*legacyHeaderSize,
   505  		// Consume the entirety of the 5th block.
   506  		blockSize-legacyHeaderSize,
   507  		// Consume the entirety of the 6th block.
   508  		blockSize-legacyHeaderSize,
   509  		// Consume roughly half of the 7th block.
   510  		blockSize/2,
   511  	)
   512  	if err != nil {
   513  		t.Fatalf("makeTestRecords: %v", err)
   514  	}
   515  
   516  	// Corrupt the checksum for the portion of the first record that exists in the 4th block.
   517  	corruptBlock(recs.buf, 3)
   518  
   519  	// Now corrupt the two blocks in a row that correspond to recs.records[2:4].
   520  	corruptBlock(recs.buf, 4)
   521  	corruptBlock(recs.buf, 5)
   522  
   523  	// The first record should fail, but only when we read deeper beyond the first block.
   524  	r := NewReader(bytes.NewReader(recs.buf), 0 /* logNum */)
   525  	r0, err := r.Next()
   526  	if err != nil {
   527  		t.Fatalf("Next: %v", err)
   528  	}
   529  
   530  	// Reading deeper should yield a checksum mismatch.
   531  	_, err = io.ReadAll(r0)
   532  	if err == nil {
   533  		t.Fatal("Exptected a checksum mismatch error, got nil")
   534  	}
   535  	if err != ErrInvalidChunk {
   536  		t.Fatalf("Unexpected error returned: %v", err)
   537  	}
   538  
   539  	// Recover from that checksum mismatch.
   540  	r.recover()
   541  
   542  	// All of the data in the second record is lost because the first
   543  	// record shared a partial block with it. The following two records
   544  	// have corrupted checksums as well, so the call above to r.Recover
   545  	// should result in r.Next() being a reader to the 5th record.
   546  	r4, err := r.Next()
   547  	if err != nil {
   548  		t.Fatalf("Next: %v", err)
   549  	}
   550  
   551  	r4Data, _ := io.ReadAll(r4)
   552  	if !bytes.Equal(r4Data, recs.records[4]) {
   553  		t.Fatal("Unexpected output in r4's data")
   554  	}
   555  }
   556  
   557  // verifyLastBlockRecover reads each record from recs expecting that the
   558  // last record will be corrupted. It will then try Recover and verify that EOF
   559  // is returned.
   560  func verifyLastBlockRecover(recs *testRecords) error {
   561  	r := NewReader(bytes.NewReader(recs.buf), 0 /* logNum */)
   562  	// Loop to one element larger than the number of records to verify EOF.
   563  	for i := 0; i < len(recs.records)+1; i++ {
   564  		_, err := r.Next()
   565  		switch i {
   566  		case len(recs.records) - 1:
   567  			if err == nil {
   568  				return errors.New("Expected a checksum mismatch error, got nil")
   569  			}
   570  			r.recover()
   571  		case len(recs.records):
   572  			if err != io.EOF {
   573  				return errors.Errorf("Expected io.EOF, got %v", err)
   574  			}
   575  		default:
   576  			if err != nil {
   577  				return errors.Errorf("Next: %v", err)
   578  			}
   579  		}
   580  	}
   581  	return nil
   582  }
   583  
   584  func TestRecoverLastPartialBlock(t *testing.T) {
   585  	recs, err := makeTestRecords(
   586  		// The first record will consume 3 entire blocks but a fraction of the 4th.
   587  		blockSize*3,
   588  		// The second record will completely fill the remainder of the 4th block.
   589  		3*(blockSize-legacyHeaderSize)-2*blockSize-2*legacyHeaderSize,
   590  		// Consume roughly half of the 5th block.
   591  		blockSize/2,
   592  	)
   593  	if err != nil {
   594  		t.Fatalf("makeTestRecords: %v", err)
   595  	}
   596  
   597  	// Corrupt the 5th block.
   598  	corruptBlock(recs.buf, 4)
   599  
   600  	// Verify Recover works when the last block is corrupted.
   601  	if err := verifyLastBlockRecover(recs); err != nil {
   602  		t.Fatalf("verifyLastBlockRecover: %v", err)
   603  	}
   604  }
   605  
   606  func TestRecoverLastCompleteBlock(t *testing.T) {
   607  	recs, err := makeTestRecords(
   608  		// The first record will consume 3 entire blocks but a fraction of the 4th.
   609  		blockSize*3,
   610  		// The second record will completely fill the remainder of the 4th block.
   611  		3*(blockSize-legacyHeaderSize)-2*blockSize-2*legacyHeaderSize,
   612  		// Consume the entire 5th block.
   613  		blockSize-legacyHeaderSize,
   614  	)
   615  	if err != nil {
   616  		t.Fatalf("makeTestRecords: %v", err)
   617  	}
   618  
   619  	// Corrupt the 5th block.
   620  	corruptBlock(recs.buf, 4)
   621  
   622  	// Verify Recover works when the last block is corrupted.
   623  	if err := verifyLastBlockRecover(recs); err != nil {
   624  		t.Fatalf("verifyLastBlockRecover: %v", err)
   625  	}
   626  }
   627  
   628  func TestReaderOffset(t *testing.T) {
   629  	recs, err := makeTestRecords(
   630  		blockSize*2,
   631  		400,
   632  		500,
   633  		600,
   634  		700,
   635  		800,
   636  		9000,
   637  		1000,
   638  	)
   639  	if err != nil {
   640  		t.Fatalf("makeTestRecords: %v", err)
   641  	}
   642  
   643  	// The first record should fail, but only when we read deeper beyond the first block.
   644  	r := NewReader(bytes.NewReader(recs.buf), 0 /* logNum */)
   645  	for i, offset := range recs.offsets {
   646  		if offset != r.Offset() {
   647  			t.Fatalf("%d: expected offset %d, but found %d", i, offset, r.Offset())
   648  		}
   649  		rec, err := r.Next()
   650  		if err != nil {
   651  			t.Fatalf("Next: %v", err)
   652  		}
   653  		if _, err = io.ReadAll(rec); err != nil {
   654  			t.Fatalf("ReadAll: %v", err)
   655  		}
   656  	}
   657  }
   658  
   659  func TestSeekRecord(t *testing.T) {
   660  	recs, err := makeTestRecords(
   661  		// The first record will consume 3 entire blocks but a fraction of the 4th.
   662  		blockSize*3,
   663  		// The second record will completely fill the remainder of the 4th block.
   664  		3*(blockSize-legacyHeaderSize)-2*blockSize-2*legacyHeaderSize,
   665  		// Consume the entirety of the 5th block.
   666  		blockSize-legacyHeaderSize,
   667  		// Consume the entirety of the 6th block.
   668  		blockSize-legacyHeaderSize,
   669  		// Consume roughly half of the 7th block.
   670  		blockSize/2,
   671  	)
   672  	if err != nil {
   673  		t.Fatalf("makeTestRecords: %v", err)
   674  	}
   675  
   676  	r := NewReader(bytes.NewReader(recs.buf), 0 /* logNum */)
   677  	// Seek to a valid block offset, but within a multiblock record. This should cause the next call to
   678  	// Next after SeekRecord to return the next valid FIRST/FULL chunk of the subsequent record.
   679  	err = r.seekRecord(blockSize)
   680  	if err != nil {
   681  		t.Fatalf("SeekRecord: %v", err)
   682  	}
   683  	rec, err := r.Next()
   684  	if err != nil {
   685  		t.Fatalf("Next: %v", err)
   686  	}
   687  	rData, _ := io.ReadAll(rec)
   688  	if !bytes.Equal(rData, recs.records[1]) {
   689  		t.Fatalf("Unexpected output in record 1's data, got %v want %v", rData, recs.records[1])
   690  	}
   691  
   692  	// Seek 3 bytes into the second block, which is still in the middle of the first record, but not
   693  	// at a valid chunk boundary. Should result in an error upon calling r.Next.
   694  	err = r.seekRecord(blockSize + 3)
   695  	if err != nil {
   696  		t.Fatalf("SeekRecord: %v", err)
   697  	}
   698  	if _, err = r.Next(); err == nil {
   699  		t.Fatalf("Expected an error seeking to an invalid chunk boundary")
   700  	}
   701  	r.recover()
   702  
   703  	// Seek to the fifth block and verify all records can be read as appropriate.
   704  	err = r.seekRecord(blockSize * 4)
   705  	if err != nil {
   706  		t.Fatalf("SeekRecord: %v", err)
   707  	}
   708  
   709  	check := func(i int) {
   710  		for ; i < len(recs.records); i++ {
   711  			rec, err := r.Next()
   712  			if err != nil {
   713  				t.Fatalf("Next: %v", err)
   714  			}
   715  
   716  			rData, _ := io.ReadAll(rec)
   717  			if !bytes.Equal(rData, recs.records[i]) {
   718  				t.Fatalf("Unexpected output in record #%d's data, got %v want %v", i, rData, recs.records[i])
   719  			}
   720  		}
   721  	}
   722  	check(2)
   723  
   724  	// Seek back to the fourth block, and read all subsequent records and verify them.
   725  	err = r.seekRecord(blockSize * 3)
   726  	if err != nil {
   727  		t.Fatalf("SeekRecord: %v", err)
   728  	}
   729  	check(1)
   730  
   731  	// Now seek past the end of the file and verify it causes an error.
   732  	err = r.seekRecord(1 << 20)
   733  	if err == nil {
   734  		t.Fatalf("Seek past the end of a file didn't cause an error")
   735  	}
   736  	if err != io.ErrUnexpectedEOF {
   737  		t.Fatalf("Seeking past EOF raised unexpected error: %v", err)
   738  	}
   739  	r.recover() // Verify recovery works.
   740  
   741  	// Validate the current records are returned after seeking to a valid offset.
   742  	err = r.seekRecord(blockSize * 4)
   743  	if err != nil {
   744  		t.Fatalf("SeekRecord: %v", err)
   745  	}
   746  	check(2)
   747  }
   748  
   749  func TestLastRecordOffset(t *testing.T) {
   750  	recs, err := makeTestRecords(
   751  		// The first record will consume 3 entire blocks but a fraction of the 4th.
   752  		blockSize*3,
   753  		// The second record will completely fill the remainder of the 4th block.
   754  		3*(blockSize-legacyHeaderSize)-2*blockSize-2*legacyHeaderSize,
   755  		// Consume the entirety of the 5th block.
   756  		blockSize-legacyHeaderSize,
   757  		// Consume the entirety of the 6th block.
   758  		blockSize-legacyHeaderSize,
   759  		// Consume roughly half of the 7th block.
   760  		blockSize/2,
   761  	)
   762  	if err != nil {
   763  		t.Fatalf("makeTestRecords: %v", err)
   764  	}
   765  
   766  	wants := []int64{0, 98332, 131072, 163840, 196608}
   767  	for i, got := range recs.offsets {
   768  		if want := wants[i]; got != want {
   769  			t.Errorf("record #%d: got %d, want %d", i, got, want)
   770  		}
   771  	}
   772  }
   773  
   774  func TestNoLastRecordOffset(t *testing.T) {
   775  	buf := new(bytes.Buffer)
   776  	w := NewWriter(buf)
   777  	defer w.Close()
   778  
   779  	if _, err := w.LastRecordOffset(); err != ErrNoLastRecord {
   780  		t.Fatalf("Expected ErrNoLastRecord, got: %v", err)
   781  	}
   782  
   783  	require.NoError(t, w.Flush())
   784  
   785  	if _, err := w.LastRecordOffset(); err != ErrNoLastRecord {
   786  		t.Fatalf("LastRecordOffset: got: %v, want ErrNoLastRecord", err)
   787  	}
   788  
   789  	_, err := w.WriteRecord([]byte("testrecord"))
   790  	require.NoError(t, err)
   791  
   792  	if off, err := w.LastRecordOffset(); err != nil {
   793  		t.Fatalf("LastRecordOffset: %v", err)
   794  	} else if off != 0 {
   795  		t.Fatalf("LastRecordOffset: got %d, want 0", off)
   796  	}
   797  }
   798  
   799  func TestInvalidLogNum(t *testing.T) {
   800  	var buf bytes.Buffer
   801  	w := NewLogWriter(&buf, 1, LogWriterConfig{
   802  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   803  	for i := 0; i < 10; i++ {
   804  		s := fmt.Sprintf("%04d\n", i)
   805  		_, err := w.WriteRecord([]byte(s))
   806  		require.NoError(t, err)
   807  	}
   808  	require.NoError(t, w.Close())
   809  
   810  	{
   811  		r := NewReader(bytes.NewReader(buf.Bytes()), 1)
   812  		for i := 0; i < 10; i++ {
   813  			rr, err := r.Next()
   814  			require.NoError(t, err)
   815  
   816  			x, err := io.ReadAll(rr)
   817  			require.NoError(t, err)
   818  
   819  			s := fmt.Sprintf("%04d\n", i)
   820  			if s != string(x) {
   821  				t.Fatalf("expected %s, but found %s", s, x)
   822  			}
   823  		}
   824  		if _, err := r.Next(); err != io.EOF {
   825  			t.Fatalf("expected EOF, but found %s", err)
   826  		}
   827  	}
   828  
   829  	{
   830  		r := NewReader(bytes.NewReader(buf.Bytes()), 2)
   831  		if _, err := r.Next(); err != io.EOF {
   832  			t.Fatalf("expected %s, but found %s\n", io.EOF, err)
   833  		}
   834  	}
   835  }
   836  
   837  func TestSize(t *testing.T) {
   838  	var buf bytes.Buffer
   839  	zeroes := make([]byte, 8<<10)
   840  	w := NewWriter(&buf)
   841  	for i := 0; i < 100; i++ {
   842  		n := rand.Intn(len(zeroes))
   843  		_, err := w.WriteRecord(zeroes[:n])
   844  		require.NoError(t, err)
   845  		require.NoError(t, w.Flush())
   846  		if buf.Len() != int(w.Size()) {
   847  			t.Fatalf("expected %d, but found %d", buf.Len(), w.Size())
   848  		}
   849  	}
   850  	require.NoError(t, w.Close())
   851  }
   852  
   853  type limitedWriter struct {
   854  	io.Writer
   855  	limit int
   856  }
   857  
   858  func (w *limitedWriter) Write(p []byte) (n int, err error) {
   859  	w.limit--
   860  	if w.limit < 0 {
   861  		return len(p), nil
   862  	}
   863  	return w.Writer.Write(p)
   864  }
   865  
   866  func TestRecycleLog(t *testing.T) {
   867  	const min = 16
   868  	const max = 4096
   869  
   870  	rnd := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
   871  	randBlock := func() []byte {
   872  		data := make([]byte, rand.Intn(max-min)+min)
   873  		tmp := data
   874  		for len(tmp) >= 8 {
   875  			binary.LittleEndian.PutUint64(tmp, rand.Uint64())
   876  			tmp = tmp[8:]
   877  		}
   878  		r := rand.Uint64()
   879  		for i := 0; i < len(tmp); i++ {
   880  			tmp[i] = byte(r)
   881  			r >>= 8
   882  		}
   883  		return data
   884  	}
   885  
   886  	// Recycle a log file 100 times, writing a random number of records filled
   887  	// with random data.
   888  	backing := make([]byte, 1<<20)
   889  	for i := 1; i <= 100; i++ {
   890  		blocks := rnd.Intn(100)
   891  		limitedBuf := &limitedWriter{
   892  			Writer: bytes.NewBuffer(backing[:0]),
   893  			limit:  blocks,
   894  		}
   895  
   896  		w := NewLogWriter(limitedBuf, base.DiskFileNum(i), LogWriterConfig{
   897  			WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   898  		sizes := make([]int, 10+rnd.Intn(100))
   899  		for j := range sizes {
   900  			data := randBlock()
   901  			if _, err := w.WriteRecord(data); err != nil {
   902  				t.Fatalf("%d/%d: %v", i, j, err)
   903  			}
   904  			sizes[j] = len(data)
   905  		}
   906  		if err := w.Close(); err != nil {
   907  			t.Fatalf("%d: %v", i, err)
   908  		}
   909  
   910  		r := NewReader(bytes.NewReader(backing), base.DiskFileNum(i))
   911  		for j := range sizes {
   912  			rr, err := r.Next()
   913  			if err != nil {
   914  				// If we limited output then an EOF, zeroed, or invalid chunk is expected.
   915  				if limitedBuf.limit < 0 && (err == io.EOF || err == ErrZeroedChunk || err == ErrInvalidChunk) {
   916  					break
   917  				}
   918  				t.Fatalf("%d/%d: %v", i, j, err)
   919  			}
   920  			x, err := io.ReadAll(rr)
   921  			if err != nil {
   922  				// If we limited output then an EOF, zeroed, or invalid chunk is expected.
   923  				if limitedBuf.limit < 0 && (err == io.EOF || err == ErrZeroedChunk || err == ErrInvalidChunk) {
   924  					break
   925  				}
   926  				t.Fatalf("%d/%d: %v", i, j, err)
   927  			}
   928  			if sizes[j] != len(x) {
   929  				t.Fatalf("%d/%d: expected record %d, but found %d", i, j, sizes[j], len(x))
   930  			}
   931  		}
   932  		if _, err := r.Next(); err != io.EOF && err != ErrZeroedChunk && err != ErrInvalidChunk {
   933  			t.Fatalf("%d: expected EOF, but found %v", i, err)
   934  		}
   935  	}
   936  }
   937  
   938  func TestTruncatedLog(t *testing.T) {
   939  	backing := make([]byte, 2*blockSize)
   940  	w := NewLogWriter(bytes.NewBuffer(backing[:0]), base.DiskFileNum(1), LogWriterConfig{
   941  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   942  	// Write a record that spans 2 blocks.
   943  	_, err := w.WriteRecord(bytes.Repeat([]byte("s"), blockSize+100))
   944  	require.NoError(t, err)
   945  	require.NoError(t, w.Close())
   946  	// Create a reader only for the first block.
   947  	r := NewReader(bytes.NewReader(backing[:blockSize]), base.DiskFileNum(1))
   948  	rr, err := r.Next()
   949  	require.NoError(t, err)
   950  	_, err = io.ReadAll(rr)
   951  	require.EqualValues(t, err, io.ErrUnexpectedEOF)
   952  }
   953  
   954  func TestRecycleLogWithPartialBlock(t *testing.T) {
   955  	backing := make([]byte, 27)
   956  	w := NewLogWriter(bytes.NewBuffer(backing[:0]), base.DiskFileNum(1), LogWriterConfig{
   957  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   958  	// Will write a chunk with 11 byte header + 5 byte payload.
   959  	_, err := w.WriteRecord([]byte("aaaaa"))
   960  	require.NoError(t, err)
   961  	// Close will write a 11-byte EOF chunk.
   962  	require.NoError(t, w.Close())
   963  
   964  	w = NewLogWriter(bytes.NewBuffer(backing[:0]), base.DiskFileNum(2), LogWriterConfig{
   965  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   966  	// Will write a chunk with 11 byte header + 1 byte payload.
   967  	_, err = w.WriteRecord([]byte("a"))
   968  	require.NoError(t, err)
   969  	// Close will write a 11-byte EOF chunk.
   970  	require.NoError(t, w.Close())
   971  
   972  	r := NewReader(bytes.NewReader(backing), base.DiskFileNum(2))
   973  	_, err = r.Next()
   974  	require.NoError(t, err)
   975  	// 4 bytes left, which are not enough for even the legacy header.
   976  	if _, err = r.Next(); err != io.EOF {
   977  		t.Fatalf("unexpected error: %v", err)
   978  	}
   979  }
   980  
   981  func TestRecycleLogNumberOverflow(t *testing.T) {
   982  	// We truncate log numbers to 32-bits when writing to the WAL. Test log
   983  	// recycling at the wraparound point, ensuring that EOF chunks are
   984  	// interpreted correctly.
   985  
   986  	backing := make([]byte, 27)
   987  	w := NewLogWriter(bytes.NewBuffer(backing[:0]), base.DiskFileNum(math.MaxUint32), LogWriterConfig{
   988  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   989  	// Will write a chunk with 11 byte header + 5 byte payload.
   990  	_, err := w.WriteRecord([]byte("aaaaa"))
   991  	require.NoError(t, err)
   992  	// Close will write a 11-byte EOF chunk.
   993  	require.NoError(t, w.Close())
   994  
   995  	w = NewLogWriter(bytes.NewBuffer(backing[:0]), base.DiskFileNum(math.MaxUint32+1), LogWriterConfig{
   996  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
   997  	// Will write a chunk with 11 byte header + 1 byte payload.
   998  	_, err = w.WriteRecord([]byte("a"))
   999  	require.NoError(t, err)
  1000  	// Close will write a 11-byte EOF chunk.
  1001  	require.NoError(t, w.Close())
  1002  
  1003  	r := NewReader(bytes.NewReader(backing), base.DiskFileNum(math.MaxUint32+1))
  1004  	_, err = r.Next()
  1005  	require.NoError(t, err)
  1006  	// 4 bytes left, which are not enough for even the legacy header.
  1007  	if _, err = r.Next(); err != io.EOF {
  1008  		t.Fatalf("unexpected error: %v", err)
  1009  	}
  1010  }
  1011  
  1012  func TestRecycleLogWithPartialRecord(t *testing.T) {
  1013  	const recordSize = (blockSize * 3) / 2
  1014  
  1015  	// Write a record that is larger than the log block size.
  1016  	backing1 := make([]byte, 2*blockSize)
  1017  	w := NewLogWriter(bytes.NewBuffer(backing1[:0]), base.DiskFileNum(1), LogWriterConfig{
  1018  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
  1019  	_, err := w.WriteRecord(bytes.Repeat([]byte("a"), recordSize))
  1020  	require.NoError(t, err)
  1021  	require.NoError(t, w.Close())
  1022  
  1023  	// Write another record to a new incarnation of the WAL that is larger than
  1024  	// the block size.
  1025  	backing2 := make([]byte, 2*blockSize)
  1026  	w = NewLogWriter(bytes.NewBuffer(backing2[:0]), base.DiskFileNum(2), LogWriterConfig{
  1027  		WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
  1028  	_, err = w.WriteRecord(bytes.Repeat([]byte("b"), recordSize))
  1029  	require.NoError(t, err)
  1030  	require.NoError(t, w.Close())
  1031  
  1032  	// Copy the second block from the first WAL to the second block of the second
  1033  	// WAL. This produces a scenario where it appears we crashed after writing
  1034  	// the first block of the second WAL, but before writing the second block.
  1035  	copy(backing2[blockSize:], backing1[blockSize:])
  1036  
  1037  	// Verify that we can't read a partial record from the second WAL.
  1038  	r := NewReader(bytes.NewReader(backing2), base.DiskFileNum(2))
  1039  	rr, err := r.Next()
  1040  	require.NoError(t, err)
  1041  
  1042  	_, err = io.ReadAll(rr)
  1043  	require.Equal(t, err, ErrInvalidChunk)
  1044  }
  1045  
  1046  func BenchmarkRecordWrite(b *testing.B) {
  1047  	for _, size := range []int{8, 16, 32, 64, 256, 1028, 4096, 65_536} {
  1048  		b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
  1049  			w := NewLogWriter(io.Discard, 0 /* logNum */, LogWriterConfig{
  1050  				WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{})})
  1051  			defer w.Close()
  1052  			buf := make([]byte, size)
  1053  
  1054  			b.SetBytes(int64(len(buf)))
  1055  			b.ResetTimer()
  1056  			for i := 0; i < b.N; i++ {
  1057  				if _, err := w.WriteRecord(buf); err != nil {
  1058  					b.Fatal(err)
  1059  				}
  1060  			}
  1061  			b.StopTimer()
  1062  		})
  1063  	}
  1064  }