github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/datas/pull/pull_table_file_writer_test.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pull
    16  
    17  import (
    18  	"context"
    19  	"crypto/rand"
    20  	"errors"
    21  	"io"
    22  	"sync/atomic"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/stretchr/testify/assert"
    27  
    28  	"github.com/dolthub/dolt/go/store/chunks"
    29  	"github.com/dolthub/dolt/go/store/nbs"
    30  )
    31  
    32  func TestPullTableFileWriter(t *testing.T) {
    33  	t.Run("Empty", func(t *testing.T) {
    34  		var s noopTableFileDestStore
    35  		wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
    36  			ConcurrentUploads:    1,
    37  			ChunksPerFile:        8,
    38  			MaximumBufferedFiles: 1,
    39  			TempDir:              t.TempDir(),
    40  			DestStore:            &s,
    41  		})
    42  		assert.NoError(t, wr.Close())
    43  		assert.Equal(t, s.writeCalled.Load(), uint32(0))
    44  		assert.Equal(t, s.addCalled, 0)
    45  	})
    46  
    47  	t.Run("AddSomeChunks", func(t *testing.T) {
    48  		t.Run("FinishOnFullWriter", func(t *testing.T) {
    49  			var s noopTableFileDestStore
    50  			wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
    51  				ConcurrentUploads:    1,
    52  				ChunksPerFile:        8,
    53  				MaximumBufferedFiles: 1,
    54  				TempDir:              t.TempDir(),
    55  				DestStore:            &s,
    56  			})
    57  
    58  			for i := 0; i < 32; i++ {
    59  				bs := make([]byte, 1024)
    60  				_, err := rand.Read(bs)
    61  				assert.NoError(t, err)
    62  				chk := chunks.NewChunk(bs)
    63  				cChk := nbs.ChunkToCompressedChunk(chk)
    64  				err = wr.AddCompressedChunk(context.Background(), cChk)
    65  				assert.NoError(t, err)
    66  			}
    67  
    68  			assert.NoError(t, wr.Close())
    69  			assert.Equal(t, s.writeCalled.Load(), uint32(4))
    70  			assert.Equal(t, s.addCalled, 1)
    71  			assert.Len(t, s.manifest, 4)
    72  		})
    73  
    74  		t.Run("FinishOnPartialFile", func(t *testing.T) {
    75  			var s noopTableFileDestStore
    76  			wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
    77  				ConcurrentUploads:    1,
    78  				ChunksPerFile:        1024,
    79  				MaximumBufferedFiles: 1,
    80  				TempDir:              t.TempDir(),
    81  				DestStore:            &s,
    82  			})
    83  
    84  			for i := 0; i < 32; i++ {
    85  				bs := make([]byte, 1024)
    86  				_, err := rand.Read(bs)
    87  				assert.NoError(t, err)
    88  				chk := chunks.NewChunk(bs)
    89  				cChk := nbs.ChunkToCompressedChunk(chk)
    90  				err = wr.AddCompressedChunk(context.Background(), cChk)
    91  				assert.NoError(t, err)
    92  			}
    93  
    94  			assert.NoError(t, wr.Close())
    95  			assert.Equal(t, s.writeCalled.Load(), uint32(1))
    96  			assert.Equal(t, s.addCalled, 1)
    97  			assert.Len(t, s.manifest, 1)
    98  		})
    99  	})
   100  
   101  	t.Run("ConcurrentUpload", func(t *testing.T) {
   102  		var s noopTableFileDestStore
   103  		s.writeDelay = 50 * time.Millisecond
   104  		wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
   105  			ConcurrentUploads:    32,
   106  			ChunksPerFile:        8,
   107  			MaximumBufferedFiles: 1,
   108  			TempDir:              t.TempDir(),
   109  			DestStore:            &s,
   110  		})
   111  
   112  		start := time.Now()
   113  
   114  		for i := 0; i < 8*32; i++ {
   115  			bs := make([]byte, 1024)
   116  			_, err := rand.Read(bs)
   117  			assert.NoError(t, err)
   118  			chk := chunks.NewChunk(bs)
   119  			cChk := nbs.ChunkToCompressedChunk(chk)
   120  			err = wr.AddCompressedChunk(context.Background(), cChk)
   121  			assert.NoError(t, err)
   122  		}
   123  
   124  		assert.NoError(t, wr.Close())
   125  		assert.Equal(t, s.writeCalled.Load(), uint32(32))
   126  		assert.Equal(t, s.addCalled, 1)
   127  		assert.Len(t, s.manifest, 32)
   128  		assert.True(t, time.Since(start) < time.Second)
   129  	})
   130  
   131  	t.Run("ErrorOnUpload", func(t *testing.T) {
   132  		t.Run("ErrAtClose", func(t *testing.T) {
   133  			var s errTableFileDestStore
   134  			wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
   135  				ConcurrentUploads:    1,
   136  				ChunksPerFile:        8,
   137  				MaximumBufferedFiles: 0,
   138  				TempDir:              t.TempDir(),
   139  				DestStore:            &s,
   140  			})
   141  
   142  			for i := 0; i < 8; i++ {
   143  				bs := make([]byte, 1024)
   144  				_, err := rand.Read(bs)
   145  				assert.NoError(t, err)
   146  				chk := chunks.NewChunk(bs)
   147  				cChk := nbs.ChunkToCompressedChunk(chk)
   148  				err = wr.AddCompressedChunk(context.Background(), cChk)
   149  				assert.NoError(t, err)
   150  			}
   151  
   152  			assert.EqualError(t, wr.Close(), "this dest store throws an error")
   153  			assert.Equal(t, s.addCalled, 0)
   154  		})
   155  
   156  		t.Run("ErrAtAdd", func(t *testing.T) {
   157  			var s errTableFileDestStore
   158  			wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
   159  				ConcurrentUploads:    1,
   160  				ChunksPerFile:        8,
   161  				MaximumBufferedFiles: 0,
   162  				TempDir:              t.TempDir(),
   163  				DestStore:            &s,
   164  			})
   165  
   166  			for i := 0; i < 8; i++ {
   167  				bs := make([]byte, 1024)
   168  				_, err := rand.Read(bs)
   169  				assert.NoError(t, err)
   170  				chk := chunks.NewChunk(bs)
   171  				cChk := nbs.ChunkToCompressedChunk(chk)
   172  				err = wr.AddCompressedChunk(context.Background(), cChk)
   173  				assert.NoError(t, err)
   174  			}
   175  
   176  			// We should eventually see the upload error from AddCompressedChunk
   177  			for i := 0; i < 1024; i++ {
   178  				bs := make([]byte, 1024)
   179  				_, err := rand.Read(bs)
   180  				assert.NoError(t, err)
   181  				chk := chunks.NewChunk(bs)
   182  				cChk := nbs.ChunkToCompressedChunk(chk)
   183  				err = wr.AddCompressedChunk(context.Background(), cChk)
   184  				if err != nil {
   185  					assert.EqualError(t, err, "this dest store throws an error")
   186  					assert.EqualError(t, wr.Close(), "this dest store throws an error")
   187  					assert.Equal(t, s.addCalled, 0)
   188  					return
   189  				}
   190  			}
   191  
   192  			t.Errorf("Did not see an error from AddCompressedChunk after concurrent upload failed")
   193  		})
   194  	})
   195  
   196  	t.Run("ErrorOnAdd", func(t *testing.T) {
   197  		var s errTableFileDestStore
   198  		s.onAdd = true
   199  		wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
   200  			ConcurrentUploads:    1,
   201  			ChunksPerFile:        8,
   202  			MaximumBufferedFiles: 0,
   203  			TempDir:              t.TempDir(),
   204  			DestStore:            &s,
   205  		})
   206  
   207  		for i := 0; i < 8; i++ {
   208  			bs := make([]byte, 1024)
   209  			_, err := rand.Read(bs)
   210  			assert.NoError(t, err)
   211  			chk := chunks.NewChunk(bs)
   212  			cChk := nbs.ChunkToCompressedChunk(chk)
   213  			err = wr.AddCompressedChunk(context.Background(), cChk)
   214  			assert.NoError(t, err)
   215  		}
   216  
   217  		assert.EqualError(t, wr.Close(), "this dest store throws an error")
   218  		assert.Equal(t, s.addCalled, 1)
   219  	})
   220  
   221  	t.Run("SimpleStats", func(t *testing.T) {
   222  		s := testDataTableFileDestStore{
   223  			atWriteTableFile:   make(chan struct{}),
   224  			doWriteTableFile:   make(chan struct{}),
   225  			doneWriteTableFile: make(chan struct{}),
   226  		}
   227  		wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
   228  			ConcurrentUploads:    1,
   229  			ChunksPerFile:        8,
   230  			MaximumBufferedFiles: 0,
   231  			TempDir:              t.TempDir(),
   232  			DestStore:            &s,
   233  		})
   234  
   235  		for i := 0; i < 8; i++ {
   236  			bs := make([]byte, 1024)
   237  			_, err := rand.Read(bs)
   238  			assert.NoError(t, err)
   239  			chk := chunks.NewChunk(bs)
   240  			cChk := nbs.ChunkToCompressedChunk(chk)
   241  			err = wr.AddCompressedChunk(context.Background(), cChk)
   242  			assert.NoError(t, err)
   243  		}
   244  
   245  		<-s.atWriteTableFile
   246  
   247  		wrStats := wr.GetStats()
   248  		assert.Equal(t, wrStats.FinishedSendBytes, uint64(0))
   249  		assert.Greater(t, wrStats.BufferedSendBytes, uint64(8*1024))
   250  
   251  		close(s.doWriteTableFile)
   252  		<-s.doneWriteTableFile
   253  
   254  		wrStats = wr.GetStats()
   255  		assert.Greater(t, wrStats.FinishedSendBytes, uint64(8*1024))
   256  		assert.Equal(t, wrStats.FinishedSendBytes, wrStats.BufferedSendBytes)
   257  
   258  		assert.NoError(t, wr.Close())
   259  	})
   260  
   261  	t.Run("UploadsAreParallel", func(t *testing.T) {
   262  		s := testDataTableFileDestStore{
   263  			atWriteTableFile:   make(chan struct{}),
   264  			doWriteTableFile:   make(chan struct{}),
   265  			doneWriteTableFile: make(chan struct{}),
   266  		}
   267  		wr := NewPullTableFileWriter(context.Background(), PullTableFileWriterConfig{
   268  			ConcurrentUploads:    4,
   269  			ChunksPerFile:        8,
   270  			MaximumBufferedFiles: 0,
   271  			TempDir:              t.TempDir(),
   272  			DestStore:            &s,
   273  		})
   274  
   275  		for i := 0; i < 32; i++ {
   276  			bs := make([]byte, 1024)
   277  			_, err := rand.Read(bs)
   278  			assert.NoError(t, err)
   279  			chk := chunks.NewChunk(bs)
   280  			cChk := nbs.ChunkToCompressedChunk(chk)
   281  			err = wr.AddCompressedChunk(context.Background(), cChk)
   282  			assert.NoError(t, err)
   283  		}
   284  
   285  		for i := 0; i < 4; i++ {
   286  			<-s.atWriteTableFile
   287  		}
   288  
   289  		close(s.doWriteTableFile)
   290  
   291  		for i := 0; i < 4; i++ {
   292  			<-s.doneWriteTableFile
   293  		}
   294  
   295  		assert.NoError(t, wr.Close())
   296  	})
   297  }
   298  
   299  type noopTableFileDestStore struct {
   300  	writeDelay  time.Duration
   301  	writeCalled atomic.Uint32
   302  	addCalled   int
   303  	manifest    map[string]int
   304  }
   305  
   306  func (s *noopTableFileDestStore) WriteTableFile(ctx context.Context, id string, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error {
   307  	if s.writeDelay > 0 {
   308  		time.Sleep(s.writeDelay)
   309  	}
   310  	s.writeCalled.Add(1)
   311  	rd, _, _ := getRd()
   312  	if rd != nil {
   313  		rd.Close()
   314  	}
   315  	return nil
   316  }
   317  
   318  func (s *noopTableFileDestStore) AddTableFilesToManifest(ctx context.Context, fileIdToNumChunks map[string]int) error {
   319  	s.addCalled += 1
   320  	s.manifest = fileIdToNumChunks
   321  	return nil
   322  }
   323  
   324  type testDataTableFileDestStore struct {
   325  	atWriteTableFile   chan struct{}
   326  	doWriteTableFile   chan struct{}
   327  	doneWriteTableFile chan struct{}
   328  }
   329  
   330  func (s *testDataTableFileDestStore) WriteTableFile(ctx context.Context, id string, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error {
   331  	s.atWriteTableFile <- struct{}{}
   332  	<-s.doWriteTableFile
   333  	defer func() {
   334  		s.doneWriteTableFile <- struct{}{}
   335  	}()
   336  	rd, _, err := getRd()
   337  	if err != nil {
   338  		return err
   339  	}
   340  	defer rd.Close()
   341  	_, err = io.ReadAll(rd)
   342  	if err != nil {
   343  		return err
   344  	}
   345  	return nil
   346  }
   347  
   348  func (s *testDataTableFileDestStore) AddTableFilesToManifest(context.Context, map[string]int) error {
   349  	return nil
   350  }
   351  
   352  type errTableFileDestStore struct {
   353  	onAdd     bool
   354  	addCalled int
   355  }
   356  
   357  func (s *errTableFileDestStore) WriteTableFile(ctx context.Context, id string, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error {
   358  	rd, _, _ := getRd()
   359  	if rd != nil {
   360  		rd.Close()
   361  	}
   362  	if s.onAdd {
   363  		return nil
   364  	}
   365  	return errors.New("this dest store throws an error")
   366  }
   367  
   368  func (s *errTableFileDestStore) AddTableFilesToManifest(ctx context.Context, fileIdToNumChunks map[string]int) error {
   369  	s.addCalled += 1
   370  	if s.onAdd {
   371  		return errors.New("this dest store throws an error")
   372  	}
   373  	return nil
   374  }