github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/aws_table_persister_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"io"
    27  	"math/rand"
    28  	"sync"
    29  	"testing"
    30  
    31  	"github.com/aws/aws-sdk-go/aws"
    32  	"github.com/aws/aws-sdk-go/aws/request"
    33  	"github.com/aws/aws-sdk-go/service/s3"
    34  	"github.com/stretchr/testify/assert"
    35  	"github.com/stretchr/testify/require"
    36  
    37  	"github.com/dolthub/dolt/go/store/util/sizecache"
    38  )
    39  
    40  var parseIndexF = func(bs []byte) (tableIndex, error) {
    41  	return parseTableIndex(bs)
    42  }
    43  
    44  func TestAWSTablePersisterPersist(t *testing.T) {
    45  	calcPartSize := func(rdr chunkReader, maxPartNum uint64) uint64 {
    46  		return maxTableSize(uint64(mustUint32(rdr.count())), mustUint64(rdr.uncompressedLen())) / maxPartNum
    47  	}
    48  
    49  	mt := newMemTable(testMemTableSize)
    50  	for _, c := range testChunks {
    51  		assert.True(t, mt.addChunk(computeAddr(c), c))
    52  	}
    53  
    54  	t.Run("PersistToS3", func(t *testing.T) {
    55  		testIt := func(t *testing.T, ns string) {
    56  			t.Run("InMultipleParts", func(t *testing.T) {
    57  				assert := assert.New(t)
    58  				s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
    59  				ic := newIndexCache(1024)
    60  				limits := awsLimits{partTarget: calcPartSize(mt, 3)}
    61  				s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, indexCache: ic, ns: ns, parseIndex: parseIndexF}
    62  
    63  				src, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
    64  				require.NoError(t, err)
    65  				assert.NotNil(ic.get(mustAddr(src.hash())))
    66  
    67  				if assert.True(mustUint32(src.count()) > 0) {
    68  					if r, err := s3svc.readerForTableWithNamespace(ns, mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
    69  						assertChunksInReader(testChunks, r, assert)
    70  					}
    71  				}
    72  			})
    73  
    74  			t.Run("CacheTable", func(t *testing.T) {
    75  				s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
    76  				limits := awsLimits{partTarget: calcPartSize(mt, 3)}
    77  				tc := &waitOnStoreTableCache{readers: map[addr]io.ReaderAt{}}
    78  				s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, tc: tc, ns: ns, parseIndex: parseIndexF}
    79  
    80  				// Persist and wait until tc.store() has completed
    81  				tc.storeWG.Add(1)
    82  				src, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
    83  				require.NoError(t, err)
    84  				tc.storeWG.Wait()
    85  
    86  				// Now, open the table that should have been cached by the above Persist() and read out all the chunks. All the reads should be serviced from tc.
    87  				rdr, err := s3p.Open(context.Background(), mustAddr(src.hash()), mustUint32(src.count()), &Stats{})
    88  				require.NoError(t, err)
    89  				baseline := s3svc.getCount
    90  				ch := make(chan extractRecord)
    91  				go func() {
    92  					defer close(ch)
    93  					err := rdr.extract(context.Background(), ch)
    94  					require.NoError(t, err)
    95  				}()
    96  				for range ch {
    97  				}
    98  				assert.Zero(t, s3svc.getCount-baseline)
    99  			})
   100  
   101  			t.Run("InSinglePart", func(t *testing.T) {
   102  				assert := assert.New(t)
   103  
   104  				s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   105  				limits := awsLimits{partTarget: calcPartSize(mt, 1)}
   106  				s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, ns: ns, parseIndex: parseIndexF}
   107  
   108  				src, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   109  				require.NoError(t, err)
   110  				if assert.True(mustUint32(src.count()) > 0) {
   111  					if r, err := s3svc.readerForTableWithNamespace(ns, mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   112  						assertChunksInReader(testChunks, r, assert)
   113  					}
   114  				}
   115  			})
   116  
   117  			t.Run("NoNewChunks", func(t *testing.T) {
   118  				assert := assert.New(t)
   119  
   120  				mt := newMemTable(testMemTableSize)
   121  				existingTable := newMemTable(testMemTableSize)
   122  
   123  				for _, c := range testChunks {
   124  					assert.True(mt.addChunk(computeAddr(c), c))
   125  					assert.True(existingTable.addChunk(computeAddr(c), c))
   126  				}
   127  
   128  				s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   129  				limits := awsLimits{partTarget: 1 << 10}
   130  				s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, ns: ns, parseIndex: parseIndexF}
   131  
   132  				src, err := s3p.Persist(context.Background(), mt, existingTable, &Stats{})
   133  				require.NoError(t, err)
   134  				assert.True(mustUint32(src.count()) == 0)
   135  
   136  				_, present := s3svc.data[mustAddr(src.hash()).String()]
   137  				assert.False(present)
   138  			})
   139  
   140  			t.Run("Abort", func(t *testing.T) {
   141  				assert := assert.New(t)
   142  
   143  				s3svc := &failingFakeS3{makeFakeS3(t), sync.Mutex{}, 1}
   144  				ddb := makeFakeDTS(makeFakeDDB(t), nil)
   145  				limits := awsLimits{partTarget: calcPartSize(mt, 4)}
   146  				s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: ddb, limits: limits, ns: ns, parseIndex: parseIndexF}
   147  
   148  				_, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   149  				assert.Error(err)
   150  			})
   151  		}
   152  		t.Run("WithoutNamespace", func(t *testing.T) {
   153  			testIt(t, "")
   154  		})
   155  		t.Run("WithNamespace", func(t *testing.T) {
   156  			testIt(t, "a-namespace-here")
   157  		})
   158  	})
   159  
   160  	t.Run("PersistToDynamo", func(t *testing.T) {
   161  		t.Run("Success", func(t *testing.T) {
   162  			t.SkipNow()
   163  			assert := assert.New(t)
   164  
   165  			ddb := makeFakeDDB(t)
   166  			s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, nil)
   167  			limits := awsLimits{itemMax: maxDynamoItemSize, chunkMax: 2 * mustUint32(mt.count())}
   168  			s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits, ns: "", parseIndex: parseIndexF}
   169  
   170  			src, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   171  			require.NoError(t, err)
   172  			if assert.True(mustUint32(src.count()) > 0) {
   173  				if r, err := ddb.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   174  					assertChunksInReader(testChunks, r, assert)
   175  				}
   176  			}
   177  		})
   178  
   179  		t.Run("CacheOnOpen", func(t *testing.T) {
   180  			t.SkipNow()
   181  			assert := assert.New(t)
   182  
   183  			tc := sizecache.New(maxDynamoItemSize)
   184  			ddb := makeFakeDDB(t)
   185  			s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, tc)
   186  			limits := awsLimits{itemMax: maxDynamoItemSize, chunkMax: 2 * mustUint32(mt.count())}
   187  
   188  			s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits, ns: "", parseIndex: parseIndexF}
   189  
   190  			tableData, name, err := buildTable(testChunks)
   191  			require.NoError(t, err)
   192  			ddb.putData(fmtTableName(name), tableData)
   193  
   194  			src, err := s3p.Open(context.Background(), name, uint32(len(testChunks)), &Stats{})
   195  			require.NoError(t, err)
   196  			if assert.True(mustUint32(src.count()) > 0) {
   197  				if r, err := ddb.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   198  					assertChunksInReader(testChunks, r, assert)
   199  				}
   200  				if data, present := tc.Get(name); assert.True(present) {
   201  					assert.Equal(tableData, data.([]byte))
   202  				}
   203  			}
   204  		})
   205  
   206  		t.Run("FailTooManyChunks", func(t *testing.T) {
   207  			t.SkipNow()
   208  			assert := assert.New(t)
   209  
   210  			ddb := makeFakeDDB(t)
   211  			s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, nil)
   212  			limits := awsLimits{itemMax: maxDynamoItemSize, chunkMax: 1, partTarget: calcPartSize(mt, 1)}
   213  			s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits, ns: "", parseIndex: parseIndexF}
   214  
   215  			src, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   216  			require.NoError(t, err)
   217  			if assert.True(mustUint32(src.count()) > 0) {
   218  				if r, err := ddb.readerForTable(mustAddr(src.hash())); assert.Nil(r) && assert.NoError(err) {
   219  					if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   220  						assertChunksInReader(testChunks, r, assert)
   221  					}
   222  				}
   223  			}
   224  		})
   225  
   226  		t.Run("FailItemTooBig", func(t *testing.T) {
   227  			t.SkipNow()
   228  			assert := assert.New(t)
   229  
   230  			ddb := makeFakeDDB(t)
   231  			s3svc, dts := makeFakeS3(t), makeFakeDTS(ddb, nil)
   232  			limits := awsLimits{itemMax: 0, chunkMax: 2 * mustUint32(mt.count()), partTarget: calcPartSize(mt, 1)}
   233  			s3p := awsTablePersister{s3: s3svc, bucket: "bucket", ddb: dts, limits: limits, ns: "", parseIndex: parseIndexF}
   234  
   235  			src, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   236  			require.NoError(t, err)
   237  			if assert.True(mustUint32(src.count()) > 0) {
   238  				if r, err := ddb.readerForTable(mustAddr(src.hash())); assert.Nil(r) && assert.NoError(err) {
   239  					if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   240  						assertChunksInReader(testChunks, r, assert)
   241  					}
   242  				}
   243  			}
   244  		})
   245  	})
   246  }
   247  func makeFakeDTS(ddb ddbsvc, tc *sizecache.SizeCache) *ddbTableStore {
   248  	return &ddbTableStore{ddb, "table", nil, tc}
   249  }
   250  
   251  type waitOnStoreTableCache struct {
   252  	readers map[addr]io.ReaderAt
   253  	mu      sync.RWMutex
   254  	storeWG sync.WaitGroup
   255  }
   256  
   257  func (mtc *waitOnStoreTableCache) checkout(h addr) (io.ReaderAt, error) {
   258  	mtc.mu.RLock()
   259  	defer mtc.mu.RUnlock()
   260  	return mtc.readers[h], nil
   261  }
   262  
   263  func (mtc *waitOnStoreTableCache) checkin(h addr) error {
   264  	return nil
   265  }
   266  
   267  func (mtc *waitOnStoreTableCache) store(h addr, data io.Reader, size uint64) error {
   268  	defer mtc.storeWG.Done()
   269  	mtc.mu.Lock()
   270  	defer mtc.mu.Unlock()
   271  	mtc.readers[h] = data.(io.ReaderAt)
   272  	return nil
   273  }
   274  
   275  type failingFakeS3 struct {
   276  	*fakeS3
   277  	mu           sync.Mutex
   278  	numSuccesses int
   279  }
   280  
   281  func (m *failingFakeS3) UploadPartWithContext(ctx aws.Context, input *s3.UploadPartInput, opts ...request.Option) (*s3.UploadPartOutput, error) {
   282  	m.mu.Lock()
   283  	defer m.mu.Unlock()
   284  	if m.numSuccesses > 0 {
   285  		m.numSuccesses--
   286  		return m.fakeS3.UploadPartWithContext(ctx, input)
   287  	}
   288  	return nil, mockAWSError("MalformedXML")
   289  }
   290  
   291  func TestAWSTablePersisterDividePlan(t *testing.T) {
   292  	assert := assert.New(t)
   293  	minPartSize, maxPartSize := uint64(16), uint64(32)
   294  	tooSmall := bytesToChunkSource(t, []byte("a"))
   295  	justRight := bytesToChunkSource(t, []byte("123456789"), []byte("abcdefghi"))
   296  	bigUns := [][]byte{make([]byte, maxPartSize-1), make([]byte, maxPartSize-1)}
   297  	for _, b := range bigUns {
   298  		rand.Read(b)
   299  	}
   300  	tooBig := bytesToChunkSource(t, bigUns...)
   301  
   302  	sources := chunkSources{justRight, tooBig, tooSmall}
   303  	plan, err := planConjoin(sources, &Stats{})
   304  	require.NoError(t, err)
   305  	copies, manuals, _, err := dividePlan(context.Background(), plan, minPartSize, maxPartSize)
   306  	require.NoError(t, err)
   307  
   308  	perTableDataSize := map[string]int64{}
   309  	for _, c := range copies {
   310  		assert.True(minPartSize <= uint64(c.srcLen))
   311  		assert.True(uint64(c.srcLen) <= maxPartSize)
   312  		totalSize := perTableDataSize[c.name]
   313  		totalSize += c.srcLen
   314  		perTableDataSize[c.name] = totalSize
   315  	}
   316  	assert.Len(perTableDataSize, 2)
   317  	assert.Contains(perTableDataSize, mustAddr(justRight.hash()).String())
   318  	assert.Contains(perTableDataSize, mustAddr(tooBig.hash()).String())
   319  	ti, err := justRight.index()
   320  	require.NoError(t, err)
   321  	assert.EqualValues(calcChunkDataLen(ti), perTableDataSize[mustAddr(justRight.hash()).String()])
   322  	ti, err = tooBig.index()
   323  	require.NoError(t, err)
   324  	assert.EqualValues(calcChunkDataLen(ti), perTableDataSize[mustAddr(tooBig.hash()).String()])
   325  
   326  	assert.Len(manuals, 1)
   327  	ti, err = tooSmall.index()
   328  	require.NoError(t, err)
   329  	assert.EqualValues(calcChunkDataLen(ti), manuals[0].dstEnd-manuals[0].dstStart)
   330  }
   331  
   332  func TestAWSTablePersisterCalcPartSizes(t *testing.T) {
   333  	assert := assert.New(t)
   334  	min, max := uint64(8*1<<10), uint64(1+(16*1<<10))
   335  
   336  	testPartSizes := func(dataLen uint64) {
   337  		lengths := splitOnMaxSize(dataLen, max)
   338  		var sum int64
   339  		for _, l := range lengths {
   340  			assert.True(uint64(l) >= min)
   341  			assert.True(uint64(l) <= max)
   342  			sum += l
   343  		}
   344  		assert.EqualValues(dataLen, sum)
   345  	}
   346  
   347  	testPartSizes(1 << 20)
   348  	testPartSizes(max + 1)
   349  	testPartSizes(10*max - 1)
   350  	testPartSizes(max + max/2)
   351  }
   352  
   353  func TestAWSTablePersisterConjoinAll(t *testing.T) {
   354  	targetPartSize := uint64(1024)
   355  	minPartSize, maxPartSize := targetPartSize, 5*targetPartSize
   356  	maxItemSize, maxChunkCount := int(targetPartSize/2), uint32(4)
   357  
   358  	ic := newIndexCache(1024)
   359  	rl := make(chan struct{}, 8)
   360  	defer close(rl)
   361  
   362  	newPersister := func(s3svc s3svc, ddb *ddbTableStore) awsTablePersister {
   363  		return awsTablePersister{
   364  			s3svc,
   365  			"bucket",
   366  			rl,
   367  			nil,
   368  			ddb,
   369  			awsLimits{targetPartSize, minPartSize, maxPartSize, maxItemSize, maxChunkCount},
   370  			ic,
   371  			"",
   372  			parseIndexF,
   373  		}
   374  	}
   375  
   376  	var smallChunks [][]byte
   377  	rnd := rand.New(rand.NewSource(0))
   378  	for smallChunkTotal := uint64(0); smallChunkTotal <= uint64(minPartSize); {
   379  		small := make([]byte, minPartSize/5)
   380  		rnd.Read(small)
   381  		src := bytesToChunkSource(t, small)
   382  		smallChunks = append(smallChunks, small)
   383  		ti, err := src.index()
   384  		require.NoError(t, err)
   385  		smallChunkTotal += calcChunkDataLen(ti)
   386  	}
   387  
   388  	t.Run("Small", func(t *testing.T) {
   389  		makeSources := func(s3p awsTablePersister, chunks [][]byte) (sources chunkSources) {
   390  			for i := 0; i < len(chunks); i++ {
   391  				mt := newMemTable(uint64(2 * targetPartSize))
   392  				mt.addChunk(computeAddr(chunks[i]), chunks[i])
   393  				cs, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   394  				require.NoError(t, err)
   395  				sources = append(sources, cs)
   396  			}
   397  			return
   398  		}
   399  
   400  		t.Run("TotalUnderMinSize", func(t *testing.T) {
   401  			assert := assert.New(t)
   402  			s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   403  			s3p := newPersister(s3svc, ddb)
   404  
   405  			chunks := smallChunks[:len(smallChunks)-1]
   406  			sources := makeSources(s3p, chunks)
   407  			src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
   408  			require.NoError(t, err)
   409  			assert.NotNil(ic.get(mustAddr(src.hash())))
   410  
   411  			if assert.True(mustUint32(src.count()) > 0) {
   412  				if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   413  					assertChunksInReader(chunks, r, assert)
   414  				}
   415  			}
   416  		})
   417  
   418  		t.Run("TotalOverMinSize", func(t *testing.T) {
   419  			assert := assert.New(t)
   420  			s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   421  			s3p := newPersister(s3svc, ddb)
   422  
   423  			sources := makeSources(s3p, smallChunks)
   424  			src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
   425  			require.NoError(t, err)
   426  			assert.NotNil(ic.get(mustAddr(src.hash())))
   427  
   428  			if assert.True(mustUint32(src.count()) > 0) {
   429  				if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   430  					assertChunksInReader(smallChunks, r, assert)
   431  				}
   432  			}
   433  		})
   434  	})
   435  
   436  	bigUns1 := [][]byte{make([]byte, maxPartSize-1), make([]byte, maxPartSize-1)}
   437  	bigUns2 := [][]byte{make([]byte, maxPartSize-1), make([]byte, maxPartSize-1)}
   438  	for _, bu := range [][][]byte{bigUns1, bigUns2} {
   439  		for _, b := range bu {
   440  			rand.Read(b)
   441  		}
   442  	}
   443  
   444  	t.Run("AllOverMax", func(t *testing.T) {
   445  		assert := assert.New(t)
   446  		s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   447  		s3p := newPersister(s3svc, ddb)
   448  
   449  		// Make 2 chunk sources that each have >maxPartSize chunk data
   450  		sources := make(chunkSources, 2)
   451  		for i, bu := range [][][]byte{bigUns1, bigUns2} {
   452  			mt := newMemTable(uint64(2 * maxPartSize))
   453  			for _, b := range bu {
   454  				mt.addChunk(computeAddr(b), b)
   455  			}
   456  
   457  			var err error
   458  			sources[i], err = s3p.Persist(context.Background(), mt, nil, &Stats{})
   459  			require.NoError(t, err)
   460  		}
   461  		src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
   462  		require.NoError(t, err)
   463  		assert.NotNil(ic.get(mustAddr(src.hash())))
   464  
   465  		if assert.True(mustUint32(src.count()) > 0) {
   466  			if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   467  				assertChunksInReader(bigUns1, r, assert)
   468  				assertChunksInReader(bigUns2, r, assert)
   469  			}
   470  		}
   471  	})
   472  
   473  	t.Run("SomeOverMax", func(t *testing.T) {
   474  		assert := assert.New(t)
   475  		s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   476  		s3p := newPersister(s3svc, ddb)
   477  
   478  		// Add one chunk source that has >maxPartSize data
   479  		mtb := newMemTable(uint64(2 * maxPartSize))
   480  		for _, b := range bigUns1 {
   481  			mtb.addChunk(computeAddr(b), b)
   482  		}
   483  
   484  		// Follow up with a chunk source where minPartSize < data size < maxPartSize
   485  		medChunks := make([][]byte, 2)
   486  		mt := newMemTable(uint64(2 * maxPartSize))
   487  		for i := range medChunks {
   488  			medChunks[i] = make([]byte, minPartSize+1)
   489  			rand.Read(medChunks[i])
   490  			mt.addChunk(computeAddr(medChunks[i]), medChunks[i])
   491  		}
   492  		cs1, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   493  		require.NoError(t, err)
   494  		cs2, err := s3p.Persist(context.Background(), mtb, nil, &Stats{})
   495  		require.NoError(t, err)
   496  		sources := chunkSources{cs1, cs2}
   497  
   498  		src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
   499  		require.NoError(t, err)
   500  		assert.NotNil(ic.get(mustAddr(src.hash())))
   501  
   502  		if assert.True(mustUint32(src.count()) > 0) {
   503  			if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   504  				assertChunksInReader(bigUns1, r, assert)
   505  				assertChunksInReader(medChunks, r, assert)
   506  			}
   507  		}
   508  	})
   509  
   510  	t.Run("Mix", func(t *testing.T) {
   511  		assert := assert.New(t)
   512  		s3svc, ddb := makeFakeS3(t), makeFakeDTS(makeFakeDDB(t), nil)
   513  		s3p := newPersister(s3svc, ddb)
   514  
   515  		// Start with small tables. Since total > minPartSize, will require more than one part to upload.
   516  		sources := make(chunkSources, len(smallChunks))
   517  		for i := 0; i < len(smallChunks); i++ {
   518  			mt := newMemTable(uint64(2 * targetPartSize))
   519  			mt.addChunk(computeAddr(smallChunks[i]), smallChunks[i])
   520  			var err error
   521  			sources[i], err = s3p.Persist(context.Background(), mt, nil, &Stats{})
   522  			require.NoError(t, err)
   523  		}
   524  
   525  		// Now, add a table with big chunks that will require more than one upload copy part.
   526  		mt := newMemTable(uint64(2 * maxPartSize))
   527  		for _, b := range bigUns1 {
   528  			mt.addChunk(computeAddr(b), b)
   529  		}
   530  
   531  		var err error
   532  		cs, err := s3p.Persist(context.Background(), mt, nil, &Stats{})
   533  		require.NoError(t, err)
   534  		sources = append(sources, cs)
   535  
   536  		// Last, some tables that should be directly upload-copyable
   537  		medChunks := make([][]byte, 2)
   538  		mt = newMemTable(uint64(2 * maxPartSize))
   539  		for i := range medChunks {
   540  			medChunks[i] = make([]byte, minPartSize+1)
   541  			rand.Read(medChunks[i])
   542  			mt.addChunk(computeAddr(medChunks[i]), medChunks[i])
   543  		}
   544  
   545  		cs, err = s3p.Persist(context.Background(), mt, nil, &Stats{})
   546  		require.NoError(t, err)
   547  		sources = append(sources, cs)
   548  
   549  		src, err := s3p.ConjoinAll(context.Background(), sources, &Stats{})
   550  		require.NoError(t, err)
   551  		assert.NotNil(ic.get(mustAddr(src.hash())))
   552  
   553  		if assert.True(mustUint32(src.count()) > 0) {
   554  			if r, err := s3svc.readerForTable(mustAddr(src.hash())); assert.NotNil(r) && assert.NoError(err) {
   555  				assertChunksInReader(smallChunks, r, assert)
   556  				assertChunksInReader(bigUns1, r, assert)
   557  				assertChunksInReader(medChunks, r, assert)
   558  			}
   559  		}
   560  	})
   561  }
   562  
   563  func bytesToChunkSource(t *testing.T, bs ...[]byte) chunkSource {
   564  	sum := 0
   565  	for _, b := range bs {
   566  		sum += len(b)
   567  	}
   568  	maxSize := maxTableSize(uint64(len(bs)), uint64(sum))
   569  	buff := make([]byte, maxSize)
   570  	tw := newTableWriter(buff, nil)
   571  	for _, b := range bs {
   572  		tw.addChunk(computeAddr(b), b)
   573  	}
   574  	tableSize, name, err := tw.finish()
   575  	require.NoError(t, err)
   576  	data := buff[:tableSize]
   577  	ti, err := parseTableIndex(data)
   578  	require.NoError(t, err)
   579  	rdr := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
   580  	return chunkSourceAdapter{rdr, name}
   581  }