github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/blobstore/blobstore_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package blobstore
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"encoding/binary"
    21  	"fmt"
    22  	"hash/maphash"
    23  	"log"
    24  	"math/rand"
    25  	"os"
    26  	"reflect"
    27  	"runtime"
    28  	"strconv"
    29  	"testing"
    30  
    31  	"cloud.google.com/go/storage"
    32  	"github.com/google/uuid"
    33  	"github.com/oracle/oci-go-sdk/v65/common"
    34  	"github.com/oracle/oci-go-sdk/v65/objectstorage"
    35  	"github.com/stretchr/testify/assert"
    36  	"github.com/stretchr/testify/require"
    37  )
    38  
    39  const (
    40  	key        = "test"
    41  	rmwRetries = 5
    42  )
    43  
    44  var (
    45  	ctx           context.Context
    46  	gcsBucket     *storage.BucketHandle
    47  	testGCSBucket string
    48  	osProvider    common.ConfigurationProvider
    49  	osClient      objectstorage.ObjectStorageClient
    50  	testOCIBucket string
    51  )
    52  
    53  const envTestGSBucket = "TEST_GCS_BUCKET"
    54  const envTestOCIBucket = "TEST_OCI_BUCKET"
    55  
    56  func init() {
    57  	testGCSBucket = os.Getenv(envTestGSBucket)
    58  	if testGCSBucket != "" {
    59  		ctx = context.Background()
    60  		gcs, err := storage.NewClient(ctx)
    61  
    62  		if err != nil {
    63  			panic("Could not create GCSBlobstore")
    64  		}
    65  
    66  		gcsBucket = gcs.Bucket(testGCSBucket)
    67  	}
    68  	testOCIBucket = os.Getenv(envTestOCIBucket)
    69  	if testOCIBucket != "" {
    70  		osProvider = common.DefaultConfigProvider()
    71  
    72  		client, err := objectstorage.NewObjectStorageClientWithConfigurationProvider(osProvider)
    73  		if err != nil {
    74  			panic("Could not create OCIBlobstore")
    75  		}
    76  
    77  		osClient = client
    78  	}
    79  }
    80  
    81  type BlobstoreTest struct {
    82  	bsType         string
    83  	bs             Blobstore
    84  	rmwConcurrency int
    85  	rmwIterations  int
    86  }
    87  
    88  func appendOCITest(tests []BlobstoreTest) []BlobstoreTest {
    89  	if testOCIBucket != "" {
    90  		ociTest := BlobstoreTest{"oci", &OCIBlobstore{osProvider, osClient, testOCIBucket, "", uuid.New().String() + "/", 2}, 4, 4}
    91  		tests = append(tests, ociTest)
    92  	}
    93  
    94  	return tests
    95  }
    96  
    97  func appendGCSTest(tests []BlobstoreTest) []BlobstoreTest {
    98  	if testGCSBucket != "" {
    99  		gcsTest := BlobstoreTest{"gcs", &GCSBlobstore{gcsBucket, testGCSBucket, uuid.New().String() + "/"}, 4, 4}
   100  		tests = append(tests, gcsTest)
   101  	}
   102  
   103  	return tests
   104  }
   105  
   106  func appendLocalTest(tests []BlobstoreTest) []BlobstoreTest {
   107  	dir, err := os.MkdirTemp("", uuid.New().String())
   108  
   109  	if err != nil {
   110  		panic("Could not create temp dir")
   111  	}
   112  
   113  	return append(tests, BlobstoreTest{"local", NewLocalBlobstore(dir), 10, 20})
   114  }
   115  
   116  func newBlobStoreTests() []BlobstoreTest {
   117  	var tests []BlobstoreTest
   118  	tests = append(tests, BlobstoreTest{"inmem", NewInMemoryBlobstore(""), 10, 20})
   119  	tests = appendLocalTest(tests)
   120  	tests = appendGCSTest(tests)
   121  	tests = appendOCITest(tests)
   122  
   123  	return tests
   124  }
   125  
   126  func randBytes(size int) []byte {
   127  	bytes := make([]byte, size)
   128  	rand.Read(bytes)
   129  
   130  	return bytes
   131  }
   132  
   133  func testPutAndGetBack(t *testing.T, bs Blobstore) {
   134  	testData := randBytes(32)
   135  	ver, err := PutBytes(context.Background(), bs, key, testData)
   136  
   137  	if err != nil {
   138  		t.Errorf("Put failed %v.", err)
   139  	}
   140  
   141  	retrieved, retVer, err := GetBytes(context.Background(), bs, key, BlobRange{})
   142  
   143  	if err != nil {
   144  		t.Errorf("Get failed: %v.", err)
   145  	}
   146  
   147  	if ver != retVer {
   148  		t.Errorf("Version doesn't match. Expected: %s Actual: %s.", ver, retVer)
   149  	}
   150  
   151  	if !reflect.DeepEqual(retrieved, testData) {
   152  		t.Errorf("Data mismatch.")
   153  	}
   154  }
   155  
   156  func TestPutAndGetBack(t *testing.T) {
   157  	for _, bsTest := range newBlobStoreTests() {
   158  		t.Run(bsTest.bsType, func(t *testing.T) {
   159  			testPutAndGetBack(t, bsTest.bs)
   160  		})
   161  	}
   162  }
   163  
   164  func testGetMissing(t *testing.T, bs Blobstore) {
   165  	_, _, err := GetBytes(context.Background(), bs, key, BlobRange{})
   166  
   167  	if err == nil || !IsNotFoundError(err) {
   168  		t.Errorf("Key should be missing.")
   169  	}
   170  }
   171  
   172  func TestGetMissing(t *testing.T) {
   173  	for _, bsTest := range newBlobStoreTests() {
   174  		t.Run(bsTest.bsType, func(t *testing.T) {
   175  			testGetMissing(t, bsTest.bs)
   176  		})
   177  	}
   178  }
   179  
   180  // CheckAndPutBytes is a utility method calls bs.CheckAndPut by wrapping the supplied []byte
   181  // in an io.Reader
   182  func CheckAndPutBytes(ctx context.Context, bs Blobstore, expectedVersion, key string, data []byte) (string, error) {
   183  	reader := bytes.NewReader(data)
   184  	return bs.CheckAndPut(ctx, expectedVersion, key, int64(len(data)), reader)
   185  }
   186  
   187  func testCheckAndPutError(t *testing.T, bs Blobstore) {
   188  	testData := randBytes(32)
   189  	badVersion := "bad" //has to be valid hex
   190  	_, err := CheckAndPutBytes(context.Background(), bs, badVersion, key, testData)
   191  
   192  	if err == nil {
   193  		t.Errorf("Key should be missing.")
   194  		return
   195  	} else if !IsCheckAndPutError(err) {
   196  		t.Errorf("Should have failed due to version mismatch.")
   197  		return
   198  	}
   199  
   200  	cpe, ok := err.(CheckAndPutError)
   201  
   202  	if !ok {
   203  		t.Errorf("Error is not of the expected type")
   204  	} else if cpe.Key != key || cpe.ExpectedVersion != badVersion {
   205  		t.Errorf("CheckAndPutError does not have expected values - " + cpe.Error())
   206  	}
   207  }
   208  
   209  func TestCheckAndPutError(t *testing.T) {
   210  	for _, bsTest := range newBlobStoreTests() {
   211  		t.Run(bsTest.bsType, func(t *testing.T) {
   212  			testCheckAndPutError(t, bsTest.bs)
   213  		})
   214  	}
   215  }
   216  
   217  func testCheckAndPut(t *testing.T, bs Blobstore) {
   218  	ver, err := CheckAndPutBytes(context.Background(), bs, "", key, randBytes(32))
   219  
   220  	if err != nil {
   221  		t.Errorf("Failed CheckAndPut.")
   222  	}
   223  
   224  	newVer, err := CheckAndPutBytes(context.Background(), bs, ver, key, randBytes(32))
   225  
   226  	if err != nil {
   227  		t.Errorf("Failed CheckAndPut.")
   228  	}
   229  
   230  	_, err = CheckAndPutBytes(context.Background(), bs, newVer, key, randBytes(32))
   231  
   232  	if err != nil {
   233  		t.Errorf("Failed CheckAndPut.")
   234  	}
   235  }
   236  
   237  func TestCheckAndPut(t *testing.T) {
   238  	for _, bsTest := range newBlobStoreTests() {
   239  		t.Run(bsTest.bsType, func(t *testing.T) {
   240  			testCheckAndPut(t, bsTest.bs)
   241  		})
   242  	}
   243  }
   244  
   245  func readModifyWrite(bs Blobstore, key string, iterations int, doneChan chan int) {
   246  	concurrentWrites := 0
   247  	for updates, failures := 0, 0; updates < iterations; {
   248  		if failures >= rmwRetries {
   249  			panic("Having io issues.")
   250  		}
   251  
   252  		data, ver, err := GetBytes(context.Background(), bs, key, BlobRange{})
   253  
   254  		if err != nil && !IsNotFoundError(err) {
   255  			log.Println(err)
   256  			failures++
   257  			continue
   258  		}
   259  
   260  		dataSize := len(data)
   261  		newData := make([]byte, dataSize+1)
   262  		copy(newData, data)
   263  		newData[dataSize] = byte(dataSize)
   264  
   265  		_, err = CheckAndPutBytes(context.Background(), bs, ver, key, newData)
   266  		if err == nil {
   267  			updates++
   268  			failures = 0
   269  		} else if !IsCheckAndPutError(err) {
   270  			log.Println(err)
   271  			failures++
   272  		} else {
   273  			concurrentWrites++
   274  		}
   275  	}
   276  
   277  	doneChan <- concurrentWrites
   278  }
   279  
   280  func testConcurrentCheckAndPuts(t *testing.T, bsTest BlobstoreTest, key string) {
   281  	doneChan := make(chan int)
   282  	for n := 0; n < bsTest.rmwConcurrency; n++ {
   283  		go readModifyWrite(bsTest.bs, key, bsTest.rmwIterations, doneChan)
   284  	}
   285  
   286  	totalConcurrentWrites := 0
   287  	for n := 0; n < bsTest.rmwConcurrency; n++ {
   288  		totalConcurrentWrites += <-doneChan
   289  	}
   290  
   291  	// If concurrent writes is 0 this test is pretty shitty
   292  	fmt.Println(totalConcurrentWrites, "concurrent writes occurred")
   293  
   294  	var data []byte
   295  	var err error
   296  	for i := 0; i < rmwRetries; i++ {
   297  		data, _, err = GetBytes(context.Background(), bsTest.bs, key, BlobRange{})
   298  
   299  		if err == nil {
   300  			break
   301  		}
   302  	}
   303  
   304  	if err != nil {
   305  		t.Errorf("Having IO issues testing concurrent blobstore CheckAndPuts")
   306  		return
   307  	}
   308  
   309  	if len(data) != bsTest.rmwIterations*bsTest.rmwConcurrency {
   310  		t.Errorf("Output data is not of the correct size. This is caused by bad synchronization where a read/read/write/write has occurred.")
   311  	}
   312  
   313  	for i, v := range data {
   314  		if i != int(v) {
   315  			t.Errorf("Data does not match the expected output.")
   316  		}
   317  	}
   318  }
   319  
   320  func TestConcurrentCheckAndPuts(t *testing.T) {
   321  	if runtime.GOOS == "windows" {
   322  		t.Skip("Skipping on windows due to flakiness")
   323  	}
   324  	for _, bsTest := range newBlobStoreTests() {
   325  		t.Run(bsTest.bsType, func(t *testing.T) {
   326  			if bsTest.rmwIterations*bsTest.rmwConcurrency > 255 {
   327  				panic("Test epects less than 255 total updates or it won't work as is.")
   328  			}
   329  			testConcurrentCheckAndPuts(t, bsTest, uuid.New().String())
   330  		})
   331  	}
   332  }
   333  
   334  func setupRangeTest(t *testing.T, bs Blobstore, data []byte) {
   335  	_, err := PutBytes(context.Background(), bs, key, data)
   336  
   337  	if err != nil {
   338  		t.FailNow()
   339  	}
   340  }
   341  
   342  func testGetRange(t *testing.T, bs Blobstore, br BlobRange, expected []byte) {
   343  	retrieved, _, err := GetBytes(context.Background(), bs, key, br)
   344  
   345  	if err != nil {
   346  		t.Errorf("Get failed: %v.", err)
   347  	}
   348  
   349  	if len(retrieved) != len(expected) {
   350  		t.Errorf("Range results are not the right size")
   351  		return
   352  	}
   353  
   354  	for i := 0; i < len(expected); i++ {
   355  		if retrieved[i] != expected[i] {
   356  			t.Errorf("Bad Value")
   357  			return
   358  		}
   359  	}
   360  }
   361  
   362  func rangeData(min, max int64) []byte {
   363  	if max <= min {
   364  		panic("no")
   365  	}
   366  
   367  	size := max - min
   368  	data := make([]byte, 2*size)
   369  	b := bytes.NewBuffer(data[:0])
   370  
   371  	for i := int16(min); i < int16(max); i++ {
   372  		binary.Write(b, binary.BigEndian, i)
   373  	}
   374  
   375  	return data
   376  }
   377  
   378  func TestGetRange(t *testing.T) {
   379  	maxValue := int64(16 * 1024)
   380  	testData := rangeData(0, maxValue)
   381  
   382  	tests := newBlobStoreTests()
   383  	for _, bsTest := range tests {
   384  		t.Run(bsTest.bsType, func(t *testing.T) {
   385  			setupRangeTest(t, bsTest.bs, testData)
   386  			// test full range
   387  			testGetRange(t, bsTest.bs, AllRange, rangeData(0, maxValue))
   388  			// test first 2048 bytes (1024 shorts)
   389  			testGetRange(t, bsTest.bs, NewBlobRange(0, 2048), rangeData(0, 1024))
   390  
   391  			// test range of values from 1024 to 2048 stored in bytes 2048 to 4096 of the original testData
   392  			testGetRange(t, bsTest.bs, NewBlobRange(2*1024, 2*1024), rangeData(1024, 2048))
   393  
   394  			// test the last 2048 bytes of data which will be the last 1024 shorts
   395  			testGetRange(t, bsTest.bs, NewBlobRange(-2*1024, 0), rangeData(maxValue-1024, maxValue))
   396  
   397  			// test the range beginning 2048 bytes from the end of size 512 which will be shorts 1024 from the end til 768 from the end
   398  			testGetRange(t, bsTest.bs, NewBlobRange(-2*1024, 512), rangeData(maxValue-1024, maxValue-768))
   399  		})
   400  	}
   401  }
   402  
   403  func TestPanicOnNegativeRangeLength(t *testing.T) {
   404  	defer func() {
   405  		if r := recover(); r == nil {
   406  			t.Errorf("The code did not panic")
   407  		}
   408  	}()
   409  
   410  	NewBlobRange(0, -1)
   411  }
   412  
   413  func TestConcatenate(t *testing.T) {
   414  	tests := newBlobStoreTests()
   415  	for _, test := range tests {
   416  		if test.bsType != "oci" {
   417  			t.Run(test.bsType, func(t *testing.T) {
   418  				testConcatenate(t, test.bs, 1)
   419  				testConcatenate(t, test.bs, 4)
   420  				testConcatenate(t, test.bs, 16)
   421  				testConcatenate(t, test.bs, 32)
   422  				testConcatenate(t, test.bs, 64)
   423  			})
   424  		}
   425  	}
   426  }
   427  
   428  func testConcatenate(t *testing.T, bs Blobstore, cnt int) {
   429  	ctx := context.Background()
   430  	type blob struct {
   431  		key  string
   432  		data []byte
   433  	}
   434  	blobs := make([]blob, cnt)
   435  	keys := make([]string, cnt)
   436  
   437  	for i := range blobs {
   438  		b := make([]byte, 64)
   439  		rand.Read(b)
   440  		keys[i] = blobName(b)
   441  		_, err := bs.Put(ctx, keys[i], int64(len(b)), bytes.NewReader(b))
   442  		require.NoError(t, err)
   443  		blobs[i] = blob{
   444  			key:  keys[i],
   445  			data: b,
   446  		}
   447  	}
   448  
   449  	composite := uuid.New().String()
   450  	_, err := bs.Concatenate(ctx, composite, keys)
   451  	assert.NoError(t, err)
   452  
   453  	var off int64
   454  	for i := range blobs {
   455  		length := int64(len(blobs[i].data))
   456  		rdr, _, err := bs.Get(ctx, composite, BlobRange{
   457  			offset: off,
   458  			length: length,
   459  		})
   460  		assert.NoError(t, err)
   461  
   462  		act := make([]byte, length)
   463  		n, err := rdr.Read(act)
   464  		assert.NoError(t, err)
   465  		assert.Equal(t, int(length), n)
   466  		assert.Equal(t, blobs[i].data, act)
   467  		off += length
   468  	}
   469  }
   470  
   471  func blobName(b []byte) string {
   472  	h := maphash.Bytes(maphash.MakeSeed(), b)
   473  	return strconv.Itoa(int(h))
   474  }