github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/blocktest/adapter.go (about)

     1  package blocktest
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"io"
     8  	"net/url"
     9  	"path"
    10  	"path/filepath"
    11  	"sort"
    12  	"strconv"
    13  	"strings"
    14  	"testing"
    15  
    16  	"github.com/go-test/deep"
    17  	"github.com/stretchr/testify/require"
    18  	"github.com/thanhpk/randstr"
    19  	"github.com/treeverse/lakefs/pkg/block"
    20  	"github.com/treeverse/lakefs/pkg/ingest/store"
    21  )
    22  
    23  func AdapterTest(t *testing.T, adapter block.Adapter, storageNamespace, externalPath string) {
    24  	t.Run("Adapter_PutGet", func(t *testing.T) { testAdapterPutGet(t, adapter, storageNamespace, externalPath) })
    25  	t.Run("Adapter_Copy", func(t *testing.T) { testAdapterCopy(t, adapter, storageNamespace) })
    26  	t.Run("Adapter_Remove", func(t *testing.T) { testAdapterRemove(t, adapter, storageNamespace) })
    27  	t.Run("Adapter_MultipartUpload", func(t *testing.T) { testAdapterMultipartUpload(t, adapter, storageNamespace) })
    28  	t.Run("Adapter_Exists", func(t *testing.T) { testAdapterExists(t, adapter, storageNamespace) })
    29  	t.Run("Adapter_GetRange", func(t *testing.T) { testAdapterGetRange(t, adapter, storageNamespace) })
    30  	t.Run("Adapter_Walker", func(t *testing.T) { testAdapterWalker(t, adapter, storageNamespace) })
    31  }
    32  
    33  func testAdapterPutGet(t *testing.T, adapter block.Adapter, storageNamespace, externalPath string) {
    34  	ctx := context.Background()
    35  	const contents = "test_file"
    36  	size := int64(len(contents))
    37  
    38  	cases := []struct {
    39  		name           string
    40  		identifierType block.IdentifierType
    41  		path           string
    42  	}{
    43  		{"identifier_relative", block.IdentifierTypeRelative, "test_file"},
    44  		{"identifier_full", block.IdentifierTypeFull, externalPath + "/" + "test_file"},
    45  		{"identifier_unknown_relative", block.IdentifierTypeUnknownDeprecated, "test_file"},                  //nolint:staticcheck
    46  		{"identifier_unknown_full", block.IdentifierTypeUnknownDeprecated, externalPath + "/" + "test_file"}, //nolint:staticcheck
    47  	}
    48  
    49  	for _, c := range cases {
    50  		t.Run(c.name, func(t *testing.T) {
    51  			obj := block.ObjectPointer{
    52  				StorageNamespace: storageNamespace,
    53  				Identifier:       c.path,
    54  				IdentifierType:   c.identifierType,
    55  			}
    56  
    57  			err := adapter.Put(ctx, obj, size, strings.NewReader(contents), block.PutOpts{})
    58  			require.NoError(t, err)
    59  
    60  			reader, err := adapter.Get(ctx, obj, size)
    61  			require.NoError(t, err)
    62  			defer func() {
    63  				require.NoError(t, reader.Close())
    64  			}()
    65  			got, err := io.ReadAll(reader)
    66  			require.NoError(t, err)
    67  			require.Equal(t, contents, string(got))
    68  		})
    69  	}
    70  }
    71  
    72  func testAdapterCopy(t *testing.T, adapter block.Adapter, storageNamespace string) {
    73  	ctx := context.Background()
    74  	contents := "foo bar baz quux"
    75  	src := block.ObjectPointer{
    76  		StorageNamespace: storageNamespace,
    77  		Identifier:       "src",
    78  		IdentifierType:   block.IdentifierTypeRelative,
    79  	}
    80  	dst := block.ObjectPointer{
    81  		StorageNamespace: storageNamespace,
    82  		Identifier:       "export/to/dst",
    83  		IdentifierType:   block.IdentifierTypeRelative,
    84  	}
    85  
    86  	require.NoError(t, adapter.Put(ctx, src, int64(len(contents)), strings.NewReader(contents), block.PutOpts{}))
    87  
    88  	require.NoError(t, adapter.Copy(ctx, src, dst))
    89  	reader, err := adapter.Get(ctx, dst, 0)
    90  	require.NoError(t, err)
    91  	got, err := io.ReadAll(reader)
    92  	require.NoError(t, err)
    93  	require.Equal(t, contents, string(got))
    94  }
    95  
    96  func testAdapterRemove(t *testing.T, adapter block.Adapter, storageNamespace string) {
    97  	ctx := context.Background()
    98  	const content = "Content used for testing"
    99  	tests := []struct {
   100  		name              string
   101  		additionalObjects []string
   102  		path              string
   103  		wantErr           bool
   104  		wantTree          []string
   105  	}{
   106  		{
   107  			name:     "test_single",
   108  			path:     "README",
   109  			wantErr:  false,
   110  			wantTree: []string{},
   111  		},
   112  
   113  		{
   114  			name:     "test_under_folder",
   115  			path:     "src/tools.go",
   116  			wantErr:  false,
   117  			wantTree: []string{},
   118  		},
   119  		{
   120  			name:     "test_under_multiple_folders",
   121  			path:     "a/b/c/d.txt",
   122  			wantErr:  false,
   123  			wantTree: []string{},
   124  		},
   125  		{
   126  			name:              "file_in_the_way",
   127  			path:              "a/b/c/d.txt",
   128  			additionalObjects: []string{"a/b/blocker.txt"},
   129  			wantErr:           false,
   130  			wantTree:          []string{"/a/b/blocker.txt"},
   131  		},
   132  	}
   133  
   134  	for _, tt := range tests {
   135  		t.Run(tt.name, func(t *testing.T) {
   136  			// setup env
   137  			envObjects := tt.additionalObjects
   138  			envObjects = append(envObjects, tt.path)
   139  			for _, p := range envObjects {
   140  				obj := block.ObjectPointer{
   141  					StorageNamespace: storageNamespace,
   142  					Identifier:       tt.name + "/" + p,
   143  					IdentifierType:   block.IdentifierTypeRelative,
   144  				}
   145  				require.NoError(t, adapter.Put(ctx, obj, int64(len(content)), strings.NewReader(content), block.PutOpts{}))
   146  			}
   147  
   148  			// test Remove
   149  			obj := block.ObjectPointer{
   150  				StorageNamespace: storageNamespace,
   151  				Identifier:       tt.name + "/" + tt.path,
   152  				IdentifierType:   block.IdentifierTypeRelative,
   153  			}
   154  			if err := adapter.Remove(ctx, obj); (err != nil) != tt.wantErr {
   155  				t.Errorf("Remove() error = %v, wantErr %v", err, tt.wantErr)
   156  			}
   157  
   158  			qk, err := adapter.ResolveNamespace(storageNamespace, tt.name, block.IdentifierTypeRelative)
   159  			require.NoError(t, err)
   160  
   161  			tree := dumpPathTree(t, ctx, adapter, qk)
   162  			if diff := deep.Equal(tt.wantTree, tree); diff != nil {
   163  				t.Errorf("Remove() tree diff = %s", diff)
   164  			}
   165  		})
   166  	}
   167  }
   168  
   169  func dumpPathTree(t testing.TB, ctx context.Context, adapter block.Adapter, qk block.QualifiedKey) []string {
   170  	t.Helper()
   171  	tree := make([]string, 0)
   172  
   173  	uri, err := url.Parse(qk.Format())
   174  	require.NoError(t, err)
   175  
   176  	w, err := adapter.GetWalker(uri)
   177  	require.NoError(t, err)
   178  
   179  	walker := store.NewWrapper(w, uri)
   180  	require.NoError(t, err)
   181  
   182  	err = walker.Walk(ctx, block.WalkOptions{}, func(e block.ObjectStoreEntry) error {
   183  		_, p, _ := strings.Cut(e.Address, uri.String())
   184  		tree = append(tree, p)
   185  		return nil
   186  	})
   187  	if err != nil {
   188  		t.Fatalf("walking on '%s': %s", uri.String(), err)
   189  	}
   190  	sort.Strings(tree)
   191  	return tree
   192  }
   193  
   194  func createMultipartFile() ([][]byte, []byte) {
   195  	const (
   196  		multipartNumberOfParts = 3
   197  		multipartPartSize      = 5 * 1024 * 1024
   198  	)
   199  	parts := make([][]byte, multipartNumberOfParts)
   200  	var partsConcat []byte
   201  	for i := 0; i < multipartNumberOfParts; i++ {
   202  		parts[i] = randstr.Bytes(multipartPartSize + i)
   203  		partsConcat = append(partsConcat, parts[i]...)
   204  	}
   205  	return parts, partsConcat
   206  }
   207  
   208  func testAdapterMultipartUpload(t *testing.T, adapter block.Adapter, storageNamespace string) {
   209  	ctx := context.Background()
   210  	parts, full := createMultipartFile()
   211  
   212  	cases := []struct {
   213  		name string
   214  		path string
   215  	}{
   216  		{"simple", "abc"},
   217  		{"nested", "foo/bar"},
   218  	}
   219  	for _, c := range cases {
   220  		t.Run(c.name, func(t *testing.T) {
   221  			blockstoreType := adapter.BlockstoreType()
   222  			obj := block.ObjectPointer{
   223  				StorageNamespace: storageNamespace,
   224  				Identifier:       c.path,
   225  				IdentifierType:   block.IdentifierTypeRelative,
   226  			}
   227  			// List parts on non-existing part
   228  			_, err := adapter.ListParts(ctx, obj, "invalidId", block.ListPartsOpts{})
   229  			if blockstoreType != block.BlockstoreTypeS3 {
   230  				require.ErrorIs(t, err, block.ErrOperationNotSupported)
   231  			} else {
   232  				require.NotNil(t, err)
   233  			}
   234  
   235  			resp, err := adapter.CreateMultiPartUpload(ctx, obj, nil, block.CreateMultiPartUploadOpts{})
   236  			require.NoError(t, err)
   237  
   238  			multiParts := make([]block.MultipartPart, len(parts))
   239  			for i, content := range parts {
   240  				partNumber := i + 1
   241  				partResp, err := adapter.UploadPart(ctx, obj, int64(len(content)), bytes.NewReader(content), resp.UploadID, partNumber)
   242  				require.NoError(t, err)
   243  				multiParts[i].PartNumber = partNumber
   244  				multiParts[i].ETag = partResp.ETag
   245  			}
   246  
   247  			// List parts after upload
   248  			listResp, err := adapter.ListParts(ctx, obj, resp.UploadID, block.ListPartsOpts{})
   249  			if blockstoreType != block.BlockstoreTypeS3 {
   250  				require.ErrorIs(t, err, block.ErrOperationNotSupported)
   251  			} else {
   252  				require.NoError(t, err)
   253  				require.Equal(t, len(parts), len(listResp.Parts))
   254  				for i, part := range listResp.Parts {
   255  					require.Equal(t, multiParts[i].PartNumber, part.PartNumber)
   256  					require.Equal(t, int64(len(parts[i])), part.Size)
   257  					require.Equal(t, multiParts[i].ETag, part.ETag)
   258  					require.False(t, listResp.IsTruncated)
   259  				}
   260  			}
   261  
   262  			// List parts partial
   263  			const maxPartsConst = 2
   264  			maxParts := int32(maxPartsConst)
   265  			listResp, err = adapter.ListParts(ctx, obj, resp.UploadID, block.ListPartsOpts{MaxParts: &maxParts})
   266  			if blockstoreType != block.BlockstoreTypeS3 {
   267  				require.ErrorIs(t, err, block.ErrOperationNotSupported)
   268  			} else {
   269  				require.NoError(t, err)
   270  				require.Equal(t, int(maxParts), len(listResp.Parts))
   271  				require.True(t, listResp.IsTruncated)
   272  				require.Equal(t, strconv.Itoa(int(maxParts)), *listResp.NextPartNumberMarker)
   273  			}
   274  
   275  			_, err = adapter.CompleteMultiPartUpload(ctx, obj, resp.UploadID, &block.MultipartUploadCompletion{
   276  				Part: multiParts,
   277  			})
   278  			require.NoError(t, err)
   279  
   280  			// List parts after complete should fail
   281  			_, err = adapter.ListParts(ctx, obj, resp.UploadID, block.ListPartsOpts{})
   282  			if blockstoreType != block.BlockstoreTypeS3 {
   283  				require.ErrorIs(t, err, block.ErrOperationNotSupported)
   284  			} else {
   285  				require.NotNil(t, err)
   286  			}
   287  
   288  			reader, err := adapter.Get(ctx, obj, 0)
   289  			require.NoError(t, err)
   290  
   291  			got, err := io.ReadAll(reader)
   292  			require.NoError(t, err)
   293  
   294  			require.Equal(t, full, got)
   295  		})
   296  	}
   297  }
   298  
   299  func testAdapterExists(t *testing.T, adapter block.Adapter, storageNamespace string) {
   300  	// TODO (niro): Test abs paths
   301  	const contents = "exists"
   302  	ctx := context.Background()
   303  	err := adapter.Put(ctx, block.ObjectPointer{
   304  		StorageNamespace: storageNamespace,
   305  		Identifier:       contents,
   306  		IdentifierType:   block.IdentifierTypeRelative,
   307  	}, int64(len(contents)), strings.NewReader(contents), block.PutOpts{})
   308  	require.NoError(t, err)
   309  
   310  	err = adapter.Put(ctx, block.ObjectPointer{
   311  		StorageNamespace: storageNamespace,
   312  		Identifier:       "nested/and/" + contents,
   313  		IdentifierType:   block.IdentifierTypeRelative,
   314  	}, int64(len(contents)), strings.NewReader(contents), block.PutOpts{})
   315  	require.NoError(t, err)
   316  
   317  	cases := []struct {
   318  		name   string
   319  		path   string
   320  		exists bool
   321  	}{
   322  		{"exists", "exists", true},
   323  		{"nested_exists", "nested/and/exists", true},
   324  		{"simple_missing", "missing", false},
   325  		{"nested_missing", "nested/down", false},
   326  		{"nested_deep_missing", "nested/quite/deeply/and/missing", false},
   327  	}
   328  	for _, tt := range cases {
   329  		t.Run(tt.name, func(t *testing.T) {
   330  			ok, err := adapter.Exists(ctx, block.ObjectPointer{
   331  				StorageNamespace: storageNamespace,
   332  				Identifier:       tt.path,
   333  				IdentifierType:   block.IdentifierTypeRelative,
   334  			})
   335  			require.NoError(t, err)
   336  			require.Equal(t, tt.exists, ok)
   337  		})
   338  	}
   339  }
   340  
   341  func testAdapterGetRange(t *testing.T, adapter block.Adapter, storageNamespace string) {
   342  	ctx := context.Background()
   343  	part1 := "this is the first part "
   344  	part2 := "this is the last part"
   345  	err := adapter.Put(ctx, block.ObjectPointer{
   346  		StorageNamespace: storageNamespace,
   347  		Identifier:       "test_file",
   348  		IdentifierType:   block.IdentifierTypeRelative,
   349  	}, int64(len(part1+part2)), strings.NewReader(part1+part2), block.PutOpts{})
   350  	require.NoError(t, err)
   351  
   352  	cases := []struct {
   353  		name          string
   354  		startPos      int
   355  		endPos        int
   356  		expected      string
   357  		expectFailure bool
   358  	}{
   359  		{"read_suffix", len(part1), len(part1 + part2), part2, false},
   360  		{"read_prefix", 0, len(part1) - 1, part1, false},
   361  		{"read_middle", 8, len(part1) + 6, "the first part this is", false},
   362  		// {"end_smaller_than_start", 10, 1, "", false}, // TODO (niro): To be determined
   363  		// {"negative_position", -1, len(part1), "", true}, // S3 and Azure not aligned
   364  		{"one_byte", 1, 1, string(part1[1]), false},
   365  		{"out_of_bounds", 0, len(part1+part2) + 10, part1 + part2, false},
   366  	}
   367  	for _, tt := range cases {
   368  		t.Run(tt.name, func(t *testing.T) {
   369  			reader, err := adapter.GetRange(ctx, block.ObjectPointer{
   370  				StorageNamespace: storageNamespace,
   371  				Identifier:       "test_file",
   372  				IdentifierType:   block.IdentifierTypeRelative,
   373  			}, int64(tt.startPos), int64(tt.endPos))
   374  			require.Equal(t, tt.expectFailure, err != nil)
   375  			if err == nil {
   376  				got, err := io.ReadAll(reader)
   377  				require.NoError(t, err)
   378  				require.Equal(t, tt.expected, string(got))
   379  			}
   380  		})
   381  	}
   382  }
   383  
   384  func testAdapterWalker(t *testing.T, adapter block.Adapter, storageNamespace string) {
   385  	ctx := context.Background()
   386  	const (
   387  		testPrefix      = "test_walker"
   388  		filesAndFolders = 5
   389  		contents        = "test_file"
   390  	)
   391  
   392  	for i := 0; i < filesAndFolders; i++ {
   393  		for j := 0; j < filesAndFolders; j++ {
   394  			err := adapter.Put(ctx, block.ObjectPointer{
   395  				StorageNamespace: storageNamespace,
   396  				Identifier:       fmt.Sprintf("%s/folder_%d/test_file_%d", testPrefix, filesAndFolders-i-1, filesAndFolders-j-1),
   397  				IdentifierType:   block.IdentifierTypeRelative,
   398  			}, int64(len(contents)), strings.NewReader(contents), block.PutOpts{})
   399  			require.NoError(t, err)
   400  		}
   401  	}
   402  
   403  	err := adapter.Put(ctx, block.ObjectPointer{
   404  		StorageNamespace: storageNamespace,
   405  		Identifier:       fmt.Sprintf("%s/folder_0.txt", testPrefix),
   406  		IdentifierType:   block.IdentifierTypeRelative,
   407  	}, int64(len(contents)), strings.NewReader(contents), block.PutOpts{})
   408  	require.NoError(t, err)
   409  
   410  	cases := []struct {
   411  		name   string
   412  		prefix string
   413  	}{
   414  		{
   415  			name:   "root",
   416  			prefix: "",
   417  		},
   418  		{
   419  			name:   "prefix",
   420  			prefix: "folder_1",
   421  		},
   422  		{
   423  			name:   "prefix/",
   424  			prefix: "folder_2",
   425  		},
   426  	}
   427  	for _, tt := range cases {
   428  		qk, err := adapter.ResolveNamespace(storageNamespace, filepath.Join(testPrefix, tt.prefix), block.IdentifierTypeRelative)
   429  		require.NoError(t, err)
   430  		uri, err := url.Parse(qk.Format())
   431  		require.NoError(t, err)
   432  		t.Run(tt.name, func(t *testing.T) {
   433  			reader, err := adapter.GetWalker(uri)
   434  			require.NoError(t, err)
   435  
   436  			var results []string
   437  			err = reader.Walk(ctx, uri, block.WalkOptions{}, func(e block.ObjectStoreEntry) error {
   438  				results = append(results, e.RelativeKey)
   439  				return nil
   440  			})
   441  			require.NoError(t, err)
   442  			var prefix string
   443  			if tt.prefix == "" {
   444  				if adapter.BlockstoreType() != block.BlockstoreTypeLocal {
   445  					prefix = testPrefix
   446  				}
   447  
   448  				require.Equal(t, path.Join(prefix, "folder_0.txt"), results[0])
   449  				results = results[1:]
   450  				for i := 0; i < filesAndFolders; i++ {
   451  					for j := 0; j < filesAndFolders; j++ {
   452  						require.Equal(t, path.Join(prefix, fmt.Sprintf("folder_%d/test_file_%d", i, j)), results[i*filesAndFolders+j])
   453  					}
   454  				}
   455  			} else {
   456  				if adapter.BlockstoreType() != block.BlockstoreTypeLocal {
   457  					prefix = tt.prefix
   458  				}
   459  				for j := 0; j < filesAndFolders; j++ {
   460  					require.Equal(t, path.Join(prefix, fmt.Sprintf("test_file_%d", j)), results[j])
   461  				}
   462  			}
   463  		})
   464  	}
   465  }