github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/worker_test.go (about)

     1  package s3
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"strings"
     8  	"testing"
     9  
    10  	minio "github.com/minio/minio-go/v6"
    11  
    12  	"github.com/pachyderm/pachyderm/src/client"
    13  	"github.com/pachyderm/pachyderm/src/client/pfs"
    14  	"github.com/pachyderm/pachyderm/src/client/pkg/require"
    15  	tu "github.com/pachyderm/pachyderm/src/server/pkg/testutil"
    16  )
    17  
    18  type workerTestState struct {
    19  	pachClient         *client.APIClient
    20  	minioClient        *minio.Client
    21  	inputRepo          string
    22  	outputRepo         string
    23  	inputMasterCommit  *pfs.Commit
    24  	inputDevelopCommit *pfs.Commit
    25  	outputCommit       *pfs.Commit
    26  }
    27  
    28  func workerListBuckets(t *testing.T, s *workerTestState) {
    29  	// create a repo - this should not show up list buckets with the worker
    30  	// driver
    31  	repo := tu.UniqueString("testlistbuckets1")
    32  	require.NoError(t, s.pachClient.CreateRepo(repo))
    33  	require.NoError(t, s.pachClient.CreateBranch(repo, "master", "", nil))
    34  
    35  	buckets, err := s.minioClient.ListBuckets()
    36  	require.NoError(t, err)
    37  
    38  	actualBucketNames := []string{}
    39  	for _, bucket := range buckets {
    40  		actualBucketNames = append(actualBucketNames, bucket.Name)
    41  	}
    42  
    43  	require.ElementsEqual(t, []string{"in1", "in2", "out"}, actualBucketNames)
    44  }
    45  
    46  func workerGetObject(t *testing.T, s *workerTestState) {
    47  	fetchedContent, err := getObject(t, s.minioClient, "in1", "0")
    48  	require.NoError(t, err)
    49  	require.Equal(t, "0\n", fetchedContent)
    50  }
    51  
    52  func workerGetObjectOutputRepo(t *testing.T, s *workerTestState) {
    53  	_, err := getObject(t, s.minioClient, "out", "file")
    54  	keyNotFoundError(t, err)
    55  }
    56  
    57  func workerStatObject(t *testing.T, s *workerTestState) {
    58  	info, err := s.minioClient.StatObject("in1", "0", minio.StatObjectOptions{})
    59  	require.NoError(t, err)
    60  	require.True(t, len(info.ETag) > 0)
    61  	require.Equal(t, "text/plain; charset=utf-8", info.ContentType)
    62  	require.Equal(t, int64(2), info.Size)
    63  }
    64  
    65  func workerPutObject(t *testing.T, s *workerTestState) {
    66  	r := strings.NewReader("content1")
    67  	_, err := s.minioClient.PutObject("out", "file", r, int64(r.Len()), minio.PutObjectOptions{ContentType: "text/plain"})
    68  	require.NoError(t, err)
    69  
    70  	// this should act as a PFS PutFileOverwrite
    71  	r2 := strings.NewReader("content2")
    72  	_, err = s.minioClient.PutObject("out", "file", r2, int64(r2.Len()), minio.PutObjectOptions{ContentType: "text/plain"})
    73  	require.NoError(t, err)
    74  
    75  	_, err = getObject(t, s.minioClient, "out", "file")
    76  	keyNotFoundError(t, err)
    77  }
    78  
    79  func workerPutObjectInputRepo(t *testing.T, s *workerTestState) {
    80  	r := strings.NewReader("content1")
    81  	_, err := s.minioClient.PutObject("in1", "0", r, int64(r.Len()), minio.PutObjectOptions{ContentType: "text/plain"})
    82  	notImplementedError(t, err)
    83  }
    84  
    85  func workerRemoveObject(t *testing.T, s *workerTestState) {
    86  	_, err := s.pachClient.PutFile(s.outputRepo, s.outputCommit.ID, "file", strings.NewReader("content"))
    87  	require.NoError(t, err)
    88  
    89  	// as per PFS semantics, the second delete should be a no-op
    90  	require.NoError(t, s.minioClient.RemoveObject("out", "file"))
    91  	require.NoError(t, s.minioClient.RemoveObject("out", "file"))
    92  }
    93  
    94  func workerRemoveObjectInputRepo(t *testing.T, s *workerTestState) {
    95  	err := s.minioClient.RemoveObject("in1", "0")
    96  	notImplementedError(t, err)
    97  }
    98  
    99  // Tests inserting and getting files over 64mb in size
   100  func workerLargeObjects(t *testing.T, s *workerTestState) {
   101  	// create a temporary file to put ~65mb of contents into it
   102  	inputFile, err := ioutil.TempFile("", "pachyderm-test-large-objects-input-*")
   103  	require.NoError(t, err)
   104  	defer os.Remove(inputFile.Name())
   105  	n, err := inputFile.WriteString(strings.Repeat("no tv and no beer make homer something something.\n", 1363149))
   106  	require.NoError(t, err)
   107  	require.Equal(t, n, 68157450)
   108  	require.NoError(t, inputFile.Sync())
   109  
   110  	// first ensure that putting into a repo that doesn't exist triggers an
   111  	// error
   112  	_, err = s.minioClient.FPutObject("foobar", "file", inputFile.Name(), minio.PutObjectOptions{
   113  		ContentType: "text/plain",
   114  	})
   115  	bucketNotFoundError(t, err)
   116  
   117  	// now try putting into a legit repo
   118  	l, err := s.minioClient.FPutObject("out", "file", inputFile.Name(), minio.PutObjectOptions{
   119  		ContentType: "text/plain",
   120  	})
   121  	require.NoError(t, err)
   122  	require.Equal(t, int(l), 68157450)
   123  
   124  	// try getting an object that does not exist
   125  	err = s.minioClient.FGetObject("foobar", "file", "foo", minio.GetObjectOptions{})
   126  	bucketNotFoundError(t, err)
   127  
   128  	// get the file that does exist, doesn't work because we're reading from
   129  	// an output repo
   130  	outputFile, err := ioutil.TempFile("", "pachyderm-test-large-objects-output-*")
   131  	require.NoError(t, err)
   132  	defer os.Remove(outputFile.Name())
   133  	err = s.minioClient.FGetObject("out", "file", outputFile.Name(), minio.GetObjectOptions{})
   134  	keyNotFoundError(t, err)
   135  }
   136  
   137  func workerMakeBucket(t *testing.T, s *workerTestState) {
   138  	repo := tu.UniqueString("testmakebucket")
   139  	notImplementedError(t, s.minioClient.MakeBucket(repo, ""))
   140  }
   141  
   142  func workerBucketExists(t *testing.T, s *workerTestState) {
   143  	exists, err := s.minioClient.BucketExists("in1")
   144  	require.NoError(t, err)
   145  	require.True(t, exists)
   146  
   147  	exists, err = s.minioClient.BucketExists("out")
   148  	require.NoError(t, err)
   149  	require.True(t, exists)
   150  
   151  	exists, err = s.minioClient.BucketExists("foobar")
   152  	require.NoError(t, err)
   153  	require.False(t, exists)
   154  }
   155  
   156  func workerRemoveBucket(t *testing.T, s *workerTestState) {
   157  	notImplementedError(t, s.minioClient.RemoveBucket("in1"))
   158  	notImplementedError(t, s.minioClient.RemoveBucket("out"))
   159  }
   160  
   161  func workerListObjectsPaginated(t *testing.T, s *workerTestState) {
   162  	// Request that will list all files in root
   163  	ch := s.minioClient.ListObjects("in2", "", false, make(chan struct{}))
   164  	expectedFiles := []string{}
   165  	for i := 0; i <= 1000; i++ {
   166  		expectedFiles = append(expectedFiles, fmt.Sprintf("%d", i))
   167  	}
   168  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{"dir/"})
   169  
   170  	// Request that will list all files in with / as a prefix ("/" should mean
   171  	// the same as "", e.g. rust-s3 client)
   172  	ch = s.minioClient.ListObjects("in2", "/", false, make(chan struct{}))
   173  	expectedFiles = []string{}
   174  	for i := 0; i <= 1000; i++ {
   175  		expectedFiles = append(expectedFiles, fmt.Sprintf("%d", i))
   176  	}
   177  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{"dir/"})
   178  
   179  	// Request that will list all files starting with 1
   180  	ch = s.minioClient.ListObjects("in2", "1", false, make(chan struct{}))
   181  	expectedFiles = []string{}
   182  	for i := 0; i <= 1000; i++ {
   183  		file := fmt.Sprintf("%d", i)
   184  		if strings.HasPrefix(file, "1") {
   185  			expectedFiles = append(expectedFiles, file)
   186  		}
   187  	}
   188  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   189  
   190  	// Request that will list all files in a directory
   191  	ch = s.minioClient.ListObjects("in2", "dir/", false, make(chan struct{}))
   192  	expectedFiles = []string{}
   193  	for i := 0; i < 10; i++ {
   194  		expectedFiles = append(expectedFiles, fmt.Sprintf("dir/%d", i))
   195  	}
   196  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   197  }
   198  
   199  func workerListObjectsRecursive(t *testing.T, s *workerTestState) {
   200  	// Request that will list all files in master
   201  	expectedFiles := []string{"0", "rootdir/1", "rootdir/subdir/2"}
   202  	ch := s.minioClient.ListObjects("in1", "", true, make(chan struct{}))
   203  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   204  
   205  	// Requests that will list all files in rootdir
   206  	expectedFiles = []string{"rootdir/1", "rootdir/subdir/2"}
   207  	ch = s.minioClient.ListObjects("in1", "r", true, make(chan struct{}))
   208  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   209  	ch = s.minioClient.ListObjects("in1", "rootdir", true, make(chan struct{}))
   210  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   211  	ch = s.minioClient.ListObjects("in1", "rootdir/", true, make(chan struct{}))
   212  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   213  
   214  	// Requests that will list all files in subdir
   215  	expectedFiles = []string{"rootdir/subdir/2"}
   216  	ch = s.minioClient.ListObjects("in1", "rootdir/s", true, make(chan struct{}))
   217  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   218  	ch = s.minioClient.ListObjects("in1", "rootdir/subdir", true, make(chan struct{}))
   219  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   220  	ch = s.minioClient.ListObjects("in1", "rootdir/subdir/", true, make(chan struct{}))
   221  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   222  	ch = s.minioClient.ListObjects("in1", "rootdir/subdir/2", true, make(chan struct{}))
   223  	checkListObjects(t, ch, nil, nil, expectedFiles, []string{})
   224  }
   225  
   226  func TestWorkerDriver(t *testing.T) {
   227  	if testing.Short() {
   228  		t.Skip("Skipping integration tests in short mode")
   229  	}
   230  
   231  	pachClient, err := client.NewForTest()
   232  	require.NoError(t, err)
   233  
   234  	inputRepo := tu.UniqueString("testworkerdriverinput")
   235  	require.NoError(t, pachClient.CreateRepo(inputRepo))
   236  	outputRepo := tu.UniqueString("testworkerdriveroutput")
   237  	require.NoError(t, pachClient.CreateRepo(outputRepo))
   238  
   239  	// create a master branch on the input repo
   240  	inputMasterCommit, err := pachClient.StartCommit(inputRepo, "master")
   241  	require.NoError(t, err)
   242  	putListFileTestObject(t, pachClient, inputRepo, inputMasterCommit.ID, "", 0)
   243  	putListFileTestObject(t, pachClient, inputRepo, inputMasterCommit.ID, "rootdir/", 1)
   244  	putListFileTestObject(t, pachClient, inputRepo, inputMasterCommit.ID, "rootdir/subdir/", 2)
   245  	require.NoError(t, pachClient.FinishCommit(inputRepo, inputMasterCommit.ID))
   246  
   247  	// create a develop branch on the input repo
   248  	inputDevelopCommit, err := pachClient.StartCommit(inputRepo, "develop")
   249  	require.NoError(t, err)
   250  	for i := 0; i <= 1000; i++ {
   251  		putListFileTestObject(t, pachClient, inputRepo, inputDevelopCommit.ID, "", i)
   252  	}
   253  	for i := 0; i < 10; i++ {
   254  		putListFileTestObject(t, pachClient, inputRepo, inputDevelopCommit.ID, "dir/", i)
   255  	}
   256  	require.NoError(t, pachClient.FinishCommit(inputRepo, inputDevelopCommit.ID))
   257  
   258  	// create the output branch
   259  	outputCommit, err := pachClient.StartCommit(outputRepo, "master")
   260  	require.NoError(t, err)
   261  
   262  	driver := NewWorkerDriver(
   263  		[]*Bucket{
   264  			&Bucket{
   265  				Repo:   inputRepo,
   266  				Commit: inputMasterCommit.ID,
   267  				Name:   "in1",
   268  			},
   269  			&Bucket{
   270  				Repo:   inputRepo,
   271  				Commit: inputDevelopCommit.ID,
   272  				Name:   "in2",
   273  			},
   274  		},
   275  		&Bucket{
   276  			Repo:   outputRepo,
   277  			Commit: outputCommit.ID,
   278  			Name:   "out",
   279  		},
   280  	)
   281  
   282  	testRunner(t, "worker", driver, func(t *testing.T, pachClient *client.APIClient, minioClient *minio.Client) {
   283  		s := &workerTestState{
   284  			pachClient:         pachClient,
   285  			minioClient:        minioClient,
   286  			inputRepo:          inputRepo,
   287  			outputRepo:         outputRepo,
   288  			inputMasterCommit:  inputMasterCommit,
   289  			inputDevelopCommit: inputDevelopCommit,
   290  			outputCommit:       outputCommit,
   291  		}
   292  
   293  		t.Run("ListBuckets", func(t *testing.T) {
   294  			workerListBuckets(t, s)
   295  		})
   296  		t.Run("GetObject", func(t *testing.T) {
   297  			workerGetObject(t, s)
   298  		})
   299  		t.Run("GetObjectOutputRepo", func(t *testing.T) {
   300  			workerGetObjectOutputRepo(t, s)
   301  		})
   302  		t.Run("StatObject", func(t *testing.T) {
   303  			workerStatObject(t, s)
   304  		})
   305  		t.Run("PutObject", func(t *testing.T) {
   306  			workerPutObject(t, s)
   307  		})
   308  		t.Run("PutObjectInputRepo", func(t *testing.T) {
   309  			workerPutObjectInputRepo(t, s)
   310  		})
   311  		t.Run("RemoveObject", func(t *testing.T) {
   312  			workerRemoveObject(t, s)
   313  		})
   314  		t.Run("RemoveObjectInputRepo", func(t *testing.T) {
   315  			workerRemoveObjectInputRepo(t, s)
   316  		})
   317  		t.Run("LargeObjects", func(t *testing.T) {
   318  			workerLargeObjects(t, s)
   319  		})
   320  		t.Run("MakeBucket", func(t *testing.T) {
   321  			workerMakeBucket(t, s)
   322  		})
   323  		t.Run("BucketExists", func(t *testing.T) {
   324  			workerBucketExists(t, s)
   325  		})
   326  		t.Run("RemoveBucket", func(t *testing.T) {
   327  			workerRemoveBucket(t, s)
   328  		})
   329  		t.Run("ListObjectsPaginated", func(t *testing.T) {
   330  			workerListObjectsPaginated(t, s)
   331  		})
   332  		t.Run("ListObjectsRecursive", func(t *testing.T) {
   333  			workerListObjectsRecursive(t, s)
   334  		})
   335  	})
   336  }