github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/s3/worker_test.go (about) 1 package s3 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "strings" 8 "testing" 9 10 minio "github.com/minio/minio-go/v6" 11 12 "github.com/pachyderm/pachyderm/src/client" 13 "github.com/pachyderm/pachyderm/src/client/pfs" 14 "github.com/pachyderm/pachyderm/src/client/pkg/require" 15 tu "github.com/pachyderm/pachyderm/src/server/pkg/testutil" 16 ) 17 18 type workerTestState struct { 19 pachClient *client.APIClient 20 minioClient *minio.Client 21 inputRepo string 22 outputRepo string 23 inputMasterCommit *pfs.Commit 24 inputDevelopCommit *pfs.Commit 25 outputCommit *pfs.Commit 26 } 27 28 func workerListBuckets(t *testing.T, s *workerTestState) { 29 // create a repo - this should not show up list buckets with the worker 30 // driver 31 repo := tu.UniqueString("testlistbuckets1") 32 require.NoError(t, s.pachClient.CreateRepo(repo)) 33 require.NoError(t, s.pachClient.CreateBranch(repo, "master", "", nil)) 34 35 buckets, err := s.minioClient.ListBuckets() 36 require.NoError(t, err) 37 38 actualBucketNames := []string{} 39 for _, bucket := range buckets { 40 actualBucketNames = append(actualBucketNames, bucket.Name) 41 } 42 43 require.ElementsEqual(t, []string{"in1", "in2", "out"}, actualBucketNames) 44 } 45 46 func workerGetObject(t *testing.T, s *workerTestState) { 47 fetchedContent, err := getObject(t, s.minioClient, "in1", "0") 48 require.NoError(t, err) 49 require.Equal(t, "0\n", fetchedContent) 50 } 51 52 func workerGetObjectOutputRepo(t *testing.T, s *workerTestState) { 53 _, err := getObject(t, s.minioClient, "out", "file") 54 keyNotFoundError(t, err) 55 } 56 57 func workerStatObject(t *testing.T, s *workerTestState) { 58 info, err := s.minioClient.StatObject("in1", "0", minio.StatObjectOptions{}) 59 require.NoError(t, err) 60 require.True(t, len(info.ETag) > 0) 61 require.Equal(t, "text/plain; charset=utf-8", info.ContentType) 62 require.Equal(t, int64(2), info.Size) 63 } 64 65 func workerPutObject(t *testing.T, s *workerTestState) { 66 r := strings.NewReader("content1") 67 _, err := s.minioClient.PutObject("out", "file", r, int64(r.Len()), minio.PutObjectOptions{ContentType: "text/plain"}) 68 require.NoError(t, err) 69 70 // this should act as a PFS PutFileOverwrite 71 r2 := strings.NewReader("content2") 72 _, err = s.minioClient.PutObject("out", "file", r2, int64(r2.Len()), minio.PutObjectOptions{ContentType: "text/plain"}) 73 require.NoError(t, err) 74 75 _, err = getObject(t, s.minioClient, "out", "file") 76 keyNotFoundError(t, err) 77 } 78 79 func workerPutObjectInputRepo(t *testing.T, s *workerTestState) { 80 r := strings.NewReader("content1") 81 _, err := s.minioClient.PutObject("in1", "0", r, int64(r.Len()), minio.PutObjectOptions{ContentType: "text/plain"}) 82 notImplementedError(t, err) 83 } 84 85 func workerRemoveObject(t *testing.T, s *workerTestState) { 86 _, err := s.pachClient.PutFile(s.outputRepo, s.outputCommit.ID, "file", strings.NewReader("content")) 87 require.NoError(t, err) 88 89 // as per PFS semantics, the second delete should be a no-op 90 require.NoError(t, s.minioClient.RemoveObject("out", "file")) 91 require.NoError(t, s.minioClient.RemoveObject("out", "file")) 92 } 93 94 func workerRemoveObjectInputRepo(t *testing.T, s *workerTestState) { 95 err := s.minioClient.RemoveObject("in1", "0") 96 notImplementedError(t, err) 97 } 98 99 // Tests inserting and getting files over 64mb in size 100 func workerLargeObjects(t *testing.T, s *workerTestState) { 101 // create a temporary file to put ~65mb of contents into it 102 inputFile, err := ioutil.TempFile("", "pachyderm-test-large-objects-input-*") 103 require.NoError(t, err) 104 defer os.Remove(inputFile.Name()) 105 n, err := inputFile.WriteString(strings.Repeat("no tv and no beer make homer something something.\n", 1363149)) 106 require.NoError(t, err) 107 require.Equal(t, n, 68157450) 108 require.NoError(t, inputFile.Sync()) 109 110 // first ensure that putting into a repo that doesn't exist triggers an 111 // error 112 _, err = s.minioClient.FPutObject("foobar", "file", inputFile.Name(), minio.PutObjectOptions{ 113 ContentType: "text/plain", 114 }) 115 bucketNotFoundError(t, err) 116 117 // now try putting into a legit repo 118 l, err := s.minioClient.FPutObject("out", "file", inputFile.Name(), minio.PutObjectOptions{ 119 ContentType: "text/plain", 120 }) 121 require.NoError(t, err) 122 require.Equal(t, int(l), 68157450) 123 124 // try getting an object that does not exist 125 err = s.minioClient.FGetObject("foobar", "file", "foo", minio.GetObjectOptions{}) 126 bucketNotFoundError(t, err) 127 128 // get the file that does exist, doesn't work because we're reading from 129 // an output repo 130 outputFile, err := ioutil.TempFile("", "pachyderm-test-large-objects-output-*") 131 require.NoError(t, err) 132 defer os.Remove(outputFile.Name()) 133 err = s.minioClient.FGetObject("out", "file", outputFile.Name(), minio.GetObjectOptions{}) 134 keyNotFoundError(t, err) 135 } 136 137 func workerMakeBucket(t *testing.T, s *workerTestState) { 138 repo := tu.UniqueString("testmakebucket") 139 notImplementedError(t, s.minioClient.MakeBucket(repo, "")) 140 } 141 142 func workerBucketExists(t *testing.T, s *workerTestState) { 143 exists, err := s.minioClient.BucketExists("in1") 144 require.NoError(t, err) 145 require.True(t, exists) 146 147 exists, err = s.minioClient.BucketExists("out") 148 require.NoError(t, err) 149 require.True(t, exists) 150 151 exists, err = s.minioClient.BucketExists("foobar") 152 require.NoError(t, err) 153 require.False(t, exists) 154 } 155 156 func workerRemoveBucket(t *testing.T, s *workerTestState) { 157 notImplementedError(t, s.minioClient.RemoveBucket("in1")) 158 notImplementedError(t, s.minioClient.RemoveBucket("out")) 159 } 160 161 func workerListObjectsPaginated(t *testing.T, s *workerTestState) { 162 // Request that will list all files in root 163 ch := s.minioClient.ListObjects("in2", "", false, make(chan struct{})) 164 expectedFiles := []string{} 165 for i := 0; i <= 1000; i++ { 166 expectedFiles = append(expectedFiles, fmt.Sprintf("%d", i)) 167 } 168 checkListObjects(t, ch, nil, nil, expectedFiles, []string{"dir/"}) 169 170 // Request that will list all files in with / as a prefix ("/" should mean 171 // the same as "", e.g. rust-s3 client) 172 ch = s.minioClient.ListObjects("in2", "/", false, make(chan struct{})) 173 expectedFiles = []string{} 174 for i := 0; i <= 1000; i++ { 175 expectedFiles = append(expectedFiles, fmt.Sprintf("%d", i)) 176 } 177 checkListObjects(t, ch, nil, nil, expectedFiles, []string{"dir/"}) 178 179 // Request that will list all files starting with 1 180 ch = s.minioClient.ListObjects("in2", "1", false, make(chan struct{})) 181 expectedFiles = []string{} 182 for i := 0; i <= 1000; i++ { 183 file := fmt.Sprintf("%d", i) 184 if strings.HasPrefix(file, "1") { 185 expectedFiles = append(expectedFiles, file) 186 } 187 } 188 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 189 190 // Request that will list all files in a directory 191 ch = s.minioClient.ListObjects("in2", "dir/", false, make(chan struct{})) 192 expectedFiles = []string{} 193 for i := 0; i < 10; i++ { 194 expectedFiles = append(expectedFiles, fmt.Sprintf("dir/%d", i)) 195 } 196 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 197 } 198 199 func workerListObjectsRecursive(t *testing.T, s *workerTestState) { 200 // Request that will list all files in master 201 expectedFiles := []string{"0", "rootdir/1", "rootdir/subdir/2"} 202 ch := s.minioClient.ListObjects("in1", "", true, make(chan struct{})) 203 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 204 205 // Requests that will list all files in rootdir 206 expectedFiles = []string{"rootdir/1", "rootdir/subdir/2"} 207 ch = s.minioClient.ListObjects("in1", "r", true, make(chan struct{})) 208 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 209 ch = s.minioClient.ListObjects("in1", "rootdir", true, make(chan struct{})) 210 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 211 ch = s.minioClient.ListObjects("in1", "rootdir/", true, make(chan struct{})) 212 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 213 214 // Requests that will list all files in subdir 215 expectedFiles = []string{"rootdir/subdir/2"} 216 ch = s.minioClient.ListObjects("in1", "rootdir/s", true, make(chan struct{})) 217 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 218 ch = s.minioClient.ListObjects("in1", "rootdir/subdir", true, make(chan struct{})) 219 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 220 ch = s.minioClient.ListObjects("in1", "rootdir/subdir/", true, make(chan struct{})) 221 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 222 ch = s.minioClient.ListObjects("in1", "rootdir/subdir/2", true, make(chan struct{})) 223 checkListObjects(t, ch, nil, nil, expectedFiles, []string{}) 224 } 225 226 func TestWorkerDriver(t *testing.T) { 227 if testing.Short() { 228 t.Skip("Skipping integration tests in short mode") 229 } 230 231 pachClient, err := client.NewForTest() 232 require.NoError(t, err) 233 234 inputRepo := tu.UniqueString("testworkerdriverinput") 235 require.NoError(t, pachClient.CreateRepo(inputRepo)) 236 outputRepo := tu.UniqueString("testworkerdriveroutput") 237 require.NoError(t, pachClient.CreateRepo(outputRepo)) 238 239 // create a master branch on the input repo 240 inputMasterCommit, err := pachClient.StartCommit(inputRepo, "master") 241 require.NoError(t, err) 242 putListFileTestObject(t, pachClient, inputRepo, inputMasterCommit.ID, "", 0) 243 putListFileTestObject(t, pachClient, inputRepo, inputMasterCommit.ID, "rootdir/", 1) 244 putListFileTestObject(t, pachClient, inputRepo, inputMasterCommit.ID, "rootdir/subdir/", 2) 245 require.NoError(t, pachClient.FinishCommit(inputRepo, inputMasterCommit.ID)) 246 247 // create a develop branch on the input repo 248 inputDevelopCommit, err := pachClient.StartCommit(inputRepo, "develop") 249 require.NoError(t, err) 250 for i := 0; i <= 1000; i++ { 251 putListFileTestObject(t, pachClient, inputRepo, inputDevelopCommit.ID, "", i) 252 } 253 for i := 0; i < 10; i++ { 254 putListFileTestObject(t, pachClient, inputRepo, inputDevelopCommit.ID, "dir/", i) 255 } 256 require.NoError(t, pachClient.FinishCommit(inputRepo, inputDevelopCommit.ID)) 257 258 // create the output branch 259 outputCommit, err := pachClient.StartCommit(outputRepo, "master") 260 require.NoError(t, err) 261 262 driver := NewWorkerDriver( 263 []*Bucket{ 264 &Bucket{ 265 Repo: inputRepo, 266 Commit: inputMasterCommit.ID, 267 Name: "in1", 268 }, 269 &Bucket{ 270 Repo: inputRepo, 271 Commit: inputDevelopCommit.ID, 272 Name: "in2", 273 }, 274 }, 275 &Bucket{ 276 Repo: outputRepo, 277 Commit: outputCommit.ID, 278 Name: "out", 279 }, 280 ) 281 282 testRunner(t, "worker", driver, func(t *testing.T, pachClient *client.APIClient, minioClient *minio.Client) { 283 s := &workerTestState{ 284 pachClient: pachClient, 285 minioClient: minioClient, 286 inputRepo: inputRepo, 287 outputRepo: outputRepo, 288 inputMasterCommit: inputMasterCommit, 289 inputDevelopCommit: inputDevelopCommit, 290 outputCommit: outputCommit, 291 } 292 293 t.Run("ListBuckets", func(t *testing.T) { 294 workerListBuckets(t, s) 295 }) 296 t.Run("GetObject", func(t *testing.T) { 297 workerGetObject(t, s) 298 }) 299 t.Run("GetObjectOutputRepo", func(t *testing.T) { 300 workerGetObjectOutputRepo(t, s) 301 }) 302 t.Run("StatObject", func(t *testing.T) { 303 workerStatObject(t, s) 304 }) 305 t.Run("PutObject", func(t *testing.T) { 306 workerPutObject(t, s) 307 }) 308 t.Run("PutObjectInputRepo", func(t *testing.T) { 309 workerPutObjectInputRepo(t, s) 310 }) 311 t.Run("RemoveObject", func(t *testing.T) { 312 workerRemoveObject(t, s) 313 }) 314 t.Run("RemoveObjectInputRepo", func(t *testing.T) { 315 workerRemoveObjectInputRepo(t, s) 316 }) 317 t.Run("LargeObjects", func(t *testing.T) { 318 workerLargeObjects(t, s) 319 }) 320 t.Run("MakeBucket", func(t *testing.T) { 321 workerMakeBucket(t, s) 322 }) 323 t.Run("BucketExists", func(t *testing.T) { 324 workerBucketExists(t, s) 325 }) 326 t.Run("RemoveBucket", func(t *testing.T) { 327 workerRemoveBucket(t, s) 328 }) 329 t.Run("ListObjectsPaginated", func(t *testing.T) { 330 workerListObjectsPaginated(t, s) 331 }) 332 t.Run("ListObjectsRecursive", func(t *testing.T) { 333 workerListObjectsRecursive(t, s) 334 }) 335 }) 336 }