github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/datum/iterator_test.go (about) 1 package datum 2 3 import ( 4 "fmt" 5 "strings" 6 "testing" 7 8 "github.com/pachyderm/pachyderm/src/client" 9 "github.com/pachyderm/pachyderm/src/client/pkg/require" 10 "github.com/pachyderm/pachyderm/src/client/pps" 11 "github.com/pachyderm/pachyderm/src/server/pkg/testpachd" 12 tu "github.com/pachyderm/pachyderm/src/server/pkg/testutil" 13 ) 14 15 func TestIterators(t *testing.T) { 16 require.NoError(t, testpachd.WithRealEnv(func(env *testpachd.RealEnv) error { 17 c := env.PachClient 18 dataRepo := tu.UniqueString(t.Name() + "_data") 19 require.NoError(t, c.CreateRepo(dataRepo)) 20 21 // put files in structured in a way so that there are many ways to glob it 22 commit, err := c.StartCommit(dataRepo, "master") 23 require.NoError(t, err) 24 for j := 0; j < 50; j++ { 25 _, err = c.PutFile(dataRepo, commit.ID, fmt.Sprintf("foo%v", j), strings.NewReader("bar")) 26 require.NoError(t, err) 27 } 28 require.NoError(t, err) 29 require.NoError(t, c.FinishCommit(dataRepo, commit.ID)) 30 31 // in0 has zero datums, for testing edge cases 32 in0 := client.NewPFSInput(dataRepo, "!(**)") 33 in0.Pfs.Commit = commit.ID 34 t.Run("ZeroDatums", func(t *testing.T) { 35 pfs0, err := NewIterator(c, in0) 36 require.NoError(t, err) 37 38 validateDI(t, pfs0) 39 }) 40 41 // in[1-2] are basic PFS inputs 42 in1 := client.NewPFSInput(dataRepo, "/foo?1") 43 in1.Pfs.Commit = commit.ID 44 in2 := client.NewPFSInput(dataRepo, "/foo*2") 45 in2.Pfs.Commit = commit.ID 46 t.Run("Basic", func(t *testing.T) { 47 pfs1, err := NewIterator(c, in1) 48 require.NoError(t, err) 49 pfs2, err := NewIterator(c, in2) 50 require.NoError(t, err) 51 52 // iterate through pfs0, pfs1 and pfs2 and verify they are as we expect 53 validateDI(t, pfs1, "/foo11", "/foo21", "/foo31", "/foo41") 54 validateDI(t, pfs2, "/foo12", "/foo2", "/foo22", "/foo32", "/foo42") 55 }) 56 57 in3 := client.NewUnionInput(in1, in2) 58 t.Run("Union", func(t *testing.T) { 59 union1, err := NewIterator(c, in3) 60 require.NoError(t, err) 61 validateDI(t, union1, "/foo11", "/foo21", "/foo31", "/foo41", 62 "/foo12", "/foo2", "/foo22", "/foo32", "/foo42") 63 }) 64 65 in4 := client.NewCrossInput(in1, in2) 66 t.Run("Cross", func(t *testing.T) { 67 cross1, err := NewIterator(c, in4) 68 require.NoError(t, err) 69 validateDI(t, cross1, 70 "/foo11/foo12", "/foo21/foo12", "/foo31/foo12", "/foo41/foo12", 71 "/foo11/foo2", "/foo21/foo2", "/foo31/foo2", "/foo41/foo2", 72 "/foo11/foo22", "/foo21/foo22", "/foo31/foo22", "/foo41/foo22", 73 "/foo11/foo32", "/foo21/foo32", "/foo31/foo32", "/foo41/foo32", 74 "/foo11/foo42", "/foo21/foo42", "/foo31/foo42", "/foo41/foo42", 75 ) 76 }) 77 78 // in5 is a nested cross 79 in5 := client.NewCrossInput(in3, in4) 80 t.Run("NestedCross", func(t *testing.T) { 81 cross2, err := NewIterator(c, in5) 82 require.NoError(t, err) 83 validateDI(t, cross2, 84 "/foo11/foo11/foo12", "/foo21/foo11/foo12", "/foo31/foo11/foo12", "/foo41/foo11/foo12", "/foo12/foo11/foo12", "/foo2/foo11/foo12", "/foo22/foo11/foo12", "/foo32/foo11/foo12", "/foo42/foo11/foo12", 85 "/foo11/foo21/foo12", "/foo21/foo21/foo12", "/foo31/foo21/foo12", "/foo41/foo21/foo12", "/foo12/foo21/foo12", "/foo2/foo21/foo12", "/foo22/foo21/foo12", "/foo32/foo21/foo12", "/foo42/foo21/foo12", 86 "/foo11/foo31/foo12", "/foo21/foo31/foo12", "/foo31/foo31/foo12", "/foo41/foo31/foo12", "/foo12/foo31/foo12", "/foo2/foo31/foo12", "/foo22/foo31/foo12", "/foo32/foo31/foo12", "/foo42/foo31/foo12", 87 "/foo11/foo41/foo12", "/foo21/foo41/foo12", "/foo31/foo41/foo12", "/foo41/foo41/foo12", "/foo12/foo41/foo12", "/foo2/foo41/foo12", "/foo22/foo41/foo12", "/foo32/foo41/foo12", "/foo42/foo41/foo12", 88 "/foo11/foo11/foo2", "/foo21/foo11/foo2", "/foo31/foo11/foo2", "/foo41/foo11/foo2", "/foo12/foo11/foo2", "/foo2/foo11/foo2", "/foo22/foo11/foo2", "/foo32/foo11/foo2", "/foo42/foo11/foo2", 89 "/foo11/foo21/foo2", "/foo21/foo21/foo2", "/foo31/foo21/foo2", "/foo41/foo21/foo2", "/foo12/foo21/foo2", "/foo2/foo21/foo2", "/foo22/foo21/foo2", "/foo32/foo21/foo2", "/foo42/foo21/foo2", 90 "/foo11/foo31/foo2", "/foo21/foo31/foo2", "/foo31/foo31/foo2", "/foo41/foo31/foo2", "/foo12/foo31/foo2", "/foo2/foo31/foo2", "/foo22/foo31/foo2", "/foo32/foo31/foo2", "/foo42/foo31/foo2", 91 "/foo11/foo41/foo2", "/foo21/foo41/foo2", "/foo31/foo41/foo2", "/foo41/foo41/foo2", "/foo12/foo41/foo2", "/foo2/foo41/foo2", "/foo22/foo41/foo2", "/foo32/foo41/foo2", "/foo42/foo41/foo2", 92 "/foo11/foo11/foo22", "/foo21/foo11/foo22", "/foo31/foo11/foo22", "/foo41/foo11/foo22", "/foo12/foo11/foo22", "/foo2/foo11/foo22", "/foo22/foo11/foo22", "/foo32/foo11/foo22", "/foo42/foo11/foo22", 93 "/foo11/foo21/foo22", "/foo21/foo21/foo22", "/foo31/foo21/foo22", "/foo41/foo21/foo22", "/foo12/foo21/foo22", "/foo2/foo21/foo22", "/foo22/foo21/foo22", "/foo32/foo21/foo22", "/foo42/foo21/foo22", 94 "/foo11/foo31/foo22", "/foo21/foo31/foo22", "/foo31/foo31/foo22", "/foo41/foo31/foo22", "/foo12/foo31/foo22", "/foo2/foo31/foo22", "/foo22/foo31/foo22", "/foo32/foo31/foo22", "/foo42/foo31/foo22", 95 "/foo11/foo41/foo22", "/foo21/foo41/foo22", "/foo31/foo41/foo22", "/foo41/foo41/foo22", "/foo12/foo41/foo22", "/foo2/foo41/foo22", "/foo22/foo41/foo22", "/foo32/foo41/foo22", "/foo42/foo41/foo22", 96 "/foo11/foo11/foo32", "/foo21/foo11/foo32", "/foo31/foo11/foo32", "/foo41/foo11/foo32", "/foo12/foo11/foo32", "/foo2/foo11/foo32", "/foo22/foo11/foo32", "/foo32/foo11/foo32", "/foo42/foo11/foo32", 97 "/foo11/foo21/foo32", "/foo21/foo21/foo32", "/foo31/foo21/foo32", "/foo41/foo21/foo32", "/foo12/foo21/foo32", "/foo2/foo21/foo32", "/foo22/foo21/foo32", "/foo32/foo21/foo32", "/foo42/foo21/foo32", 98 "/foo11/foo31/foo32", "/foo21/foo31/foo32", "/foo31/foo31/foo32", "/foo41/foo31/foo32", "/foo12/foo31/foo32", "/foo2/foo31/foo32", "/foo22/foo31/foo32", "/foo32/foo31/foo32", "/foo42/foo31/foo32", 99 "/foo11/foo41/foo32", "/foo21/foo41/foo32", "/foo31/foo41/foo32", "/foo41/foo41/foo32", "/foo12/foo41/foo32", "/foo2/foo41/foo32", "/foo22/foo41/foo32", "/foo32/foo41/foo32", "/foo42/foo41/foo32", 100 "/foo11/foo11/foo42", "/foo21/foo11/foo42", "/foo31/foo11/foo42", "/foo41/foo11/foo42", "/foo12/foo11/foo42", "/foo2/foo11/foo42", "/foo22/foo11/foo42", "/foo32/foo11/foo42", "/foo42/foo11/foo42", 101 "/foo11/foo21/foo42", "/foo21/foo21/foo42", "/foo31/foo21/foo42", "/foo41/foo21/foo42", "/foo12/foo21/foo42", "/foo2/foo21/foo42", "/foo22/foo21/foo42", "/foo32/foo21/foo42", "/foo42/foo21/foo42", 102 "/foo11/foo31/foo42", "/foo21/foo31/foo42", "/foo31/foo31/foo42", "/foo41/foo31/foo42", "/foo12/foo31/foo42", "/foo2/foo31/foo42", "/foo22/foo31/foo42", "/foo32/foo31/foo42", "/foo42/foo31/foo42", 103 "/foo11/foo41/foo42", "/foo21/foo41/foo42", "/foo31/foo41/foo42", "/foo41/foo41/foo42", "/foo12/foo41/foo42", "/foo2/foo41/foo42", "/foo22/foo41/foo42", "/foo32/foo41/foo42", "/foo42/foo41/foo42") 104 }) 105 106 // in6 is a cross with a zero datum input (should also be zero) 107 in6 := client.NewCrossInput(in3, in0, in2, in4) 108 t.Run("EmptyCross", func(t *testing.T) { 109 cross3, err := NewIterator(c, in6) 110 require.NoError(t, err) 111 validateDI(t, cross3) 112 }) 113 114 // in7 is a cross with a [nested cross w/ a zero datum input] 115 // (should also be zero) 116 in7 := client.NewCrossInput(in6, in1) 117 t.Run("NestedEmptyCross", func(t *testing.T) { 118 cross4, err := NewIterator(c, in7) 119 require.NoError(t, err) 120 validateDI(t, cross4) 121 }) 122 123 // in[8-9] are elements of in10, which is a join input 124 in8 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "$1$2", "", false, false, nil) 125 in8.Pfs.Commit = commit.ID 126 in9 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "$2$1", "", false, false, nil) 127 in9.Pfs.Commit = commit.ID 128 in10 := client.NewJoinInput(in8, in9) 129 t.Run("Join", func(t *testing.T) { 130 join1, err := NewIterator(c, in10) 131 require.NoError(t, err) 132 validateDI(t, join1, 133 "/foo11/foo11", 134 "/foo12/foo21", 135 "/foo13/foo31", 136 "/foo14/foo41", 137 "/foo21/foo12", 138 "/foo22/foo22", 139 "/foo23/foo32", 140 "/foo24/foo42", 141 "/foo31/foo13", 142 "/foo32/foo23", 143 "/foo33/foo33", 144 "/foo34/foo43", 145 "/foo41/foo14", 146 "/foo42/foo24", 147 "/foo43/foo34", 148 "/foo44/foo44") 149 }) 150 151 // in11 is an S3 input 152 in11 := client.NewS3PFSInput("", dataRepo, "") 153 in11.Pfs.Commit = commit.ID 154 t.Run("PlainS3", func(t *testing.T) { 155 s3itr, err := NewIterator(c, in11) 156 require.NoError(t, err) 157 validateDI(t, s3itr, "/") 158 159 // Check that every datum has an S3 input 160 s3itr, _ = NewIterator(c, in11) 161 var checked, s3Count int 162 for s3itr.Next() { 163 checked++ 164 require.Equal(t, 1, len(s3itr.Datum())) 165 if s3itr.Datum()[0].S3 { 166 s3Count++ 167 break 168 } 169 } 170 require.True(t, checked > 0 && checked == s3Count, 171 "checked: %v, s3Count: %v", checked, s3Count) 172 }) 173 174 // in12 is a cross that contains an S3 input and two non-s3 inputs 175 in12 := client.NewCrossInput(in1, in2, in11) 176 t.Run("S3MixedCross", func(t *testing.T) { 177 s3CrossItr, err := NewIterator(c, in12) 178 require.NoError(t, err) 179 validateDI(t, s3CrossItr, 180 "/foo11/foo12/", "/foo21/foo12/", "/foo31/foo12/", "/foo41/foo12/", 181 "/foo11/foo2/", "/foo21/foo2/", "/foo31/foo2/", "/foo41/foo2/", 182 "/foo11/foo22/", "/foo21/foo22/", "/foo31/foo22/", "/foo41/foo22/", 183 "/foo11/foo32/", "/foo21/foo32/", "/foo31/foo32/", "/foo41/foo32/", 184 "/foo11/foo42/", "/foo21/foo42/", "/foo31/foo42/", "/foo41/foo42/", 185 ) 186 187 s3CrossItr, _ = NewIterator(c, in12) 188 var checked, s3Count int 189 for s3CrossItr.Next() { 190 checked++ 191 for _, d := range s3CrossItr.Datum() { 192 if d.S3 { 193 s3Count++ 194 } 195 } 196 } 197 require.True(t, checked > 0 && checked == s3Count, 198 "checked: %v, s3Count: %v", checked, s3Count) 199 }) 200 201 // in13 is a cross consisting of exclusively S3 inputs 202 in13 := client.NewCrossInput(in11, in11, in11) 203 t.Run("S3OnlyCrossUnionJoin", func(t *testing.T) { 204 s3CrossItr, err := NewIterator(c, in13) 205 require.NoError(t, err) 206 validateDI(t, s3CrossItr, "///") 207 208 s3CrossItr, _ = NewIterator(c, in13) 209 var checked, s3Count int 210 for s3CrossItr.Next() { 211 checked++ 212 for _, d := range s3CrossItr.Datum() { 213 if d.S3 { 214 s3Count++ 215 } 216 } 217 } 218 require.True(t, checked > 0 && 3*checked == s3Count, 219 "checked: %v, s3Count: %v", checked, s3Count) 220 }) 221 222 in14 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "", "$1", false, false, nil) 223 in14.Pfs.Commit = commit.ID 224 in15 := client.NewGroupInput(in14) 225 t.Run("GroupSingle", func(t *testing.T) { 226 group1, err := NewIterator(c, in15) 227 require.NoError(t, err) 228 validateDI(t, group1, 229 "/foo10/foo11/foo12/foo13/foo14/foo15/foo16/foo17/foo18/foo19", 230 "/foo20/foo21/foo22/foo23/foo24/foo25/foo26/foo27/foo28/foo29", 231 "/foo30/foo31/foo32/foo33/foo34/foo35/foo36/foo37/foo38/foo39", 232 "/foo40/foo41/foo42/foo43/foo44/foo45/foo46/foo47/foo48/foo49") 233 }) 234 235 in16 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "", "$1", false, false, nil) 236 in16.Pfs.Commit = commit.ID 237 in17 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "", "$2", false, false, nil) 238 in17.Pfs.Commit = commit.ID 239 in18 := client.NewGroupInput(in16, in17) 240 t.Run("GroupDoubles", func(t *testing.T) { 241 group2, err := NewIterator(c, in18) 242 require.NoError(t, err) 243 validateDI(t, group2, 244 "/foo10/foo20/foo30/foo40", 245 "/foo10/foo11/foo12/foo13/foo14/foo15/foo16/foo17/foo18/foo19/foo11/foo21/foo31/foo41", 246 "/foo20/foo21/foo22/foo23/foo24/foo25/foo26/foo27/foo28/foo29/foo12/foo22/foo32/foo42", 247 "/foo30/foo31/foo32/foo33/foo34/foo35/foo36/foo37/foo38/foo39/foo13/foo23/foo33/foo43", 248 "/foo40/foo41/foo42/foo43/foo44/foo45/foo46/foo47/foo48/foo49/foo14/foo24/foo34/foo44", 249 "/foo15/foo25/foo35/foo45", 250 "/foo16/foo26/foo36/foo46", 251 "/foo17/foo27/foo37/foo47", 252 "/foo18/foo28/foo38/foo48", 253 "/foo19/foo29/foo39/foo49") 254 }) 255 256 in19 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "$1$2", "$1", false, false, nil) 257 in19.Pfs.Commit = commit.ID 258 in20 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "$2$1", "$2", false, false, nil) 259 in20.Pfs.Commit = commit.ID 260 261 in21 := client.NewJoinInput(in19, in20) 262 in22 := client.NewGroupInput(in21) 263 t.Run("GroupJoin", func(t *testing.T) { 264 groupJoin1, err := NewIterator(c, in22) 265 require.NoError(t, err) 266 validateDI(t, groupJoin1, 267 "/foo11/foo11/foo12/foo21/foo13/foo31/foo14/foo41", 268 "/foo21/foo12/foo22/foo22/foo23/foo32/foo24/foo42", 269 "/foo31/foo13/foo32/foo23/foo33/foo33/foo34/foo43", 270 "/foo41/foo14/foo42/foo24/foo43/foo34/foo44/foo44") 271 }) 272 273 in23 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "$1$2", "$1", false, false, nil) 274 in23.Pfs.Commit = commit.ID 275 in24 := client.NewPFSInputOpts("", dataRepo, "", "/foo(?)(?)", "$2$1", "$2", false, false, nil) 276 in24.Pfs.Commit = commit.ID 277 278 in25 := client.NewGroupInput(in24) 279 in26 := client.NewUnionInput(in23, in25) 280 281 t.Run("UnionGroup", func(t *testing.T) { 282 unionGroup1, err := NewIterator(c, in26) 283 require.NoError(t, err) 284 validateDI(t, unionGroup1, 285 "/foo10", 286 "/foo11", 287 "/foo12", 288 "/foo13", 289 "/foo14", 290 "/foo15", 291 "/foo16", 292 "/foo17", 293 "/foo18", 294 "/foo19", 295 "/foo20", 296 "/foo21", 297 "/foo22", 298 "/foo23", 299 "/foo24", 300 "/foo25", 301 "/foo26", 302 "/foo27", 303 "/foo28", 304 "/foo29", 305 "/foo30", 306 "/foo31", 307 "/foo32", 308 "/foo33", 309 "/foo34", 310 "/foo35", 311 "/foo36", 312 "/foo37", 313 "/foo38", 314 "/foo39", 315 "/foo40", 316 "/foo41", 317 "/foo42", 318 "/foo43", 319 "/foo44", 320 "/foo45", 321 "/foo46", 322 "/foo47", 323 "/foo48", 324 "/foo49", 325 "/foo10/foo20/foo30/foo40", 326 "/foo11/foo21/foo31/foo41", 327 "/foo12/foo22/foo32/foo42", 328 "/foo13/foo23/foo33/foo43", 329 "/foo14/foo24/foo34/foo44", 330 "/foo15/foo25/foo35/foo45", 331 "/foo16/foo26/foo36/foo46", 332 "/foo17/foo27/foo37/foo47", 333 "/foo18/foo28/foo38/foo48", 334 "/foo19/foo29/foo39/foo49") 335 }) 336 return nil 337 })) 338 } 339 340 // TestJoinOnTrailingSlash tests that the same glob pattern is used for 341 // extracting JoinOn and GroupBy capture groups as is used to match paths. Tests 342 // the fix for https://github.com/pachyderm/pachyderm/issues/5365 343 func TestJoinTrailingSlash(t *testing.T) { 344 require.NoError(t, testpachd.WithRealEnv(func(env *testpachd.RealEnv) error { 345 c := env.PachClient 346 repo := []string{ // singular name b/c we only refer to individual elements 347 tu.UniqueString(t.Name() + "_0"), 348 tu.UniqueString(t.Name() + "_1"), 349 } 350 input := []*pps.Input{ // singular name b/c only use individual elements 351 client.NewPFSInputOpts("", repo[0], 352 /* commit--set below */ "", "/*", "$1", "", false, false, nil), 353 client.NewPFSInputOpts("", repo[1], 354 /* commit--set below */ "", "/*", "$1", "", false, false, nil), 355 } 356 require.NoError(t, c.CreateRepo(repo[0])) 357 require.NoError(t, c.CreateRepo(repo[1])) 358 359 // put files in structured in a way so that there are many ways to glob it 360 for i := 0; i < 2; i++ { 361 commit, err := c.StartCommit(repo[i], "master") 362 require.NoError(t, err) 363 for j := 0; j < 10; j++ { 364 _, err = c.PutFile(repo[i], commit.ID, fmt.Sprintf("foo-%v", j), strings.NewReader("bar")) 365 require.NoError(t, err) 366 } 367 require.NoError(t, err) 368 require.NoError(t, c.FinishCommit(repo[i], commit.ID)) 369 input[i].Pfs.Commit = commit.ID 370 } 371 372 // Test without trailing slashes 373 input[0].Pfs.Glob = "/(*)" 374 input[1].Pfs.Glob = "/(*)" 375 itr, err := NewIterator(c, client.NewJoinInput(input...)) 376 require.NoError(t, err) 377 validateDI(t, itr, 378 "/foo-0/foo-0", 379 "/foo-1/foo-1", 380 "/foo-2/foo-2", 381 "/foo-3/foo-3", 382 "/foo-4/foo-4", 383 "/foo-5/foo-5", 384 "/foo-6/foo-6", 385 "/foo-7/foo-7", 386 "/foo-8/foo-8", 387 "/foo-9/foo-9", 388 ) 389 // Test with trailing slashes 390 input[0].Pfs.Glob = "/(*)/" 391 input[1].Pfs.Glob = "/(*)/" 392 itr, err = NewIterator(c, client.NewJoinInput(input...)) 393 require.NoError(t, err) 394 validateDI(t, itr, 395 "/foo-0/foo-0", 396 "/foo-1/foo-1", 397 "/foo-2/foo-2", 398 "/foo-3/foo-3", 399 "/foo-4/foo-4", 400 "/foo-5/foo-5", 401 "/foo-6/foo-6", 402 "/foo-7/foo-7", 403 "/foo-8/foo-8", 404 "/foo-9/foo-9", 405 ) 406 407 return nil 408 })) 409 } 410 411 func validateDI(t testing.TB, dit Iterator, datums ...string) { 412 t.Helper() 413 i := 0 414 clone := dit 415 for dit.Next() { 416 key := "" 417 for _, input := range dit.Datum() { 418 key += input.FileInfo.File.Path 419 } 420 421 key2 := "" 422 clone.DatumN(0) 423 for _, input := range clone.DatumN(i) { 424 key2 += input.FileInfo.File.Path 425 } 426 427 if len(datums) > 0 { 428 require.Equal(t, datums[i], key) 429 } 430 require.Equal(t, key, key2) 431 i++ 432 } 433 if len(datums) > 0 { 434 require.Equal(t, len(datums), dit.Len()) 435 } 436 require.Equal(t, i, dit.Len()) 437 }