github.com/ethersphere/bee/v2@v2.2.0/pkg/traversal/traversal_test.go (about) 1 // Copyright 2020 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package traversal_test 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "math" 12 "path" 13 "sync" 14 "testing" 15 "time" 16 17 "github.com/ethersphere/bee/v2/pkg/file/loadsave" 18 "github.com/ethersphere/bee/v2/pkg/file/pipeline" 19 "github.com/ethersphere/bee/v2/pkg/file/pipeline/builder" 20 "github.com/ethersphere/bee/v2/pkg/manifest" 21 testingsoc "github.com/ethersphere/bee/v2/pkg/soc/testing" 22 storage "github.com/ethersphere/bee/v2/pkg/storage" 23 "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore" 24 "github.com/ethersphere/bee/v2/pkg/swarm" 25 "github.com/ethersphere/bee/v2/pkg/traversal" 26 ) 27 28 const ( 29 dataCorpus = "hello test world" // 16 bytes. 30 defaultMediaType = "bzz-manifest-mantaray" 31 ) 32 33 func generateSample(size int) []byte { 34 buf := make([]byte, size) 35 for n := 0; n < size; { 36 n += copy(buf[n:], dataCorpus) 37 } 38 return buf 39 } 40 41 // newAddressIterator is a convenient constructor for creating addressIterator. 42 func newAddressIterator(ignoreDuplicates bool) *addressIterator { 43 return &addressIterator{ 44 seen: make(map[string]bool), 45 ignoreDuplicates: ignoreDuplicates, 46 } 47 } 48 49 // addressIterator is a simple collector of statistics 50 // targeting swarm.AddressIterFunc execution. 51 type addressIterator struct { 52 mu sync.Mutex // mu guards cnt and seen fields. 53 cnt int 54 seen map[string]bool 55 // Settings. 56 ignoreDuplicates bool 57 } 58 59 // Next matches the signature of swarm.AddressIterFunc needed in 60 // Traverser.Traverse method and collects statistics about it's execution. 61 func (i *addressIterator) Next(addr swarm.Address) error { 62 i.mu.Lock() 63 defer i.mu.Unlock() 64 65 i.cnt++ 66 if !i.ignoreDuplicates && i.seen[addr.String()] { 67 return fmt.Errorf("duplicit address: %q", addr.String()) 68 } 69 i.seen[addr.String()] = true 70 return nil 71 } 72 73 func TestTraversalBytes(t *testing.T) { 74 t.Parallel() 75 76 testCases := []struct { 77 dataSize int 78 wantHashCount int 79 wantHashes []string 80 ignoreDuplicateHashes bool 81 }{ 82 { 83 dataSize: len(dataCorpus), 84 wantHashCount: 1, 85 wantHashes: []string{ 86 "e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a", 87 }, 88 }, 89 { 90 dataSize: swarm.ChunkSize, 91 wantHashCount: 1, 92 wantHashes: []string{ 93 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 94 }, 95 }, 96 { 97 dataSize: swarm.ChunkSize + 1, 98 wantHashCount: 3, 99 wantHashes: []string{ 100 "a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner) 101 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 102 "dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1) 103 }, 104 }, 105 { 106 dataSize: swarm.ChunkSize * 128, 107 wantHashCount: 129, 108 wantHashes: []string{ 109 "5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner) 110 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 111 }, 112 ignoreDuplicateHashes: true, 113 }, 114 { 115 dataSize: swarm.ChunkSize * 129, 116 wantHashCount: 131, 117 wantHashes: []string{ 118 "150665dfbd81f80f5ba00a0caa2caa34f8b94e662e1dea769fe9ce7ea170bf25", // root (joiner, chunk) 119 "5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner) 120 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 121 }, 122 ignoreDuplicateHashes: true, 123 }, 124 { 125 dataSize: swarm.ChunkSize*129 - 1, 126 wantHashCount: 131, 127 wantHashes: []string{ 128 "895610b2d795e7cc351a8336d46ba9ef37309d83267d272c6e257e46a78ecb7c", // root (joiner, chunk) 129 "5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner) 130 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 131 "d18f0d81b832086376684558978cfe6773ed773178f84961c8b750fe72033a26", // bytes (4095) 132 }, 133 ignoreDuplicateHashes: true, 134 }, 135 { 136 dataSize: swarm.ChunkSize*129 + 1, 137 wantHashCount: 133, 138 wantHashes: []string{ 139 "023ee8b901702a999e9ef90ca2bc1c6db1daefb3f178b683a87b0fd613fd8e21", // root (joiner, chunk) 140 "5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner [4096 * 128]) 141 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 142 "dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1) 143 "a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner - [4096, 1]) 144 }, 145 ignoreDuplicateHashes: true, 146 }, 147 } 148 149 for _, tc := range testCases { 150 tc := tc 151 chunkCount := int(math.Ceil(float64(tc.dataSize) / swarm.ChunkSize)) 152 t.Run(fmt.Sprintf("%d-chunk-%d-bytes", chunkCount, tc.dataSize), func(t *testing.T) { 153 t.Parallel() 154 155 var ( 156 data = generateSample(tc.dataSize) 157 iter = newAddressIterator(tc.ignoreDuplicateHashes) 158 storerMock = inmemchunkstore.New() 159 ) 160 161 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 162 defer cancel() 163 164 pipe := builder.NewPipelineBuilder(ctx, storerMock, false, 0) 165 address, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(data)) 166 if err != nil { 167 t.Fatal(err) 168 } 169 170 err = traversal.New(storerMock, storerMock).Traverse(ctx, address, iter.Next) 171 if err != nil { 172 t.Fatal(err) 173 } 174 175 haveCnt, wantCnt := tc.wantHashCount, iter.cnt 176 if !tc.ignoreDuplicateHashes { 177 haveCnt, wantCnt = len(iter.seen), len(tc.wantHashes) 178 } 179 if haveCnt != wantCnt { 180 t.Fatalf("hash count mismatch: have %d; want %d", haveCnt, wantCnt) 181 } 182 183 for _, hash := range tc.wantHashes { 184 if !iter.seen[hash] { 185 t.Fatalf("hash check: want %q; have none", hash) 186 } 187 } 188 }) 189 } 190 } 191 192 func TestTraversalFiles(t *testing.T) { 193 t.Parallel() 194 195 testCases := []struct { 196 filesSize int 197 contentType string 198 filename string 199 wantHashCount int 200 wantHashes []string 201 ignoreDuplicateHashes bool 202 }{ 203 { 204 filesSize: len(dataCorpus), 205 contentType: "text/plain; charset=utf-8", 206 filename: "simple.txt", 207 wantHashCount: 4, 208 wantHashes: []string{ 209 "ae16fb27474b41273c0deb355e4405d3cd0a6639f834285f97c75636c9e29df7", // root manifest 210 "0cc878d32c96126d47f63fbe391114ee1438cd521146fc975dea1546d302b6c0", // manifest root metadata 211 "05e34f11a0967e8c09968b69c4f486f569ef58a31a197992e01304a1e59f8e75", // manifest file entry 212 "e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a", // bytes 213 }, 214 }, 215 { 216 filesSize: swarm.ChunkSize, 217 contentType: "text/plain; charset=utf-8", 218 wantHashCount: 6, 219 wantHashes: []string{ 220 "7e0a4b6cd542eb501f372438cbbbcd8a82c444740f00bdd54f4981f487bcf8b7", // root manifest 221 "0cc878d32c96126d47f63fbe391114ee1438cd521146fc975dea1546d302b6c0", // manifest root metadata 222 "3f538c3b5225111a79b3b1dbb5e269ca2115f2a7caf0e6925b773457cdef7be5", // manifest file entry (Edge) 223 "2f09e41846a24201758db3535dc6c42d738180c8874d4d40d4f2924d0091521f", // manifest file entry (Edge) 224 "b2662d17d51ce734695d993b44c0e2df34c3f50d5889e5bc3b8718838658e6b0", // manifest file entry (Value) 225 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes 226 }, 227 }, 228 { 229 filesSize: swarm.ChunkSize + 1, 230 contentType: "text/plain; charset=utf-8", 231 filename: "simple.txt", 232 wantHashCount: 6, 233 wantHashes: []string{ 234 "ea58761906f98bd88204efbbab5c690329af02548afec37d7a556a47ca78ac62", // manifest root 235 "0cc878d32c96126d47f63fbe391114ee1438cd521146fc975dea1546d302b6c0", // manifest root metadata 236 "85617df0249a12649b56d09cf7f21e8642627b4fb9c0c9e03e2d25340cf60499", // manifest file entry 237 "a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // manifest file entry 238 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 239 "dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1) 240 }, 241 }, 242 } 243 244 for _, tc := range testCases { 245 tc := tc 246 chunkCount := int(math.Ceil(float64(tc.filesSize) / swarm.ChunkSize)) 247 t.Run(fmt.Sprintf("%d-chunk-%d-bytes", chunkCount, tc.filesSize), func(t *testing.T) { 248 t.Parallel() 249 250 var ( 251 data = generateSample(tc.filesSize) 252 iter = newAddressIterator(tc.ignoreDuplicateHashes) 253 storerMock = inmemchunkstore.New() 254 ) 255 256 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 257 defer cancel() 258 259 pipe := builder.NewPipelineBuilder(ctx, storerMock, false, 0) 260 fr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(data)) 261 if err != nil { 262 t.Fatal(err) 263 } 264 265 ls := loadsave.New(storerMock, storerMock, pipelineFactory(storerMock, false)) 266 fManifest, err := manifest.NewDefaultManifest(ls, false) 267 if err != nil { 268 t.Fatal(err) 269 } 270 filename := tc.filename 271 if filename == "" { 272 filename = fr.String() 273 } 274 275 rootMtdt := map[string]string{ 276 manifest.WebsiteIndexDocumentSuffixKey: filename, 277 } 278 err = fManifest.Add(ctx, "/", manifest.NewEntry(swarm.ZeroAddress, rootMtdt)) 279 if err != nil { 280 t.Fatal(err) 281 } 282 283 fileMtdt := map[string]string{ 284 manifest.EntryMetadataFilenameKey: filename, 285 manifest.EntryMetadataContentTypeKey: tc.contentType, 286 } 287 err = fManifest.Add(ctx, filename, manifest.NewEntry(fr, fileMtdt)) 288 if err != nil { 289 t.Fatal(err) 290 } 291 292 address, err := fManifest.Store(ctx) 293 if err != nil { 294 t.Fatal(err) 295 } 296 297 err = traversal.New(storerMock, storerMock).Traverse(ctx, address, iter.Next) 298 if err != nil { 299 t.Fatal(err) 300 } 301 302 haveCnt, wantCnt := tc.wantHashCount, iter.cnt 303 if !tc.ignoreDuplicateHashes { 304 haveCnt, wantCnt = len(iter.seen), len(tc.wantHashes) 305 } 306 if haveCnt != wantCnt { 307 t.Fatalf("hash count mismatch: have %d; want %d", haveCnt, wantCnt) 308 } 309 310 for _, hash := range tc.wantHashes { 311 if !iter.seen[hash] { 312 t.Fatalf("hash check: want %q; have none", hash) 313 } 314 } 315 }) 316 } 317 } 318 319 type file struct { 320 size int 321 dir string 322 name string 323 chunks fileChunks 324 } 325 326 type fileChunks struct { 327 content []string 328 } 329 330 func TestTraversalManifest(t *testing.T) { 331 t.Parallel() 332 333 testCases := []struct { 334 files []file 335 manifestHashes []string 336 wantHashCount int 337 ignoreDuplicateHashes bool 338 }{ 339 { 340 files: []file{ 341 { 342 size: len(dataCorpus), 343 dir: "", 344 name: "hello.txt", 345 chunks: fileChunks{ 346 content: []string{ 347 "e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a", 348 }, 349 }, 350 }, 351 }, 352 manifestHashes: []string{ 353 // NOTE: references will be fixed, due to custom obfuscation key function 354 "f81ac8ceb2db7e55b718eca35f05233dc523022e36e11f934dbfd5f0cafde198", // root 355 "05e34f11a0967e8c09968b69c4f486f569ef58a31a197992e01304a1e59f8e75", // metadata 356 }, 357 wantHashCount: 3, 358 }, 359 { 360 files: []file{ 361 { 362 size: len(dataCorpus), 363 dir: "", 364 name: "hello.txt", 365 chunks: fileChunks{ 366 content: []string{ 367 "e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a", 368 }, 369 }, 370 }, 371 { 372 size: swarm.ChunkSize, 373 dir: "", 374 name: "data/1.txt", 375 chunks: fileChunks{ 376 content: []string{ 377 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 378 }, 379 }, 380 }, 381 { 382 size: swarm.ChunkSize, 383 dir: "", 384 name: "data/2.txt", 385 chunks: fileChunks{ 386 content: []string{ 387 "f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096) 388 }, 389 }, 390 }, 391 }, 392 manifestHashes: []string{ 393 // NOTE: references will be fixed, due to custom obfuscation key function 394 "d182df1cb214167d085256fafa657f38a191efe51af16834f6288ef23416fd25", // root 395 "05e34f11a0967e8c09968b69c4f486f569ef58a31a197992e01304a1e59f8e75", // manifest entry 396 "7e6bc53ca11bff459f77892563d04e09b440c63ce2f7d5fe8a8b0f0ba9eeefcf", // manifest entry (Edge PathSeparator) 397 "b2662d17d51ce734695d993b44c0e2df34c3f50d5889e5bc3b8718838658e6b0", // manifest file entry (1.txt) 398 "b2662d17d51ce734695d993b44c0e2df34c3f50d5889e5bc3b8718838658e6b0", // manifest file entry (2.txt) 399 }, 400 wantHashCount: 8, 401 ignoreDuplicateHashes: true, 402 }, 403 } 404 405 for _, tc := range testCases { 406 tc := tc 407 t.Run(fmt.Sprintf("%s-%d-files-%d-chunks", defaultMediaType, len(tc.files), tc.wantHashCount), func(t *testing.T) { 408 t.Parallel() 409 410 var ( 411 storerMock = inmemchunkstore.New() 412 iter = newAddressIterator(tc.ignoreDuplicateHashes) 413 ) 414 415 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 416 defer cancel() 417 418 var wantHashes []string 419 for _, f := range tc.files { 420 wantHashes = append(wantHashes, f.chunks.content...) 421 } 422 wantHashes = append(wantHashes, tc.manifestHashes...) 423 424 ls := loadsave.New(storerMock, storerMock, pipelineFactory(storerMock, false)) 425 dirManifest, err := manifest.NewMantarayManifest(ls, false) 426 if err != nil { 427 t.Fatal(err) 428 } 429 430 for _, f := range tc.files { 431 data := generateSample(f.size) 432 433 pipe := builder.NewPipelineBuilder(ctx, storerMock, false, 0) 434 fr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(data)) 435 if err != nil { 436 t.Fatal(err) 437 } 438 439 fileName := f.name 440 if fileName == "" { 441 fileName = fr.String() 442 } 443 filePath := path.Join(f.dir, fileName) 444 445 err = dirManifest.Add(ctx, filePath, manifest.NewEntry(fr, nil)) 446 if err != nil { 447 t.Fatal(err) 448 } 449 } 450 address, err := dirManifest.Store(ctx) 451 if err != nil { 452 t.Fatal(err) 453 } 454 455 err = traversal.New(storerMock, storerMock).Traverse(ctx, address, iter.Next) 456 if err != nil { 457 t.Fatal(err) 458 } 459 460 haveCnt, wantCnt := tc.wantHashCount, iter.cnt 461 if !tc.ignoreDuplicateHashes { 462 haveCnt, wantCnt = len(iter.seen), len(wantHashes) 463 } 464 if haveCnt != wantCnt { 465 t.Fatalf("hash count mismatch: have %d; want %d", haveCnt, wantCnt) 466 } 467 468 for _, hash := range wantHashes { 469 if !iter.seen[hash] { 470 t.Fatalf("hash check: want %q; have none", hash) 471 } 472 } 473 }) 474 } 475 } 476 477 func TestTraversalSOC(t *testing.T) { 478 t.Parallel() 479 480 store := inmemchunkstore.New() 481 iter := newAddressIterator(false) 482 483 ctx := context.Background() 484 485 s := testingsoc.GenerateMockSOC(t, generateSample(swarm.ChunkSize)) 486 sch := s.Chunk() 487 488 err := store.Put(ctx, sch) 489 if err != nil { 490 t.Fatal(err) 491 } 492 493 err = traversal.New(store, store).Traverse(ctx, sch.Address(), iter.Next) 494 if err != nil { 495 t.Fatal(err) 496 } 497 498 if len(iter.seen) != 1 { 499 t.Fatal("incorrect hashes seen") 500 } 501 502 if !iter.seen[sch.Address().String()] { 503 t.Fatal("expected hash not seen") 504 } 505 } 506 507 func pipelineFactory(s storage.Putter, encrypt bool) func() pipeline.Interface { 508 return func() pipeline.Interface { 509 return builder.NewPipelineBuilder(context.Background(), s, encrypt, 0) 510 } 511 }