github.com/filecoin-project/lassie@v0.23.0/pkg/internal/itest/http_fetch_test.go (about) 1 //go:build !race 2 3 package itest 4 5 import ( 6 "bytes" 7 "context" 8 "encoding/json" 9 "fmt" 10 "io" 11 "math/rand" 12 "net/http" 13 "net/url" 14 "os" 15 "strings" 16 "sync" 17 "testing" 18 "time" 19 20 datatransfer "github.com/filecoin-project/go-data-transfer/v2" 21 "github.com/filecoin-project/lassie/pkg/aggregateeventrecorder" 22 "github.com/filecoin-project/lassie/pkg/internal/itest/mocknet" 23 "github.com/filecoin-project/lassie/pkg/internal/itest/testpeer" 24 "github.com/filecoin-project/lassie/pkg/lassie" 25 "github.com/filecoin-project/lassie/pkg/retriever" 26 httpserver "github.com/filecoin-project/lassie/pkg/server/http" 27 "github.com/filecoin-project/lassie/pkg/types" 28 "github.com/google/uuid" 29 "github.com/ipfs/go-cid" 30 unixfs "github.com/ipfs/go-unixfsnode/testutil" 31 "github.com/ipld/go-car/v2" 32 "github.com/ipld/go-car/v2/storage" 33 unixfsgen "github.com/ipld/go-fixtureplate/generator" 34 "github.com/ipld/go-ipld-prime" 35 "github.com/ipld/go-ipld-prime/datamodel" 36 "github.com/ipld/go-ipld-prime/linking" 37 cidlink "github.com/ipld/go-ipld-prime/linking/cid" 38 "github.com/ipld/go-ipld-prime/storage/memstore" 39 selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" 40 trustlesshttp "github.com/ipld/go-trustless-utils/http" 41 trustlesstestutil "github.com/ipld/go-trustless-utils/testutil" 42 "github.com/ipld/go-trustless-utils/traversal" 43 "github.com/libp2p/go-libp2p/core/peer" 44 "github.com/multiformats/go-multicodec" 45 "github.com/stretchr/testify/require" 46 "golang.org/x/exp/slices" 47 48 "net/http/httptest" 49 _ "net/http/pprof" 50 ) 51 52 // DEBUG_DATA, when true, will write source and received data to CARs 53 // for inspection if tests fail; otherwise they are cleaned up as tests 54 // proceed. 55 const DEBUG_DATA = false 56 57 // UnixFS data generation specs used by github.com/ipld/go-fixtureplate/generator 58 const ( 59 unixfsSpec_smallFile = `file:1KiB` 60 unixfsSpec_largeShardedFile = `file:4MiB` 61 unixfsSpec_largeShardedFileZeroed = `file:4MiB{zero}` 62 unixfsSpec_largeDirectory = `dir(~10*file:1,~5*dir(~10*file:~10k,~5*dir(~4*file:~200k)),~5*file:~300k)` 63 unixfsSpec_largeShardedDirectory = `dir{sharded}(~20*file:1,~10*file:~1k,~5*dir(~5*file:~10k,~5*dir(~4*file:~200k)),~2*file:~300k)` 64 65 // wrapPath is the path within "wrapped" content (below) that the content we 66 // care about is located, we use this to test path-nested retrievals and make 67 // sure we ignore surrounding content outside of this path. 68 wrapPath = "/want2/want1/want0" 69 ) 70 71 var ( 72 // same as unixfsSpec_largeShardedFile but nested within wrapPath, surrounded by other directories 73 unixfsSpec_largeShardedFileWrapped = wrapSpec(unixfsSpec_largeShardedFile) 74 // same as unixfsSpec_largeDirectory but nested within wrapPath, surrounded by other directories 75 unixfsSpec_largeDirectoryWrapped = wrapSpec(unixfsSpec_largeDirectory) 76 // same as unixfsSpec_largeShardedDirectory but nested within wrapPath, surrounded by other directories 77 unixfsSpec_largeShardedDirectoryWrapped = wrapSpec(unixfsSpec_largeShardedDirectory) 78 ) 79 80 type generateFn func(*testing.T, io.Reader, []testpeer.TestPeer) []unixfs.DirEntry 81 type bodyValidator func(*testing.T, unixfs.DirEntry, []byte) 82 type response struct { 83 StatusCode int 84 Header http.Header 85 Body []byte 86 } 87 88 func TestHttpFetch(t *testing.T) { 89 entityQuery := func(q url.Values, _ []testpeer.TestPeer) { 90 q.Set("dag-scope", "entity") 91 } 92 blockQuery := func(q url.Values, _ []testpeer.TestPeer) { 93 q.Set("dag-scope", "block") 94 } 95 noDups := func(header http.Header) { 96 header.Set("Accept", "application/vnd.ipld.car;order=dfs;version=1;dups=n;") 97 } 98 type headerSetter func(http.Header) 99 type queryModifier func(url.Values, []testpeer.TestPeer) 100 type lassieOptsGen func(*testing.T, *mocknet.MockRetrievalNet) []lassie.LassieOption 101 102 testCases := []struct { 103 name string 104 graphsyncRemotes int 105 bitswapRemotes int 106 httpRemotes int 107 disableGraphsync bool 108 expectNoCandidates bool 109 expectUncleanEnd bool 110 expectUnauthorized bool 111 expectAggregateEvents []aggregateeventrecorder.AggregateEvent 112 modifyHttpConfig func(httpserver.HttpServerConfig) httpserver.HttpServerConfig 113 generate generateFn 114 paths []string 115 setHeader headerSetter 116 modifyQueries []queryModifier 117 validateBodies []bodyValidator 118 lassieOpts lassieOptsGen 119 expectNoDups bool 120 }{ 121 { 122 name: "graphsync large sharded file", 123 graphsyncRemotes: 1, 124 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 125 expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{{ 126 Success: true, 127 URLPath: "?dag-scope=all&dups=y", 128 ProtocolsAllowed: []string{multicodec.TransportGraphsyncFilecoinv1.String(), multicodec.TransportBitswap.String(), multicodec.TransportIpfsGatewayHttp.String()}, 129 ProtocolsAttempted: []string{multicodec.TransportGraphsyncFilecoinv1.String()}, 130 }}, 131 }, 132 { 133 name: "bitswap large sharded file", 134 bitswapRemotes: 1, 135 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 136 expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{{ 137 Success: true, 138 URLPath: "?dag-scope=all&dups=y", 139 ProtocolsAllowed: []string{multicodec.TransportGraphsyncFilecoinv1.String(), multicodec.TransportBitswap.String(), multicodec.TransportIpfsGatewayHttp.String()}, 140 ProtocolsAttempted: []string{multicodec.TransportBitswap.String()}, 141 }}, 142 }, 143 { 144 name: "http large sharded file", 145 httpRemotes: 1, 146 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 147 expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{{ 148 Success: true, 149 URLPath: "?dag-scope=all&dups=y", 150 ProtocolsAllowed: []string{multicodec.TransportGraphsyncFilecoinv1.String(), multicodec.TransportBitswap.String(), multicodec.TransportIpfsGatewayHttp.String()}, 151 ProtocolsAttempted: []string{multicodec.TransportIpfsGatewayHttp.String()}, 152 }}, 153 }, 154 { 155 name: "graphsync large directory", 156 graphsyncRemotes: 1, 157 generate: singlePeerGenerator(unixfsSpec_largeDirectory), 158 }, 159 { 160 name: "bitswap large directory", 161 bitswapRemotes: 1, 162 generate: singlePeerGenerator(unixfsSpec_largeDirectory), 163 }, 164 { 165 name: "http large directory", 166 httpRemotes: 1, 167 generate: singlePeerGenerator(unixfsSpec_largeDirectory), 168 }, 169 { 170 name: "graphsync large sharded directory", 171 graphsyncRemotes: 1, 172 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectory), 173 }, 174 { 175 name: "bitswap large sharded directory", 176 bitswapRemotes: 1, 177 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectory), 178 }, 179 { 180 name: "http large sharded directory", 181 httpRemotes: 1, 182 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectory), 183 }, 184 { 185 name: "graphsync max block limit", 186 graphsyncRemotes: 1, 187 expectUncleanEnd: true, 188 modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig { 189 cfg.MaxBlocksPerRequest = 3 190 return cfg 191 }, 192 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 193 validateBodies: validateFirstThreeBlocksOnly, 194 }, 195 { 196 name: "graphsync max block limit in request", 197 graphsyncRemotes: 1, 198 expectUncleanEnd: true, 199 modifyQueries: []queryModifier{ 200 func(values url.Values, _ []testpeer.TestPeer) { 201 values.Add("blockLimit", "3") 202 }, 203 }, 204 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 205 validateBodies: validateFirstThreeBlocksOnly, 206 }, 207 { 208 name: "bitswap max block limit", 209 bitswapRemotes: 1, 210 expectUncleanEnd: true, 211 modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig { 212 cfg.MaxBlocksPerRequest = 3 213 return cfg 214 }, 215 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 216 validateBodies: validateFirstThreeBlocksOnly, 217 }, 218 { 219 name: "http max block limit", 220 httpRemotes: 1, 221 expectUncleanEnd: true, 222 modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig { 223 cfg.MaxBlocksPerRequest = 3 224 return cfg 225 }, 226 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 227 validateBodies: validateFirstThreeBlocksOnly, 228 }, 229 { 230 name: "bitswap block timeout from missing block", 231 bitswapRemotes: 1, 232 expectUncleanEnd: true, 233 lassieOpts: func(t *testing.T, mrn *mocknet.MockRetrievalNet) []lassie.LassieOption { 234 // this delay is going to depend on CI, if it's too short then a slower machine 235 // won't get bitswap setup in time to get the block 236 return []lassie.LassieOption{lassie.WithProviderTimeout(1 * time.Second)} 237 }, 238 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 239 file := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem) 240 remotes[0].Blockstore().DeleteBlock(context.Background(), file.SelfCids[2]) 241 return []unixfs.DirEntry{file} 242 }, 243 validateBodies: validateFirstThreeBlocksOnly, 244 }, 245 { 246 name: "same content, http missing block, bitswap completes", 247 bitswapRemotes: 1, 248 httpRemotes: 1, 249 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 250 file := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem) 251 for _, c := range file.SelfCids { 252 blk, err := remotes[0].Blockstore().Get(context.Background(), c) 253 require.NoError(t, err) 254 writer, commit, err := remotes[1].LinkSystem.StorageWriteOpener(linking.LinkContext{Ctx: context.Background()}) 255 require.NoError(t, err) 256 _, err = writer.Write(blk.RawData()) 257 require.NoError(t, err) 258 err = commit(cidlink.Link{Cid: c}) 259 require.NoError(t, err) 260 } 261 remotes[1].Blockstore().DeleteBlock(context.Background(), file.SelfCids[3]) 262 return []unixfs.DirEntry{file} 263 }, 264 }, 265 { 266 // dag-scope entity fetch should get the same DAG as full for a plain file 267 name: "graphsync large sharded file, dag-scope entity", 268 graphsyncRemotes: 1, 269 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 270 modifyQueries: []queryModifier{entityQuery}, 271 }, 272 { 273 // dag-scope entity fetch should get the same DAG as full for a plain file 274 name: "bitswap large sharded file, dag-scope entity", 275 bitswapRemotes: 1, 276 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 277 modifyQueries: []queryModifier{entityQuery}, 278 }, 279 { 280 name: "graphsync nested large sharded file, with path, dag-scope entity", 281 graphsyncRemotes: 1, 282 generate: singlePeerGenerator(unixfsSpec_largeShardedFileWrapped), 283 paths: []string{wrapPath}, 284 modifyQueries: []queryModifier{entityQuery}, 285 validateBodies: validatePathedEntityContent, 286 }, 287 { 288 name: "bitswap nested large sharded file, with path, dag-scope entity", 289 bitswapRemotes: 1, 290 generate: singlePeerGenerator(unixfsSpec_largeShardedFileWrapped), 291 paths: []string{wrapPath}, 292 modifyQueries: []queryModifier{entityQuery}, 293 validateBodies: validatePathedEntityContent, 294 }, 295 { 296 name: "http nested large sharded file, with path, dag-scope entity", 297 httpRemotes: 1, 298 generate: singlePeerGenerator(unixfsSpec_largeShardedFileWrapped), 299 paths: []string{wrapPath}, 300 modifyQueries: []queryModifier{entityQuery}, 301 validateBodies: validatePathedEntityContent, 302 }, 303 { 304 name: "graphsync large directory, dag-scope entity", 305 graphsyncRemotes: 1, 306 generate: singlePeerGenerator(unixfsSpec_largeDirectory), 307 modifyQueries: []queryModifier{entityQuery}, 308 validateBodies: validateOnlyRoot, 309 }, 310 { 311 name: "bitswap large directory, dag-scope entity", 312 bitswapRemotes: 1, 313 generate: singlePeerGenerator(unixfsSpec_largeDirectory), 314 modifyQueries: []queryModifier{entityQuery}, 315 validateBodies: validateOnlyRoot, 316 }, 317 { 318 name: "http large directory, dag-scope entity", 319 httpRemotes: 1, 320 generate: singlePeerGenerator(unixfsSpec_largeDirectory), 321 modifyQueries: []queryModifier{entityQuery}, 322 validateBodies: validateOnlyRoot, 323 }, 324 { 325 name: "graphsync nested large directory, with path, dag-scope entity", 326 graphsyncRemotes: 1, 327 generate: singlePeerGenerator(unixfsSpec_largeDirectoryWrapped), 328 paths: []string{wrapPath}, 329 modifyQueries: []queryModifier{entityQuery}, 330 validateBodies: validatePathedEntityContent, 331 }, 332 { 333 name: "bitswap nested large directory, with path, dag-scope entity", 334 bitswapRemotes: 1, 335 generate: singlePeerGenerator(unixfsSpec_largeDirectoryWrapped), 336 paths: []string{wrapPath}, 337 modifyQueries: []queryModifier{entityQuery}, 338 validateBodies: validatePathedEntityContent, 339 }, 340 { 341 name: "http nested large directory, with path, dag-scope entity", 342 httpRemotes: 1, 343 generate: singlePeerGenerator(unixfsSpec_largeDirectoryWrapped), 344 paths: []string{wrapPath}, 345 modifyQueries: []queryModifier{entityQuery}, 346 validateBodies: validatePathedEntityContent, 347 }, 348 { 349 name: "graphsync nested large directory, with path, full", 350 graphsyncRemotes: 1, 351 generate: singlePeerGenerator(unixfsSpec_largeDirectoryWrapped), 352 paths: []string{wrapPath}, 353 validateBodies: validatePathedFullContent, 354 }, 355 { 356 name: "bitswap nested large directory, with path, full", 357 bitswapRemotes: 1, 358 generate: singlePeerGenerator(unixfsSpec_largeDirectoryWrapped), 359 paths: []string{wrapPath}, 360 validateBodies: validatePathedFullContent, 361 }, 362 { 363 name: "bitswap nested large directory, with path, full", 364 httpRemotes: 1, 365 generate: singlePeerGenerator(unixfsSpec_largeDirectoryWrapped), 366 paths: []string{wrapPath}, 367 validateBodies: validatePathedFullContent, 368 }, 369 { 370 name: "graphsync nested large sharded directory, dag-scope entity", 371 graphsyncRemotes: 1, 372 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectory), 373 modifyQueries: []queryModifier{entityQuery}, 374 validateBodies: validateOnlyEntity, 375 }, 376 { 377 name: "bitswap nested large sharded directory, dag-scope entity", 378 bitswapRemotes: 1, 379 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectory), 380 modifyQueries: []queryModifier{entityQuery}, 381 validateBodies: validateOnlyEntity, 382 }, 383 { 384 name: "http nested large sharded directory, dag-scope entity", 385 httpRemotes: 1, 386 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectory), 387 modifyQueries: []queryModifier{entityQuery}, 388 validateBodies: validateOnlyEntity, 389 }, 390 { 391 name: "graphsync nested large sharded directory, with path, dag-scope entity", 392 graphsyncRemotes: 1, 393 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped), 394 paths: []string{wrapPath}, 395 modifyQueries: []queryModifier{entityQuery}, 396 validateBodies: validatePathedEntityContent, 397 }, 398 { 399 name: "bitswap nested large sharded directory, with path, dag-scope entity", 400 bitswapRemotes: 1, 401 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped), 402 paths: []string{wrapPath}, 403 modifyQueries: []queryModifier{entityQuery}, 404 validateBodies: validatePathedEntityContent, 405 }, 406 { 407 name: "http nested large sharded directory, with path, dag-scope entity", 408 httpRemotes: 1, 409 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped), 410 paths: []string{wrapPath}, 411 modifyQueries: []queryModifier{entityQuery}, 412 validateBodies: validatePathedEntityContent, 413 }, 414 { 415 name: "graphsync nested large sharded directory, with path, full", 416 graphsyncRemotes: 1, 417 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped), 418 paths: []string{wrapPath}, 419 validateBodies: validatePathedFullContent, 420 }, 421 { 422 name: "bitswap nested large sharded directory, with path, full", 423 bitswapRemotes: 1, 424 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped), 425 paths: []string{wrapPath}, 426 validateBodies: validatePathedFullContent, 427 }, 428 { 429 name: "http nested large sharded directory, with path, full", 430 httpRemotes: 1, 431 generate: singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped), 432 paths: []string{wrapPath}, 433 validateBodies: validatePathedFullContent, 434 }, 435 { 436 // A very contrived example - we spread the content generated for this test across 4 peers, 437 // then we also make sure the root is in all of them, so the CandidateSource will return them 438 // all. The retriever should then form a swarm of 4 peers and fetch the content from across 439 // the set. 440 name: "bitswap, nested large sharded directory, spread across multiple peers, with path, dag-scope entity", 441 bitswapRemotes: 4, 442 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 443 // rotating linksystem - each block will be written to a different remote 444 lsys := cidlink.DefaultLinkSystem() 445 var blkIdx int 446 lsys.StorageWriteOpener = func(lctx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { 447 defer func() { blkIdx++ }() 448 return remotes[blkIdx%len(remotes)].LinkSystem.StorageWriteOpener(lctx) 449 } 450 lsys.TrustedStorage = true 451 // generate data 452 data := unixfs.WrapContent(t, rndReader, &lsys, unixfs.GenerateDirectory(t, &lsys, rndReader, 16<<20, true), wrapPath, false) 453 454 // copy the root block to all remotes 455 lctx := ipld.LinkContext{} 456 rootLnk := cidlink.Link{Cid: data.Root} 457 // the root should be the last written block, so we should be able to 458 // find it on remote: (blkIdx-1)%len(remotes) 459 blkRdr, err := remotes[(blkIdx-1)%len(remotes)].LinkSystem.StorageReadOpener(lctx, rootLnk) 460 require.NoError(t, err) 461 blk, err := io.ReadAll(blkRdr) 462 require.NoError(t, err) 463 for _, remote := range remotes { 464 w, wc, err := remote.LinkSystem.StorageWriteOpener(lctx) 465 require.NoError(t, err) 466 _, err = w.Write(blk) 467 require.NoError(t, err) 468 require.NoError(t, wc(rootLnk)) 469 } 470 471 return []unixfs.DirEntry{data} 472 }, 473 paths: []string{wrapPath}, 474 modifyQueries: []queryModifier{entityQuery}, 475 validateBodies: validatePathedEntityContent, 476 }, 477 { 478 name: "two separate, parallel bitswap retrievals", 479 bitswapRemotes: 2, 480 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 481 return []unixfs.DirEntry{ 482 generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem), 483 generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem), 484 } 485 }, 486 }, 487 { 488 name: "two separate, parallel graphsync retrievals", 489 graphsyncRemotes: 2, 490 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 491 return []unixfs.DirEntry{ 492 generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem), 493 generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem), 494 } 495 }, 496 }, 497 { 498 name: "two separate, parallel graphsync retrievals, with graphsync disabled", 499 graphsyncRemotes: 2, 500 disableGraphsync: true, 501 // in practice, rather than "no candidates", it'll likely be a timeout 502 // from waiting for bitswap candidates; in test we short-circuit and send 503 // strictly zero bitswap or http candidates 504 expectNoCandidates: true, 505 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 506 return []unixfs.DirEntry{ 507 generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem), 508 generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem), 509 } 510 }, 511 expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{ 512 { 513 Success: false, 514 URLPath: "?dag-scope=all&dups=y", 515 ProtocolsAllowed: []string{multicodec.TransportIpfsGatewayHttp.String(), multicodec.TransportBitswap.String()}, 516 ProtocolsAttempted: []string{}, 517 }, 518 { 519 Success: false, 520 URLPath: "?dag-scope=all&dups=y", 521 ProtocolsAllowed: []string{multicodec.TransportIpfsGatewayHttp.String(), multicodec.TransportBitswap.String()}, 522 ProtocolsAttempted: []string{}, 523 }, 524 }, 525 }, 526 { 527 name: "parallel, separate graphsync and bitswap retrievals", 528 graphsyncRemotes: 1, 529 bitswapRemotes: 1, 530 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 531 return []unixfs.DirEntry{ 532 generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem), 533 generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem), 534 } 535 }, 536 }, 537 { 538 // dag-scope block fetch should only get the the root node for a plain file 539 name: "graphsync large sharded file, dag-scope block", 540 graphsyncRemotes: 1, 541 generate: singlePeerGenerator(unixfsSpec_largeShardedFile), 542 modifyQueries: []queryModifier{blockQuery}, 543 validateBodies: validateOnlyRoot, 544 }, 545 { 546 name: "graphsync nested large sharded file, with path, dag-scope block", 547 graphsyncRemotes: 1, 548 generate: singlePeerGenerator(unixfsSpec_largeShardedFileWrapped), 549 paths: []string{wrapPath}, 550 modifyQueries: []queryModifier{blockQuery}, 551 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 552 wantCids := []cid.Cid{ 553 srcData.Root, // "/"" 554 srcData.Children[1].Root, // "/want2" 555 srcData.Children[1].Children[1].Root, // "/want2/want1" 556 srcData.Children[1].Children[1].Children[1].Root, // wrapPath 557 } 558 validateCarBody(t, body, srcData.Root, wantCids, true) 559 }}, 560 }, 561 { 562 name: "graphsync large sharded file, fixedPeer", 563 graphsyncRemotes: 1, 564 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 565 fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem) 566 // wipe content routing information for remote 567 remotes[0].Cids = make(map[cid.Cid]struct{}) 568 return []unixfs.DirEntry{fileEntry} 569 }, 570 modifyQueries: []queryModifier{func(v url.Values, tp []testpeer.TestPeer) { 571 multiaddrs, _ := peer.AddrInfoToP2pAddrs(tp[0].AddrInfo()) 572 maStrings := make([]string, 0, len(multiaddrs)) 573 for _, ma := range multiaddrs { 574 maStrings = append(maStrings, ma.String()) 575 } 576 v.Set("providers", strings.Join(maStrings, ",")) 577 }}, 578 }, 579 { 580 name: "graphsync large sharded file, fixedPeer through startup opts", 581 graphsyncRemotes: 1, 582 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 583 fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem) 584 // wipe content routing information for remote 585 remotes[0].Cids = make(map[cid.Cid]struct{}) 586 return []unixfs.DirEntry{fileEntry} 587 }, 588 lassieOpts: func(t *testing.T, mrn *mocknet.MockRetrievalNet) []lassie.LassieOption { 589 return []lassie.LassieOption{lassie.WithCandidateSource(retriever.NewDirectCandidateSource([]types.Provider{{Peer: *mrn.Remotes[0].AddrInfo(), Protocols: nil}}, retriever.WithLibp2pCandidateDiscovery(mrn.Self)))} 590 }, 591 }, 592 { 593 name: "bitswap large sharded file, fixedPeer", 594 bitswapRemotes: 1, 595 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 596 fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem) 597 // wipe content routing information for remote 598 remotes[0].Cids = make(map[cid.Cid]struct{}) 599 return []unixfs.DirEntry{fileEntry} 600 }, 601 modifyQueries: []queryModifier{func(v url.Values, tp []testpeer.TestPeer) { 602 multiaddrs, _ := peer.AddrInfoToP2pAddrs(tp[0].AddrInfo()) 603 maStrings := make([]string, 0, len(multiaddrs)) 604 for _, ma := range multiaddrs { 605 maStrings = append(maStrings, ma.String()) 606 } 607 v.Set("providers", strings.Join(maStrings, ",")) 608 }}, 609 }, 610 { 611 name: "bitswap large sharded file, fixedPeer through startup opts", 612 bitswapRemotes: 1, 613 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 614 fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem) 615 // wipe content routing information for remote 616 remotes[0].Cids = make(map[cid.Cid]struct{}) 617 return []unixfs.DirEntry{fileEntry} 618 }, 619 lassieOpts: func(t *testing.T, mrn *mocknet.MockRetrievalNet) []lassie.LassieOption { 620 return []lassie.LassieOption{lassie.WithCandidateSource(retriever.NewDirectCandidateSource([]types.Provider{{Peer: *mrn.Remotes[0].AddrInfo(), Protocols: nil}}, retriever.WithLibp2pCandidateDiscovery(mrn.Self)))} 621 }, 622 }, 623 { 624 name: "http large sharded file with dups", 625 httpRemotes: 1, 626 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 627 return []unixfs.DirEntry{unixfs.GenerateFile(t, remotes[0].LinkSystem, trustlesstestutil.ZeroReader{}, 4<<20)} 628 }, 629 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 630 store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{ 631 Bag: make(map[string][]byte), 632 }} 633 lsys := cidlink.DefaultLinkSystem() 634 lsys.SetReadStorage(store) 635 lsys.SetWriteStorage(store) 636 lsys.TrustedStorage = true 637 _, err := traversal.Config{ 638 Root: srcData.Root, 639 Selector: selectorparse.CommonSelector_ExploreAllRecursively, 640 ExpectDuplicatesIn: true, 641 }.VerifyCar(context.Background(), bytes.NewReader(body), lsys) 642 require.NoError(t, err) 643 }}, 644 }, 645 { 646 name: "http large sharded file with dups, no dups response requested", 647 httpRemotes: 1, 648 setHeader: noDups, 649 expectNoDups: true, 650 generate: singlePeerGenerator(unixfsSpec_largeShardedFileZeroed), 651 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 652 wantCids := []cid.Cid{ 653 srcData.Root, // "/"" 654 srcData.SelfCids[1], 655 srcData.SelfCids[len(srcData.SelfCids)-1], 656 } 657 validateCarBody(t, body, srcData.Root, wantCids, true) 658 }}, 659 }, 660 { 661 name: "http large sharded file with dups, */* gives dups", 662 httpRemotes: 1, 663 setHeader: func(h http.Header) { h.Set("Accept", "*/*") }, 664 generate: singlePeerGenerator(unixfsSpec_largeShardedFileZeroed), 665 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 666 store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{ 667 Bag: make(map[string][]byte), 668 }} 669 lsys := cidlink.DefaultLinkSystem() 670 lsys.SetReadStorage(store) 671 lsys.SetWriteStorage(store) 672 lsys.TrustedStorage = true 673 _, err := traversal.Config{ 674 Root: srcData.Root, 675 Selector: selectorparse.CommonSelector_ExploreAllRecursively, 676 ExpectDuplicatesIn: true, 677 }.VerifyCar(context.Background(), bytes.NewReader(body), lsys) 678 require.NoError(t, err) 679 }}, 680 }, { 681 name: "http large sharded file with dups, multiple accept, priority to no dups", 682 httpRemotes: 1, 683 expectNoDups: true, 684 setHeader: func(h http.Header) { 685 h.Set("Accept", 686 strings.Join([]string{ 687 "text/html", 688 trustlesshttp.DefaultContentType().WithDuplicates(true).WithQuality(0.7).String(), 689 trustlesshttp.DefaultContentType().WithDuplicates(false).WithQuality(0.8).String(), 690 "*/*;q=0.1", 691 }, ", "), 692 ) 693 }, 694 generate: singlePeerGenerator(unixfsSpec_largeShardedFileZeroed), 695 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 696 wantCids := []cid.Cid{ 697 srcData.Root, // "/"" 698 srcData.SelfCids[1], 699 srcData.SelfCids[len(srcData.SelfCids)-1], 700 } 701 validateCarBody(t, body, srcData.Root, wantCids, true) 702 }}, 703 }, 704 { 705 name: "http large sharded file with dups, multiple accept, priority to dups", 706 httpRemotes: 1, 707 setHeader: func(h http.Header) { 708 h.Set("Accept", 709 strings.Join([]string{ 710 "text/html", 711 trustlesshttp.DefaultContentType().WithDuplicates(true).WithQuality(0.8).String(), 712 trustlesshttp.DefaultContentType().WithDuplicates(false).WithQuality(0.7).String(), 713 "*/*;q=0.1", 714 }, ", "), 715 ) 716 }, 717 generate: singlePeerGenerator(unixfsSpec_largeShardedFileZeroed), 718 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 719 store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{ 720 Bag: make(map[string][]byte), 721 }} 722 lsys := cidlink.DefaultLinkSystem() 723 lsys.SetReadStorage(store) 724 lsys.SetWriteStorage(store) 725 lsys.TrustedStorage = true 726 _, err := traversal.Config{ 727 Root: srcData.Root, 728 Selector: selectorparse.CommonSelector_ExploreAllRecursively, 729 ExpectDuplicatesIn: true, 730 }.VerifyCar(context.Background(), bytes.NewReader(body), lsys) 731 require.NoError(t, err) 732 }}, 733 }, 734 { 735 name: "bitswap nested file, path with special characters", 736 bitswapRemotes: 1, 737 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 738 lsys := remotes[0].LinkSystem 739 return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, lsys, rndReader, 1024), "/?/#/%/ ", false)} 740 }, 741 paths: []string{"/?/#/%/ "}, 742 modifyQueries: []queryModifier{entityQuery}, 743 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 744 wantCids := []cid.Cid{ 745 srcData.Root, // "/" 746 srcData.Children[1].Root, // "/?" 747 srcData.Children[1].Children[1].Root, // "/?/#" 748 srcData.Children[1].Children[1].Children[1].Root, // "/?/#/%" 749 srcData.Children[1].Children[1].Children[1].Children[0].Root, // "/?/#/%/ " (' ' is before '!', so it's the first link after the one named '!before') 750 } 751 validateCarBody(t, body, srcData.Root, wantCids, true) 752 }}, 753 }, 754 { 755 name: "http nested file, path with special characters", 756 httpRemotes: 1, 757 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 758 lsys := remotes[0].LinkSystem 759 return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, lsys, rndReader, 1024), "/?/#/%/ ", false)} 760 }, 761 paths: []string{"/?/#/%/ "}, 762 modifyQueries: []queryModifier{entityQuery}, 763 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 764 wantCids := []cid.Cid{ 765 srcData.Root, // "/" 766 srcData.Children[1].Root, // "/?" 767 srcData.Children[1].Children[1].Root, // "/?/#" 768 srcData.Children[1].Children[1].Children[1].Root, // "/?/#/%" 769 srcData.Children[1].Children[1].Children[1].Children[0].Root, // "/?/#/%/ " (' ' is before '!', so it's the first link after the one named '!before') 770 } 771 validateCarBody(t, body, srcData.Root, wantCids, true) 772 }}, 773 }, 774 { 775 name: "graphsync nested file, path with special characters", 776 graphsyncRemotes: 1, 777 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 778 lsys := remotes[0].LinkSystem 779 return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, lsys, rndReader, 1024), "/?/#/%/ ", false)} 780 }, 781 paths: []string{"/?/#/%/ "}, 782 modifyQueries: []queryModifier{entityQuery}, 783 validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 784 wantCids := []cid.Cid{ 785 srcData.Root, // "/" 786 srcData.Children[1].Root, // "/?" 787 srcData.Children[1].Children[1].Root, // "/?/#" 788 srcData.Children[1].Children[1].Children[1].Root, // "/?/#/%" 789 srcData.Children[1].Children[1].Children[1].Children[0].Root, // "/?/#/%/ " (' ' is before '!', so it's the first link after the one named '!before') 790 } 791 validateCarBody(t, body, srcData.Root, wantCids, true) 792 }}, 793 }, 794 { 795 name: "with access token - rejects anonymous requests", 796 httpRemotes: 1, 797 generate: singlePeerGenerator(unixfsSpec_smallFile), 798 modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig { 799 cfg.AccessToken = "super-secret" 800 return cfg 801 }, 802 expectUnauthorized: true, 803 }, 804 { 805 name: "with access token - allows requests with authorization header", 806 httpRemotes: 1, 807 generate: singlePeerGenerator(unixfsSpec_smallFile), 808 modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig { 809 cfg.AccessToken = "super-secret" 810 return cfg 811 }, 812 setHeader: func(header http.Header) { 813 header.Set("Authorization", "Bearer super-secret") 814 header.Add("Accept", "application/vnd.ipld.car") 815 }, 816 expectUnauthorized: false, 817 }, 818 { 819 name: "non-unixfs graphsync", 820 graphsyncRemotes: 1, 821 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 822 return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)} 823 }, 824 }, 825 { 826 name: "non-unixfs bitswap", 827 bitswapRemotes: 1, 828 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 829 return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)} 830 }, 831 }, 832 { 833 name: "non-unixfs http", 834 httpRemotes: 1, 835 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 836 return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)} 837 }, 838 }, 839 // noDups variants are important because handling of these happens all the 840 // way up to DuplicateAdderCar 841 { 842 name: "non-unixfs graphsync /w noDups", 843 setHeader: noDups, 844 expectNoDups: true, 845 graphsyncRemotes: 1, 846 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 847 return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)} 848 }, 849 }, 850 { 851 name: "non-unixfs bitswap /w noDups", 852 setHeader: noDups, 853 expectNoDups: true, 854 bitswapRemotes: 1, 855 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 856 return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)} 857 }, 858 }, 859 { 860 name: "non-unixfs http /w noDups", 861 setHeader: noDups, 862 expectNoDups: true, 863 httpRemotes: 1, 864 generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 865 return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)} 866 }, 867 }, 868 } 869 870 for _, testCase := range testCases { 871 testCase := testCase 872 t.Run(testCase.name, func(t *testing.T) { 873 ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 874 defer cancel() 875 876 rndSeed := time.Now().UTC().UnixNano() 877 t.Logf("random seed: %d", rndSeed) 878 var rndReader io.Reader = rand.New(rand.NewSource(rndSeed)) 879 880 mrn := mocknet.NewMockRetrievalNet(ctx, t) 881 mrn.AddGraphsyncPeers(testCase.graphsyncRemotes) 882 finishedChans := make([]chan []datatransfer.Event, 0) 883 for _, r := range mrn.Remotes { 884 finishedChans = append(finishedChans, mocknet.SetupRetrieval(t, r)) 885 } 886 mrn.AddBitswapPeers(testCase.bitswapRemotes) 887 mrn.AddHttpPeers(testCase.httpRemotes) 888 889 require.NoError(t, mrn.MN.LinkAll()) 890 891 carFiles := debugRemotes(t, ctx, testCase.name, mrn.Remotes) 892 srcData := testCase.generate(t, rndReader, mrn.Remotes) 893 894 // Setup a new lassie 895 req := require.New(t) 896 var customOpts []lassie.LassieOption 897 if testCase.lassieOpts != nil { 898 customOpts = testCase.lassieOpts(t, mrn) 899 } 900 opts := append([]lassie.LassieOption{ 901 lassie.WithProviderTimeout(20 * time.Second), 902 lassie.WithHost(mrn.Self), 903 lassie.WithCandidateSource(mrn.Source), 904 }, customOpts...) 905 if testCase.disableGraphsync { 906 opts = append(opts, lassie.WithProtocols([]multicodec.Code{multicodec.TransportBitswap, multicodec.TransportIpfsGatewayHttp})) 907 } 908 lassie, err := lassie.NewLassie(ctx, opts...) 909 req.NoError(err) 910 911 var aggregateEventsCh = make(chan []aggregateeventrecorder.AggregateEvent) 912 if len(testCase.expectAggregateEvents) > 0 { 913 closer := setupAggregateEventRecorder(t, ctx, len(srcData), lassie, aggregateEventsCh) 914 defer closer.Close() 915 } 916 917 // Start an HTTP server 918 cfg := httpserver.HttpServerConfig{Address: "127.0.0.1", Port: 0, TempDir: t.TempDir()} 919 if testCase.modifyHttpConfig != nil { 920 cfg = testCase.modifyHttpConfig(cfg) 921 } 922 httpServer, err := httpserver.NewHttpServer(ctx, lassie, cfg) 923 req.NoError(err) 924 serverError := make(chan error, 1) 925 go func() { 926 serverError <- httpServer.Start() 927 }() 928 929 paths := make([]string, len(srcData)) 930 for i := 0; i < len(srcData); i++ { 931 if testCase.paths != nil && testCase.paths[i] != "" { 932 p := datamodel.ParsePath(testCase.paths[i]) 933 for p.Len() > 0 { 934 var ps datamodel.PathSegment 935 ps, p = p.Shift() 936 paths[i] += "/" + url.PathEscape(ps.String()) 937 } 938 } 939 } 940 941 responseChans := make([]chan response, 0) 942 for i := 0; i < len(srcData); i++ { 943 responseChan := make(chan response, 1) 944 responseChans = append(responseChans, responseChan) 945 go func(i int) { 946 // Make a request for our CID and read the complete CAR bytes 947 addr := fmt.Sprintf("http://%s/ipfs/%s%s", httpServer.Addr(), srcData[i].Root.String(), paths[i]) 948 getReq, err := http.NewRequest("GET", addr, nil) 949 req.NoError(err) 950 if testCase.setHeader == nil { 951 getReq.Header.Add("Accept", "application/vnd.ipld.car") 952 } else { 953 testCase.setHeader(getReq.Header) 954 } 955 if testCase.modifyQueries != nil && testCase.modifyQueries[i] != nil { 956 q := getReq.URL.Query() 957 testCase.modifyQueries[i](q, mrn.Remotes) 958 getReq.URL.RawQuery = q.Encode() 959 } 960 t.Log("Fetching", getReq.URL.String()) 961 resp, err := http.DefaultClient.Do(getReq) 962 req.NoError(err) 963 expectBodyReadError := "" 964 if testCase.expectUncleanEnd { 965 expectBodyReadError = "http: unexpected EOF reading trailer" 966 } 967 body := readAllBody(t, resp.Body, expectBodyReadError) 968 req.NoError(resp.Body.Close()) 969 responseChan <- response{StatusCode: resp.StatusCode, Header: resp.Header, Body: body} 970 }(i) 971 } 972 973 responses := make([]response, 0) 974 for _, responseChan := range responseChans { 975 select { 976 case resp := <-responseChan: 977 responses = append(responses, resp) 978 case <-ctx.Done(): 979 req.FailNow("Did not receive responses") 980 } 981 } 982 983 if !testCase.disableGraphsync { 984 // wait for graphsync retrievals to finish on the remotes 985 var wg sync.WaitGroup 986 wg.Add(len(finishedChans)) 987 for _, finishedChan := range finishedChans { 988 go func(finishedChan chan []datatransfer.Event) { 989 mocknet.WaitForFinish(ctx, t, finishedChan, 1*time.Second) 990 wg.Done() 991 }(finishedChan) 992 } 993 wg.Wait() 994 } 995 996 for i, resp := range responses { 997 if testCase.expectNoCandidates { 998 if resp.StatusCode != http.StatusBadGateway { 999 req.Failf("wrong response code not received", "expected %d, got %d; body: [%s]", http.StatusBadGateway, resp.StatusCode, string(resp.Body)) 1000 req.Contains(string(resp.Body), "no candidates found") 1001 } 1002 } else if testCase.expectUnauthorized { 1003 if resp.StatusCode != http.StatusUnauthorized { 1004 req.Failf("wrong response code not received", "expected %d, got %d; body: [%s]", http.StatusUnauthorized, resp.StatusCode, string(resp.Body)) 1005 } 1006 } else { 1007 if resp.StatusCode != http.StatusOK { 1008 req.Failf("wrong response code not received", "expected %d, got %d; body: [%s]", http.StatusOK, resp.StatusCode, string(resp.Body)) 1009 } 1010 1011 verifyHeaders(t, resp, srcData[i].Root, paths[i], testCase.expectNoDups) 1012 1013 if DEBUG_DATA { 1014 dstf, err := os.CreateTemp("", fmt.Sprintf("%s_received%d.car", strings.Replace(testCase.name, "/", "__", -1), i)) 1015 req.NoError(err) 1016 t.Logf("Writing received data to CAR @ %s", dstf.Name()) 1017 _, err = dstf.Write(resp.Body) 1018 req.NoError(err) 1019 carFiles = append(carFiles, dstf) 1020 } 1021 1022 if testCase.validateBodies != nil && testCase.validateBodies[i] != nil { 1023 testCase.validateBodies[i](t, srcData[i], resp.Body) 1024 } else { 1025 gotLsys := CarBytesLinkSystem(t, bytes.NewReader(resp.Body)) 1026 gotDir := unixfs.ToDirEntry(t, gotLsys, srcData[i].Root, true) 1027 unixfs.CompareDirEntries(t, srcData[i], gotDir) 1028 } 1029 } 1030 } 1031 1032 if len(testCase.expectAggregateEvents) > 0 { 1033 var events []aggregateeventrecorder.AggregateEvent 1034 // check that the event recorder got and event for this by looking for the root cid 1035 select { 1036 case events = <-aggregateEventsCh: 1037 case <-ctx.Done(): 1038 req.FailNow("Did not receive aggregate events") 1039 } 1040 verifyAggregateEvents(t, mrn.Remotes, srcData, testCase.expectAggregateEvents, events) 1041 } 1042 1043 if DEBUG_DATA { 1044 for _, cf := range carFiles { 1045 req.NoError(cf.Close()) 1046 req.NoError(os.Remove(cf.Name())) 1047 } 1048 t.Logf("Cleaned up CARs") 1049 } 1050 1051 err = httpServer.Close() 1052 req.NoError(err) 1053 select { 1054 case <-ctx.Done(): 1055 req.FailNow("server failed to shut down") 1056 case err = <-serverError: 1057 req.NoError(err) 1058 } 1059 }) 1060 } 1061 } 1062 1063 // validateCarBody reads the given bytes as a CAR, validates the root is correct 1064 // and that it contains all of the wantCids (not strictly in order). If 1065 // onlyWantCids is true, it also validates that wantCids are the only CIDs in 1066 // the CAR (with no duplicates). 1067 func validateCarBody(t *testing.T, body []byte, root cid.Cid, wantCids []cid.Cid, onlyWantCids bool) { 1068 br, err := car.NewBlockReader(bytes.NewReader(body)) 1069 require.NoError(t, err) 1070 require.Equal(t, []cid.Cid{root}, br.Roots) 1071 gotCids := make([]cid.Cid, 0) 1072 for { 1073 blk, err := br.Next() 1074 if err != nil { 1075 require.EqualError(t, err, io.EOF.Error()) 1076 break 1077 } 1078 gotCids = append(gotCids, blk.Cid()) 1079 } 1080 for _, cw := range wantCids { 1081 var found bool 1082 for _, cg := range gotCids { 1083 if cw.Equals(cg) { 1084 found = true 1085 break 1086 } 1087 } 1088 require.True(t, found) 1089 } 1090 if onlyWantCids { 1091 require.Len(t, gotCids, len(wantCids)) 1092 } 1093 } 1094 1095 func verifyHeaders(t *testing.T, resp response, root cid.Cid, path string, expectNoDups bool) { 1096 req := require.New(t) 1097 1098 req.Regexp(`^lassie/v\d+\.\d+\.\d+-\w+$`, resp.Header.Get("Server")) 1099 req.Equal(fmt.Sprintf(`attachment; filename="%s.car"`, root.String()), resp.Header.Get("Content-Disposition")) 1100 req.Equal("none", resp.Header.Get("Accept-Ranges")) 1101 req.Equal("public, max-age=29030400, immutable", resp.Header.Get("Cache-Control")) 1102 req.Equal(trustlesshttp.DefaultContentType().WithDuplicates(!expectNoDups).String(), resp.Header.Get("Content-Type")) 1103 req.Equal("nosniff", resp.Header.Get("X-Content-Type-Options")) 1104 st := resp.Header.Get("Server-Timing") 1105 req.Contains(st, "started-finding-candidates") 1106 req.Contains(st, "candidates-found=") 1107 req.Contains(st, "retrieval-") 1108 req.Contains(st, "dur=") // at lest one of these 1109 etagStart := fmt.Sprintf(`"%s.car.`, root.String()) 1110 etagGot := resp.Header.Get("ETag") 1111 req.True(strings.HasPrefix(etagGot, etagStart), "ETag should start with [%s], got [%s]", etagStart, etagGot) 1112 req.Equal(`"`, etagGot[len(etagGot)-1:], "ETag should end with a quote") 1113 req.Equal(fmt.Sprintf("/ipfs/%s%s", root.String(), path), resp.Header.Get("X-Ipfs-Path")) 1114 requestId := resp.Header.Get("X-Trace-Id") 1115 require.NotEmpty(t, requestId) 1116 _, err := uuid.Parse(requestId) 1117 req.NoError(err) 1118 } 1119 1120 func setupAggregateEventRecorder(t *testing.T, ctx context.Context, expectCount int, lassie *lassie.Lassie, aggregateEventsCh chan []aggregateeventrecorder.AggregateEvent) interface{ Close() } { 1121 var aggregateEventsLk sync.Mutex 1122 events := make([]aggregateeventrecorder.AggregateEvent, 0) 1123 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 1124 require.Equal(t, "Basic listenup", r.Header.Get("Authorization")) 1125 type batch struct { 1126 Events []aggregateeventrecorder.AggregateEvent 1127 } 1128 var b batch 1129 err := json.NewDecoder(r.Body).Decode(&b) 1130 require.NoError(t, err) 1131 aggregateEventsLk.Lock() 1132 events = append(events, b.Events...) 1133 if len(events) == expectCount { 1134 select { 1135 case <-ctx.Done(): 1136 case aggregateEventsCh <- events: 1137 } 1138 } 1139 aggregateEventsLk.Unlock() 1140 })) 1141 1142 eventRecorder := aggregateeventrecorder.NewAggregateEventRecorder(ctx, aggregateeventrecorder.EventRecorderConfig{ 1143 InstanceID: "fooblesmush", 1144 EndpointURL: ts.URL, 1145 EndpointAuthorization: "listenup", 1146 }) 1147 lassie.RegisterSubscriber(eventRecorder.RetrievalEventSubscriber()) 1148 1149 return ts 1150 } 1151 1152 func verifyAggregateEvents(t *testing.T, remotes []testpeer.TestPeer, srcData []unixfs.DirEntry, expectedEvents, actualEvents []aggregateeventrecorder.AggregateEvent) { 1153 req := require.New(t) 1154 1155 for ii, src := range srcData { 1156 var evt aggregateeventrecorder.AggregateEvent 1157 for _, e := range actualEvents { 1158 if e.RootCid == src.Root.String() { 1159 evt = e 1160 break 1161 } 1162 } 1163 req.NotNil(evt) 1164 t.Log("got event", evt) 1165 1166 expect := expectedEvents[ii] 1167 req.Equal("fooblesmush", evt.InstanceID) 1168 req.Equal(expect.Success, evt.Success) 1169 req.Equal(expect.URLPath, evt.URLPath) 1170 req.ElementsMatch(expect.ProtocolsAttempted, evt.ProtocolsAttempted) 1171 req.ElementsMatch(expect.ProtocolsAllowed, evt.ProtocolsAllowed) 1172 1173 // This makes an assumption that there's a clear mapping of remote 1174 // index to srcData index, which doesn't necessarily hold. So if novel 1175 // cases need to be tested, this may need to be en-smartened. 1176 if expect.Success { 1177 totalBytes := totalBlockBytes(t, *remotes[ii].LinkSystem, src) 1178 req.Equal(totalBytes, evt.BytesTransferred) 1179 1180 // This makes an assumption there's only one attempt 1181 isBitswap := slices.Equal(expect.ProtocolsAttempted, []string{multicodec.TransportBitswap.String()}) 1182 if isBitswap { 1183 req.Len(evt.RetrievalAttempts, 2) 1184 req.Contains(evt.RetrievalAttempts, "Bitswap") 1185 } else { 1186 req.Len(evt.RetrievalAttempts, 1) 1187 } 1188 for _, attempt := range evt.RetrievalAttempts { 1189 req.Equal("", attempt.Error) 1190 req.Equal(totalBytes, attempt.BytesTransferred) // both attempts for a bitswap req will have the same number 1191 } 1192 } 1193 } 1194 } 1195 1196 func debugRemotes(t *testing.T, ctx context.Context, name string, remotes []testpeer.TestPeer) []*os.File { 1197 if !DEBUG_DATA { 1198 return nil 1199 } 1200 carFiles := make([]*os.File, 0) 1201 for ii, r := range remotes { 1202 func(ii int, r testpeer.TestPeer) { 1203 carFile, err := os.CreateTemp("", fmt.Sprintf("%s_remote%d.car", strings.Replace(name, "/", "__", -1), ii)) 1204 require.NoError(t, err) 1205 t.Logf("Writing source data to CAR @ %s", carFile.Name()) 1206 carFiles = append(carFiles, carFile) 1207 carW, err := storage.NewWritable(carFile, []cid.Cid{}, car.WriteAsCarV1(true), car.AllowDuplicatePuts(true)) 1208 require.NoError(t, err) 1209 swo := r.LinkSystem.StorageWriteOpener 1210 r.LinkSystem.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) { 1211 w, c, err := swo(lc) 1212 if err != nil { 1213 return nil, nil, err 1214 } 1215 var buf bytes.Buffer 1216 return &buf, func(l datamodel.Link) error { 1217 require.NoError(t, carW.Put(ctx, l.(cidlink.Link).Cid.KeyString(), buf.Bytes())) 1218 _, err := w.Write(buf.Bytes()) 1219 if err != nil { 1220 return err 1221 } 1222 return c(l) 1223 }, nil 1224 } 1225 }(ii, r) 1226 } 1227 return carFiles 1228 } 1229 1230 func readAllBody(t *testing.T, r io.Reader, expectError string) []byte { 1231 if expectError == "" { 1232 body, err := io.ReadAll(r) 1233 require.NoError(t, err) 1234 return body 1235 } 1236 // expect an error, so let's creep up on it and collect as much of the body 1237 // as we can before the error blocks us 1238 // see readLocked() in src/net/http/transfer.go: 1239 // → b.src.Read(p) 1240 // → followed by b.readTrailer() which should error; we want to capture both 1241 var buf bytes.Buffer 1242 var byt [1]byte 1243 var err error 1244 var n int 1245 for { 1246 n, err = r.Read(byt[:]) 1247 // record the bytes we read, the error should come after the normal body 1248 // read and then it attempts to read trailers where it should fail 1249 buf.Write(byt[:n]) 1250 if err != nil { 1251 require.EqualError(t, err, expectError) 1252 break 1253 } 1254 } 1255 return buf.Bytes() 1256 } 1257 1258 // wrapSpec wraps the given spec in a directory structure that has a 1259 // subdirectory before and after the subdirectory we want to path through at 1260 // each level, according to wrapPath. Tests should be able to ignore the 1261 // extraneous content generated by this function's spec. 1262 func wrapSpec(spec string) string { 1263 return `dir( 1264 dir{name:"!before"}(~10*file:~1k),dir{name:"want2"}( 1265 dir{name:"!before"}(~10*file:~2k),dir{name:"want1"}( 1266 dir{name:"!before"}(~10*file:~1k),dir{name:"want0"}( 1267 dir{name:"!before"}(~10*file:~100),` + spec + `,dir{name:"~after"}(~10*file:~100)), 1268 dir{name:"~after"}(~10*file:~2kb)), 1269 dir{name:"~after"}(~10*file:~1kb)), 1270 dir{name:"~after"}(~10*file:~2kb))` 1271 } 1272 1273 // a utility function where a test only involves a single peer 1274 func singlePeerGenerator(spec string) generateFn { 1275 return func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry { 1276 return []unixfs.DirEntry{generateFor(t, spec, rndReader, *remotes[0].LinkSystem)} 1277 } 1278 } 1279 1280 // given a spec, generate that UnixFS content into the given LinkSystem. 1281 func generateFor(t *testing.T, spec string, rndReader io.Reader, lsys linking.LinkSystem) unixfs.DirEntry { 1282 ss := strings.Split(spec, "\n") 1283 for i, s := range ss { 1284 ss[i] = strings.TrimSpace(s) 1285 } 1286 spec = strings.Join(ss, "") 1287 entity, err := unixfsgen.Parse(spec) 1288 require.NoError(t, err) 1289 t.Logf("Generating: %s", entity.Describe("")) 1290 rootEnt, err := entity.Generate(lsys, rndReader) 1291 require.NoError(t, err) 1292 return rootEnt 1293 } 1294 1295 var validateFirstThreeBlocksOnly = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 1296 // 3 blocks max, start at the root and then two blocks into the sharded data 1297 wantCids := []cid.Cid{ 1298 srcData.Root, 1299 srcData.SelfCids[0], 1300 srcData.SelfCids[1], 1301 } 1302 validateCarBody(t, body, srcData.Root, wantCids, true) 1303 }} 1304 1305 var validateOnlyEntity = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 1306 // sharded directory contains multiple blocks, so we expect a CAR with 1307 // exactly those blocks 1308 validateCarBody(t, body, srcData.Root, srcData.SelfCids, true) 1309 }} 1310 1311 var validatePathedFullContent = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 1312 wantCids := append([]cid.Cid{ 1313 srcData.Root, // "/"" 1314 srcData.Children[1].Root, // "/want2" 1315 srcData.Children[1].Children[1].Root, // "/want2/want1" 1316 }, 1317 srcData.Children[1].Children[1].Children[1].SelfCids..., // wrapPath (full) 1318 ) 1319 // validate we got the dag-scope entity form 1320 validateCarBody(t, body, srcData.Root, wantCids, false) 1321 // validate that we got the full depth form under the path 1322 gotDir := carToDirEntry(t, bytes.NewReader(body), srcData.Children[1].Children[1].Children[1].Root, wrapPath, true) 1323 unixfs.CompareDirEntries(t, srcData.Children[1].Children[1].Children[1], gotDir) 1324 }} 1325 1326 var validatePathedEntityContent = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 1327 wantCids := append([]cid.Cid{ 1328 srcData.Root, // "/"" 1329 srcData.Children[1].Root, // "/want2" 1330 srcData.Children[1].Children[1].Root, // "/want2/want1" 1331 }, 1332 srcData.Children[1].Children[1].Children[1].SelfCids..., // wrapPath (full) 1333 ) 1334 validateCarBody(t, body, srcData.Root, wantCids, true) 1335 }} 1336 1337 var validateOnlyRoot = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) { 1338 // expect a CAR of one block, to represent the root directory we asked for 1339 validateCarBody(t, body, srcData.Root, []cid.Cid{srcData.Root}, true) 1340 }} 1341 1342 func totalBlockBytes(t *testing.T, lsys linking.LinkSystem, srcData unixfs.DirEntry) uint64 { 1343 var total uint64 1344 for _, c := range srcData.SelfCids { 1345 b, err := lsys.LoadRaw(ipld.LinkContext{}, cidlink.Link{Cid: c}) 1346 require.NoError(t, err) 1347 total += uint64(len(b)) 1348 for _, child := range srcData.Children { 1349 total += totalBlockBytes(t, lsys, child) 1350 } 1351 } 1352 return total 1353 }