github.com/filecoin-project/lassie@v0.23.0/pkg/internal/itest/http_fetch_test.go (about)

     1  //go:build !race
     2  
     3  package itest
     4  
     5  import (
     6  	"bytes"
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"math/rand"
    12  	"net/http"
    13  	"net/url"
    14  	"os"
    15  	"strings"
    16  	"sync"
    17  	"testing"
    18  	"time"
    19  
    20  	datatransfer "github.com/filecoin-project/go-data-transfer/v2"
    21  	"github.com/filecoin-project/lassie/pkg/aggregateeventrecorder"
    22  	"github.com/filecoin-project/lassie/pkg/internal/itest/mocknet"
    23  	"github.com/filecoin-project/lassie/pkg/internal/itest/testpeer"
    24  	"github.com/filecoin-project/lassie/pkg/lassie"
    25  	"github.com/filecoin-project/lassie/pkg/retriever"
    26  	httpserver "github.com/filecoin-project/lassie/pkg/server/http"
    27  	"github.com/filecoin-project/lassie/pkg/types"
    28  	"github.com/google/uuid"
    29  	"github.com/ipfs/go-cid"
    30  	unixfs "github.com/ipfs/go-unixfsnode/testutil"
    31  	"github.com/ipld/go-car/v2"
    32  	"github.com/ipld/go-car/v2/storage"
    33  	unixfsgen "github.com/ipld/go-fixtureplate/generator"
    34  	"github.com/ipld/go-ipld-prime"
    35  	"github.com/ipld/go-ipld-prime/datamodel"
    36  	"github.com/ipld/go-ipld-prime/linking"
    37  	cidlink "github.com/ipld/go-ipld-prime/linking/cid"
    38  	"github.com/ipld/go-ipld-prime/storage/memstore"
    39  	selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
    40  	trustlesshttp "github.com/ipld/go-trustless-utils/http"
    41  	trustlesstestutil "github.com/ipld/go-trustless-utils/testutil"
    42  	"github.com/ipld/go-trustless-utils/traversal"
    43  	"github.com/libp2p/go-libp2p/core/peer"
    44  	"github.com/multiformats/go-multicodec"
    45  	"github.com/stretchr/testify/require"
    46  	"golang.org/x/exp/slices"
    47  
    48  	"net/http/httptest"
    49  	_ "net/http/pprof"
    50  )
    51  
    52  // DEBUG_DATA, when true, will write source and received data to CARs
    53  // for inspection if tests fail; otherwise they are cleaned up as tests
    54  // proceed.
    55  const DEBUG_DATA = false
    56  
    57  // UnixFS data generation specs used by github.com/ipld/go-fixtureplate/generator
    58  const (
    59  	unixfsSpec_smallFile              = `file:1KiB`
    60  	unixfsSpec_largeShardedFile       = `file:4MiB`
    61  	unixfsSpec_largeShardedFileZeroed = `file:4MiB{zero}`
    62  	unixfsSpec_largeDirectory         = `dir(~10*file:1,~5*dir(~10*file:~10k,~5*dir(~4*file:~200k)),~5*file:~300k)`
    63  	unixfsSpec_largeShardedDirectory  = `dir{sharded}(~20*file:1,~10*file:~1k,~5*dir(~5*file:~10k,~5*dir(~4*file:~200k)),~2*file:~300k)`
    64  
    65  	// wrapPath is the path within "wrapped" content (below) that the content we
    66  	// care about is located, we use this to test path-nested retrievals and make
    67  	// sure we ignore surrounding content outside of this path.
    68  	wrapPath = "/want2/want1/want0"
    69  )
    70  
    71  var (
    72  	// same as unixfsSpec_largeShardedFile but nested within wrapPath, surrounded by other directories
    73  	unixfsSpec_largeShardedFileWrapped = wrapSpec(unixfsSpec_largeShardedFile)
    74  	// same as unixfsSpec_largeDirectory but nested within wrapPath, surrounded by other directories
    75  	unixfsSpec_largeDirectoryWrapped = wrapSpec(unixfsSpec_largeDirectory)
    76  	// same as unixfsSpec_largeShardedDirectory but nested within wrapPath, surrounded by other directories
    77  	unixfsSpec_largeShardedDirectoryWrapped = wrapSpec(unixfsSpec_largeShardedDirectory)
    78  )
    79  
    80  type generateFn func(*testing.T, io.Reader, []testpeer.TestPeer) []unixfs.DirEntry
    81  type bodyValidator func(*testing.T, unixfs.DirEntry, []byte)
    82  type response struct {
    83  	StatusCode int
    84  	Header     http.Header
    85  	Body       []byte
    86  }
    87  
    88  func TestHttpFetch(t *testing.T) {
    89  	entityQuery := func(q url.Values, _ []testpeer.TestPeer) {
    90  		q.Set("dag-scope", "entity")
    91  	}
    92  	blockQuery := func(q url.Values, _ []testpeer.TestPeer) {
    93  		q.Set("dag-scope", "block")
    94  	}
    95  	noDups := func(header http.Header) {
    96  		header.Set("Accept", "application/vnd.ipld.car;order=dfs;version=1;dups=n;")
    97  	}
    98  	type headerSetter func(http.Header)
    99  	type queryModifier func(url.Values, []testpeer.TestPeer)
   100  	type lassieOptsGen func(*testing.T, *mocknet.MockRetrievalNet) []lassie.LassieOption
   101  
   102  	testCases := []struct {
   103  		name                  string
   104  		graphsyncRemotes      int
   105  		bitswapRemotes        int
   106  		httpRemotes           int
   107  		disableGraphsync      bool
   108  		expectNoCandidates    bool
   109  		expectUncleanEnd      bool
   110  		expectUnauthorized    bool
   111  		expectAggregateEvents []aggregateeventrecorder.AggregateEvent
   112  		modifyHttpConfig      func(httpserver.HttpServerConfig) httpserver.HttpServerConfig
   113  		generate              generateFn
   114  		paths                 []string
   115  		setHeader             headerSetter
   116  		modifyQueries         []queryModifier
   117  		validateBodies        []bodyValidator
   118  		lassieOpts            lassieOptsGen
   119  		expectNoDups          bool
   120  	}{
   121  		{
   122  			name:             "graphsync large sharded file",
   123  			graphsyncRemotes: 1,
   124  			generate:         singlePeerGenerator(unixfsSpec_largeShardedFile),
   125  			expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{{
   126  				Success:            true,
   127  				URLPath:            "?dag-scope=all&dups=y",
   128  				ProtocolsAllowed:   []string{multicodec.TransportGraphsyncFilecoinv1.String(), multicodec.TransportBitswap.String(), multicodec.TransportIpfsGatewayHttp.String()},
   129  				ProtocolsAttempted: []string{multicodec.TransportGraphsyncFilecoinv1.String()},
   130  			}},
   131  		},
   132  		{
   133  			name:           "bitswap large sharded file",
   134  			bitswapRemotes: 1,
   135  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFile),
   136  			expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{{
   137  				Success:            true,
   138  				URLPath:            "?dag-scope=all&dups=y",
   139  				ProtocolsAllowed:   []string{multicodec.TransportGraphsyncFilecoinv1.String(), multicodec.TransportBitswap.String(), multicodec.TransportIpfsGatewayHttp.String()},
   140  				ProtocolsAttempted: []string{multicodec.TransportBitswap.String()},
   141  			}},
   142  		},
   143  		{
   144  			name:        "http large sharded file",
   145  			httpRemotes: 1,
   146  			generate:    singlePeerGenerator(unixfsSpec_largeShardedFile),
   147  			expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{{
   148  				Success:            true,
   149  				URLPath:            "?dag-scope=all&dups=y",
   150  				ProtocolsAllowed:   []string{multicodec.TransportGraphsyncFilecoinv1.String(), multicodec.TransportBitswap.String(), multicodec.TransportIpfsGatewayHttp.String()},
   151  				ProtocolsAttempted: []string{multicodec.TransportIpfsGatewayHttp.String()},
   152  			}},
   153  		},
   154  		{
   155  			name:             "graphsync large directory",
   156  			graphsyncRemotes: 1,
   157  			generate:         singlePeerGenerator(unixfsSpec_largeDirectory),
   158  		},
   159  		{
   160  			name:           "bitswap large directory",
   161  			bitswapRemotes: 1,
   162  			generate:       singlePeerGenerator(unixfsSpec_largeDirectory),
   163  		},
   164  		{
   165  			name:        "http large directory",
   166  			httpRemotes: 1,
   167  			generate:    singlePeerGenerator(unixfsSpec_largeDirectory),
   168  		},
   169  		{
   170  			name:             "graphsync large sharded directory",
   171  			graphsyncRemotes: 1,
   172  			generate:         singlePeerGenerator(unixfsSpec_largeShardedDirectory),
   173  		},
   174  		{
   175  			name:           "bitswap large sharded directory",
   176  			bitswapRemotes: 1,
   177  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectory),
   178  		},
   179  		{
   180  			name:        "http large sharded directory",
   181  			httpRemotes: 1,
   182  			generate:    singlePeerGenerator(unixfsSpec_largeShardedDirectory),
   183  		},
   184  		{
   185  			name:             "graphsync max block limit",
   186  			graphsyncRemotes: 1,
   187  			expectUncleanEnd: true,
   188  			modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig {
   189  				cfg.MaxBlocksPerRequest = 3
   190  				return cfg
   191  			},
   192  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFile),
   193  			validateBodies: validateFirstThreeBlocksOnly,
   194  		},
   195  		{
   196  			name:             "graphsync max block limit in request",
   197  			graphsyncRemotes: 1,
   198  			expectUncleanEnd: true,
   199  			modifyQueries: []queryModifier{
   200  				func(values url.Values, _ []testpeer.TestPeer) {
   201  					values.Add("blockLimit", "3")
   202  				},
   203  			},
   204  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFile),
   205  			validateBodies: validateFirstThreeBlocksOnly,
   206  		},
   207  		{
   208  			name:             "bitswap max block limit",
   209  			bitswapRemotes:   1,
   210  			expectUncleanEnd: true,
   211  			modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig {
   212  				cfg.MaxBlocksPerRequest = 3
   213  				return cfg
   214  			},
   215  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFile),
   216  			validateBodies: validateFirstThreeBlocksOnly,
   217  		},
   218  		{
   219  			name:             "http max block limit",
   220  			httpRemotes:      1,
   221  			expectUncleanEnd: true,
   222  			modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig {
   223  				cfg.MaxBlocksPerRequest = 3
   224  				return cfg
   225  			},
   226  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFile),
   227  			validateBodies: validateFirstThreeBlocksOnly,
   228  		},
   229  		{
   230  			name:             "bitswap block timeout from missing block",
   231  			bitswapRemotes:   1,
   232  			expectUncleanEnd: true,
   233  			lassieOpts: func(t *testing.T, mrn *mocknet.MockRetrievalNet) []lassie.LassieOption {
   234  				// this delay is going to depend on CI, if it's too short then a slower machine
   235  				// won't get bitswap setup in time to get the block
   236  				return []lassie.LassieOption{lassie.WithProviderTimeout(1 * time.Second)}
   237  			},
   238  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   239  				file := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem)
   240  				remotes[0].Blockstore().DeleteBlock(context.Background(), file.SelfCids[2])
   241  				return []unixfs.DirEntry{file}
   242  			},
   243  			validateBodies: validateFirstThreeBlocksOnly,
   244  		},
   245  		{
   246  			name:           "same content, http missing block, bitswap completes",
   247  			bitswapRemotes: 1,
   248  			httpRemotes:    1,
   249  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   250  				file := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem)
   251  				for _, c := range file.SelfCids {
   252  					blk, err := remotes[0].Blockstore().Get(context.Background(), c)
   253  					require.NoError(t, err)
   254  					writer, commit, err := remotes[1].LinkSystem.StorageWriteOpener(linking.LinkContext{Ctx: context.Background()})
   255  					require.NoError(t, err)
   256  					_, err = writer.Write(blk.RawData())
   257  					require.NoError(t, err)
   258  					err = commit(cidlink.Link{Cid: c})
   259  					require.NoError(t, err)
   260  				}
   261  				remotes[1].Blockstore().DeleteBlock(context.Background(), file.SelfCids[3])
   262  				return []unixfs.DirEntry{file}
   263  			},
   264  		},
   265  		{
   266  			// dag-scope entity fetch should get the same DAG as full for a plain file
   267  			name:             "graphsync large sharded file, dag-scope entity",
   268  			graphsyncRemotes: 1,
   269  			generate:         singlePeerGenerator(unixfsSpec_largeShardedFile),
   270  			modifyQueries:    []queryModifier{entityQuery},
   271  		},
   272  		{
   273  			// dag-scope entity fetch should get the same DAG as full for a plain file
   274  			name:           "bitswap large sharded file, dag-scope entity",
   275  			bitswapRemotes: 1,
   276  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFile),
   277  			modifyQueries:  []queryModifier{entityQuery},
   278  		},
   279  		{
   280  			name:             "graphsync nested large sharded file, with path, dag-scope entity",
   281  			graphsyncRemotes: 1,
   282  			generate:         singlePeerGenerator(unixfsSpec_largeShardedFileWrapped),
   283  			paths:            []string{wrapPath},
   284  			modifyQueries:    []queryModifier{entityQuery},
   285  			validateBodies:   validatePathedEntityContent,
   286  		},
   287  		{
   288  			name:           "bitswap nested large sharded file, with path, dag-scope entity",
   289  			bitswapRemotes: 1,
   290  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFileWrapped),
   291  			paths:          []string{wrapPath},
   292  			modifyQueries:  []queryModifier{entityQuery},
   293  			validateBodies: validatePathedEntityContent,
   294  		},
   295  		{
   296  			name:           "http nested large sharded file, with path, dag-scope entity",
   297  			httpRemotes:    1,
   298  			generate:       singlePeerGenerator(unixfsSpec_largeShardedFileWrapped),
   299  			paths:          []string{wrapPath},
   300  			modifyQueries:  []queryModifier{entityQuery},
   301  			validateBodies: validatePathedEntityContent,
   302  		},
   303  		{
   304  			name:             "graphsync large directory, dag-scope entity",
   305  			graphsyncRemotes: 1,
   306  			generate:         singlePeerGenerator(unixfsSpec_largeDirectory),
   307  			modifyQueries:    []queryModifier{entityQuery},
   308  			validateBodies:   validateOnlyRoot,
   309  		},
   310  		{
   311  			name:           "bitswap large directory, dag-scope entity",
   312  			bitswapRemotes: 1,
   313  			generate:       singlePeerGenerator(unixfsSpec_largeDirectory),
   314  			modifyQueries:  []queryModifier{entityQuery},
   315  			validateBodies: validateOnlyRoot,
   316  		},
   317  		{
   318  			name:           "http large directory, dag-scope entity",
   319  			httpRemotes:    1,
   320  			generate:       singlePeerGenerator(unixfsSpec_largeDirectory),
   321  			modifyQueries:  []queryModifier{entityQuery},
   322  			validateBodies: validateOnlyRoot,
   323  		},
   324  		{
   325  			name:             "graphsync nested large directory, with path, dag-scope entity",
   326  			graphsyncRemotes: 1,
   327  			generate:         singlePeerGenerator(unixfsSpec_largeDirectoryWrapped),
   328  			paths:            []string{wrapPath},
   329  			modifyQueries:    []queryModifier{entityQuery},
   330  			validateBodies:   validatePathedEntityContent,
   331  		},
   332  		{
   333  			name:           "bitswap nested large directory, with path, dag-scope entity",
   334  			bitswapRemotes: 1,
   335  			generate:       singlePeerGenerator(unixfsSpec_largeDirectoryWrapped),
   336  			paths:          []string{wrapPath},
   337  			modifyQueries:  []queryModifier{entityQuery},
   338  			validateBodies: validatePathedEntityContent,
   339  		},
   340  		{
   341  			name:           "http nested large directory, with path, dag-scope entity",
   342  			httpRemotes:    1,
   343  			generate:       singlePeerGenerator(unixfsSpec_largeDirectoryWrapped),
   344  			paths:          []string{wrapPath},
   345  			modifyQueries:  []queryModifier{entityQuery},
   346  			validateBodies: validatePathedEntityContent,
   347  		},
   348  		{
   349  			name:             "graphsync nested large directory, with path, full",
   350  			graphsyncRemotes: 1,
   351  			generate:         singlePeerGenerator(unixfsSpec_largeDirectoryWrapped),
   352  			paths:            []string{wrapPath},
   353  			validateBodies:   validatePathedFullContent,
   354  		},
   355  		{
   356  			name:           "bitswap nested large directory, with path, full",
   357  			bitswapRemotes: 1,
   358  			generate:       singlePeerGenerator(unixfsSpec_largeDirectoryWrapped),
   359  			paths:          []string{wrapPath},
   360  			validateBodies: validatePathedFullContent,
   361  		},
   362  		{
   363  			name:           "bitswap nested large directory, with path, full",
   364  			httpRemotes:    1,
   365  			generate:       singlePeerGenerator(unixfsSpec_largeDirectoryWrapped),
   366  			paths:          []string{wrapPath},
   367  			validateBodies: validatePathedFullContent,
   368  		},
   369  		{
   370  			name:             "graphsync nested large sharded directory, dag-scope entity",
   371  			graphsyncRemotes: 1,
   372  			generate:         singlePeerGenerator(unixfsSpec_largeShardedDirectory),
   373  			modifyQueries:    []queryModifier{entityQuery},
   374  			validateBodies:   validateOnlyEntity,
   375  		},
   376  		{
   377  			name:           "bitswap nested large sharded directory, dag-scope entity",
   378  			bitswapRemotes: 1,
   379  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectory),
   380  			modifyQueries:  []queryModifier{entityQuery},
   381  			validateBodies: validateOnlyEntity,
   382  		},
   383  		{
   384  			name:           "http nested large sharded directory, dag-scope entity",
   385  			httpRemotes:    1,
   386  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectory),
   387  			modifyQueries:  []queryModifier{entityQuery},
   388  			validateBodies: validateOnlyEntity,
   389  		},
   390  		{
   391  			name:             "graphsync nested large sharded directory, with path, dag-scope entity",
   392  			graphsyncRemotes: 1,
   393  			generate:         singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped),
   394  			paths:            []string{wrapPath},
   395  			modifyQueries:    []queryModifier{entityQuery},
   396  			validateBodies:   validatePathedEntityContent,
   397  		},
   398  		{
   399  			name:           "bitswap nested large sharded directory, with path, dag-scope entity",
   400  			bitswapRemotes: 1,
   401  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped),
   402  			paths:          []string{wrapPath},
   403  			modifyQueries:  []queryModifier{entityQuery},
   404  			validateBodies: validatePathedEntityContent,
   405  		},
   406  		{
   407  			name:           "http nested large sharded directory, with path, dag-scope entity",
   408  			httpRemotes:    1,
   409  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped),
   410  			paths:          []string{wrapPath},
   411  			modifyQueries:  []queryModifier{entityQuery},
   412  			validateBodies: validatePathedEntityContent,
   413  		},
   414  		{
   415  			name:             "graphsync nested large sharded directory, with path, full",
   416  			graphsyncRemotes: 1,
   417  			generate:         singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped),
   418  			paths:            []string{wrapPath},
   419  			validateBodies:   validatePathedFullContent,
   420  		},
   421  		{
   422  			name:           "bitswap nested large sharded directory, with path, full",
   423  			bitswapRemotes: 1,
   424  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped),
   425  			paths:          []string{wrapPath},
   426  			validateBodies: validatePathedFullContent,
   427  		},
   428  		{
   429  			name:           "http nested large sharded directory, with path, full",
   430  			httpRemotes:    1,
   431  			generate:       singlePeerGenerator(unixfsSpec_largeShardedDirectoryWrapped),
   432  			paths:          []string{wrapPath},
   433  			validateBodies: validatePathedFullContent,
   434  		},
   435  		{
   436  			// A very contrived example - we spread the content generated for this test across 4 peers,
   437  			// then we also make sure the root is in all of them, so the CandidateSource will return them
   438  			// all. The retriever should then form a swarm of 4 peers and fetch the content from across
   439  			// the set.
   440  			name:           "bitswap, nested large sharded directory, spread across multiple peers, with path, dag-scope entity",
   441  			bitswapRemotes: 4,
   442  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   443  				// rotating linksystem - each block will be written to a different remote
   444  				lsys := cidlink.DefaultLinkSystem()
   445  				var blkIdx int
   446  				lsys.StorageWriteOpener = func(lctx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) {
   447  					defer func() { blkIdx++ }()
   448  					return remotes[blkIdx%len(remotes)].LinkSystem.StorageWriteOpener(lctx)
   449  				}
   450  				lsys.TrustedStorage = true
   451  				// generate data
   452  				data := unixfs.WrapContent(t, rndReader, &lsys, unixfs.GenerateDirectory(t, &lsys, rndReader, 16<<20, true), wrapPath, false)
   453  
   454  				// copy the root block to all remotes
   455  				lctx := ipld.LinkContext{}
   456  				rootLnk := cidlink.Link{Cid: data.Root}
   457  				// the root should be the last written block, so we should be able to
   458  				// find it on remote: (blkIdx-1)%len(remotes)
   459  				blkRdr, err := remotes[(blkIdx-1)%len(remotes)].LinkSystem.StorageReadOpener(lctx, rootLnk)
   460  				require.NoError(t, err)
   461  				blk, err := io.ReadAll(blkRdr)
   462  				require.NoError(t, err)
   463  				for _, remote := range remotes {
   464  					w, wc, err := remote.LinkSystem.StorageWriteOpener(lctx)
   465  					require.NoError(t, err)
   466  					_, err = w.Write(blk)
   467  					require.NoError(t, err)
   468  					require.NoError(t, wc(rootLnk))
   469  				}
   470  
   471  				return []unixfs.DirEntry{data}
   472  			},
   473  			paths:          []string{wrapPath},
   474  			modifyQueries:  []queryModifier{entityQuery},
   475  			validateBodies: validatePathedEntityContent,
   476  		},
   477  		{
   478  			name:           "two separate, parallel bitswap retrievals",
   479  			bitswapRemotes: 2,
   480  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   481  				return []unixfs.DirEntry{
   482  					generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem),
   483  					generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem),
   484  				}
   485  			},
   486  		},
   487  		{
   488  			name:             "two separate, parallel graphsync retrievals",
   489  			graphsyncRemotes: 2,
   490  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   491  				return []unixfs.DirEntry{
   492  					generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem),
   493  					generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem),
   494  				}
   495  			},
   496  		},
   497  		{
   498  			name:             "two separate, parallel graphsync retrievals, with graphsync disabled",
   499  			graphsyncRemotes: 2,
   500  			disableGraphsync: true,
   501  			// in practice, rather than "no candidates", it'll likely be a timeout
   502  			// from waiting for bitswap candidates; in test we short-circuit and send
   503  			// strictly zero bitswap or http candidates
   504  			expectNoCandidates: true,
   505  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   506  				return []unixfs.DirEntry{
   507  					generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem),
   508  					generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem),
   509  				}
   510  			},
   511  			expectAggregateEvents: []aggregateeventrecorder.AggregateEvent{
   512  				{
   513  					Success:            false,
   514  					URLPath:            "?dag-scope=all&dups=y",
   515  					ProtocolsAllowed:   []string{multicodec.TransportIpfsGatewayHttp.String(), multicodec.TransportBitswap.String()},
   516  					ProtocolsAttempted: []string{},
   517  				},
   518  				{
   519  					Success:            false,
   520  					URLPath:            "?dag-scope=all&dups=y",
   521  					ProtocolsAllowed:   []string{multicodec.TransportIpfsGatewayHttp.String(), multicodec.TransportBitswap.String()},
   522  					ProtocolsAttempted: []string{},
   523  				},
   524  			},
   525  		},
   526  		{
   527  			name:             "parallel, separate graphsync and bitswap retrievals",
   528  			graphsyncRemotes: 1,
   529  			bitswapRemotes:   1,
   530  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   531  				return []unixfs.DirEntry{
   532  					generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem),
   533  					generateFor(t, unixfsSpec_largeDirectory, rndReader, *remotes[1].LinkSystem),
   534  				}
   535  			},
   536  		},
   537  		{
   538  			// dag-scope block fetch should only get the the root node for a plain file
   539  			name:             "graphsync large sharded file, dag-scope block",
   540  			graphsyncRemotes: 1,
   541  			generate:         singlePeerGenerator(unixfsSpec_largeShardedFile),
   542  			modifyQueries:    []queryModifier{blockQuery},
   543  			validateBodies:   validateOnlyRoot,
   544  		},
   545  		{
   546  			name:             "graphsync nested large sharded file, with path, dag-scope block",
   547  			graphsyncRemotes: 1,
   548  			generate:         singlePeerGenerator(unixfsSpec_largeShardedFileWrapped),
   549  			paths:            []string{wrapPath},
   550  			modifyQueries:    []queryModifier{blockQuery},
   551  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   552  				wantCids := []cid.Cid{
   553  					srcData.Root,                                     // "/""
   554  					srcData.Children[1].Root,                         // "/want2"
   555  					srcData.Children[1].Children[1].Root,             // "/want2/want1"
   556  					srcData.Children[1].Children[1].Children[1].Root, // wrapPath
   557  				}
   558  				validateCarBody(t, body, srcData.Root, wantCids, true)
   559  			}},
   560  		},
   561  		{
   562  			name:             "graphsync large sharded file, fixedPeer",
   563  			graphsyncRemotes: 1,
   564  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   565  				fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem)
   566  				// wipe content routing information for remote
   567  				remotes[0].Cids = make(map[cid.Cid]struct{})
   568  				return []unixfs.DirEntry{fileEntry}
   569  			},
   570  			modifyQueries: []queryModifier{func(v url.Values, tp []testpeer.TestPeer) {
   571  				multiaddrs, _ := peer.AddrInfoToP2pAddrs(tp[0].AddrInfo())
   572  				maStrings := make([]string, 0, len(multiaddrs))
   573  				for _, ma := range multiaddrs {
   574  					maStrings = append(maStrings, ma.String())
   575  				}
   576  				v.Set("providers", strings.Join(maStrings, ","))
   577  			}},
   578  		},
   579  		{
   580  			name:             "graphsync large sharded file, fixedPeer through startup opts",
   581  			graphsyncRemotes: 1,
   582  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   583  				fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem)
   584  				// wipe content routing information for remote
   585  				remotes[0].Cids = make(map[cid.Cid]struct{})
   586  				return []unixfs.DirEntry{fileEntry}
   587  			},
   588  			lassieOpts: func(t *testing.T, mrn *mocknet.MockRetrievalNet) []lassie.LassieOption {
   589  				return []lassie.LassieOption{lassie.WithCandidateSource(retriever.NewDirectCandidateSource([]types.Provider{{Peer: *mrn.Remotes[0].AddrInfo(), Protocols: nil}}, retriever.WithLibp2pCandidateDiscovery(mrn.Self)))}
   590  			},
   591  		},
   592  		{
   593  			name:           "bitswap large sharded file, fixedPeer",
   594  			bitswapRemotes: 1,
   595  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   596  				fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem)
   597  				// wipe content routing information for remote
   598  				remotes[0].Cids = make(map[cid.Cid]struct{})
   599  				return []unixfs.DirEntry{fileEntry}
   600  			},
   601  			modifyQueries: []queryModifier{func(v url.Values, tp []testpeer.TestPeer) {
   602  				multiaddrs, _ := peer.AddrInfoToP2pAddrs(tp[0].AddrInfo())
   603  				maStrings := make([]string, 0, len(multiaddrs))
   604  				for _, ma := range multiaddrs {
   605  					maStrings = append(maStrings, ma.String())
   606  				}
   607  				v.Set("providers", strings.Join(maStrings, ","))
   608  			}},
   609  		},
   610  		{
   611  			name:           "bitswap large sharded file, fixedPeer through startup opts",
   612  			bitswapRemotes: 1,
   613  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   614  				fileEntry := generateFor(t, unixfsSpec_largeShardedFile, rndReader, *remotes[0].LinkSystem)
   615  				// wipe content routing information for remote
   616  				remotes[0].Cids = make(map[cid.Cid]struct{})
   617  				return []unixfs.DirEntry{fileEntry}
   618  			},
   619  			lassieOpts: func(t *testing.T, mrn *mocknet.MockRetrievalNet) []lassie.LassieOption {
   620  				return []lassie.LassieOption{lassie.WithCandidateSource(retriever.NewDirectCandidateSource([]types.Provider{{Peer: *mrn.Remotes[0].AddrInfo(), Protocols: nil}}, retriever.WithLibp2pCandidateDiscovery(mrn.Self)))}
   621  			},
   622  		},
   623  		{
   624  			name:        "http large sharded file with dups",
   625  			httpRemotes: 1,
   626  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   627  				return []unixfs.DirEntry{unixfs.GenerateFile(t, remotes[0].LinkSystem, trustlesstestutil.ZeroReader{}, 4<<20)}
   628  			},
   629  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   630  				store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{
   631  					Bag: make(map[string][]byte),
   632  				}}
   633  				lsys := cidlink.DefaultLinkSystem()
   634  				lsys.SetReadStorage(store)
   635  				lsys.SetWriteStorage(store)
   636  				lsys.TrustedStorage = true
   637  				_, err := traversal.Config{
   638  					Root:               srcData.Root,
   639  					Selector:           selectorparse.CommonSelector_ExploreAllRecursively,
   640  					ExpectDuplicatesIn: true,
   641  				}.VerifyCar(context.Background(), bytes.NewReader(body), lsys)
   642  				require.NoError(t, err)
   643  			}},
   644  		},
   645  		{
   646  			name:         "http large sharded file with dups, no dups response requested",
   647  			httpRemotes:  1,
   648  			setHeader:    noDups,
   649  			expectNoDups: true,
   650  			generate:     singlePeerGenerator(unixfsSpec_largeShardedFileZeroed),
   651  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   652  				wantCids := []cid.Cid{
   653  					srcData.Root, // "/""
   654  					srcData.SelfCids[1],
   655  					srcData.SelfCids[len(srcData.SelfCids)-1],
   656  				}
   657  				validateCarBody(t, body, srcData.Root, wantCids, true)
   658  			}},
   659  		},
   660  		{
   661  			name:        "http large sharded file with dups, */* gives dups",
   662  			httpRemotes: 1,
   663  			setHeader:   func(h http.Header) { h.Set("Accept", "*/*") },
   664  			generate:    singlePeerGenerator(unixfsSpec_largeShardedFileZeroed),
   665  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   666  				store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{
   667  					Bag: make(map[string][]byte),
   668  				}}
   669  				lsys := cidlink.DefaultLinkSystem()
   670  				lsys.SetReadStorage(store)
   671  				lsys.SetWriteStorage(store)
   672  				lsys.TrustedStorage = true
   673  				_, err := traversal.Config{
   674  					Root:               srcData.Root,
   675  					Selector:           selectorparse.CommonSelector_ExploreAllRecursively,
   676  					ExpectDuplicatesIn: true,
   677  				}.VerifyCar(context.Background(), bytes.NewReader(body), lsys)
   678  				require.NoError(t, err)
   679  			}},
   680  		}, {
   681  			name:         "http large sharded file with dups, multiple accept, priority to no dups",
   682  			httpRemotes:  1,
   683  			expectNoDups: true,
   684  			setHeader: func(h http.Header) {
   685  				h.Set("Accept",
   686  					strings.Join([]string{
   687  						"text/html",
   688  						trustlesshttp.DefaultContentType().WithDuplicates(true).WithQuality(0.7).String(),
   689  						trustlesshttp.DefaultContentType().WithDuplicates(false).WithQuality(0.8).String(),
   690  						"*/*;q=0.1",
   691  					}, ", "),
   692  				)
   693  			},
   694  			generate: singlePeerGenerator(unixfsSpec_largeShardedFileZeroed),
   695  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   696  				wantCids := []cid.Cid{
   697  					srcData.Root, // "/""
   698  					srcData.SelfCids[1],
   699  					srcData.SelfCids[len(srcData.SelfCids)-1],
   700  				}
   701  				validateCarBody(t, body, srcData.Root, wantCids, true)
   702  			}},
   703  		},
   704  		{
   705  			name:        "http large sharded file with dups, multiple accept, priority to dups",
   706  			httpRemotes: 1,
   707  			setHeader: func(h http.Header) {
   708  				h.Set("Accept",
   709  					strings.Join([]string{
   710  						"text/html",
   711  						trustlesshttp.DefaultContentType().WithDuplicates(true).WithQuality(0.8).String(),
   712  						trustlesshttp.DefaultContentType().WithDuplicates(false).WithQuality(0.7).String(),
   713  						"*/*;q=0.1",
   714  					}, ", "),
   715  				)
   716  			},
   717  			generate: singlePeerGenerator(unixfsSpec_largeShardedFileZeroed),
   718  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   719  				store := &trustlesstestutil.CorrectedMemStore{ParentStore: &memstore.Store{
   720  					Bag: make(map[string][]byte),
   721  				}}
   722  				lsys := cidlink.DefaultLinkSystem()
   723  				lsys.SetReadStorage(store)
   724  				lsys.SetWriteStorage(store)
   725  				lsys.TrustedStorage = true
   726  				_, err := traversal.Config{
   727  					Root:               srcData.Root,
   728  					Selector:           selectorparse.CommonSelector_ExploreAllRecursively,
   729  					ExpectDuplicatesIn: true,
   730  				}.VerifyCar(context.Background(), bytes.NewReader(body), lsys)
   731  				require.NoError(t, err)
   732  			}},
   733  		},
   734  		{
   735  			name:           "bitswap nested file, path with special characters",
   736  			bitswapRemotes: 1,
   737  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   738  				lsys := remotes[0].LinkSystem
   739  				return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, lsys, rndReader, 1024), "/?/#/%/ ", false)}
   740  			},
   741  			paths:         []string{"/?/#/%/ "},
   742  			modifyQueries: []queryModifier{entityQuery},
   743  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   744  				wantCids := []cid.Cid{
   745  					srcData.Root,                                                 // "/"
   746  					srcData.Children[1].Root,                                     // "/?"
   747  					srcData.Children[1].Children[1].Root,                         // "/?/#"
   748  					srcData.Children[1].Children[1].Children[1].Root,             // "/?/#/%"
   749  					srcData.Children[1].Children[1].Children[1].Children[0].Root, // "/?/#/%/ " (' ' is before '!', so it's the first link after the one named '!before')
   750  				}
   751  				validateCarBody(t, body, srcData.Root, wantCids, true)
   752  			}},
   753  		},
   754  		{
   755  			name:        "http nested file, path with special characters",
   756  			httpRemotes: 1,
   757  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   758  				lsys := remotes[0].LinkSystem
   759  				return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, lsys, rndReader, 1024), "/?/#/%/ ", false)}
   760  			},
   761  			paths:         []string{"/?/#/%/ "},
   762  			modifyQueries: []queryModifier{entityQuery},
   763  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   764  				wantCids := []cid.Cid{
   765  					srcData.Root,                                                 // "/"
   766  					srcData.Children[1].Root,                                     // "/?"
   767  					srcData.Children[1].Children[1].Root,                         // "/?/#"
   768  					srcData.Children[1].Children[1].Children[1].Root,             // "/?/#/%"
   769  					srcData.Children[1].Children[1].Children[1].Children[0].Root, // "/?/#/%/ " (' ' is before '!', so it's the first link after the one named '!before')
   770  				}
   771  				validateCarBody(t, body, srcData.Root, wantCids, true)
   772  			}},
   773  		},
   774  		{
   775  			name:             "graphsync nested file, path with special characters",
   776  			graphsyncRemotes: 1,
   777  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   778  				lsys := remotes[0].LinkSystem
   779  				return []unixfs.DirEntry{unixfs.WrapContent(t, rndReader, lsys, unixfs.GenerateFile(t, lsys, rndReader, 1024), "/?/#/%/ ", false)}
   780  			},
   781  			paths:         []string{"/?/#/%/ "},
   782  			modifyQueries: []queryModifier{entityQuery},
   783  			validateBodies: []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
   784  				wantCids := []cid.Cid{
   785  					srcData.Root,                                                 // "/"
   786  					srcData.Children[1].Root,                                     // "/?"
   787  					srcData.Children[1].Children[1].Root,                         // "/?/#"
   788  					srcData.Children[1].Children[1].Children[1].Root,             // "/?/#/%"
   789  					srcData.Children[1].Children[1].Children[1].Children[0].Root, // "/?/#/%/ " (' ' is before '!', so it's the first link after the one named '!before')
   790  				}
   791  				validateCarBody(t, body, srcData.Root, wantCids, true)
   792  			}},
   793  		},
   794  		{
   795  			name:        "with access token - rejects anonymous requests",
   796  			httpRemotes: 1,
   797  			generate:    singlePeerGenerator(unixfsSpec_smallFile),
   798  			modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig {
   799  				cfg.AccessToken = "super-secret"
   800  				return cfg
   801  			},
   802  			expectUnauthorized: true,
   803  		},
   804  		{
   805  			name:        "with access token - allows requests with authorization header",
   806  			httpRemotes: 1,
   807  			generate:    singlePeerGenerator(unixfsSpec_smallFile),
   808  			modifyHttpConfig: func(cfg httpserver.HttpServerConfig) httpserver.HttpServerConfig {
   809  				cfg.AccessToken = "super-secret"
   810  				return cfg
   811  			},
   812  			setHeader: func(header http.Header) {
   813  				header.Set("Authorization", "Bearer super-secret")
   814  				header.Add("Accept", "application/vnd.ipld.car")
   815  			},
   816  			expectUnauthorized: false,
   817  		},
   818  		{
   819  			name:             "non-unixfs graphsync",
   820  			graphsyncRemotes: 1,
   821  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   822  				return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)}
   823  			},
   824  		},
   825  		{
   826  			name:           "non-unixfs bitswap",
   827  			bitswapRemotes: 1,
   828  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   829  				return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)}
   830  			},
   831  		},
   832  		{
   833  			name:        "non-unixfs http",
   834  			httpRemotes: 1,
   835  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   836  				return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)}
   837  			},
   838  		},
   839  		// noDups variants are important because handling of these happens all the
   840  		// way up to DuplicateAdderCar
   841  		{
   842  			name:             "non-unixfs graphsync /w noDups",
   843  			setHeader:        noDups,
   844  			expectNoDups:     true,
   845  			graphsyncRemotes: 1,
   846  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   847  				return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)}
   848  			},
   849  		},
   850  		{
   851  			name:           "non-unixfs bitswap /w noDups",
   852  			setHeader:      noDups,
   853  			expectNoDups:   true,
   854  			bitswapRemotes: 1,
   855  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   856  				return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)}
   857  			},
   858  		},
   859  		{
   860  			name:         "non-unixfs http /w noDups",
   861  			setHeader:    noDups,
   862  			expectNoDups: true,
   863  			httpRemotes:  1,
   864  			generate: func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
   865  				return []unixfs.DirEntry{trustlesstestutil.MakeDagWithIdentity(t, *remotes[0].LinkSystem)}
   866  			},
   867  		},
   868  	}
   869  
   870  	for _, testCase := range testCases {
   871  		testCase := testCase
   872  		t.Run(testCase.name, func(t *testing.T) {
   873  			ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   874  			defer cancel()
   875  
   876  			rndSeed := time.Now().UTC().UnixNano()
   877  			t.Logf("random seed: %d", rndSeed)
   878  			var rndReader io.Reader = rand.New(rand.NewSource(rndSeed))
   879  
   880  			mrn := mocknet.NewMockRetrievalNet(ctx, t)
   881  			mrn.AddGraphsyncPeers(testCase.graphsyncRemotes)
   882  			finishedChans := make([]chan []datatransfer.Event, 0)
   883  			for _, r := range mrn.Remotes {
   884  				finishedChans = append(finishedChans, mocknet.SetupRetrieval(t, r))
   885  			}
   886  			mrn.AddBitswapPeers(testCase.bitswapRemotes)
   887  			mrn.AddHttpPeers(testCase.httpRemotes)
   888  
   889  			require.NoError(t, mrn.MN.LinkAll())
   890  
   891  			carFiles := debugRemotes(t, ctx, testCase.name, mrn.Remotes)
   892  			srcData := testCase.generate(t, rndReader, mrn.Remotes)
   893  
   894  			// Setup a new lassie
   895  			req := require.New(t)
   896  			var customOpts []lassie.LassieOption
   897  			if testCase.lassieOpts != nil {
   898  				customOpts = testCase.lassieOpts(t, mrn)
   899  			}
   900  			opts := append([]lassie.LassieOption{
   901  				lassie.WithProviderTimeout(20 * time.Second),
   902  				lassie.WithHost(mrn.Self),
   903  				lassie.WithCandidateSource(mrn.Source),
   904  			}, customOpts...)
   905  			if testCase.disableGraphsync {
   906  				opts = append(opts, lassie.WithProtocols([]multicodec.Code{multicodec.TransportBitswap, multicodec.TransportIpfsGatewayHttp}))
   907  			}
   908  			lassie, err := lassie.NewLassie(ctx, opts...)
   909  			req.NoError(err)
   910  
   911  			var aggregateEventsCh = make(chan []aggregateeventrecorder.AggregateEvent)
   912  			if len(testCase.expectAggregateEvents) > 0 {
   913  				closer := setupAggregateEventRecorder(t, ctx, len(srcData), lassie, aggregateEventsCh)
   914  				defer closer.Close()
   915  			}
   916  
   917  			// Start an HTTP server
   918  			cfg := httpserver.HttpServerConfig{Address: "127.0.0.1", Port: 0, TempDir: t.TempDir()}
   919  			if testCase.modifyHttpConfig != nil {
   920  				cfg = testCase.modifyHttpConfig(cfg)
   921  			}
   922  			httpServer, err := httpserver.NewHttpServer(ctx, lassie, cfg)
   923  			req.NoError(err)
   924  			serverError := make(chan error, 1)
   925  			go func() {
   926  				serverError <- httpServer.Start()
   927  			}()
   928  
   929  			paths := make([]string, len(srcData))
   930  			for i := 0; i < len(srcData); i++ {
   931  				if testCase.paths != nil && testCase.paths[i] != "" {
   932  					p := datamodel.ParsePath(testCase.paths[i])
   933  					for p.Len() > 0 {
   934  						var ps datamodel.PathSegment
   935  						ps, p = p.Shift()
   936  						paths[i] += "/" + url.PathEscape(ps.String())
   937  					}
   938  				}
   939  			}
   940  
   941  			responseChans := make([]chan response, 0)
   942  			for i := 0; i < len(srcData); i++ {
   943  				responseChan := make(chan response, 1)
   944  				responseChans = append(responseChans, responseChan)
   945  				go func(i int) {
   946  					// Make a request for our CID and read the complete CAR bytes
   947  					addr := fmt.Sprintf("http://%s/ipfs/%s%s", httpServer.Addr(), srcData[i].Root.String(), paths[i])
   948  					getReq, err := http.NewRequest("GET", addr, nil)
   949  					req.NoError(err)
   950  					if testCase.setHeader == nil {
   951  						getReq.Header.Add("Accept", "application/vnd.ipld.car")
   952  					} else {
   953  						testCase.setHeader(getReq.Header)
   954  					}
   955  					if testCase.modifyQueries != nil && testCase.modifyQueries[i] != nil {
   956  						q := getReq.URL.Query()
   957  						testCase.modifyQueries[i](q, mrn.Remotes)
   958  						getReq.URL.RawQuery = q.Encode()
   959  					}
   960  					t.Log("Fetching", getReq.URL.String())
   961  					resp, err := http.DefaultClient.Do(getReq)
   962  					req.NoError(err)
   963  					expectBodyReadError := ""
   964  					if testCase.expectUncleanEnd {
   965  						expectBodyReadError = "http: unexpected EOF reading trailer"
   966  					}
   967  					body := readAllBody(t, resp.Body, expectBodyReadError)
   968  					req.NoError(resp.Body.Close())
   969  					responseChan <- response{StatusCode: resp.StatusCode, Header: resp.Header, Body: body}
   970  				}(i)
   971  			}
   972  
   973  			responses := make([]response, 0)
   974  			for _, responseChan := range responseChans {
   975  				select {
   976  				case resp := <-responseChan:
   977  					responses = append(responses, resp)
   978  				case <-ctx.Done():
   979  					req.FailNow("Did not receive responses")
   980  				}
   981  			}
   982  
   983  			if !testCase.disableGraphsync {
   984  				// wait for graphsync retrievals to finish on the remotes
   985  				var wg sync.WaitGroup
   986  				wg.Add(len(finishedChans))
   987  				for _, finishedChan := range finishedChans {
   988  					go func(finishedChan chan []datatransfer.Event) {
   989  						mocknet.WaitForFinish(ctx, t, finishedChan, 1*time.Second)
   990  						wg.Done()
   991  					}(finishedChan)
   992  				}
   993  				wg.Wait()
   994  			}
   995  
   996  			for i, resp := range responses {
   997  				if testCase.expectNoCandidates {
   998  					if resp.StatusCode != http.StatusBadGateway {
   999  						req.Failf("wrong response code not received", "expected %d, got %d; body: [%s]", http.StatusBadGateway, resp.StatusCode, string(resp.Body))
  1000  						req.Contains(string(resp.Body), "no candidates found")
  1001  					}
  1002  				} else if testCase.expectUnauthorized {
  1003  					if resp.StatusCode != http.StatusUnauthorized {
  1004  						req.Failf("wrong response code not received", "expected %d, got %d; body: [%s]", http.StatusUnauthorized, resp.StatusCode, string(resp.Body))
  1005  					}
  1006  				} else {
  1007  					if resp.StatusCode != http.StatusOK {
  1008  						req.Failf("wrong response code not received", "expected %d, got %d; body: [%s]", http.StatusOK, resp.StatusCode, string(resp.Body))
  1009  					}
  1010  
  1011  					verifyHeaders(t, resp, srcData[i].Root, paths[i], testCase.expectNoDups)
  1012  
  1013  					if DEBUG_DATA {
  1014  						dstf, err := os.CreateTemp("", fmt.Sprintf("%s_received%d.car", strings.Replace(testCase.name, "/", "__", -1), i))
  1015  						req.NoError(err)
  1016  						t.Logf("Writing received data to CAR @ %s", dstf.Name())
  1017  						_, err = dstf.Write(resp.Body)
  1018  						req.NoError(err)
  1019  						carFiles = append(carFiles, dstf)
  1020  					}
  1021  
  1022  					if testCase.validateBodies != nil && testCase.validateBodies[i] != nil {
  1023  						testCase.validateBodies[i](t, srcData[i], resp.Body)
  1024  					} else {
  1025  						gotLsys := CarBytesLinkSystem(t, bytes.NewReader(resp.Body))
  1026  						gotDir := unixfs.ToDirEntry(t, gotLsys, srcData[i].Root, true)
  1027  						unixfs.CompareDirEntries(t, srcData[i], gotDir)
  1028  					}
  1029  				}
  1030  			}
  1031  
  1032  			if len(testCase.expectAggregateEvents) > 0 {
  1033  				var events []aggregateeventrecorder.AggregateEvent
  1034  				// check that the event recorder got and event for this by looking for the root cid
  1035  				select {
  1036  				case events = <-aggregateEventsCh:
  1037  				case <-ctx.Done():
  1038  					req.FailNow("Did not receive aggregate events")
  1039  				}
  1040  				verifyAggregateEvents(t, mrn.Remotes, srcData, testCase.expectAggregateEvents, events)
  1041  			}
  1042  
  1043  			if DEBUG_DATA {
  1044  				for _, cf := range carFiles {
  1045  					req.NoError(cf.Close())
  1046  					req.NoError(os.Remove(cf.Name()))
  1047  				}
  1048  				t.Logf("Cleaned up CARs")
  1049  			}
  1050  
  1051  			err = httpServer.Close()
  1052  			req.NoError(err)
  1053  			select {
  1054  			case <-ctx.Done():
  1055  				req.FailNow("server failed to shut down")
  1056  			case err = <-serverError:
  1057  				req.NoError(err)
  1058  			}
  1059  		})
  1060  	}
  1061  }
  1062  
  1063  // validateCarBody reads the given bytes as a CAR, validates the root is correct
  1064  // and that it contains all of the wantCids (not strictly in order). If
  1065  // onlyWantCids is true, it also validates that wantCids are the only CIDs in
  1066  // the CAR (with no duplicates).
  1067  func validateCarBody(t *testing.T, body []byte, root cid.Cid, wantCids []cid.Cid, onlyWantCids bool) {
  1068  	br, err := car.NewBlockReader(bytes.NewReader(body))
  1069  	require.NoError(t, err)
  1070  	require.Equal(t, []cid.Cid{root}, br.Roots)
  1071  	gotCids := make([]cid.Cid, 0)
  1072  	for {
  1073  		blk, err := br.Next()
  1074  		if err != nil {
  1075  			require.EqualError(t, err, io.EOF.Error())
  1076  			break
  1077  		}
  1078  		gotCids = append(gotCids, blk.Cid())
  1079  	}
  1080  	for _, cw := range wantCids {
  1081  		var found bool
  1082  		for _, cg := range gotCids {
  1083  			if cw.Equals(cg) {
  1084  				found = true
  1085  				break
  1086  			}
  1087  		}
  1088  		require.True(t, found)
  1089  	}
  1090  	if onlyWantCids {
  1091  		require.Len(t, gotCids, len(wantCids))
  1092  	}
  1093  }
  1094  
  1095  func verifyHeaders(t *testing.T, resp response, root cid.Cid, path string, expectNoDups bool) {
  1096  	req := require.New(t)
  1097  
  1098  	req.Regexp(`^lassie/v\d+\.\d+\.\d+-\w+$`, resp.Header.Get("Server"))
  1099  	req.Equal(fmt.Sprintf(`attachment; filename="%s.car"`, root.String()), resp.Header.Get("Content-Disposition"))
  1100  	req.Equal("none", resp.Header.Get("Accept-Ranges"))
  1101  	req.Equal("public, max-age=29030400, immutable", resp.Header.Get("Cache-Control"))
  1102  	req.Equal(trustlesshttp.DefaultContentType().WithDuplicates(!expectNoDups).String(), resp.Header.Get("Content-Type"))
  1103  	req.Equal("nosniff", resp.Header.Get("X-Content-Type-Options"))
  1104  	st := resp.Header.Get("Server-Timing")
  1105  	req.Contains(st, "started-finding-candidates")
  1106  	req.Contains(st, "candidates-found=")
  1107  	req.Contains(st, "retrieval-")
  1108  	req.Contains(st, "dur=") // at lest one of these
  1109  	etagStart := fmt.Sprintf(`"%s.car.`, root.String())
  1110  	etagGot := resp.Header.Get("ETag")
  1111  	req.True(strings.HasPrefix(etagGot, etagStart), "ETag should start with [%s], got [%s]", etagStart, etagGot)
  1112  	req.Equal(`"`, etagGot[len(etagGot)-1:], "ETag should end with a quote")
  1113  	req.Equal(fmt.Sprintf("/ipfs/%s%s", root.String(), path), resp.Header.Get("X-Ipfs-Path"))
  1114  	requestId := resp.Header.Get("X-Trace-Id")
  1115  	require.NotEmpty(t, requestId)
  1116  	_, err := uuid.Parse(requestId)
  1117  	req.NoError(err)
  1118  }
  1119  
  1120  func setupAggregateEventRecorder(t *testing.T, ctx context.Context, expectCount int, lassie *lassie.Lassie, aggregateEventsCh chan []aggregateeventrecorder.AggregateEvent) interface{ Close() } {
  1121  	var aggregateEventsLk sync.Mutex
  1122  	events := make([]aggregateeventrecorder.AggregateEvent, 0)
  1123  	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  1124  		require.Equal(t, "Basic listenup", r.Header.Get("Authorization"))
  1125  		type batch struct {
  1126  			Events []aggregateeventrecorder.AggregateEvent
  1127  		}
  1128  		var b batch
  1129  		err := json.NewDecoder(r.Body).Decode(&b)
  1130  		require.NoError(t, err)
  1131  		aggregateEventsLk.Lock()
  1132  		events = append(events, b.Events...)
  1133  		if len(events) == expectCount {
  1134  			select {
  1135  			case <-ctx.Done():
  1136  			case aggregateEventsCh <- events:
  1137  			}
  1138  		}
  1139  		aggregateEventsLk.Unlock()
  1140  	}))
  1141  
  1142  	eventRecorder := aggregateeventrecorder.NewAggregateEventRecorder(ctx, aggregateeventrecorder.EventRecorderConfig{
  1143  		InstanceID:            "fooblesmush",
  1144  		EndpointURL:           ts.URL,
  1145  		EndpointAuthorization: "listenup",
  1146  	})
  1147  	lassie.RegisterSubscriber(eventRecorder.RetrievalEventSubscriber())
  1148  
  1149  	return ts
  1150  }
  1151  
  1152  func verifyAggregateEvents(t *testing.T, remotes []testpeer.TestPeer, srcData []unixfs.DirEntry, expectedEvents, actualEvents []aggregateeventrecorder.AggregateEvent) {
  1153  	req := require.New(t)
  1154  
  1155  	for ii, src := range srcData {
  1156  		var evt aggregateeventrecorder.AggregateEvent
  1157  		for _, e := range actualEvents {
  1158  			if e.RootCid == src.Root.String() {
  1159  				evt = e
  1160  				break
  1161  			}
  1162  		}
  1163  		req.NotNil(evt)
  1164  		t.Log("got event", evt)
  1165  
  1166  		expect := expectedEvents[ii]
  1167  		req.Equal("fooblesmush", evt.InstanceID)
  1168  		req.Equal(expect.Success, evt.Success)
  1169  		req.Equal(expect.URLPath, evt.URLPath)
  1170  		req.ElementsMatch(expect.ProtocolsAttempted, evt.ProtocolsAttempted)
  1171  		req.ElementsMatch(expect.ProtocolsAllowed, evt.ProtocolsAllowed)
  1172  
  1173  		// This makes an assumption that there's a clear mapping of remote
  1174  		// index to srcData index, which doesn't necessarily hold. So if novel
  1175  		// cases need to be tested, this may need to be en-smartened.
  1176  		if expect.Success {
  1177  			totalBytes := totalBlockBytes(t, *remotes[ii].LinkSystem, src)
  1178  			req.Equal(totalBytes, evt.BytesTransferred)
  1179  
  1180  			// This makes an assumption there's only one attempt
  1181  			isBitswap := slices.Equal(expect.ProtocolsAttempted, []string{multicodec.TransportBitswap.String()})
  1182  			if isBitswap {
  1183  				req.Len(evt.RetrievalAttempts, 2)
  1184  				req.Contains(evt.RetrievalAttempts, "Bitswap")
  1185  			} else {
  1186  				req.Len(evt.RetrievalAttempts, 1)
  1187  			}
  1188  			for _, attempt := range evt.RetrievalAttempts {
  1189  				req.Equal("", attempt.Error)
  1190  				req.Equal(totalBytes, attempt.BytesTransferred) // both attempts for a bitswap req will have the same number
  1191  			}
  1192  		}
  1193  	}
  1194  }
  1195  
  1196  func debugRemotes(t *testing.T, ctx context.Context, name string, remotes []testpeer.TestPeer) []*os.File {
  1197  	if !DEBUG_DATA {
  1198  		return nil
  1199  	}
  1200  	carFiles := make([]*os.File, 0)
  1201  	for ii, r := range remotes {
  1202  		func(ii int, r testpeer.TestPeer) {
  1203  			carFile, err := os.CreateTemp("", fmt.Sprintf("%s_remote%d.car", strings.Replace(name, "/", "__", -1), ii))
  1204  			require.NoError(t, err)
  1205  			t.Logf("Writing source data to CAR @ %s", carFile.Name())
  1206  			carFiles = append(carFiles, carFile)
  1207  			carW, err := storage.NewWritable(carFile, []cid.Cid{}, car.WriteAsCarV1(true), car.AllowDuplicatePuts(true))
  1208  			require.NoError(t, err)
  1209  			swo := r.LinkSystem.StorageWriteOpener
  1210  			r.LinkSystem.StorageWriteOpener = func(lc linking.LinkContext) (io.Writer, linking.BlockWriteCommitter, error) {
  1211  				w, c, err := swo(lc)
  1212  				if err != nil {
  1213  					return nil, nil, err
  1214  				}
  1215  				var buf bytes.Buffer
  1216  				return &buf, func(l datamodel.Link) error {
  1217  					require.NoError(t, carW.Put(ctx, l.(cidlink.Link).Cid.KeyString(), buf.Bytes()))
  1218  					_, err := w.Write(buf.Bytes())
  1219  					if err != nil {
  1220  						return err
  1221  					}
  1222  					return c(l)
  1223  				}, nil
  1224  			}
  1225  		}(ii, r)
  1226  	}
  1227  	return carFiles
  1228  }
  1229  
  1230  func readAllBody(t *testing.T, r io.Reader, expectError string) []byte {
  1231  	if expectError == "" {
  1232  		body, err := io.ReadAll(r)
  1233  		require.NoError(t, err)
  1234  		return body
  1235  	}
  1236  	// expect an error, so let's creep up on it and collect as much of the body
  1237  	// as we can before the error blocks us
  1238  	// see readLocked() in src/net/http/transfer.go:
  1239  	// → b.src.Read(p)
  1240  	// → followed by b.readTrailer() which should error; we want to capture both
  1241  	var buf bytes.Buffer
  1242  	var byt [1]byte
  1243  	var err error
  1244  	var n int
  1245  	for {
  1246  		n, err = r.Read(byt[:])
  1247  		// record the bytes we read, the error should come after the normal body
  1248  		// read and then it attempts to read trailers where it should fail
  1249  		buf.Write(byt[:n])
  1250  		if err != nil {
  1251  			require.EqualError(t, err, expectError)
  1252  			break
  1253  		}
  1254  	}
  1255  	return buf.Bytes()
  1256  }
  1257  
  1258  // wrapSpec wraps the given spec in a directory structure that has a
  1259  // subdirectory before and after the subdirectory we want to path through at
  1260  // each level, according to wrapPath. Tests should be able to ignore the
  1261  // extraneous content generated by this function's spec.
  1262  func wrapSpec(spec string) string {
  1263  	return `dir(
  1264  		dir{name:"!before"}(~10*file:~1k),dir{name:"want2"}(
  1265  			dir{name:"!before"}(~10*file:~2k),dir{name:"want1"}(
  1266  				dir{name:"!before"}(~10*file:~1k),dir{name:"want0"}(
  1267  					dir{name:"!before"}(~10*file:~100),` + spec + `,dir{name:"~after"}(~10*file:~100)),
  1268  				dir{name:"~after"}(~10*file:~2kb)),
  1269  			dir{name:"~after"}(~10*file:~1kb)),
  1270  		dir{name:"~after"}(~10*file:~2kb))`
  1271  }
  1272  
  1273  // a utility function where a test only involves a single peer
  1274  func singlePeerGenerator(spec string) generateFn {
  1275  	return func(t *testing.T, rndReader io.Reader, remotes []testpeer.TestPeer) []unixfs.DirEntry {
  1276  		return []unixfs.DirEntry{generateFor(t, spec, rndReader, *remotes[0].LinkSystem)}
  1277  	}
  1278  }
  1279  
  1280  // given a spec, generate that UnixFS content into the given LinkSystem.
  1281  func generateFor(t *testing.T, spec string, rndReader io.Reader, lsys linking.LinkSystem) unixfs.DirEntry {
  1282  	ss := strings.Split(spec, "\n")
  1283  	for i, s := range ss {
  1284  		ss[i] = strings.TrimSpace(s)
  1285  	}
  1286  	spec = strings.Join(ss, "")
  1287  	entity, err := unixfsgen.Parse(spec)
  1288  	require.NoError(t, err)
  1289  	t.Logf("Generating: %s", entity.Describe(""))
  1290  	rootEnt, err := entity.Generate(lsys, rndReader)
  1291  	require.NoError(t, err)
  1292  	return rootEnt
  1293  }
  1294  
  1295  var validateFirstThreeBlocksOnly = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
  1296  	// 3 blocks max, start at the root and then two blocks into the sharded data
  1297  	wantCids := []cid.Cid{
  1298  		srcData.Root,
  1299  		srcData.SelfCids[0],
  1300  		srcData.SelfCids[1],
  1301  	}
  1302  	validateCarBody(t, body, srcData.Root, wantCids, true)
  1303  }}
  1304  
  1305  var validateOnlyEntity = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
  1306  	// sharded directory contains multiple blocks, so we expect a CAR with
  1307  	// exactly those blocks
  1308  	validateCarBody(t, body, srcData.Root, srcData.SelfCids, true)
  1309  }}
  1310  
  1311  var validatePathedFullContent = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
  1312  	wantCids := append([]cid.Cid{
  1313  		srcData.Root,                         // "/""
  1314  		srcData.Children[1].Root,             // "/want2"
  1315  		srcData.Children[1].Children[1].Root, // "/want2/want1"
  1316  	},
  1317  		srcData.Children[1].Children[1].Children[1].SelfCids..., // wrapPath (full)
  1318  	)
  1319  	// validate we got the dag-scope entity form
  1320  	validateCarBody(t, body, srcData.Root, wantCids, false)
  1321  	// validate that we got the full depth form under the path
  1322  	gotDir := carToDirEntry(t, bytes.NewReader(body), srcData.Children[1].Children[1].Children[1].Root, wrapPath, true)
  1323  	unixfs.CompareDirEntries(t, srcData.Children[1].Children[1].Children[1], gotDir)
  1324  }}
  1325  
  1326  var validatePathedEntityContent = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
  1327  	wantCids := append([]cid.Cid{
  1328  		srcData.Root,                         // "/""
  1329  		srcData.Children[1].Root,             // "/want2"
  1330  		srcData.Children[1].Children[1].Root, // "/want2/want1"
  1331  	},
  1332  		srcData.Children[1].Children[1].Children[1].SelfCids..., // wrapPath (full)
  1333  	)
  1334  	validateCarBody(t, body, srcData.Root, wantCids, true)
  1335  }}
  1336  
  1337  var validateOnlyRoot = []bodyValidator{func(t *testing.T, srcData unixfs.DirEntry, body []byte) {
  1338  	// expect a CAR of one block, to represent the root directory we asked for
  1339  	validateCarBody(t, body, srcData.Root, []cid.Cid{srcData.Root}, true)
  1340  }}
  1341  
  1342  func totalBlockBytes(t *testing.T, lsys linking.LinkSystem, srcData unixfs.DirEntry) uint64 {
  1343  	var total uint64
  1344  	for _, c := range srcData.SelfCids {
  1345  		b, err := lsys.LoadRaw(ipld.LinkContext{}, cidlink.Link{Cid: c})
  1346  		require.NoError(t, err)
  1347  		total += uint64(len(b))
  1348  		for _, child := range srcData.Children {
  1349  			total += totalBlockBytes(t, lsys, child)
  1350  		}
  1351  	}
  1352  	return total
  1353  }