github.com/ethersphere/bee/v2@v2.2.0/pkg/traversal/traversal_test.go (about)

     1  // Copyright 2020 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package traversal_test
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"fmt"
    11  	"math"
    12  	"path"
    13  	"sync"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/ethersphere/bee/v2/pkg/file/loadsave"
    18  	"github.com/ethersphere/bee/v2/pkg/file/pipeline"
    19  	"github.com/ethersphere/bee/v2/pkg/file/pipeline/builder"
    20  	"github.com/ethersphere/bee/v2/pkg/manifest"
    21  	testingsoc "github.com/ethersphere/bee/v2/pkg/soc/testing"
    22  	storage "github.com/ethersphere/bee/v2/pkg/storage"
    23  	"github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore"
    24  	"github.com/ethersphere/bee/v2/pkg/swarm"
    25  	"github.com/ethersphere/bee/v2/pkg/traversal"
    26  )
    27  
    28  const (
    29  	dataCorpus       = "hello test world" // 16 bytes.
    30  	defaultMediaType = "bzz-manifest-mantaray"
    31  )
    32  
    33  func generateSample(size int) []byte {
    34  	buf := make([]byte, size)
    35  	for n := 0; n < size; {
    36  		n += copy(buf[n:], dataCorpus)
    37  	}
    38  	return buf
    39  }
    40  
    41  // newAddressIterator is a convenient constructor for creating addressIterator.
    42  func newAddressIterator(ignoreDuplicates bool) *addressIterator {
    43  	return &addressIterator{
    44  		seen:             make(map[string]bool),
    45  		ignoreDuplicates: ignoreDuplicates,
    46  	}
    47  }
    48  
    49  // addressIterator is a simple collector of statistics
    50  // targeting swarm.AddressIterFunc execution.
    51  type addressIterator struct {
    52  	mu   sync.Mutex // mu guards cnt and seen fields.
    53  	cnt  int
    54  	seen map[string]bool
    55  	// Settings.
    56  	ignoreDuplicates bool
    57  }
    58  
    59  // Next matches the signature of swarm.AddressIterFunc needed in
    60  // Traverser.Traverse method and collects statistics about it's execution.
    61  func (i *addressIterator) Next(addr swarm.Address) error {
    62  	i.mu.Lock()
    63  	defer i.mu.Unlock()
    64  
    65  	i.cnt++
    66  	if !i.ignoreDuplicates && i.seen[addr.String()] {
    67  		return fmt.Errorf("duplicit address: %q", addr.String())
    68  	}
    69  	i.seen[addr.String()] = true
    70  	return nil
    71  }
    72  
    73  func TestTraversalBytes(t *testing.T) {
    74  	t.Parallel()
    75  
    76  	testCases := []struct {
    77  		dataSize              int
    78  		wantHashCount         int
    79  		wantHashes            []string
    80  		ignoreDuplicateHashes bool
    81  	}{
    82  		{
    83  			dataSize:      len(dataCorpus),
    84  			wantHashCount: 1,
    85  			wantHashes: []string{
    86  				"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
    87  			},
    88  		},
    89  		{
    90  			dataSize:      swarm.ChunkSize,
    91  			wantHashCount: 1,
    92  			wantHashes: []string{
    93  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
    94  			},
    95  		},
    96  		{
    97  			dataSize:      swarm.ChunkSize + 1,
    98  			wantHashCount: 3,
    99  			wantHashes: []string{
   100  				"a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner)
   101  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   102  				"dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1)
   103  			},
   104  		},
   105  		{
   106  			dataSize:      swarm.ChunkSize * 128,
   107  			wantHashCount: 129,
   108  			wantHashes: []string{
   109  				"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner)
   110  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   111  			},
   112  			ignoreDuplicateHashes: true,
   113  		},
   114  		{
   115  			dataSize:      swarm.ChunkSize * 129,
   116  			wantHashCount: 131,
   117  			wantHashes: []string{
   118  				"150665dfbd81f80f5ba00a0caa2caa34f8b94e662e1dea769fe9ce7ea170bf25", // root (joiner, chunk)
   119  				"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner)
   120  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   121  			},
   122  			ignoreDuplicateHashes: true,
   123  		},
   124  		{
   125  			dataSize:      swarm.ChunkSize*129 - 1,
   126  			wantHashCount: 131,
   127  			wantHashes: []string{
   128  				"895610b2d795e7cc351a8336d46ba9ef37309d83267d272c6e257e46a78ecb7c", // root (joiner, chunk)
   129  				"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner)
   130  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   131  				"d18f0d81b832086376684558978cfe6773ed773178f84961c8b750fe72033a26", // bytes (4095)
   132  			},
   133  			ignoreDuplicateHashes: true,
   134  		},
   135  		{
   136  			dataSize:      swarm.ChunkSize*129 + 1,
   137  			wantHashCount: 133,
   138  			wantHashes: []string{
   139  				"023ee8b901702a999e9ef90ca2bc1c6db1daefb3f178b683a87b0fd613fd8e21", // root (joiner, chunk)
   140  				"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner [4096 * 128])
   141  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   142  				"dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1)
   143  				"a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner - [4096, 1])
   144  			},
   145  			ignoreDuplicateHashes: true,
   146  		},
   147  	}
   148  
   149  	for _, tc := range testCases {
   150  		tc := tc
   151  		chunkCount := int(math.Ceil(float64(tc.dataSize) / swarm.ChunkSize))
   152  		t.Run(fmt.Sprintf("%d-chunk-%d-bytes", chunkCount, tc.dataSize), func(t *testing.T) {
   153  			t.Parallel()
   154  
   155  			var (
   156  				data       = generateSample(tc.dataSize)
   157  				iter       = newAddressIterator(tc.ignoreDuplicateHashes)
   158  				storerMock = inmemchunkstore.New()
   159  			)
   160  
   161  			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   162  			defer cancel()
   163  
   164  			pipe := builder.NewPipelineBuilder(ctx, storerMock, false, 0)
   165  			address, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(data))
   166  			if err != nil {
   167  				t.Fatal(err)
   168  			}
   169  
   170  			err = traversal.New(storerMock, storerMock).Traverse(ctx, address, iter.Next)
   171  			if err != nil {
   172  				t.Fatal(err)
   173  			}
   174  
   175  			haveCnt, wantCnt := tc.wantHashCount, iter.cnt
   176  			if !tc.ignoreDuplicateHashes {
   177  				haveCnt, wantCnt = len(iter.seen), len(tc.wantHashes)
   178  			}
   179  			if haveCnt != wantCnt {
   180  				t.Fatalf("hash count mismatch: have %d; want %d", haveCnt, wantCnt)
   181  			}
   182  
   183  			for _, hash := range tc.wantHashes {
   184  				if !iter.seen[hash] {
   185  					t.Fatalf("hash check: want %q; have none", hash)
   186  				}
   187  			}
   188  		})
   189  	}
   190  }
   191  
   192  func TestTraversalFiles(t *testing.T) {
   193  	t.Parallel()
   194  
   195  	testCases := []struct {
   196  		filesSize             int
   197  		contentType           string
   198  		filename              string
   199  		wantHashCount         int
   200  		wantHashes            []string
   201  		ignoreDuplicateHashes bool
   202  	}{
   203  		{
   204  			filesSize:     len(dataCorpus),
   205  			contentType:   "text/plain; charset=utf-8",
   206  			filename:      "simple.txt",
   207  			wantHashCount: 4,
   208  			wantHashes: []string{
   209  				"ae16fb27474b41273c0deb355e4405d3cd0a6639f834285f97c75636c9e29df7", // root manifest
   210  				"0cc878d32c96126d47f63fbe391114ee1438cd521146fc975dea1546d302b6c0", // manifest root metadata
   211  				"05e34f11a0967e8c09968b69c4f486f569ef58a31a197992e01304a1e59f8e75", // manifest file entry
   212  				"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a", // bytes
   213  			},
   214  		},
   215  		{
   216  			filesSize:     swarm.ChunkSize,
   217  			contentType:   "text/plain; charset=utf-8",
   218  			wantHashCount: 6,
   219  			wantHashes: []string{
   220  				"7e0a4b6cd542eb501f372438cbbbcd8a82c444740f00bdd54f4981f487bcf8b7", // root manifest
   221  				"0cc878d32c96126d47f63fbe391114ee1438cd521146fc975dea1546d302b6c0", // manifest root metadata
   222  				"3f538c3b5225111a79b3b1dbb5e269ca2115f2a7caf0e6925b773457cdef7be5", // manifest file entry (Edge)
   223  				"2f09e41846a24201758db3535dc6c42d738180c8874d4d40d4f2924d0091521f", // manifest file entry (Edge)
   224  				"b2662d17d51ce734695d993b44c0e2df34c3f50d5889e5bc3b8718838658e6b0", // manifest file entry (Value)
   225  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes
   226  			},
   227  		},
   228  		{
   229  			filesSize:     swarm.ChunkSize + 1,
   230  			contentType:   "text/plain; charset=utf-8",
   231  			filename:      "simple.txt",
   232  			wantHashCount: 6,
   233  			wantHashes: []string{
   234  				"ea58761906f98bd88204efbbab5c690329af02548afec37d7a556a47ca78ac62", // manifest root
   235  				"0cc878d32c96126d47f63fbe391114ee1438cd521146fc975dea1546d302b6c0", // manifest root metadata
   236  				"85617df0249a12649b56d09cf7f21e8642627b4fb9c0c9e03e2d25340cf60499", // manifest file entry
   237  				"a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // manifest file entry
   238  				"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   239  				"dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1)
   240  			},
   241  		},
   242  	}
   243  
   244  	for _, tc := range testCases {
   245  		tc := tc
   246  		chunkCount := int(math.Ceil(float64(tc.filesSize) / swarm.ChunkSize))
   247  		t.Run(fmt.Sprintf("%d-chunk-%d-bytes", chunkCount, tc.filesSize), func(t *testing.T) {
   248  			t.Parallel()
   249  
   250  			var (
   251  				data       = generateSample(tc.filesSize)
   252  				iter       = newAddressIterator(tc.ignoreDuplicateHashes)
   253  				storerMock = inmemchunkstore.New()
   254  			)
   255  
   256  			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   257  			defer cancel()
   258  
   259  			pipe := builder.NewPipelineBuilder(ctx, storerMock, false, 0)
   260  			fr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(data))
   261  			if err != nil {
   262  				t.Fatal(err)
   263  			}
   264  
   265  			ls := loadsave.New(storerMock, storerMock, pipelineFactory(storerMock, false))
   266  			fManifest, err := manifest.NewDefaultManifest(ls, false)
   267  			if err != nil {
   268  				t.Fatal(err)
   269  			}
   270  			filename := tc.filename
   271  			if filename == "" {
   272  				filename = fr.String()
   273  			}
   274  
   275  			rootMtdt := map[string]string{
   276  				manifest.WebsiteIndexDocumentSuffixKey: filename,
   277  			}
   278  			err = fManifest.Add(ctx, "/", manifest.NewEntry(swarm.ZeroAddress, rootMtdt))
   279  			if err != nil {
   280  				t.Fatal(err)
   281  			}
   282  
   283  			fileMtdt := map[string]string{
   284  				manifest.EntryMetadataFilenameKey:    filename,
   285  				manifest.EntryMetadataContentTypeKey: tc.contentType,
   286  			}
   287  			err = fManifest.Add(ctx, filename, manifest.NewEntry(fr, fileMtdt))
   288  			if err != nil {
   289  				t.Fatal(err)
   290  			}
   291  
   292  			address, err := fManifest.Store(ctx)
   293  			if err != nil {
   294  				t.Fatal(err)
   295  			}
   296  
   297  			err = traversal.New(storerMock, storerMock).Traverse(ctx, address, iter.Next)
   298  			if err != nil {
   299  				t.Fatal(err)
   300  			}
   301  
   302  			haveCnt, wantCnt := tc.wantHashCount, iter.cnt
   303  			if !tc.ignoreDuplicateHashes {
   304  				haveCnt, wantCnt = len(iter.seen), len(tc.wantHashes)
   305  			}
   306  			if haveCnt != wantCnt {
   307  				t.Fatalf("hash count mismatch: have %d; want %d", haveCnt, wantCnt)
   308  			}
   309  
   310  			for _, hash := range tc.wantHashes {
   311  				if !iter.seen[hash] {
   312  					t.Fatalf("hash check: want %q; have none", hash)
   313  				}
   314  			}
   315  		})
   316  	}
   317  }
   318  
   319  type file struct {
   320  	size   int
   321  	dir    string
   322  	name   string
   323  	chunks fileChunks
   324  }
   325  
   326  type fileChunks struct {
   327  	content []string
   328  }
   329  
   330  func TestTraversalManifest(t *testing.T) {
   331  	t.Parallel()
   332  
   333  	testCases := []struct {
   334  		files                 []file
   335  		manifestHashes        []string
   336  		wantHashCount         int
   337  		ignoreDuplicateHashes bool
   338  	}{
   339  		{
   340  			files: []file{
   341  				{
   342  					size: len(dataCorpus),
   343  					dir:  "",
   344  					name: "hello.txt",
   345  					chunks: fileChunks{
   346  						content: []string{
   347  							"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
   348  						},
   349  					},
   350  				},
   351  			},
   352  			manifestHashes: []string{
   353  				// NOTE: references will be fixed, due to custom obfuscation key function
   354  				"f81ac8ceb2db7e55b718eca35f05233dc523022e36e11f934dbfd5f0cafde198", // root
   355  				"05e34f11a0967e8c09968b69c4f486f569ef58a31a197992e01304a1e59f8e75", // metadata
   356  			},
   357  			wantHashCount: 3,
   358  		},
   359  		{
   360  			files: []file{
   361  				{
   362  					size: len(dataCorpus),
   363  					dir:  "",
   364  					name: "hello.txt",
   365  					chunks: fileChunks{
   366  						content: []string{
   367  							"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
   368  						},
   369  					},
   370  				},
   371  				{
   372  					size: swarm.ChunkSize,
   373  					dir:  "",
   374  					name: "data/1.txt",
   375  					chunks: fileChunks{
   376  						content: []string{
   377  							"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   378  						},
   379  					},
   380  				},
   381  				{
   382  					size: swarm.ChunkSize,
   383  					dir:  "",
   384  					name: "data/2.txt",
   385  					chunks: fileChunks{
   386  						content: []string{
   387  							"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
   388  						},
   389  					},
   390  				},
   391  			},
   392  			manifestHashes: []string{
   393  				// NOTE: references will be fixed, due to custom obfuscation key function
   394  				"d182df1cb214167d085256fafa657f38a191efe51af16834f6288ef23416fd25", // root
   395  				"05e34f11a0967e8c09968b69c4f486f569ef58a31a197992e01304a1e59f8e75", // manifest entry
   396  				"7e6bc53ca11bff459f77892563d04e09b440c63ce2f7d5fe8a8b0f0ba9eeefcf", // manifest entry (Edge PathSeparator)
   397  				"b2662d17d51ce734695d993b44c0e2df34c3f50d5889e5bc3b8718838658e6b0", // manifest file entry (1.txt)
   398  				"b2662d17d51ce734695d993b44c0e2df34c3f50d5889e5bc3b8718838658e6b0", // manifest file entry (2.txt)
   399  			},
   400  			wantHashCount:         8,
   401  			ignoreDuplicateHashes: true,
   402  		},
   403  	}
   404  
   405  	for _, tc := range testCases {
   406  		tc := tc
   407  		t.Run(fmt.Sprintf("%s-%d-files-%d-chunks", defaultMediaType, len(tc.files), tc.wantHashCount), func(t *testing.T) {
   408  			t.Parallel()
   409  
   410  			var (
   411  				storerMock = inmemchunkstore.New()
   412  				iter       = newAddressIterator(tc.ignoreDuplicateHashes)
   413  			)
   414  
   415  			ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   416  			defer cancel()
   417  
   418  			var wantHashes []string
   419  			for _, f := range tc.files {
   420  				wantHashes = append(wantHashes, f.chunks.content...)
   421  			}
   422  			wantHashes = append(wantHashes, tc.manifestHashes...)
   423  
   424  			ls := loadsave.New(storerMock, storerMock, pipelineFactory(storerMock, false))
   425  			dirManifest, err := manifest.NewMantarayManifest(ls, false)
   426  			if err != nil {
   427  				t.Fatal(err)
   428  			}
   429  
   430  			for _, f := range tc.files {
   431  				data := generateSample(f.size)
   432  
   433  				pipe := builder.NewPipelineBuilder(ctx, storerMock, false, 0)
   434  				fr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(data))
   435  				if err != nil {
   436  					t.Fatal(err)
   437  				}
   438  
   439  				fileName := f.name
   440  				if fileName == "" {
   441  					fileName = fr.String()
   442  				}
   443  				filePath := path.Join(f.dir, fileName)
   444  
   445  				err = dirManifest.Add(ctx, filePath, manifest.NewEntry(fr, nil))
   446  				if err != nil {
   447  					t.Fatal(err)
   448  				}
   449  			}
   450  			address, err := dirManifest.Store(ctx)
   451  			if err != nil {
   452  				t.Fatal(err)
   453  			}
   454  
   455  			err = traversal.New(storerMock, storerMock).Traverse(ctx, address, iter.Next)
   456  			if err != nil {
   457  				t.Fatal(err)
   458  			}
   459  
   460  			haveCnt, wantCnt := tc.wantHashCount, iter.cnt
   461  			if !tc.ignoreDuplicateHashes {
   462  				haveCnt, wantCnt = len(iter.seen), len(wantHashes)
   463  			}
   464  			if haveCnt != wantCnt {
   465  				t.Fatalf("hash count mismatch: have %d; want %d", haveCnt, wantCnt)
   466  			}
   467  
   468  			for _, hash := range wantHashes {
   469  				if !iter.seen[hash] {
   470  					t.Fatalf("hash check: want %q; have none", hash)
   471  				}
   472  			}
   473  		})
   474  	}
   475  }
   476  
   477  func TestTraversalSOC(t *testing.T) {
   478  	t.Parallel()
   479  
   480  	store := inmemchunkstore.New()
   481  	iter := newAddressIterator(false)
   482  
   483  	ctx := context.Background()
   484  
   485  	s := testingsoc.GenerateMockSOC(t, generateSample(swarm.ChunkSize))
   486  	sch := s.Chunk()
   487  
   488  	err := store.Put(ctx, sch)
   489  	if err != nil {
   490  		t.Fatal(err)
   491  	}
   492  
   493  	err = traversal.New(store, store).Traverse(ctx, sch.Address(), iter.Next)
   494  	if err != nil {
   495  		t.Fatal(err)
   496  	}
   497  
   498  	if len(iter.seen) != 1 {
   499  		t.Fatal("incorrect hashes seen")
   500  	}
   501  
   502  	if !iter.seen[sch.Address().String()] {
   503  		t.Fatal("expected hash not seen")
   504  	}
   505  }
   506  
   507  func pipelineFactory(s storage.Putter, encrypt bool) func() pipeline.Interface {
   508  	return func() pipeline.Interface {
   509  		return builder.NewPipelineBuilder(context.Background(), s, encrypt, 0)
   510  	}
   511  }