github.com/ethersphere/bee/v2@v2.2.0/pkg/file/redundancy/getter/getter_test.go (about)

     1  // Copyright 2023 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package getter_test
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/rand"
    11  	"encoding/binary"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	mrand "math/rand"
    16  	"sync"
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/ethersphere/bee/v2/pkg/cac"
    21  	"github.com/ethersphere/bee/v2/pkg/file/redundancy/getter"
    22  	"github.com/ethersphere/bee/v2/pkg/storage"
    23  	inmem "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore"
    24  	mockstorer "github.com/ethersphere/bee/v2/pkg/storer/mock"
    25  	"github.com/ethersphere/bee/v2/pkg/swarm"
    26  	"github.com/klauspost/reedsolomon"
    27  	"golang.org/x/sync/errgroup"
    28  )
    29  
    30  // TestGetter tests the retrieval of chunks with missing data shards
    31  // using the RACE strategy for a number of erasure code parameters
    32  func TestGetterRACE_FLAKY(t *testing.T) {
    33  	type getterTest struct {
    34  		bufSize    int
    35  		shardCnt   int
    36  		erasureCnt int
    37  	}
    38  
    39  	var tcs []getterTest
    40  	for bufSize := 3; bufSize <= 128; bufSize += 21 {
    41  		for shardCnt := bufSize/2 + 1; shardCnt <= bufSize; shardCnt += 21 {
    42  			parityCnt := bufSize - shardCnt
    43  			erasures := mrand.Perm(parityCnt - 1)
    44  			if len(erasures) > 3 {
    45  				erasures = erasures[:3]
    46  			}
    47  			for _, erasureCnt := range erasures {
    48  				tcs = append(tcs, getterTest{bufSize, shardCnt, erasureCnt})
    49  			}
    50  			tcs = append(tcs, getterTest{bufSize, shardCnt, parityCnt}, getterTest{bufSize, shardCnt, parityCnt + 1})
    51  			erasures = mrand.Perm(shardCnt - 1)
    52  			if len(erasures) > 3 {
    53  				erasures = erasures[:3]
    54  			}
    55  			for _, erasureCnt := range erasures {
    56  				tcs = append(tcs, getterTest{bufSize, shardCnt, erasureCnt + parityCnt + 1})
    57  			}
    58  		}
    59  	}
    60  	t.Run("GET with RACE", func(t *testing.T) {
    61  		t.Parallel()
    62  
    63  		for _, tc := range tcs {
    64  			t.Run(fmt.Sprintf("data/total/missing=%d/%d/%d", tc.shardCnt, tc.bufSize, tc.erasureCnt), func(t *testing.T) {
    65  				testDecodingRACE(t, tc.bufSize, tc.shardCnt, tc.erasureCnt)
    66  			})
    67  		}
    68  	})
    69  }
    70  
    71  // TestGetterFallback tests the retrieval of chunks with missing data shards
    72  // using the strict or fallback mode starting with NONE and DATA strategies
    73  func TestGetterFallback(t *testing.T) {
    74  	t.Skip("removed strategy timeout")
    75  	t.Run("GET", func(t *testing.T) {
    76  		t.Run("NONE", func(t *testing.T) {
    77  			t.Run("strict", func(t *testing.T) {
    78  				testDecodingFallback(t, getter.NONE, true)
    79  			})
    80  			t.Run("fallback", func(t *testing.T) {
    81  				testDecodingFallback(t, getter.NONE, false)
    82  			})
    83  		})
    84  		t.Run("DATA", func(t *testing.T) {
    85  			t.Run("strict", func(t *testing.T) {
    86  				testDecodingFallback(t, getter.DATA, true)
    87  			})
    88  			t.Run("fallback", func(t *testing.T) {
    89  				testDecodingFallback(t, getter.DATA, false)
    90  			})
    91  		})
    92  	})
    93  }
    94  
    95  func testDecodingRACE(t *testing.T, bufSize, shardCnt, erasureCnt int) {
    96  	t.Helper()
    97  	store := inmem.New()
    98  	buf := make([][]byte, bufSize)
    99  	addrs := initData(t, buf, shardCnt, store)
   100  
   101  	var addr swarm.Address
   102  	erasures := forget(t, store, addrs, erasureCnt)
   103  	for _, i := range erasures {
   104  		if i < shardCnt {
   105  			addr = addrs[i]
   106  			break
   107  		}
   108  	}
   109  	if len(addr.Bytes()) == 0 {
   110  		t.Skip("no data shard erased")
   111  	}
   112  
   113  	g := getter.New(addrs, shardCnt, store, store, func(error) {}, getter.DefaultConfig)
   114  
   115  	parityCnt := len(buf) - shardCnt
   116  	_, err := g.Get(context.Background(), addr)
   117  
   118  	switch {
   119  	case erasureCnt > parityCnt:
   120  		t.Run("unable to recover", func(t *testing.T) {
   121  			if !errors.Is(err, storage.ErrNotFound) &&
   122  				!errors.Is(err, context.DeadlineExceeded) {
   123  				t.Fatalf("expected not found error or deadline exceeded, got %v", err)
   124  			}
   125  		})
   126  	case erasureCnt <= parityCnt:
   127  		t.Run("will recover", func(t *testing.T) {
   128  			if err != nil {
   129  				t.Fatalf("expected no error, got %v", err)
   130  			}
   131  			checkShardsAvailable(t, store, addrs[:shardCnt], buf[:shardCnt])
   132  		})
   133  	}
   134  }
   135  
   136  // testDecodingFallback tests the retrieval of chunks with missing data shards
   137  func testDecodingFallback(t *testing.T, s getter.Strategy, strict bool) {
   138  	t.Helper()
   139  
   140  	strategyTimeout := 150 * time.Millisecond
   141  
   142  	bufSize := 12
   143  	shardCnt := 6
   144  	store := mockstorer.NewDelayedStore(inmem.New())
   145  	buf := make([][]byte, bufSize)
   146  	addrs := initData(t, buf, shardCnt, store)
   147  
   148  	// erase two data shards
   149  	delayed, erased := 1, 0
   150  	ctx := context.TODO()
   151  	err := store.Delete(ctx, addrs[erased])
   152  	if err != nil {
   153  		t.Fatal(err)
   154  	}
   155  
   156  	// context for enforced retrievals with long timeout
   157  	ctx, cancel := context.WithCancel(context.TODO())
   158  	defer cancel()
   159  
   160  	// signal channels for delayed and erased chunk retrieval
   161  	waitDelayed, waitErased := make(chan error, 1), make(chan error, 1)
   162  
   163  	// complete retrieval of delayed chunk by putting it into the store after a while
   164  	delay := strategyTimeout / 4
   165  	if s == getter.NONE {
   166  		delay += strategyTimeout
   167  	}
   168  	store.Delay(addrs[delayed], delay)
   169  	// create getter
   170  	start := time.Now()
   171  	conf := getter.Config{
   172  		Strategy:     s,
   173  		Strict:       strict,
   174  		FetchTimeout: strategyTimeout / 2,
   175  	}
   176  	g := getter.New(addrs, shardCnt, store, store, func(error) {}, conf)
   177  
   178  	// launch delayed and erased chunk retrieval
   179  	wg := sync.WaitGroup{}
   180  	// defer wg.Wait()
   181  	wg.Add(2)
   182  	// signal using the waitDelayed and waitErased channels when
   183  	// delayed and erased chunk retrieval completes
   184  	go func() {
   185  		defer wg.Done()
   186  		ctx, cancel := context.WithTimeout(ctx, strategyTimeout*time.Duration(5-s))
   187  		defer cancel()
   188  		_, err := g.Get(ctx, addrs[delayed])
   189  		waitDelayed <- err
   190  	}()
   191  	go func() {
   192  		defer wg.Done()
   193  		ctx, cancel := context.WithTimeout(ctx, strategyTimeout*time.Duration(5-s))
   194  		defer cancel()
   195  		_, err := g.Get(ctx, addrs[erased])
   196  		waitErased <- err
   197  	}()
   198  
   199  	// wait for delayed chunk retrieval to complete
   200  	select {
   201  	case err := <-waitDelayed:
   202  		if err != nil {
   203  			t.Fatal("unexpected error", err)
   204  		}
   205  		round := time.Since(start) / strategyTimeout
   206  		switch {
   207  		case strict && s == getter.NONE:
   208  			if round < 1 {
   209  				t.Fatalf("unexpected completion of delayed chunk retrieval. got round %d", round)
   210  			}
   211  		case s == getter.NONE:
   212  			if round < 1 {
   213  				t.Fatalf("unexpected completion of delayed chunk retrieval. got round %d", round)
   214  			}
   215  			if round > 2 {
   216  				t.Fatalf("unexpected late completion of delayed chunk retrieval. got round %d", round)
   217  			}
   218  		case s == getter.DATA:
   219  			if round > 0 {
   220  				t.Fatalf("unexpected late completion of delayed chunk retrieval. got round %d", round)
   221  			}
   222  		}
   223  
   224  		checkShardsAvailable(t, store, addrs[delayed:], buf[delayed:])
   225  		// wait for erased chunk retrieval to complete
   226  		select {
   227  		case err := <-waitErased:
   228  			if err != nil {
   229  				t.Fatal("unexpected error", err)
   230  			}
   231  			round = time.Since(start) / strategyTimeout
   232  			switch {
   233  			case strict:
   234  				t.Fatalf("unexpected completion of erased chunk retrieval. got round %d", round)
   235  			case s == getter.NONE:
   236  				if round < 3 {
   237  					t.Fatalf("unexpected early completion of erased chunk retrieval. got round %d", round)
   238  				}
   239  				if round > 3 {
   240  					t.Fatalf("unexpected late completion of erased chunk retrieval. got round %d", round)
   241  				}
   242  			case s == getter.DATA:
   243  				if round < 1 {
   244  					t.Fatalf("unexpected early completion of erased chunk retrieval. got round %d", round)
   245  				}
   246  				if round > 1 {
   247  					t.Fatalf("unexpected late completion of delayed chunk retrieval. got round %d", round)
   248  				}
   249  			}
   250  			checkShardsAvailable(t, store, addrs[:erased], buf[:erased])
   251  
   252  		case <-time.After(strategyTimeout * 2):
   253  			if !strict {
   254  				t.Fatal("unexpected timeout using strategy", s, "with strict", strict)
   255  			}
   256  		}
   257  	case <-time.After(strategyTimeout * 3):
   258  		if !strict || s != getter.NONE {
   259  			t.Fatal("unexpected timeout using strategy", s, "with strict", strict)
   260  		}
   261  	}
   262  }
   263  
   264  func initData(t *testing.T, buf [][]byte, shardCnt int, s storage.ChunkStore) []swarm.Address {
   265  	t.Helper()
   266  	spanBytes := make([]byte, 8)
   267  	binary.LittleEndian.PutUint64(spanBytes, swarm.ChunkSize)
   268  
   269  	for i := 0; i < len(buf); i++ {
   270  		buf[i] = make([]byte, swarm.ChunkWithSpanSize)
   271  		if i >= shardCnt {
   272  			continue
   273  		}
   274  		_, err := io.ReadFull(rand.Reader, buf[i])
   275  		if err != nil {
   276  			t.Fatal(err)
   277  		}
   278  		copy(buf[i], spanBytes)
   279  	}
   280  
   281  	// fill in parity chunks
   282  	rs, err := reedsolomon.New(shardCnt, len(buf)-shardCnt)
   283  	if err != nil {
   284  		t.Fatal(err)
   285  	}
   286  	err = rs.Encode(buf)
   287  	if err != nil {
   288  		t.Fatal(err)
   289  	}
   290  
   291  	// calculate chunk addresses and upload to the store
   292  	addrs := make([]swarm.Address, len(buf))
   293  	ctx := context.TODO()
   294  	for i := 0; i < len(buf); i++ {
   295  		chunk, err := cac.NewWithDataSpan(buf[i])
   296  		if err != nil {
   297  			t.Fatal(err)
   298  		}
   299  		err = s.Put(ctx, chunk)
   300  		if err != nil {
   301  			t.Fatal(err)
   302  		}
   303  		addrs[i] = chunk.Address()
   304  	}
   305  
   306  	return addrs
   307  }
   308  
   309  func checkShardsAvailable(t *testing.T, s storage.ChunkStore, addrs []swarm.Address, data [][]byte) {
   310  	t.Helper()
   311  	eg, ctx := errgroup.WithContext(context.Background())
   312  	for i, addr := range addrs {
   313  		i := i
   314  		addr := addr
   315  		eg.Go(func() (err error) {
   316  			var delay time.Duration
   317  			var ch swarm.Chunk
   318  			for i := 0; i < 30; i++ {
   319  				select {
   320  				case <-ctx.Done():
   321  					return ctx.Err()
   322  				default:
   323  					<-time.After(delay)
   324  					delay = 50 * time.Millisecond
   325  				}
   326  				ch, err = s.Get(ctx, addr)
   327  				if err == nil {
   328  					break
   329  				}
   330  				err = fmt.Errorf("datashard %d with address %v is not available: %w", i, addr, err)
   331  				select {
   332  				case <-ctx.Done():
   333  					return ctx.Err()
   334  				default:
   335  					<-time.After(delay)
   336  					delay = 50 * time.Millisecond
   337  				}
   338  			}
   339  			if err == nil && !bytes.Equal(ch.Data(), data[i]) {
   340  				return fmt.Errorf("datashard %d has incorrect data", i)
   341  			}
   342  			return err
   343  		})
   344  	}
   345  	if err := eg.Wait(); err != nil {
   346  		t.Fatal(err)
   347  	}
   348  }
   349  
   350  func forget(t *testing.T, store storage.ChunkStore, addrs []swarm.Address, erasureCnt int) (erasures []int) {
   351  	t.Helper()
   352  
   353  	ctx := context.TODO()
   354  	erasures = mrand.Perm(len(addrs))[:erasureCnt]
   355  	for _, i := range erasures {
   356  		err := store.Delete(ctx, addrs[i])
   357  		if err != nil {
   358  			t.Fatal(err)
   359  		}
   360  	}
   361  	return erasures
   362  }