github.com/ethersphere/bee/v2@v2.2.0/pkg/file/redundancy/getter/getter_test.go (about) 1 // Copyright 2023 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package getter_test 6 7 import ( 8 "bytes" 9 "context" 10 "crypto/rand" 11 "encoding/binary" 12 "errors" 13 "fmt" 14 "io" 15 mrand "math/rand" 16 "sync" 17 "testing" 18 "time" 19 20 "github.com/ethersphere/bee/v2/pkg/cac" 21 "github.com/ethersphere/bee/v2/pkg/file/redundancy/getter" 22 "github.com/ethersphere/bee/v2/pkg/storage" 23 inmem "github.com/ethersphere/bee/v2/pkg/storage/inmemchunkstore" 24 mockstorer "github.com/ethersphere/bee/v2/pkg/storer/mock" 25 "github.com/ethersphere/bee/v2/pkg/swarm" 26 "github.com/klauspost/reedsolomon" 27 "golang.org/x/sync/errgroup" 28 ) 29 30 // TestGetter tests the retrieval of chunks with missing data shards 31 // using the RACE strategy for a number of erasure code parameters 32 func TestGetterRACE_FLAKY(t *testing.T) { 33 type getterTest struct { 34 bufSize int 35 shardCnt int 36 erasureCnt int 37 } 38 39 var tcs []getterTest 40 for bufSize := 3; bufSize <= 128; bufSize += 21 { 41 for shardCnt := bufSize/2 + 1; shardCnt <= bufSize; shardCnt += 21 { 42 parityCnt := bufSize - shardCnt 43 erasures := mrand.Perm(parityCnt - 1) 44 if len(erasures) > 3 { 45 erasures = erasures[:3] 46 } 47 for _, erasureCnt := range erasures { 48 tcs = append(tcs, getterTest{bufSize, shardCnt, erasureCnt}) 49 } 50 tcs = append(tcs, getterTest{bufSize, shardCnt, parityCnt}, getterTest{bufSize, shardCnt, parityCnt + 1}) 51 erasures = mrand.Perm(shardCnt - 1) 52 if len(erasures) > 3 { 53 erasures = erasures[:3] 54 } 55 for _, erasureCnt := range erasures { 56 tcs = append(tcs, getterTest{bufSize, shardCnt, erasureCnt + parityCnt + 1}) 57 } 58 } 59 } 60 t.Run("GET with RACE", func(t *testing.T) { 61 t.Parallel() 62 63 for _, tc := range tcs { 64 t.Run(fmt.Sprintf("data/total/missing=%d/%d/%d", tc.shardCnt, tc.bufSize, tc.erasureCnt), func(t *testing.T) { 65 testDecodingRACE(t, tc.bufSize, tc.shardCnt, tc.erasureCnt) 66 }) 67 } 68 }) 69 } 70 71 // TestGetterFallback tests the retrieval of chunks with missing data shards 72 // using the strict or fallback mode starting with NONE and DATA strategies 73 func TestGetterFallback(t *testing.T) { 74 t.Skip("removed strategy timeout") 75 t.Run("GET", func(t *testing.T) { 76 t.Run("NONE", func(t *testing.T) { 77 t.Run("strict", func(t *testing.T) { 78 testDecodingFallback(t, getter.NONE, true) 79 }) 80 t.Run("fallback", func(t *testing.T) { 81 testDecodingFallback(t, getter.NONE, false) 82 }) 83 }) 84 t.Run("DATA", func(t *testing.T) { 85 t.Run("strict", func(t *testing.T) { 86 testDecodingFallback(t, getter.DATA, true) 87 }) 88 t.Run("fallback", func(t *testing.T) { 89 testDecodingFallback(t, getter.DATA, false) 90 }) 91 }) 92 }) 93 } 94 95 func testDecodingRACE(t *testing.T, bufSize, shardCnt, erasureCnt int) { 96 t.Helper() 97 store := inmem.New() 98 buf := make([][]byte, bufSize) 99 addrs := initData(t, buf, shardCnt, store) 100 101 var addr swarm.Address 102 erasures := forget(t, store, addrs, erasureCnt) 103 for _, i := range erasures { 104 if i < shardCnt { 105 addr = addrs[i] 106 break 107 } 108 } 109 if len(addr.Bytes()) == 0 { 110 t.Skip("no data shard erased") 111 } 112 113 g := getter.New(addrs, shardCnt, store, store, func(error) {}, getter.DefaultConfig) 114 115 parityCnt := len(buf) - shardCnt 116 _, err := g.Get(context.Background(), addr) 117 118 switch { 119 case erasureCnt > parityCnt: 120 t.Run("unable to recover", func(t *testing.T) { 121 if !errors.Is(err, storage.ErrNotFound) && 122 !errors.Is(err, context.DeadlineExceeded) { 123 t.Fatalf("expected not found error or deadline exceeded, got %v", err) 124 } 125 }) 126 case erasureCnt <= parityCnt: 127 t.Run("will recover", func(t *testing.T) { 128 if err != nil { 129 t.Fatalf("expected no error, got %v", err) 130 } 131 checkShardsAvailable(t, store, addrs[:shardCnt], buf[:shardCnt]) 132 }) 133 } 134 } 135 136 // testDecodingFallback tests the retrieval of chunks with missing data shards 137 func testDecodingFallback(t *testing.T, s getter.Strategy, strict bool) { 138 t.Helper() 139 140 strategyTimeout := 150 * time.Millisecond 141 142 bufSize := 12 143 shardCnt := 6 144 store := mockstorer.NewDelayedStore(inmem.New()) 145 buf := make([][]byte, bufSize) 146 addrs := initData(t, buf, shardCnt, store) 147 148 // erase two data shards 149 delayed, erased := 1, 0 150 ctx := context.TODO() 151 err := store.Delete(ctx, addrs[erased]) 152 if err != nil { 153 t.Fatal(err) 154 } 155 156 // context for enforced retrievals with long timeout 157 ctx, cancel := context.WithCancel(context.TODO()) 158 defer cancel() 159 160 // signal channels for delayed and erased chunk retrieval 161 waitDelayed, waitErased := make(chan error, 1), make(chan error, 1) 162 163 // complete retrieval of delayed chunk by putting it into the store after a while 164 delay := strategyTimeout / 4 165 if s == getter.NONE { 166 delay += strategyTimeout 167 } 168 store.Delay(addrs[delayed], delay) 169 // create getter 170 start := time.Now() 171 conf := getter.Config{ 172 Strategy: s, 173 Strict: strict, 174 FetchTimeout: strategyTimeout / 2, 175 } 176 g := getter.New(addrs, shardCnt, store, store, func(error) {}, conf) 177 178 // launch delayed and erased chunk retrieval 179 wg := sync.WaitGroup{} 180 // defer wg.Wait() 181 wg.Add(2) 182 // signal using the waitDelayed and waitErased channels when 183 // delayed and erased chunk retrieval completes 184 go func() { 185 defer wg.Done() 186 ctx, cancel := context.WithTimeout(ctx, strategyTimeout*time.Duration(5-s)) 187 defer cancel() 188 _, err := g.Get(ctx, addrs[delayed]) 189 waitDelayed <- err 190 }() 191 go func() { 192 defer wg.Done() 193 ctx, cancel := context.WithTimeout(ctx, strategyTimeout*time.Duration(5-s)) 194 defer cancel() 195 _, err := g.Get(ctx, addrs[erased]) 196 waitErased <- err 197 }() 198 199 // wait for delayed chunk retrieval to complete 200 select { 201 case err := <-waitDelayed: 202 if err != nil { 203 t.Fatal("unexpected error", err) 204 } 205 round := time.Since(start) / strategyTimeout 206 switch { 207 case strict && s == getter.NONE: 208 if round < 1 { 209 t.Fatalf("unexpected completion of delayed chunk retrieval. got round %d", round) 210 } 211 case s == getter.NONE: 212 if round < 1 { 213 t.Fatalf("unexpected completion of delayed chunk retrieval. got round %d", round) 214 } 215 if round > 2 { 216 t.Fatalf("unexpected late completion of delayed chunk retrieval. got round %d", round) 217 } 218 case s == getter.DATA: 219 if round > 0 { 220 t.Fatalf("unexpected late completion of delayed chunk retrieval. got round %d", round) 221 } 222 } 223 224 checkShardsAvailable(t, store, addrs[delayed:], buf[delayed:]) 225 // wait for erased chunk retrieval to complete 226 select { 227 case err := <-waitErased: 228 if err != nil { 229 t.Fatal("unexpected error", err) 230 } 231 round = time.Since(start) / strategyTimeout 232 switch { 233 case strict: 234 t.Fatalf("unexpected completion of erased chunk retrieval. got round %d", round) 235 case s == getter.NONE: 236 if round < 3 { 237 t.Fatalf("unexpected early completion of erased chunk retrieval. got round %d", round) 238 } 239 if round > 3 { 240 t.Fatalf("unexpected late completion of erased chunk retrieval. got round %d", round) 241 } 242 case s == getter.DATA: 243 if round < 1 { 244 t.Fatalf("unexpected early completion of erased chunk retrieval. got round %d", round) 245 } 246 if round > 1 { 247 t.Fatalf("unexpected late completion of delayed chunk retrieval. got round %d", round) 248 } 249 } 250 checkShardsAvailable(t, store, addrs[:erased], buf[:erased]) 251 252 case <-time.After(strategyTimeout * 2): 253 if !strict { 254 t.Fatal("unexpected timeout using strategy", s, "with strict", strict) 255 } 256 } 257 case <-time.After(strategyTimeout * 3): 258 if !strict || s != getter.NONE { 259 t.Fatal("unexpected timeout using strategy", s, "with strict", strict) 260 } 261 } 262 } 263 264 func initData(t *testing.T, buf [][]byte, shardCnt int, s storage.ChunkStore) []swarm.Address { 265 t.Helper() 266 spanBytes := make([]byte, 8) 267 binary.LittleEndian.PutUint64(spanBytes, swarm.ChunkSize) 268 269 for i := 0; i < len(buf); i++ { 270 buf[i] = make([]byte, swarm.ChunkWithSpanSize) 271 if i >= shardCnt { 272 continue 273 } 274 _, err := io.ReadFull(rand.Reader, buf[i]) 275 if err != nil { 276 t.Fatal(err) 277 } 278 copy(buf[i], spanBytes) 279 } 280 281 // fill in parity chunks 282 rs, err := reedsolomon.New(shardCnt, len(buf)-shardCnt) 283 if err != nil { 284 t.Fatal(err) 285 } 286 err = rs.Encode(buf) 287 if err != nil { 288 t.Fatal(err) 289 } 290 291 // calculate chunk addresses and upload to the store 292 addrs := make([]swarm.Address, len(buf)) 293 ctx := context.TODO() 294 for i := 0; i < len(buf); i++ { 295 chunk, err := cac.NewWithDataSpan(buf[i]) 296 if err != nil { 297 t.Fatal(err) 298 } 299 err = s.Put(ctx, chunk) 300 if err != nil { 301 t.Fatal(err) 302 } 303 addrs[i] = chunk.Address() 304 } 305 306 return addrs 307 } 308 309 func checkShardsAvailable(t *testing.T, s storage.ChunkStore, addrs []swarm.Address, data [][]byte) { 310 t.Helper() 311 eg, ctx := errgroup.WithContext(context.Background()) 312 for i, addr := range addrs { 313 i := i 314 addr := addr 315 eg.Go(func() (err error) { 316 var delay time.Duration 317 var ch swarm.Chunk 318 for i := 0; i < 30; i++ { 319 select { 320 case <-ctx.Done(): 321 return ctx.Err() 322 default: 323 <-time.After(delay) 324 delay = 50 * time.Millisecond 325 } 326 ch, err = s.Get(ctx, addr) 327 if err == nil { 328 break 329 } 330 err = fmt.Errorf("datashard %d with address %v is not available: %w", i, addr, err) 331 select { 332 case <-ctx.Done(): 333 return ctx.Err() 334 default: 335 <-time.After(delay) 336 delay = 50 * time.Millisecond 337 } 338 } 339 if err == nil && !bytes.Equal(ch.Data(), data[i]) { 340 return fmt.Errorf("datashard %d has incorrect data", i) 341 } 342 return err 343 }) 344 } 345 if err := eg.Wait(); err != nil { 346 t.Fatal(err) 347 } 348 } 349 350 func forget(t *testing.T, store storage.ChunkStore, addrs []swarm.Address, erasureCnt int) (erasures []int) { 351 t.Helper() 352 353 ctx := context.TODO() 354 erasures = mrand.Perm(len(addrs))[:erasureCnt] 355 for _, i := range erasures { 356 err := store.Delete(ctx, addrs[i]) 357 if err != nil { 358 t.Fatal(err) 359 } 360 } 361 return erasures 362 }