github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/sample.go (about) 1 // Copyright 2023 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package storer 6 7 import ( 8 "bytes" 9 "context" 10 "encoding/binary" 11 "fmt" 12 "hash" 13 "math/big" 14 "sort" 15 "sync" 16 "testing" 17 "time" 18 19 "github.com/ethersphere/bee/v2/pkg/bmt" 20 "github.com/ethersphere/bee/v2/pkg/cac" 21 "github.com/ethersphere/bee/v2/pkg/postage" 22 "github.com/ethersphere/bee/v2/pkg/soc" 23 chunk "github.com/ethersphere/bee/v2/pkg/storage/testing" 24 "github.com/ethersphere/bee/v2/pkg/storer/internal/chunkstamp" 25 "github.com/ethersphere/bee/v2/pkg/storer/internal/reserve" 26 "github.com/ethersphere/bee/v2/pkg/swarm" 27 "golang.org/x/sync/errgroup" 28 ) 29 30 const SampleSize = 16 31 32 type SampleItem struct { 33 TransformedAddress swarm.Address 34 ChunkAddress swarm.Address 35 ChunkData []byte 36 Stamp *postage.Stamp 37 } 38 39 type Sample struct { 40 Stats SampleStats 41 Items []SampleItem 42 } 43 44 // RandSample returns Sample with random values. 45 func RandSample(t *testing.T, anchor []byte) Sample { 46 t.Helper() 47 48 chunks := make([]swarm.Chunk, SampleSize) 49 for i := 0; i < SampleSize; i++ { 50 ch := chunk.GenerateTestRandomChunk() 51 if i%3 == 0 { 52 ch = chunk.GenerateTestRandomSoChunk(t, ch) 53 } 54 chunks[i] = ch 55 } 56 57 sample, err := MakeSampleUsingChunks(chunks, anchor) 58 if err != nil { 59 t.Fatal(err) 60 } 61 62 return sample 63 } 64 65 // MakeSampleUsingChunks returns Sample constructed using supplied chunks. 66 func MakeSampleUsingChunks(chunks []swarm.Chunk, anchor []byte) (Sample, error) { 67 prefixHasherFactory := func() hash.Hash { 68 return swarm.NewPrefixHasher(anchor) 69 } 70 items := make([]SampleItem, len(chunks)) 71 for i, ch := range chunks { 72 tr, err := transformedAddress(bmt.NewHasher(prefixHasherFactory), ch, getChunkType(ch)) 73 if err != nil { 74 return Sample{}, err 75 } 76 77 items[i] = SampleItem{ 78 TransformedAddress: tr, 79 ChunkAddress: ch.Address(), 80 ChunkData: ch.Data(), 81 Stamp: newStamp(ch.Stamp()), 82 } 83 } 84 85 sort.Slice(items, func(i, j int) bool { 86 return items[i].TransformedAddress.Compare(items[j].TransformedAddress) == -1 87 }) 88 89 return Sample{Items: items}, nil 90 } 91 92 func newStamp(s swarm.Stamp) *postage.Stamp { 93 return postage.NewStamp(s.BatchID(), s.Index(), s.Timestamp(), s.Sig()) 94 } 95 96 func getChunkType(chunk swarm.Chunk) swarm.ChunkType { 97 if cac.Valid(chunk) { 98 return swarm.ChunkTypeContentAddressed 99 } else if soc.Valid(chunk) { 100 return swarm.ChunkTypeSingleOwner 101 } 102 return swarm.ChunkTypeUnspecified 103 } 104 105 // ReserveSample generates the sample of reserve storage of a node required for the 106 // storage incentives agent to participate in the lottery round. In order to generate 107 // this sample we need to iterate through all the chunks in the node's reserve and 108 // calculate the transformed hashes of all the chunks using the anchor as the salt. 109 // In order to generate the transformed hashes, we will use the std hmac keyed-hash 110 // implementation by using the anchor as the key. Nodes need to calculate the sample 111 // in the most optimal way and there are time restrictions. The lottery round is a 112 // time based round, so nodes participating in the round need to perform this 113 // calculation within the round limits. 114 // In order to optimize this we use a simple pipeline pattern: 115 // Iterate chunk addresses -> Get the chunk data and calculate transformed hash -> Assemble the sample 116 func (db *DB) ReserveSample( 117 ctx context.Context, 118 anchor []byte, 119 storageRadius uint8, 120 consensusTime uint64, 121 minBatchBalance *big.Int, 122 ) (Sample, error) { 123 g, ctx := errgroup.WithContext(ctx) 124 chunkC := make(chan *reserve.ChunkBinItem, 64) 125 allStats := &SampleStats{} 126 statsLock := sync.Mutex{} 127 addStats := func(stats SampleStats) { 128 statsLock.Lock() 129 allStats.add(stats) 130 statsLock.Unlock() 131 } 132 133 t := time.Now() 134 135 excludedBatchIDs, err := db.batchesBelowValue(minBatchBalance) 136 if err != nil { 137 db.logger.Error(err, "get batches below value") 138 } 139 140 allStats.BatchesBelowValueDuration = time.Since(t) 141 142 // Phase 1: Iterate chunk addresses 143 g.Go(func() error { 144 start := time.Now() 145 stats := SampleStats{} 146 defer func() { 147 stats.IterationDuration = time.Since(start) 148 close(chunkC) 149 addStats(stats) 150 }() 151 152 err := db.reserve.IterateChunksItems(storageRadius, func(chi *reserve.ChunkBinItem) (bool, error) { 153 select { 154 case chunkC <- chi: 155 stats.TotalIterated++ 156 return false, nil 157 case <-ctx.Done(): 158 return false, ctx.Err() 159 } 160 }) 161 return err 162 }) 163 164 // Phase 2: Get the chunk data and calculate transformed hash 165 sampleItemChan := make(chan SampleItem, 64) 166 167 prefixHasherFactory := func() hash.Hash { 168 return swarm.NewPrefixHasher(anchor) 169 } 170 171 const workers = 6 172 173 for i := 0; i < workers; i++ { 174 g.Go(func() error { 175 wstat := SampleStats{} 176 hasher := bmt.NewHasher(prefixHasherFactory) 177 defer func() { 178 addStats(wstat) 179 }() 180 181 for chItem := range chunkC { 182 // exclude chunks who's batches balance are below minimum 183 if _, found := excludedBatchIDs[string(chItem.BatchID)]; found { 184 wstat.BelowBalanceIgnored++ 185 186 continue 187 } 188 189 // Skip chunks if they are not SOC or CAC 190 if chItem.ChunkType != swarm.ChunkTypeSingleOwner && 191 chItem.ChunkType != swarm.ChunkTypeContentAddressed { 192 wstat.RogueChunk++ 193 continue 194 } 195 196 chunkLoadStart := time.Now() 197 198 chunk, err := db.ChunkStore().Get(ctx, chItem.Address) 199 if err != nil { 200 wstat.ChunkLoadFailed++ 201 db.logger.Debug("failed loading chunk", "chunk_address", chItem.Address, "error", err) 202 continue 203 } 204 205 wstat.ChunkLoadDuration += time.Since(chunkLoadStart) 206 207 taddrStart := time.Now() 208 taddr, err := transformedAddress(hasher, chunk, chItem.ChunkType) 209 if err != nil { 210 return err 211 } 212 wstat.TaddrDuration += time.Since(taddrStart) 213 214 select { 215 case sampleItemChan <- SampleItem{ 216 TransformedAddress: taddr, 217 ChunkAddress: chunk.Address(), 218 ChunkData: chunk.Data(), 219 Stamp: postage.NewStamp(chItem.BatchID, nil, nil, nil), 220 }: 221 case <-ctx.Done(): 222 return ctx.Err() 223 } 224 } 225 226 return nil 227 }) 228 } 229 230 go func() { 231 _ = g.Wait() 232 close(sampleItemChan) 233 }() 234 235 sampleItems := make([]SampleItem, 0, SampleSize) 236 // insert function will insert the new item in its correct place. If the sample 237 // size goes beyond what we need we omit the last item. 238 insert := func(item SampleItem) { 239 added := false 240 for i, sItem := range sampleItems { 241 if le(item.TransformedAddress, sItem.TransformedAddress) { 242 sampleItems = append(sampleItems[:i+1], sampleItems[i:]...) 243 sampleItems[i] = item 244 added = true 245 break 246 } else if item.TransformedAddress.Compare(sItem.TransformedAddress) == 0 { // ensuring to pass the check order function of redistribution contract 247 // replace the chunk at index if the chunk is CAC 248 ch := swarm.NewChunk(item.ChunkAddress, item.ChunkData) 249 _, err := soc.FromChunk(ch) 250 if err != nil { 251 sampleItems[i] = item 252 } 253 return 254 } 255 } 256 if len(sampleItems) > SampleSize { 257 sampleItems = sampleItems[:SampleSize] 258 } 259 if len(sampleItems) < SampleSize && !added { 260 sampleItems = append(sampleItems, item) 261 } 262 } 263 264 // Phase 3: Assemble the sample. Here we need to assemble only the first SampleSize 265 // no of items from the results of the 2nd phase. 266 // In this step stamps are loaded and validated only if chunk will be added to sample. 267 stats := SampleStats{} 268 for item := range sampleItemChan { 269 currentMaxAddr := swarm.EmptyAddress 270 if len(sampleItems) > 0 { 271 currentMaxAddr = sampleItems[len(sampleItems)-1].TransformedAddress 272 } 273 274 if le(item.TransformedAddress, currentMaxAddr) || len(sampleItems) < SampleSize { 275 start := time.Now() 276 277 stamp, err := chunkstamp.LoadWithBatchID(db.storage.IndexStore(), "reserve", item.ChunkAddress, item.Stamp.BatchID()) 278 if err != nil { 279 stats.StampLoadFailed++ 280 db.logger.Debug("failed loading stamp", "chunk_address", item.ChunkAddress, "error", err) 281 continue 282 } 283 284 ch := swarm.NewChunk(item.ChunkAddress, item.ChunkData).WithStamp(stamp) 285 286 // check if the timestamp on the postage stamp is not later than the consensus time. 287 if binary.BigEndian.Uint64(ch.Stamp().Timestamp()) > consensusTime { 288 stats.NewIgnored++ 289 continue 290 } 291 292 if _, err := db.validStamp(ch); err != nil { 293 stats.InvalidStamp++ 294 db.logger.Debug("invalid stamp for chunk", "chunk_address", ch.Address(), "error", err) 295 continue 296 } 297 298 stats.ValidStampDuration += time.Since(start) 299 300 item.Stamp = postage.NewStamp(stamp.BatchID(), stamp.Index(), stamp.Timestamp(), stamp.Sig()) 301 302 insert(item) 303 stats.SampleInserts++ 304 } 305 } 306 addStats(stats) 307 308 allStats.TotalDuration = time.Since(t) 309 310 if err := g.Wait(); err != nil { 311 db.logger.Info("reserve sampler finished with error", "err", err, "duration", time.Since(t), "storage_radius", storageRadius, "consensus_time_ns", consensusTime, "stats", fmt.Sprintf("%+v", allStats)) 312 313 return Sample{}, fmt.Errorf("sampler: failed creating sample: %w", err) 314 } 315 316 db.logger.Info("reserve sampler finished", "duration", time.Since(t), "storage_radius", storageRadius, "consensus_time_ns", consensusTime, "stats", fmt.Sprintf("%+v", allStats)) 317 318 return Sample{Stats: *allStats, Items: sampleItems}, nil 319 } 320 321 // less function uses the byte compare to check for lexicographic ordering 322 func le(a, b swarm.Address) bool { 323 return bytes.Compare(a.Bytes(), b.Bytes()) == -1 324 } 325 326 func (db *DB) batchesBelowValue(until *big.Int) (map[string]struct{}, error) { 327 res := make(map[string]struct{}) 328 329 if until == nil { 330 return res, nil 331 } 332 333 err := db.batchstore.Iterate(func(b *postage.Batch) (bool, error) { 334 if b.Value.Cmp(until) < 0 { 335 res[string(b.ID)] = struct{}{} 336 } 337 return false, nil 338 }) 339 340 return res, err 341 } 342 343 func transformedAddress(hasher *bmt.Hasher, chunk swarm.Chunk, chType swarm.ChunkType) (swarm.Address, error) { 344 switch chType { 345 case swarm.ChunkTypeContentAddressed: 346 return transformedAddressCAC(hasher, chunk) 347 case swarm.ChunkTypeSingleOwner: 348 return transformedAddressSOC(hasher, chunk) 349 default: 350 return swarm.ZeroAddress, fmt.Errorf("chunk type [%v] is not valid", chType) 351 } 352 } 353 354 func transformedAddressCAC(hasher *bmt.Hasher, chunk swarm.Chunk) (swarm.Address, error) { 355 hasher.Reset() 356 hasher.SetHeader(chunk.Data()[:bmt.SpanSize]) 357 358 _, err := hasher.Write(chunk.Data()[bmt.SpanSize:]) 359 if err != nil { 360 return swarm.ZeroAddress, err 361 } 362 363 taddr, err := hasher.Hash(nil) 364 if err != nil { 365 return swarm.ZeroAddress, err 366 } 367 368 return swarm.NewAddress(taddr), nil 369 } 370 371 func transformedAddressSOC(hasher *bmt.Hasher, chunk swarm.Chunk) (swarm.Address, error) { 372 // Calculate transformed address from wrapped chunk 373 sChunk, err := soc.FromChunk(chunk) 374 if err != nil { 375 return swarm.ZeroAddress, err 376 } 377 taddrCac, err := transformedAddressCAC(hasher, sChunk.WrappedChunk()) 378 if err != nil { 379 return swarm.ZeroAddress, err 380 } 381 382 // Hash address and transformed address to make transformed address for this SOC 383 sHasher := swarm.NewHasher() 384 if _, err := sHasher.Write(chunk.Address().Bytes()); err != nil { 385 return swarm.ZeroAddress, err 386 } 387 if _, err := sHasher.Write(taddrCac.Bytes()); err != nil { 388 return swarm.ZeroAddress, err 389 } 390 391 return swarm.NewAddress(sHasher.Sum(nil)), nil 392 } 393 394 type SampleStats struct { 395 TotalDuration time.Duration 396 TotalIterated int64 397 IterationDuration time.Duration 398 SampleInserts int64 399 NewIgnored int64 400 InvalidStamp int64 401 BelowBalanceIgnored int64 402 TaddrDuration time.Duration 403 ValidStampDuration time.Duration 404 BatchesBelowValueDuration time.Duration 405 RogueChunk int64 406 ChunkLoadDuration time.Duration 407 ChunkLoadFailed int64 408 StampLoadFailed int64 409 } 410 411 func (s *SampleStats) add(other SampleStats) { 412 s.TotalDuration += other.TotalDuration 413 s.TotalIterated += other.TotalIterated 414 s.IterationDuration += other.IterationDuration 415 s.SampleInserts += other.SampleInserts 416 s.NewIgnored += other.NewIgnored 417 s.InvalidStamp += other.InvalidStamp 418 s.BelowBalanceIgnored += other.BelowBalanceIgnored 419 s.TaddrDuration += other.TaddrDuration 420 s.ValidStampDuration += other.ValidStampDuration 421 s.BatchesBelowValueDuration += other.BatchesBelowValueDuration 422 s.RogueChunk += other.RogueChunk 423 s.ChunkLoadDuration += other.ChunkLoadDuration 424 s.ChunkLoadFailed += other.ChunkLoadFailed 425 s.StampLoadFailed += other.StampLoadFailed 426 }