github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/sample.go (about)

     1  // Copyright 2023 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package storer
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"encoding/binary"
    11  	"fmt"
    12  	"hash"
    13  	"math/big"
    14  	"sort"
    15  	"sync"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/ethersphere/bee/v2/pkg/bmt"
    20  	"github.com/ethersphere/bee/v2/pkg/cac"
    21  	"github.com/ethersphere/bee/v2/pkg/postage"
    22  	"github.com/ethersphere/bee/v2/pkg/soc"
    23  	chunk "github.com/ethersphere/bee/v2/pkg/storage/testing"
    24  	"github.com/ethersphere/bee/v2/pkg/storer/internal/chunkstamp"
    25  	"github.com/ethersphere/bee/v2/pkg/storer/internal/reserve"
    26  	"github.com/ethersphere/bee/v2/pkg/swarm"
    27  	"golang.org/x/sync/errgroup"
    28  )
    29  
    30  const SampleSize = 16
    31  
    32  type SampleItem struct {
    33  	TransformedAddress swarm.Address
    34  	ChunkAddress       swarm.Address
    35  	ChunkData          []byte
    36  	Stamp              *postage.Stamp
    37  }
    38  
    39  type Sample struct {
    40  	Stats SampleStats
    41  	Items []SampleItem
    42  }
    43  
    44  // RandSample returns Sample with random values.
    45  func RandSample(t *testing.T, anchor []byte) Sample {
    46  	t.Helper()
    47  
    48  	chunks := make([]swarm.Chunk, SampleSize)
    49  	for i := 0; i < SampleSize; i++ {
    50  		ch := chunk.GenerateTestRandomChunk()
    51  		if i%3 == 0 {
    52  			ch = chunk.GenerateTestRandomSoChunk(t, ch)
    53  		}
    54  		chunks[i] = ch
    55  	}
    56  
    57  	sample, err := MakeSampleUsingChunks(chunks, anchor)
    58  	if err != nil {
    59  		t.Fatal(err)
    60  	}
    61  
    62  	return sample
    63  }
    64  
    65  // MakeSampleUsingChunks returns Sample constructed using supplied chunks.
    66  func MakeSampleUsingChunks(chunks []swarm.Chunk, anchor []byte) (Sample, error) {
    67  	prefixHasherFactory := func() hash.Hash {
    68  		return swarm.NewPrefixHasher(anchor)
    69  	}
    70  	items := make([]SampleItem, len(chunks))
    71  	for i, ch := range chunks {
    72  		tr, err := transformedAddress(bmt.NewHasher(prefixHasherFactory), ch, getChunkType(ch))
    73  		if err != nil {
    74  			return Sample{}, err
    75  		}
    76  
    77  		items[i] = SampleItem{
    78  			TransformedAddress: tr,
    79  			ChunkAddress:       ch.Address(),
    80  			ChunkData:          ch.Data(),
    81  			Stamp:              newStamp(ch.Stamp()),
    82  		}
    83  	}
    84  
    85  	sort.Slice(items, func(i, j int) bool {
    86  		return items[i].TransformedAddress.Compare(items[j].TransformedAddress) == -1
    87  	})
    88  
    89  	return Sample{Items: items}, nil
    90  }
    91  
    92  func newStamp(s swarm.Stamp) *postage.Stamp {
    93  	return postage.NewStamp(s.BatchID(), s.Index(), s.Timestamp(), s.Sig())
    94  }
    95  
    96  func getChunkType(chunk swarm.Chunk) swarm.ChunkType {
    97  	if cac.Valid(chunk) {
    98  		return swarm.ChunkTypeContentAddressed
    99  	} else if soc.Valid(chunk) {
   100  		return swarm.ChunkTypeSingleOwner
   101  	}
   102  	return swarm.ChunkTypeUnspecified
   103  }
   104  
   105  // ReserveSample generates the sample of reserve storage of a node required for the
   106  // storage incentives agent to participate in the lottery round. In order to generate
   107  // this sample we need to iterate through all the chunks in the node's reserve and
   108  // calculate the transformed hashes of all the chunks using the anchor as the salt.
   109  // In order to generate the transformed hashes, we will use the std hmac keyed-hash
   110  // implementation by using the anchor as the key. Nodes need to calculate the sample
   111  // in the most optimal way and there are time restrictions. The lottery round is a
   112  // time based round, so nodes participating in the round need to perform this
   113  // calculation within the round limits.
   114  // In order to optimize this we use a simple pipeline pattern:
   115  // Iterate chunk addresses -> Get the chunk data and calculate transformed hash -> Assemble the sample
   116  func (db *DB) ReserveSample(
   117  	ctx context.Context,
   118  	anchor []byte,
   119  	storageRadius uint8,
   120  	consensusTime uint64,
   121  	minBatchBalance *big.Int,
   122  ) (Sample, error) {
   123  	g, ctx := errgroup.WithContext(ctx)
   124  	chunkC := make(chan *reserve.ChunkBinItem, 64)
   125  	allStats := &SampleStats{}
   126  	statsLock := sync.Mutex{}
   127  	addStats := func(stats SampleStats) {
   128  		statsLock.Lock()
   129  		allStats.add(stats)
   130  		statsLock.Unlock()
   131  	}
   132  
   133  	t := time.Now()
   134  
   135  	excludedBatchIDs, err := db.batchesBelowValue(minBatchBalance)
   136  	if err != nil {
   137  		db.logger.Error(err, "get batches below value")
   138  	}
   139  
   140  	allStats.BatchesBelowValueDuration = time.Since(t)
   141  
   142  	// Phase 1: Iterate chunk addresses
   143  	g.Go(func() error {
   144  		start := time.Now()
   145  		stats := SampleStats{}
   146  		defer func() {
   147  			stats.IterationDuration = time.Since(start)
   148  			close(chunkC)
   149  			addStats(stats)
   150  		}()
   151  
   152  		err := db.reserve.IterateChunksItems(storageRadius, func(chi *reserve.ChunkBinItem) (bool, error) {
   153  			select {
   154  			case chunkC <- chi:
   155  				stats.TotalIterated++
   156  				return false, nil
   157  			case <-ctx.Done():
   158  				return false, ctx.Err()
   159  			}
   160  		})
   161  		return err
   162  	})
   163  
   164  	// Phase 2: Get the chunk data and calculate transformed hash
   165  	sampleItemChan := make(chan SampleItem, 64)
   166  
   167  	prefixHasherFactory := func() hash.Hash {
   168  		return swarm.NewPrefixHasher(anchor)
   169  	}
   170  
   171  	const workers = 6
   172  
   173  	for i := 0; i < workers; i++ {
   174  		g.Go(func() error {
   175  			wstat := SampleStats{}
   176  			hasher := bmt.NewHasher(prefixHasherFactory)
   177  			defer func() {
   178  				addStats(wstat)
   179  			}()
   180  
   181  			for chItem := range chunkC {
   182  				// exclude chunks who's batches balance are below minimum
   183  				if _, found := excludedBatchIDs[string(chItem.BatchID)]; found {
   184  					wstat.BelowBalanceIgnored++
   185  
   186  					continue
   187  				}
   188  
   189  				// Skip chunks if they are not SOC or CAC
   190  				if chItem.ChunkType != swarm.ChunkTypeSingleOwner &&
   191  					chItem.ChunkType != swarm.ChunkTypeContentAddressed {
   192  					wstat.RogueChunk++
   193  					continue
   194  				}
   195  
   196  				chunkLoadStart := time.Now()
   197  
   198  				chunk, err := db.ChunkStore().Get(ctx, chItem.Address)
   199  				if err != nil {
   200  					wstat.ChunkLoadFailed++
   201  					db.logger.Debug("failed loading chunk", "chunk_address", chItem.Address, "error", err)
   202  					continue
   203  				}
   204  
   205  				wstat.ChunkLoadDuration += time.Since(chunkLoadStart)
   206  
   207  				taddrStart := time.Now()
   208  				taddr, err := transformedAddress(hasher, chunk, chItem.ChunkType)
   209  				if err != nil {
   210  					return err
   211  				}
   212  				wstat.TaddrDuration += time.Since(taddrStart)
   213  
   214  				select {
   215  				case sampleItemChan <- SampleItem{
   216  					TransformedAddress: taddr,
   217  					ChunkAddress:       chunk.Address(),
   218  					ChunkData:          chunk.Data(),
   219  					Stamp:              postage.NewStamp(chItem.BatchID, nil, nil, nil),
   220  				}:
   221  				case <-ctx.Done():
   222  					return ctx.Err()
   223  				}
   224  			}
   225  
   226  			return nil
   227  		})
   228  	}
   229  
   230  	go func() {
   231  		_ = g.Wait()
   232  		close(sampleItemChan)
   233  	}()
   234  
   235  	sampleItems := make([]SampleItem, 0, SampleSize)
   236  	// insert function will insert the new item in its correct place. If the sample
   237  	// size goes beyond what we need we omit the last item.
   238  	insert := func(item SampleItem) {
   239  		added := false
   240  		for i, sItem := range sampleItems {
   241  			if le(item.TransformedAddress, sItem.TransformedAddress) {
   242  				sampleItems = append(sampleItems[:i+1], sampleItems[i:]...)
   243  				sampleItems[i] = item
   244  				added = true
   245  				break
   246  			} else if item.TransformedAddress.Compare(sItem.TransformedAddress) == 0 { // ensuring to pass the check order function of redistribution contract
   247  				// replace the chunk at index if the chunk is CAC
   248  				ch := swarm.NewChunk(item.ChunkAddress, item.ChunkData)
   249  				_, err := soc.FromChunk(ch)
   250  				if err != nil {
   251  					sampleItems[i] = item
   252  				}
   253  				return
   254  			}
   255  		}
   256  		if len(sampleItems) > SampleSize {
   257  			sampleItems = sampleItems[:SampleSize]
   258  		}
   259  		if len(sampleItems) < SampleSize && !added {
   260  			sampleItems = append(sampleItems, item)
   261  		}
   262  	}
   263  
   264  	// Phase 3: Assemble the sample. Here we need to assemble only the first SampleSize
   265  	// no of items from the results of the 2nd phase.
   266  	// In this step stamps are loaded and validated only if chunk will be added to sample.
   267  	stats := SampleStats{}
   268  	for item := range sampleItemChan {
   269  		currentMaxAddr := swarm.EmptyAddress
   270  		if len(sampleItems) > 0 {
   271  			currentMaxAddr = sampleItems[len(sampleItems)-1].TransformedAddress
   272  		}
   273  
   274  		if le(item.TransformedAddress, currentMaxAddr) || len(sampleItems) < SampleSize {
   275  			start := time.Now()
   276  
   277  			stamp, err := chunkstamp.LoadWithBatchID(db.storage.IndexStore(), "reserve", item.ChunkAddress, item.Stamp.BatchID())
   278  			if err != nil {
   279  				stats.StampLoadFailed++
   280  				db.logger.Debug("failed loading stamp", "chunk_address", item.ChunkAddress, "error", err)
   281  				continue
   282  			}
   283  
   284  			ch := swarm.NewChunk(item.ChunkAddress, item.ChunkData).WithStamp(stamp)
   285  
   286  			// check if the timestamp on the postage stamp is not later than the consensus time.
   287  			if binary.BigEndian.Uint64(ch.Stamp().Timestamp()) > consensusTime {
   288  				stats.NewIgnored++
   289  				continue
   290  			}
   291  
   292  			if _, err := db.validStamp(ch); err != nil {
   293  				stats.InvalidStamp++
   294  				db.logger.Debug("invalid stamp for chunk", "chunk_address", ch.Address(), "error", err)
   295  				continue
   296  			}
   297  
   298  			stats.ValidStampDuration += time.Since(start)
   299  
   300  			item.Stamp = postage.NewStamp(stamp.BatchID(), stamp.Index(), stamp.Timestamp(), stamp.Sig())
   301  
   302  			insert(item)
   303  			stats.SampleInserts++
   304  		}
   305  	}
   306  	addStats(stats)
   307  
   308  	allStats.TotalDuration = time.Since(t)
   309  
   310  	if err := g.Wait(); err != nil {
   311  		db.logger.Info("reserve sampler finished with error", "err", err, "duration", time.Since(t), "storage_radius", storageRadius, "consensus_time_ns", consensusTime, "stats", fmt.Sprintf("%+v", allStats))
   312  
   313  		return Sample{}, fmt.Errorf("sampler: failed creating sample: %w", err)
   314  	}
   315  
   316  	db.logger.Info("reserve sampler finished", "duration", time.Since(t), "storage_radius", storageRadius, "consensus_time_ns", consensusTime, "stats", fmt.Sprintf("%+v", allStats))
   317  
   318  	return Sample{Stats: *allStats, Items: sampleItems}, nil
   319  }
   320  
   321  // less function uses the byte compare to check for lexicographic ordering
   322  func le(a, b swarm.Address) bool {
   323  	return bytes.Compare(a.Bytes(), b.Bytes()) == -1
   324  }
   325  
   326  func (db *DB) batchesBelowValue(until *big.Int) (map[string]struct{}, error) {
   327  	res := make(map[string]struct{})
   328  
   329  	if until == nil {
   330  		return res, nil
   331  	}
   332  
   333  	err := db.batchstore.Iterate(func(b *postage.Batch) (bool, error) {
   334  		if b.Value.Cmp(until) < 0 {
   335  			res[string(b.ID)] = struct{}{}
   336  		}
   337  		return false, nil
   338  	})
   339  
   340  	return res, err
   341  }
   342  
   343  func transformedAddress(hasher *bmt.Hasher, chunk swarm.Chunk, chType swarm.ChunkType) (swarm.Address, error) {
   344  	switch chType {
   345  	case swarm.ChunkTypeContentAddressed:
   346  		return transformedAddressCAC(hasher, chunk)
   347  	case swarm.ChunkTypeSingleOwner:
   348  		return transformedAddressSOC(hasher, chunk)
   349  	default:
   350  		return swarm.ZeroAddress, fmt.Errorf("chunk type [%v] is not valid", chType)
   351  	}
   352  }
   353  
   354  func transformedAddressCAC(hasher *bmt.Hasher, chunk swarm.Chunk) (swarm.Address, error) {
   355  	hasher.Reset()
   356  	hasher.SetHeader(chunk.Data()[:bmt.SpanSize])
   357  
   358  	_, err := hasher.Write(chunk.Data()[bmt.SpanSize:])
   359  	if err != nil {
   360  		return swarm.ZeroAddress, err
   361  	}
   362  
   363  	taddr, err := hasher.Hash(nil)
   364  	if err != nil {
   365  		return swarm.ZeroAddress, err
   366  	}
   367  
   368  	return swarm.NewAddress(taddr), nil
   369  }
   370  
   371  func transformedAddressSOC(hasher *bmt.Hasher, chunk swarm.Chunk) (swarm.Address, error) {
   372  	// Calculate transformed address from wrapped chunk
   373  	sChunk, err := soc.FromChunk(chunk)
   374  	if err != nil {
   375  		return swarm.ZeroAddress, err
   376  	}
   377  	taddrCac, err := transformedAddressCAC(hasher, sChunk.WrappedChunk())
   378  	if err != nil {
   379  		return swarm.ZeroAddress, err
   380  	}
   381  
   382  	// Hash address and transformed address to make transformed address for this SOC
   383  	sHasher := swarm.NewHasher()
   384  	if _, err := sHasher.Write(chunk.Address().Bytes()); err != nil {
   385  		return swarm.ZeroAddress, err
   386  	}
   387  	if _, err := sHasher.Write(taddrCac.Bytes()); err != nil {
   388  		return swarm.ZeroAddress, err
   389  	}
   390  
   391  	return swarm.NewAddress(sHasher.Sum(nil)), nil
   392  }
   393  
   394  type SampleStats struct {
   395  	TotalDuration             time.Duration
   396  	TotalIterated             int64
   397  	IterationDuration         time.Duration
   398  	SampleInserts             int64
   399  	NewIgnored                int64
   400  	InvalidStamp              int64
   401  	BelowBalanceIgnored       int64
   402  	TaddrDuration             time.Duration
   403  	ValidStampDuration        time.Duration
   404  	BatchesBelowValueDuration time.Duration
   405  	RogueChunk                int64
   406  	ChunkLoadDuration         time.Duration
   407  	ChunkLoadFailed           int64
   408  	StampLoadFailed           int64
   409  }
   410  
   411  func (s *SampleStats) add(other SampleStats) {
   412  	s.TotalDuration += other.TotalDuration
   413  	s.TotalIterated += other.TotalIterated
   414  	s.IterationDuration += other.IterationDuration
   415  	s.SampleInserts += other.SampleInserts
   416  	s.NewIgnored += other.NewIgnored
   417  	s.InvalidStamp += other.InvalidStamp
   418  	s.BelowBalanceIgnored += other.BelowBalanceIgnored
   419  	s.TaddrDuration += other.TaddrDuration
   420  	s.ValidStampDuration += other.ValidStampDuration
   421  	s.BatchesBelowValueDuration += other.BatchesBelowValueDuration
   422  	s.RogueChunk += other.RogueChunk
   423  	s.ChunkLoadDuration += other.ChunkLoadDuration
   424  	s.ChunkLoadFailed += other.ChunkLoadFailed
   425  	s.StampLoadFailed += other.StampLoadFailed
   426  }