github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/internal/chunkstore/chunkstore.go (about)

     1  // Copyright 2022 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package chunkstore
     6  
     7  import (
     8  	"context"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  	"time"
    13  
    14  	"github.com/ethersphere/bee/v2/pkg/sharky"
    15  	"github.com/ethersphere/bee/v2/pkg/storage"
    16  	"github.com/ethersphere/bee/v2/pkg/storage/storageutil"
    17  	"github.com/ethersphere/bee/v2/pkg/swarm"
    18  	"golang.org/x/exp/slices"
    19  )
    20  
    21  var (
    22  	// errMarshalInvalidRetrievalIndexAddress is returned if the RetrievalIndexItem address is zero during marshaling.
    23  	errMarshalInvalidRetrievalIndexAddress = errors.New("marshal RetrievalIndexItem: address is zero")
    24  	// errMarshalInvalidRetrievalIndexLocation is returned if the RetrievalIndexItem location is invalid during marshaling.
    25  	errMarshalInvalidRetrievalIndexLocation = errors.New("marshal RetrievalIndexItem: location is invalid")
    26  	// errUnmarshalInvalidRetrievalIndexSize is returned during unmarshaling if the passed buffer is not the expected size.
    27  	errUnmarshalInvalidRetrievalIndexSize = errors.New("unmarshal RetrievalIndexItem: invalid size")
    28  	// errUnmarshalInvalidRetrievalIndexLocationBytes is returned during unmarshaling if the location buffer is invalid.
    29  	errUnmarshalInvalidRetrievalIndexLocationBytes = errors.New("unmarshal RetrievalIndexItem: invalid location bytes")
    30  )
    31  
    32  const RetrievalIndexItemSize = swarm.HashSize + 8 + sharky.LocationSize + 4
    33  
    34  var _ storage.Item = (*RetrievalIndexItem)(nil)
    35  
    36  // Sharky provides an abstraction for the sharky.Store operations used in the
    37  // chunkstore. This allows us to be more flexible in passing in the sharky instance
    38  // to chunkstore. For eg, check the TxChunkStore implementation in this pkg.
    39  type Sharky interface {
    40  	Read(context.Context, sharky.Location, []byte) error
    41  	Write(context.Context, []byte) (sharky.Location, error)
    42  	Release(context.Context, sharky.Location) error
    43  }
    44  
    45  func Get(ctx context.Context, r storage.Reader, s storage.Sharky, addr swarm.Address) (swarm.Chunk, error) {
    46  	rIdx := &RetrievalIndexItem{Address: addr}
    47  	err := r.Get(rIdx)
    48  	if err != nil {
    49  		return nil, fmt.Errorf("chunk store: failed reading retrievalIndex for address %s: %w", addr, err)
    50  	}
    51  	return readChunk(ctx, s, rIdx)
    52  }
    53  
    54  // helper to read chunk from retrievalIndex.
    55  func readChunk(ctx context.Context, s storage.Sharky, rIdx *RetrievalIndexItem) (swarm.Chunk, error) {
    56  	buf := make([]byte, rIdx.Location.Length)
    57  	err := s.Read(ctx, rIdx.Location, buf)
    58  	if err != nil {
    59  		return nil, fmt.Errorf(
    60  			"chunk store: failed reading location: %v for chunk %s from sharky: %w",
    61  			rIdx.Location, rIdx.Address, err,
    62  		)
    63  	}
    64  
    65  	return swarm.NewChunk(rIdx.Address, buf), nil
    66  }
    67  
    68  func Has(_ context.Context, r storage.Reader, addr swarm.Address) (bool, error) {
    69  	return r.Has(&RetrievalIndexItem{Address: addr})
    70  }
    71  
    72  func Put(ctx context.Context, s storage.IndexStore, sh storage.Sharky, ch swarm.Chunk) error {
    73  	var (
    74  		rIdx = &RetrievalIndexItem{Address: ch.Address()}
    75  		loc  sharky.Location
    76  	)
    77  	err := s.Get(rIdx)
    78  	switch {
    79  	case errors.Is(err, storage.ErrNotFound):
    80  		// if this is the first instance of this address, we should store the chunk
    81  		// in sharky and create the new indexes.
    82  		loc, err = sh.Write(ctx, ch.Data())
    83  		if err != nil {
    84  			return fmt.Errorf("chunk store: write to sharky failed: %w", err)
    85  		}
    86  		rIdx.Location = loc
    87  		rIdx.Timestamp = uint64(time.Now().Unix())
    88  	case err != nil:
    89  		return fmt.Errorf("chunk store: failed to read: %w", err)
    90  	}
    91  
    92  	rIdx.RefCnt++
    93  
    94  	return s.Put(rIdx)
    95  }
    96  
    97  func Replace(ctx context.Context, s storage.IndexStore, sh storage.Sharky, ch swarm.Chunk) error {
    98  	rIdx := &RetrievalIndexItem{Address: ch.Address()}
    99  	err := s.Get(rIdx)
   100  	if err != nil {
   101  		return fmt.Errorf("chunk store: failed to read retrievalIndex for address %s: %w", ch.Address(), err)
   102  	}
   103  
   104  	err = sh.Release(ctx, rIdx.Location)
   105  	if err != nil {
   106  		return fmt.Errorf("chunkstore: failed to release sharky location: %w", err)
   107  	}
   108  
   109  	loc, err := sh.Write(ctx, ch.Data())
   110  	if err != nil {
   111  		return fmt.Errorf("chunk store: write to sharky failed: %w", err)
   112  	}
   113  	rIdx.Location = loc
   114  	rIdx.Timestamp = uint64(time.Now().Unix())
   115  	return s.Put(rIdx)
   116  }
   117  
   118  func Delete(ctx context.Context, s storage.IndexStore, sh storage.Sharky, addr swarm.Address) error {
   119  	rIdx := &RetrievalIndexItem{Address: addr}
   120  	err := s.Get(rIdx)
   121  	switch {
   122  	case errors.Is(err, storage.ErrNotFound):
   123  		return nil
   124  	case err != nil:
   125  		return fmt.Errorf("chunk store: failed to read retrievalIndex for address %s: %w", addr, err)
   126  	default:
   127  		rIdx.RefCnt--
   128  	}
   129  
   130  	if rIdx.RefCnt > 0 { // If there are more references for this we don't delete it from sharky.
   131  		err = s.Put(rIdx)
   132  		if err != nil {
   133  			return fmt.Errorf("chunk store: failed updating retrievalIndex for address %s: %w", addr, err)
   134  		}
   135  		return nil
   136  	}
   137  
   138  	return errors.Join(
   139  		sh.Release(ctx, rIdx.Location),
   140  		s.Delete(rIdx),
   141  	)
   142  }
   143  
   144  func Iterate(ctx context.Context, s storage.IndexStore, sh storage.Sharky, fn storage.IterateChunkFn) error {
   145  	return s.Iterate(
   146  		storage.Query{
   147  			Factory: func() storage.Item { return new(RetrievalIndexItem) },
   148  		},
   149  		func(r storage.Result) (bool, error) {
   150  			ch, err := readChunk(ctx, sh, r.Entry.(*RetrievalIndexItem))
   151  			if err != nil {
   152  				return true, err
   153  			}
   154  			return fn(ch)
   155  		},
   156  	)
   157  }
   158  
   159  func IterateChunkEntries(st storage.Reader, fn func(swarm.Address, uint32) (bool, error)) error {
   160  	return st.Iterate(
   161  		storage.Query{
   162  			Factory: func() storage.Item { return new(RetrievalIndexItem) },
   163  		},
   164  		func(r storage.Result) (bool, error) {
   165  			item := r.Entry.(*RetrievalIndexItem)
   166  			addr := item.Address
   167  			return fn(addr, item.RefCnt)
   168  		},
   169  	)
   170  }
   171  
   172  type LocationResult struct {
   173  	Err      error
   174  	Location sharky.Location
   175  }
   176  
   177  type IterateResult struct {
   178  	Err  error
   179  	Item *RetrievalIndexItem
   180  }
   181  
   182  // IterateLocations iterates over entire retrieval index and plucks only sharky location.
   183  func IterateLocations(
   184  	ctx context.Context,
   185  	st storage.Reader,
   186  ) <-chan LocationResult {
   187  
   188  	locationResultC := make(chan LocationResult)
   189  
   190  	go func() {
   191  		defer close(locationResultC)
   192  
   193  		err := st.Iterate(storage.Query{
   194  			Factory: func() storage.Item { return new(RetrievalIndexItem) },
   195  		}, func(r storage.Result) (bool, error) {
   196  			entry := r.Entry.(*RetrievalIndexItem)
   197  			result := LocationResult{Location: entry.Location}
   198  
   199  			select {
   200  			case <-ctx.Done():
   201  				return true, ctx.Err()
   202  			case locationResultC <- result:
   203  			}
   204  
   205  			return false, nil
   206  		})
   207  		if err != nil {
   208  			result := LocationResult{Err: fmt.Errorf("iterate retrieval index error: %w", err)}
   209  
   210  			select {
   211  			case <-ctx.Done():
   212  			case locationResultC <- result:
   213  			}
   214  		}
   215  	}()
   216  
   217  	return locationResultC
   218  }
   219  
   220  // Iterate iterates over entire retrieval index with a call back.
   221  func IterateItems(st storage.Store, callBackFunc func(*RetrievalIndexItem) error) error {
   222  	return st.Iterate(storage.Query{
   223  		Factory: func() storage.Item { return new(RetrievalIndexItem) },
   224  	}, func(r storage.Result) (bool, error) {
   225  		entry := r.Entry.(*RetrievalIndexItem)
   226  		return false, callBackFunc(entry)
   227  	})
   228  }
   229  
   230  // RetrievalIndexItem is the index which gives us the sharky location from the swarm.Address.
   231  // The RefCnt stores the reference of each time a Put operation is issued on this Address.
   232  type RetrievalIndexItem struct {
   233  	Address   swarm.Address
   234  	Timestamp uint64
   235  	Location  sharky.Location
   236  	RefCnt    uint32
   237  }
   238  
   239  func (r *RetrievalIndexItem) ID() string { return r.Address.ByteString() }
   240  
   241  func (RetrievalIndexItem) Namespace() string { return "retrievalIdx" }
   242  
   243  // Stored in bytes as:
   244  // |--Address(32)--|--Timestamp(8)--|--Location(7)--|--RefCnt(4)--|
   245  func (r *RetrievalIndexItem) Marshal() ([]byte, error) {
   246  	if r.Address.IsZero() {
   247  		return nil, errMarshalInvalidRetrievalIndexAddress
   248  	}
   249  
   250  	buf := make([]byte, RetrievalIndexItemSize)
   251  	i := 0
   252  
   253  	locBuf, err := r.Location.MarshalBinary()
   254  	if err != nil {
   255  		return nil, errMarshalInvalidRetrievalIndexLocation
   256  	}
   257  
   258  	copy(buf[i:swarm.HashSize], r.Address.Bytes())
   259  	i += swarm.HashSize
   260  
   261  	binary.LittleEndian.PutUint64(buf[i:i+8], r.Timestamp)
   262  	i += 8
   263  
   264  	copy(buf[i:i+sharky.LocationSize], locBuf)
   265  	i += sharky.LocationSize
   266  
   267  	binary.LittleEndian.PutUint32(buf[i:], r.RefCnt)
   268  
   269  	return buf, nil
   270  }
   271  
   272  func (r *RetrievalIndexItem) Unmarshal(buf []byte) error {
   273  	if len(buf) != RetrievalIndexItemSize {
   274  		return errUnmarshalInvalidRetrievalIndexSize
   275  	}
   276  
   277  	i := 0
   278  	ni := new(RetrievalIndexItem)
   279  
   280  	ni.Address = swarm.NewAddress(slices.Clone(buf[i : i+swarm.HashSize]))
   281  	i += swarm.HashSize
   282  
   283  	ni.Timestamp = binary.LittleEndian.Uint64(buf[i : i+8])
   284  	i += 8
   285  
   286  	loc := new(sharky.Location)
   287  	if err := loc.UnmarshalBinary(buf[i : i+sharky.LocationSize]); err != nil {
   288  		return errUnmarshalInvalidRetrievalIndexLocationBytes
   289  	}
   290  	ni.Location = *loc
   291  	i += sharky.LocationSize
   292  
   293  	ni.RefCnt = binary.LittleEndian.Uint32(buf[i:])
   294  
   295  	*r = *ni
   296  	return nil
   297  }
   298  
   299  func (r *RetrievalIndexItem) Clone() storage.Item {
   300  	if r == nil {
   301  		return nil
   302  	}
   303  	return &RetrievalIndexItem{
   304  		Address:   r.Address.Clone(),
   305  		Timestamp: r.Timestamp,
   306  		Location:  r.Location,
   307  		RefCnt:    r.RefCnt,
   308  	}
   309  }
   310  
   311  func (r RetrievalIndexItem) String() string {
   312  	return storageutil.JoinFields(r.Namespace(), r.ID())
   313  }