github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/internal/cache/cache.go (about)

     1  // Copyright 2022 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cache
     6  
     7  import (
     8  	"context"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  	"runtime"
    13  	"strconv"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	storage "github.com/ethersphere/bee/v2/pkg/storage"
    18  	"github.com/ethersphere/bee/v2/pkg/storer/internal/transaction"
    19  	"github.com/ethersphere/bee/v2/pkg/swarm"
    20  	"golang.org/x/sync/errgroup"
    21  	"resenje.org/multex"
    22  )
    23  
    24  var now = time.Now
    25  
    26  // exported for migration
    27  type CacheEntryItem = cacheEntry
    28  
    29  const cacheEntrySize = swarm.HashSize + 8
    30  
    31  var _ storage.Item = (*cacheEntry)(nil)
    32  
    33  var (
    34  	errMarshalCacheEntryInvalidAddress   = errors.New("marshal cacheEntry: invalid address")
    35  	errMarshalCacheEntryInvalidTimestamp = errors.New("marshal cacheEntry: invalid timestamp")
    36  	errUnmarshalCacheEntryInvalidSize    = errors.New("unmarshal cacheEntry: invalid size")
    37  )
    38  
    39  // Cache is the part of the localstore which keeps track of the chunks that are not
    40  // part of the reserve but are potentially useful to store for obtaining bandwidth
    41  // incentives.
    42  type Cache struct {
    43  	size     atomic.Int64
    44  	capacity int
    45  	glock    *multex.Multex // blocks Get and Put ops while shallow copy is running.
    46  }
    47  
    48  // New creates a new Cache component with the specified capacity. The store is used
    49  // here only to read the initial state of the cache before shutdown if there was
    50  // any.
    51  func New(ctx context.Context, store storage.Reader, capacity uint64) (*Cache, error) {
    52  	count, err := store.Count(&cacheEntry{})
    53  	if err != nil {
    54  		return nil, fmt.Errorf("failed counting cache entries: %w", err)
    55  	}
    56  
    57  	c := &Cache{capacity: int(capacity), glock: multex.New()}
    58  	c.size.Store(int64(count))
    59  
    60  	return c, nil
    61  }
    62  
    63  // Size returns the current size of the cache.
    64  func (c *Cache) Size() uint64 {
    65  	return uint64(c.size.Load())
    66  }
    67  
    68  // Capacity returns the capacity of the cache.
    69  func (c *Cache) Capacity() uint64 { return uint64(c.capacity) }
    70  
    71  // Putter returns a Storage.Putter instance which adds the chunk to the underlying
    72  // chunkstore and also adds a Cache entry for the chunk.
    73  func (c *Cache) Putter(store transaction.Storage) storage.Putter {
    74  	return storage.PutterFunc(func(ctx context.Context, chunk swarm.Chunk) error {
    75  
    76  		c.glock.Lock(chunk.Address().ByteString())
    77  		defer c.glock.Unlock(chunk.Address().ByteString())
    78  
    79  		trx, done := store.NewTransaction(ctx)
    80  		defer done()
    81  
    82  		newEntry := &cacheEntry{Address: chunk.Address()}
    83  		found, err := trx.IndexStore().Has(newEntry)
    84  		if err != nil {
    85  			return fmt.Errorf("failed checking has cache entry: %w", err)
    86  		}
    87  
    88  		// if chunk is already part of cache, return found.
    89  		if found {
    90  			return nil
    91  		}
    92  
    93  		newEntry.AccessTimestamp = now().UnixNano()
    94  		err = trx.IndexStore().Put(newEntry)
    95  		if err != nil {
    96  			return fmt.Errorf("failed adding cache entry: %w", err)
    97  		}
    98  
    99  		err = trx.IndexStore().Put(&cacheOrderIndex{
   100  			Address:         newEntry.Address,
   101  			AccessTimestamp: newEntry.AccessTimestamp,
   102  		})
   103  		if err != nil {
   104  			return fmt.Errorf("failed adding cache order index: %w", err)
   105  		}
   106  
   107  		err = trx.ChunkStore().Put(ctx, chunk)
   108  		if err != nil {
   109  			return fmt.Errorf("failed adding chunk to chunkstore: %w", err)
   110  		}
   111  
   112  		if err := trx.Commit(); err != nil {
   113  			return fmt.Errorf("batch commit: %w", err)
   114  		}
   115  
   116  		c.size.Add(1)
   117  
   118  		return nil
   119  	})
   120  }
   121  
   122  // Getter returns a Storage.Getter instance which checks if the chunks accessed are
   123  // part of cache it will update the cache indexes. If the operation to update the
   124  // cache indexes fail, we need to fail the operation as this should signal the user
   125  // of this getter to rollback the operation.
   126  func (c *Cache) Getter(store transaction.Storage) storage.Getter {
   127  	return storage.GetterFunc(func(ctx context.Context, address swarm.Address) (swarm.Chunk, error) {
   128  
   129  		c.glock.Lock(address.ByteString())
   130  		defer c.glock.Unlock(address.ByteString())
   131  
   132  		trx, done := store.NewTransaction(ctx)
   133  		defer done()
   134  
   135  		ch, err := trx.ChunkStore().Get(ctx, address)
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  
   140  		// check if there is an entry in Cache. As this is the download path, we do
   141  		// a best-effort operation. So in case of any error we return the chunk.
   142  		entry := &cacheEntry{Address: address}
   143  		err = trx.IndexStore().Get(entry)
   144  		if err != nil {
   145  			if errors.Is(err, storage.ErrNotFound) {
   146  				return ch, nil
   147  			}
   148  			return nil, fmt.Errorf("unexpected error getting indexstore entry: %w", err)
   149  		}
   150  
   151  		err = trx.IndexStore().Delete(&cacheOrderIndex{
   152  			Address:         entry.Address,
   153  			AccessTimestamp: entry.AccessTimestamp,
   154  		})
   155  		if err != nil {
   156  			return nil, fmt.Errorf("failed deleting cache order index: %w", err)
   157  		}
   158  
   159  		entry.AccessTimestamp = now().UnixNano()
   160  		err = trx.IndexStore().Put(&cacheOrderIndex{
   161  			Address:         entry.Address,
   162  			AccessTimestamp: entry.AccessTimestamp,
   163  		})
   164  		if err != nil {
   165  			return nil, fmt.Errorf("failed adding cache order index: %w", err)
   166  		}
   167  
   168  		err = trx.IndexStore().Put(entry)
   169  		if err != nil {
   170  			return nil, fmt.Errorf("failed adding cache entry: %w", err)
   171  		}
   172  
   173  		err = trx.Commit()
   174  		if err != nil {
   175  			return nil, fmt.Errorf("batch commit: %w", err)
   176  		}
   177  
   178  		return ch, nil
   179  	})
   180  }
   181  
   182  // RemoveOldest removes the oldest cache entries from the store. The count
   183  // specifies the number of entries to remove.
   184  func (c *Cache) RemoveOldest(ctx context.Context, st transaction.Storage, count uint64) error {
   185  
   186  	if count <= 0 {
   187  		return nil
   188  	}
   189  
   190  	evictItems := make([]*cacheEntry, 0, count)
   191  	err := st.IndexStore().Iterate(
   192  		storage.Query{
   193  			Factory:      func() storage.Item { return &cacheOrderIndex{} },
   194  			ItemProperty: storage.QueryItemID,
   195  		},
   196  		func(res storage.Result) (bool, error) {
   197  			accessTime, addr, err := idFromKey(res.ID)
   198  			if err != nil {
   199  				return false, fmt.Errorf("failed to parse cache order index %s: %w", res.ID, err)
   200  			}
   201  			entry := &cacheEntry{
   202  				Address:         addr,
   203  				AccessTimestamp: accessTime,
   204  			}
   205  			evictItems = append(evictItems, entry)
   206  			count--
   207  			return count == 0, nil
   208  		},
   209  	)
   210  	if err != nil {
   211  		return fmt.Errorf("failed iterating over cache order index: %w", err)
   212  	}
   213  
   214  	eg, ctx := errgroup.WithContext(ctx)
   215  	eg.SetLimit(runtime.NumCPU())
   216  
   217  	for _, item := range evictItems {
   218  		func(item *cacheEntry) {
   219  			eg.Go(func() error {
   220  				c.glock.Lock(item.Address.ByteString())
   221  				defer c.glock.Unlock(item.Address.ByteString())
   222  				err := st.Run(ctx, func(s transaction.Store) error {
   223  					return errors.Join(
   224  						s.IndexStore().Delete(item),
   225  						s.IndexStore().Delete(&cacheOrderIndex{
   226  							Address:         item.Address,
   227  							AccessTimestamp: item.AccessTimestamp,
   228  						}),
   229  						s.ChunkStore().Delete(ctx, item.Address),
   230  					)
   231  				})
   232  				if err != nil {
   233  					return err
   234  				}
   235  				c.size.Add(-1)
   236  				return nil
   237  			})
   238  		}(item)
   239  	}
   240  
   241  	return eg.Wait()
   242  }
   243  
   244  // ShallowCopy creates cache entries with the expectation that the chunk already exists in the chunkstore.
   245  func (c *Cache) ShallowCopy(
   246  	ctx context.Context,
   247  	store transaction.Storage,
   248  	addrs ...swarm.Address,
   249  ) (err error) {
   250  
   251  	// TODO: add proper mutex locking before usage
   252  
   253  	entries := make([]*cacheEntry, 0, len(addrs))
   254  
   255  	defer func() {
   256  		if err != nil {
   257  			err = errors.Join(err,
   258  				store.Run(context.Background(), func(s transaction.Store) error {
   259  					for _, entry := range entries {
   260  						dErr := s.ChunkStore().Delete(context.Background(), entry.Address)
   261  						if dErr != nil {
   262  							return dErr
   263  						}
   264  					}
   265  					return nil
   266  				}),
   267  			)
   268  		}
   269  	}()
   270  
   271  	for _, addr := range addrs {
   272  		entry := &cacheEntry{Address: addr, AccessTimestamp: now().UnixNano()}
   273  		if has, err := store.IndexStore().Has(entry); err == nil && has {
   274  			// Since the caller has previously referenced the chunk (+1 refCnt), and if the chunk is already referenced
   275  			// by the cache store (+1 refCnt), then we must decrement the refCnt by one ( -1 refCnt to bring the total to +1).
   276  			// See https://github.com/ethersphere/bee/issues/4530.
   277  			_ = store.Run(ctx, func(s transaction.Store) error { return s.ChunkStore().Delete(ctx, addr) })
   278  			continue
   279  		}
   280  		entries = append(entries, entry)
   281  	}
   282  
   283  	if len(entries) == 0 {
   284  		return nil
   285  	}
   286  
   287  	//consider only the amount that can fit, the rest should be deleted from the chunkstore.
   288  	if len(entries) > c.capacity {
   289  		for _, addr := range entries[:len(entries)-c.capacity] {
   290  			_ = store.Run(ctx, func(s transaction.Store) error { return s.ChunkStore().Delete(ctx, addr.Address) })
   291  		}
   292  		entries = entries[len(entries)-c.capacity:]
   293  	}
   294  
   295  	err = store.Run(ctx, func(s transaction.Store) error {
   296  		for _, entry := range entries {
   297  			err = s.IndexStore().Put(entry)
   298  			if err != nil {
   299  				return fmt.Errorf("failed adding entry %s: %w", entry, err)
   300  			}
   301  			err = s.IndexStore().Put(&cacheOrderIndex{
   302  				Address:         entry.Address,
   303  				AccessTimestamp: entry.AccessTimestamp,
   304  			})
   305  			if err != nil {
   306  				return fmt.Errorf("failed adding cache order index: %w", err)
   307  			}
   308  		}
   309  		return nil
   310  	})
   311  	if err != nil {
   312  		return err
   313  	}
   314  
   315  	c.size.Add(int64(len(entries)))
   316  	return nil
   317  }
   318  
   319  type cacheEntry struct {
   320  	Address         swarm.Address
   321  	AccessTimestamp int64
   322  }
   323  
   324  func (c *cacheEntry) ID() string { return c.Address.ByteString() }
   325  
   326  func (cacheEntry) Namespace() string { return "cacheEntry" }
   327  
   328  func (c *cacheEntry) Marshal() ([]byte, error) {
   329  	entryBuf := make([]byte, cacheEntrySize)
   330  	if c.Address.IsZero() {
   331  		return nil, errMarshalCacheEntryInvalidAddress
   332  	}
   333  	if c.AccessTimestamp <= 0 {
   334  		return nil, errMarshalCacheEntryInvalidTimestamp
   335  	}
   336  	copy(entryBuf[:swarm.HashSize], c.Address.Bytes())
   337  	binary.LittleEndian.PutUint64(entryBuf[swarm.HashSize:], uint64(c.AccessTimestamp))
   338  	return entryBuf, nil
   339  }
   340  
   341  func (c *cacheEntry) Unmarshal(buf []byte) error {
   342  	if len(buf) != cacheEntrySize {
   343  		return errUnmarshalCacheEntryInvalidSize
   344  	}
   345  	newEntry := new(cacheEntry)
   346  	newEntry.Address = swarm.NewAddress(append(make([]byte, 0, swarm.HashSize), buf[:swarm.HashSize]...))
   347  	newEntry.AccessTimestamp = int64(binary.LittleEndian.Uint64(buf[swarm.HashSize:]))
   348  	*c = *newEntry
   349  	return nil
   350  }
   351  
   352  func (c *cacheEntry) Clone() storage.Item {
   353  	if c == nil {
   354  		return nil
   355  	}
   356  	return &cacheEntry{
   357  		Address:         c.Address.Clone(),
   358  		AccessTimestamp: c.AccessTimestamp,
   359  	}
   360  }
   361  
   362  func (c cacheEntry) String() string {
   363  	return fmt.Sprintf(
   364  		"cacheEntry { Address: %s AccessTimestamp: %s }",
   365  		c.Address,
   366  		time.Unix(c.AccessTimestamp, 0).UTC().Format(time.RFC3339),
   367  	)
   368  }
   369  
   370  var _ storage.Item = (*cacheOrderIndex)(nil)
   371  
   372  type cacheOrderIndex struct {
   373  	AccessTimestamp int64
   374  	Address         swarm.Address
   375  }
   376  
   377  func keyFromID(ts int64, addr swarm.Address) string {
   378  	tsStr := fmt.Sprintf("%d", ts)
   379  	return tsStr + addr.ByteString()
   380  }
   381  
   382  func idFromKey(key string) (int64, swarm.Address, error) {
   383  	ts := key[:len(key)-swarm.HashSize]
   384  	addr := key[len(key)-swarm.HashSize:]
   385  	n, err := strconv.ParseInt(ts, 10, 64)
   386  	if err != nil {
   387  		return 0, swarm.ZeroAddress, err
   388  	}
   389  	return n, swarm.NewAddress([]byte(addr)), nil
   390  }
   391  
   392  func (c *cacheOrderIndex) ID() string {
   393  	return keyFromID(c.AccessTimestamp, c.Address)
   394  }
   395  
   396  func (cacheOrderIndex) Namespace() string { return "cacheOrderIndex" }
   397  
   398  func (cacheOrderIndex) Marshal() ([]byte, error) {
   399  	return nil, nil
   400  }
   401  
   402  func (cacheOrderIndex) Unmarshal(_ []byte) error {
   403  	return nil
   404  }
   405  
   406  func (c *cacheOrderIndex) Clone() storage.Item {
   407  	if c == nil {
   408  		return nil
   409  	}
   410  	return &cacheOrderIndex{
   411  		AccessTimestamp: c.AccessTimestamp,
   412  		Address:         c.Address.Clone(),
   413  	}
   414  }
   415  
   416  func (c cacheOrderIndex) String() string {
   417  	return fmt.Sprintf(
   418  		"cacheOrderIndex { AccessTimestamp: %d Address: %s }",
   419  		c.AccessTimestamp,
   420  		c.Address.ByteString(),
   421  	)
   422  }