github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/internal/pinning/pinning.go (about)

     1  // Copyright 2022 The Swarm Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pinstore
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"encoding/binary"
    11  	"errors"
    12  	"fmt"
    13  	"runtime"
    14  
    15  	"github.com/ethersphere/bee/v2/pkg/encryption"
    16  	storage "github.com/ethersphere/bee/v2/pkg/storage"
    17  	"github.com/ethersphere/bee/v2/pkg/storer/internal/transaction"
    18  	"golang.org/x/sync/errgroup"
    19  
    20  	"github.com/ethersphere/bee/v2/pkg/storage/storageutil"
    21  	"github.com/ethersphere/bee/v2/pkg/storer/internal"
    22  	"github.com/ethersphere/bee/v2/pkg/swarm"
    23  	"github.com/google/uuid"
    24  )
    25  
    26  const (
    27  	// size of the UUID generated by the pinstore
    28  	uuidSize = 16
    29  )
    30  
    31  var (
    32  	// errInvalidPinCollectionAddr is returned when trying to marshal a pinCollectionItem
    33  	// with a zero address
    34  	errInvalidPinCollectionAddr = errors.New("marshal pinCollectionItem: address is zero")
    35  	// errInvalidPinCollectionUUID is returned when trying to marshal a pinCollectionItem
    36  	// with an empty UUID
    37  	errInvalidPinCollectionUUID = errors.New("marshal pinCollectionItem: UUID is zero")
    38  	// errInvalidPinCollectionSize is returned when trying to unmarshal a buffer of
    39  	// incorrect size
    40  	errInvalidPinCollectionSize = errors.New("unmarshal pinCollectionItem: invalid size")
    41  	// errPutterAlreadyClosed is returned when trying to use a Putter which is already closed
    42  	errPutterAlreadyClosed = errors.New("pin store: putter already closed")
    43  	// errCollectionRootAddressIsZero is returned if the putter is closed with a zero
    44  	// swarm.Address. Root reference has to be set.
    45  	errCollectionRootAddressIsZero = errors.New("pin store: collection root address is zero")
    46  	// ErrDuplicatePinCollection is returned when attempted to pin the same file repeatedly
    47  	ErrDuplicatePinCollection = errors.New("pin store: duplicate pin collection")
    48  )
    49  
    50  // creates a new UUID and returns it as a byte slice
    51  func newUUID() []byte {
    52  	id := uuid.New()
    53  	return id[:]
    54  }
    55  
    56  // emptyKey is a 32 byte slice of zeros used to check if encryption key is set
    57  var emptyKey = make([]byte, 32)
    58  
    59  // CollectionStat is used to store some basic stats about the pinning collection
    60  type CollectionStat struct {
    61  	Total           uint64
    62  	DupInCollection uint64
    63  }
    64  
    65  // NewCollection returns a putter wrapped around the passed storage.
    66  // The putter will add the chunk to Chunk store if it doesn't exists within this collection.
    67  // It will create a new UUID for the collection which can be used to iterate on all the chunks
    68  // that are part of this collection. The root pin is only updated on successful close of this.
    69  // Calls to the Putter MUST be mutex locked to prevent concurrent upload data races.
    70  func NewCollection(st storage.IndexStore) (internal.PutterCloserWithReference, error) {
    71  	newCollectionUUID := newUUID()
    72  	err := st.Put(&dirtyCollection{UUID: newCollectionUUID})
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	return &collectionPutter{
    77  		collection: &pinCollectionItem{UUID: newCollectionUUID},
    78  	}, nil
    79  }
    80  
    81  type collectionPutter struct {
    82  	collection *pinCollectionItem
    83  	closed     bool
    84  }
    85  
    86  // Put adds a chunk to the pin collection.
    87  // The user of the putter MUST mutex lock the call to prevent data-races across multiple upload sessions.
    88  func (c *collectionPutter) Put(ctx context.Context, st transaction.Store, ch swarm.Chunk) error {
    89  
    90  	// do not allow any Puts after putter was closed
    91  	if c.closed {
    92  		return errPutterAlreadyClosed
    93  	}
    94  
    95  	c.collection.Stat.Total++
    96  
    97  	// We will only care about duplicates within this collection. In order to
    98  	// guarantee that we dont accidentally delete common chunks across collections,
    99  	// a separate pinCollectionItem entry will be present for each duplicate chunk.
   100  	collectionChunk := &pinChunkItem{UUID: c.collection.UUID, Addr: ch.Address()}
   101  	found, err := st.IndexStore().Has(collectionChunk)
   102  	if err != nil {
   103  		return fmt.Errorf("pin store: failed to check chunk: %w", err)
   104  	}
   105  	if found {
   106  		// If we already have this chunk in the current collection, don't add it
   107  		// again.
   108  		c.collection.Stat.DupInCollection++
   109  		return nil
   110  	}
   111  
   112  	err = st.IndexStore().Put(collectionChunk)
   113  	if err != nil {
   114  		return fmt.Errorf("pin store: failed putting collection chunk: %w", err)
   115  	}
   116  
   117  	err = st.ChunkStore().Put(ctx, ch)
   118  	if err != nil {
   119  		return fmt.Errorf("pin store: failed putting chunk: %w", err)
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (c *collectionPutter) Close(st storage.IndexStore, root swarm.Address) error {
   126  	if root.IsZero() {
   127  		return errCollectionRootAddressIsZero
   128  	}
   129  
   130  	collection := &pinCollectionItem{Addr: root}
   131  	has, err := st.Has(collection)
   132  
   133  	if err != nil {
   134  		return fmt.Errorf("pin store: check previous root: %w", err)
   135  	}
   136  
   137  	if has {
   138  		return ErrDuplicatePinCollection
   139  	}
   140  
   141  	// Save the root pin reference.
   142  	c.collection.Addr = root
   143  	err = st.Put(c.collection)
   144  	if err != nil {
   145  		return fmt.Errorf("pin store: failed updating collection: %w", err)
   146  	}
   147  
   148  	err = st.Delete(&dirtyCollection{UUID: c.collection.UUID})
   149  	if err != nil {
   150  		return fmt.Errorf("pin store: failed deleting dirty collection: %w", err)
   151  	}
   152  
   153  	c.closed = true
   154  	return nil
   155  }
   156  
   157  func (c *collectionPutter) Cleanup(st transaction.Storage) error {
   158  	if c.closed {
   159  		return nil
   160  	}
   161  
   162  	if err := deleteCollectionChunks(context.Background(), st, c.collection.UUID); err != nil {
   163  		return fmt.Errorf("pin store: failed deleting collection chunks: %w", err)
   164  	}
   165  
   166  	err := st.Run(context.Background(), func(s transaction.Store) error {
   167  		return s.IndexStore().Delete(&dirtyCollection{UUID: c.collection.UUID})
   168  	})
   169  	if err != nil {
   170  		return fmt.Errorf("pin store: failed deleting dirty collection: %w", err)
   171  	}
   172  
   173  	c.closed = true
   174  	return nil
   175  }
   176  
   177  // CleanupDirty will iterate over all the dirty collections and delete them.
   178  func CleanupDirty(st transaction.Storage) error {
   179  
   180  	dirtyCollections := make([]*dirtyCollection, 0)
   181  	err := st.IndexStore().Iterate(
   182  		storage.Query{
   183  			Factory:      func() storage.Item { return new(dirtyCollection) },
   184  			ItemProperty: storage.QueryItemID,
   185  		},
   186  		func(r storage.Result) (bool, error) {
   187  			di := &dirtyCollection{UUID: []byte(r.ID)}
   188  			dirtyCollections = append(dirtyCollections, di)
   189  			return false, nil
   190  		},
   191  	)
   192  	if err != nil {
   193  		return fmt.Errorf("pin store: failed iterating dirty collections: %w", err)
   194  	}
   195  
   196  	for _, di := range dirtyCollections {
   197  		err = errors.Join(err, (&collectionPutter{collection: &pinCollectionItem{UUID: di.UUID}}).Cleanup(st))
   198  	}
   199  
   200  	return err
   201  }
   202  
   203  // HasPin function will check if the address represents a valid pin collection.
   204  func HasPin(st storage.Reader, root swarm.Address) (bool, error) {
   205  	collection := &pinCollectionItem{Addr: root}
   206  	has, err := st.Has(collection)
   207  	if err != nil {
   208  		return false, fmt.Errorf("pin store: failed checking collection: %w", err)
   209  	}
   210  	return has, nil
   211  }
   212  
   213  // Pins lists all the added pinning collections.
   214  func Pins(st storage.Reader) ([]swarm.Address, error) {
   215  	var pins []swarm.Address
   216  	err := st.Iterate(storage.Query{
   217  		Factory:      func() storage.Item { return new(pinCollectionItem) },
   218  		ItemProperty: storage.QueryItemID,
   219  	}, func(r storage.Result) (bool, error) {
   220  		addr := swarm.NewAddress([]byte(r.ID))
   221  		pins = append(pins, addr)
   222  		return false, nil
   223  	})
   224  	if err != nil {
   225  		return nil, fmt.Errorf("pin store: failed iterating root refs: %w", err)
   226  	}
   227  
   228  	return pins, nil
   229  }
   230  
   231  func deleteCollectionChunks(ctx context.Context, st transaction.Storage, collectionUUID []byte) error {
   232  	chunksToDelete := make([]*pinChunkItem, 0)
   233  
   234  	err := st.IndexStore().Iterate(
   235  		storage.Query{
   236  			Factory: func() storage.Item { return &pinChunkItem{UUID: collectionUUID} },
   237  		}, func(r storage.Result) (bool, error) {
   238  			addr := swarm.NewAddress([]byte(r.ID))
   239  			chunk := &pinChunkItem{UUID: collectionUUID, Addr: addr}
   240  			chunksToDelete = append(chunksToDelete, chunk)
   241  			return false, nil
   242  		},
   243  	)
   244  	if err != nil {
   245  		return fmt.Errorf("pin store: failed iterating collection chunks: %w", err)
   246  	}
   247  
   248  	eg, ctx := errgroup.WithContext(ctx)
   249  	eg.SetLimit(runtime.NumCPU())
   250  
   251  	for _, item := range chunksToDelete {
   252  		func(item *pinChunkItem) {
   253  			eg.Go(func() error {
   254  				return st.Run(ctx, func(s transaction.Store) error {
   255  					return errors.Join(
   256  						s.IndexStore().Delete(item),
   257  						s.ChunkStore().Delete(ctx, item.Addr),
   258  					)
   259  				})
   260  			})
   261  
   262  		}(item)
   263  	}
   264  
   265  	err = eg.Wait()
   266  	if err != nil {
   267  		return fmt.Errorf("pin store: failed tx deleting collection chunks: %w", err)
   268  	}
   269  
   270  	return nil
   271  }
   272  
   273  // DeletePin will delete the root pin and all the chunks that are part of this collection.
   274  func DeletePin(ctx context.Context, st transaction.Storage, root swarm.Address) error {
   275  	collection := &pinCollectionItem{Addr: root}
   276  
   277  	err := st.IndexStore().Get(collection)
   278  	if err != nil {
   279  		return fmt.Errorf("pin store: failed getting collection: %w", err)
   280  	}
   281  
   282  	if err := deleteCollectionChunks(ctx, st, collection.UUID); err != nil {
   283  		return err
   284  	}
   285  
   286  	return st.Run(ctx, func(s transaction.Store) error {
   287  		err := s.IndexStore().Delete(collection)
   288  		if err != nil {
   289  			return fmt.Errorf("pin store: failed deleting root collection: %w", err)
   290  		}
   291  		return nil
   292  	})
   293  }
   294  
   295  func IterateCollection(st storage.Reader, root swarm.Address, fn func(addr swarm.Address) (bool, error)) error {
   296  	collection := &pinCollectionItem{Addr: root}
   297  	err := st.Get(collection)
   298  	if err != nil {
   299  		return fmt.Errorf("pin store: failed getting collection: %w", err)
   300  	}
   301  
   302  	return st.Iterate(storage.Query{
   303  		Factory:      func() storage.Item { return &pinChunkItem{UUID: collection.UUID} },
   304  		ItemProperty: storage.QueryItemID,
   305  	}, func(r storage.Result) (bool, error) {
   306  		addr := swarm.NewAddress([]byte(r.ID))
   307  		stop, err := fn(addr)
   308  		if err != nil {
   309  			return true, err
   310  		}
   311  		return stop, nil
   312  	})
   313  }
   314  
   315  func IterateCollectionStats(st storage.Reader, iterateFn func(st CollectionStat) (bool, error)) error {
   316  	return st.Iterate(
   317  		storage.Query{
   318  			Factory: func() storage.Item { return new(pinCollectionItem) },
   319  		},
   320  		func(r storage.Result) (bool, error) {
   321  			return iterateFn(r.Entry.(*pinCollectionItem).Stat)
   322  		},
   323  	)
   324  }
   325  
   326  // pinCollectionSize represents the size of the pinCollectionItem
   327  const pinCollectionItemSize = encryption.ReferenceSize + uuidSize + 8 + 8
   328  
   329  var _ storage.Item = (*pinCollectionItem)(nil)
   330  
   331  // pinCollectionItem is the index used to describe a pinning collection. The Addr
   332  // is the root reference of the collection and UUID is a unique UUID for this collection.
   333  // The Address could be an encrypted swarm hash. This hash has the key to decrypt the
   334  // collection.
   335  type pinCollectionItem struct {
   336  	Addr swarm.Address
   337  	UUID []byte
   338  	Stat CollectionStat
   339  }
   340  
   341  func (p *pinCollectionItem) ID() string { return p.Addr.ByteString() }
   342  
   343  func (pinCollectionItem) Namespace() string { return "pinCollectionItem" }
   344  
   345  func (p *pinCollectionItem) Marshal() ([]byte, error) {
   346  	if p.Addr.IsZero() {
   347  		return nil, errInvalidPinCollectionAddr
   348  	}
   349  	if len(p.UUID) == 0 {
   350  		return nil, errInvalidPinCollectionUUID
   351  	}
   352  	buf := make([]byte, pinCollectionItemSize)
   353  	copy(buf[:encryption.ReferenceSize], p.Addr.Bytes())
   354  	off := encryption.ReferenceSize
   355  	copy(buf[off:off+uuidSize], p.UUID)
   356  	statBufOff := encryption.ReferenceSize + uuidSize
   357  	binary.LittleEndian.PutUint64(buf[statBufOff:], p.Stat.Total)
   358  	binary.LittleEndian.PutUint64(buf[statBufOff+8:], p.Stat.DupInCollection)
   359  	return buf, nil
   360  }
   361  
   362  func (p *pinCollectionItem) Unmarshal(buf []byte) error {
   363  	if len(buf) != pinCollectionItemSize {
   364  		return errInvalidPinCollectionSize
   365  	}
   366  	ni := new(pinCollectionItem)
   367  	if bytes.Equal(buf[swarm.HashSize:encryption.ReferenceSize], emptyKey) {
   368  		ni.Addr = swarm.NewAddress(buf[:swarm.HashSize]).Clone()
   369  	} else {
   370  		ni.Addr = swarm.NewAddress(buf[:encryption.ReferenceSize]).Clone()
   371  	}
   372  	off := encryption.ReferenceSize
   373  	ni.UUID = append(make([]byte, 0, uuidSize), buf[off:off+uuidSize]...)
   374  	statBuf := buf[off+uuidSize:]
   375  	ni.Stat.Total = binary.LittleEndian.Uint64(statBuf[:8])
   376  	ni.Stat.DupInCollection = binary.LittleEndian.Uint64(statBuf[8:16])
   377  	*p = *ni
   378  	return nil
   379  }
   380  
   381  func (p *pinCollectionItem) Clone() storage.Item {
   382  	if p == nil {
   383  		return nil
   384  	}
   385  	return &pinCollectionItem{
   386  		Addr: p.Addr.Clone(),
   387  		UUID: append([]byte(nil), p.UUID...),
   388  		Stat: p.Stat,
   389  	}
   390  }
   391  
   392  func (p pinCollectionItem) String() string {
   393  	return storageutil.JoinFields(p.Namespace(), p.ID())
   394  }
   395  
   396  var _ storage.Item = (*pinChunkItem)(nil)
   397  
   398  // pinChunkItem is the index used to represent a single chunk in the pinning
   399  // collection. It is prefixed with the UUID of the collection.
   400  type pinChunkItem struct {
   401  	UUID []byte
   402  	Addr swarm.Address
   403  }
   404  
   405  func (p *pinChunkItem) Namespace() string { return string(p.UUID) }
   406  
   407  func (p *pinChunkItem) ID() string { return p.Addr.ByteString() }
   408  
   409  // pinChunkItem is a key-only type index. We don't need to store any value. As such
   410  // the serialization functions would be no-ops. A Get operation on this key is not
   411  // required as the key would constitute the item. Usually these type of indexes are
   412  // useful for key-only iterations.
   413  func (p *pinChunkItem) Marshal() ([]byte, error) {
   414  	return nil, nil
   415  }
   416  
   417  func (p *pinChunkItem) Unmarshal(_ []byte) error {
   418  	return nil
   419  }
   420  
   421  func (p *pinChunkItem) Clone() storage.Item {
   422  	if p == nil {
   423  		return nil
   424  	}
   425  	return &pinChunkItem{
   426  		UUID: append([]byte(nil), p.UUID...),
   427  		Addr: p.Addr.Clone(),
   428  	}
   429  }
   430  
   431  func (p pinChunkItem) String() string {
   432  	return storageutil.JoinFields(p.Namespace(), p.ID())
   433  }
   434  
   435  type dirtyCollection struct {
   436  	UUID []byte
   437  }
   438  
   439  func (d *dirtyCollection) ID() string { return string(d.UUID) }
   440  
   441  func (dirtyCollection) Namespace() string { return "dirtyCollection" }
   442  
   443  func (d *dirtyCollection) Marshal() ([]byte, error) {
   444  	return nil, nil
   445  }
   446  
   447  func (d *dirtyCollection) Unmarshal(_ []byte) error {
   448  	return nil
   449  }
   450  
   451  func (d *dirtyCollection) Clone() storage.Item {
   452  	if d == nil {
   453  		return nil
   454  	}
   455  	return &dirtyCollection{
   456  		UUID: append([]byte(nil), d.UUID...),
   457  	}
   458  }
   459  
   460  func (d dirtyCollection) String() string {
   461  	return storageutil.JoinFields(d.Namespace(), d.ID())
   462  }