github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/internal/pinning/pinning.go (about) 1 // Copyright 2022 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package pinstore 6 7 import ( 8 "bytes" 9 "context" 10 "encoding/binary" 11 "errors" 12 "fmt" 13 "runtime" 14 15 "github.com/ethersphere/bee/v2/pkg/encryption" 16 storage "github.com/ethersphere/bee/v2/pkg/storage" 17 "github.com/ethersphere/bee/v2/pkg/storer/internal/transaction" 18 "golang.org/x/sync/errgroup" 19 20 "github.com/ethersphere/bee/v2/pkg/storage/storageutil" 21 "github.com/ethersphere/bee/v2/pkg/storer/internal" 22 "github.com/ethersphere/bee/v2/pkg/swarm" 23 "github.com/google/uuid" 24 ) 25 26 const ( 27 // size of the UUID generated by the pinstore 28 uuidSize = 16 29 ) 30 31 var ( 32 // errInvalidPinCollectionAddr is returned when trying to marshal a pinCollectionItem 33 // with a zero address 34 errInvalidPinCollectionAddr = errors.New("marshal pinCollectionItem: address is zero") 35 // errInvalidPinCollectionUUID is returned when trying to marshal a pinCollectionItem 36 // with an empty UUID 37 errInvalidPinCollectionUUID = errors.New("marshal pinCollectionItem: UUID is zero") 38 // errInvalidPinCollectionSize is returned when trying to unmarshal a buffer of 39 // incorrect size 40 errInvalidPinCollectionSize = errors.New("unmarshal pinCollectionItem: invalid size") 41 // errPutterAlreadyClosed is returned when trying to use a Putter which is already closed 42 errPutterAlreadyClosed = errors.New("pin store: putter already closed") 43 // errCollectionRootAddressIsZero is returned if the putter is closed with a zero 44 // swarm.Address. Root reference has to be set. 45 errCollectionRootAddressIsZero = errors.New("pin store: collection root address is zero") 46 // ErrDuplicatePinCollection is returned when attempted to pin the same file repeatedly 47 ErrDuplicatePinCollection = errors.New("pin store: duplicate pin collection") 48 ) 49 50 // creates a new UUID and returns it as a byte slice 51 func newUUID() []byte { 52 id := uuid.New() 53 return id[:] 54 } 55 56 // emptyKey is a 32 byte slice of zeros used to check if encryption key is set 57 var emptyKey = make([]byte, 32) 58 59 // CollectionStat is used to store some basic stats about the pinning collection 60 type CollectionStat struct { 61 Total uint64 62 DupInCollection uint64 63 } 64 65 // NewCollection returns a putter wrapped around the passed storage. 66 // The putter will add the chunk to Chunk store if it doesn't exists within this collection. 67 // It will create a new UUID for the collection which can be used to iterate on all the chunks 68 // that are part of this collection. The root pin is only updated on successful close of this. 69 // Calls to the Putter MUST be mutex locked to prevent concurrent upload data races. 70 func NewCollection(st storage.IndexStore) (internal.PutterCloserWithReference, error) { 71 newCollectionUUID := newUUID() 72 err := st.Put(&dirtyCollection{UUID: newCollectionUUID}) 73 if err != nil { 74 return nil, err 75 } 76 return &collectionPutter{ 77 collection: &pinCollectionItem{UUID: newCollectionUUID}, 78 }, nil 79 } 80 81 type collectionPutter struct { 82 collection *pinCollectionItem 83 closed bool 84 } 85 86 // Put adds a chunk to the pin collection. 87 // The user of the putter MUST mutex lock the call to prevent data-races across multiple upload sessions. 88 func (c *collectionPutter) Put(ctx context.Context, st transaction.Store, ch swarm.Chunk) error { 89 90 // do not allow any Puts after putter was closed 91 if c.closed { 92 return errPutterAlreadyClosed 93 } 94 95 c.collection.Stat.Total++ 96 97 // We will only care about duplicates within this collection. In order to 98 // guarantee that we dont accidentally delete common chunks across collections, 99 // a separate pinCollectionItem entry will be present for each duplicate chunk. 100 collectionChunk := &pinChunkItem{UUID: c.collection.UUID, Addr: ch.Address()} 101 found, err := st.IndexStore().Has(collectionChunk) 102 if err != nil { 103 return fmt.Errorf("pin store: failed to check chunk: %w", err) 104 } 105 if found { 106 // If we already have this chunk in the current collection, don't add it 107 // again. 108 c.collection.Stat.DupInCollection++ 109 return nil 110 } 111 112 err = st.IndexStore().Put(collectionChunk) 113 if err != nil { 114 return fmt.Errorf("pin store: failed putting collection chunk: %w", err) 115 } 116 117 err = st.ChunkStore().Put(ctx, ch) 118 if err != nil { 119 return fmt.Errorf("pin store: failed putting chunk: %w", err) 120 } 121 122 return nil 123 } 124 125 func (c *collectionPutter) Close(st storage.IndexStore, root swarm.Address) error { 126 if root.IsZero() { 127 return errCollectionRootAddressIsZero 128 } 129 130 collection := &pinCollectionItem{Addr: root} 131 has, err := st.Has(collection) 132 133 if err != nil { 134 return fmt.Errorf("pin store: check previous root: %w", err) 135 } 136 137 if has { 138 return ErrDuplicatePinCollection 139 } 140 141 // Save the root pin reference. 142 c.collection.Addr = root 143 err = st.Put(c.collection) 144 if err != nil { 145 return fmt.Errorf("pin store: failed updating collection: %w", err) 146 } 147 148 err = st.Delete(&dirtyCollection{UUID: c.collection.UUID}) 149 if err != nil { 150 return fmt.Errorf("pin store: failed deleting dirty collection: %w", err) 151 } 152 153 c.closed = true 154 return nil 155 } 156 157 func (c *collectionPutter) Cleanup(st transaction.Storage) error { 158 if c.closed { 159 return nil 160 } 161 162 if err := deleteCollectionChunks(context.Background(), st, c.collection.UUID); err != nil { 163 return fmt.Errorf("pin store: failed deleting collection chunks: %w", err) 164 } 165 166 err := st.Run(context.Background(), func(s transaction.Store) error { 167 return s.IndexStore().Delete(&dirtyCollection{UUID: c.collection.UUID}) 168 }) 169 if err != nil { 170 return fmt.Errorf("pin store: failed deleting dirty collection: %w", err) 171 } 172 173 c.closed = true 174 return nil 175 } 176 177 // CleanupDirty will iterate over all the dirty collections and delete them. 178 func CleanupDirty(st transaction.Storage) error { 179 180 dirtyCollections := make([]*dirtyCollection, 0) 181 err := st.IndexStore().Iterate( 182 storage.Query{ 183 Factory: func() storage.Item { return new(dirtyCollection) }, 184 ItemProperty: storage.QueryItemID, 185 }, 186 func(r storage.Result) (bool, error) { 187 di := &dirtyCollection{UUID: []byte(r.ID)} 188 dirtyCollections = append(dirtyCollections, di) 189 return false, nil 190 }, 191 ) 192 if err != nil { 193 return fmt.Errorf("pin store: failed iterating dirty collections: %w", err) 194 } 195 196 for _, di := range dirtyCollections { 197 err = errors.Join(err, (&collectionPutter{collection: &pinCollectionItem{UUID: di.UUID}}).Cleanup(st)) 198 } 199 200 return err 201 } 202 203 // HasPin function will check if the address represents a valid pin collection. 204 func HasPin(st storage.Reader, root swarm.Address) (bool, error) { 205 collection := &pinCollectionItem{Addr: root} 206 has, err := st.Has(collection) 207 if err != nil { 208 return false, fmt.Errorf("pin store: failed checking collection: %w", err) 209 } 210 return has, nil 211 } 212 213 // Pins lists all the added pinning collections. 214 func Pins(st storage.Reader) ([]swarm.Address, error) { 215 var pins []swarm.Address 216 err := st.Iterate(storage.Query{ 217 Factory: func() storage.Item { return new(pinCollectionItem) }, 218 ItemProperty: storage.QueryItemID, 219 }, func(r storage.Result) (bool, error) { 220 addr := swarm.NewAddress([]byte(r.ID)) 221 pins = append(pins, addr) 222 return false, nil 223 }) 224 if err != nil { 225 return nil, fmt.Errorf("pin store: failed iterating root refs: %w", err) 226 } 227 228 return pins, nil 229 } 230 231 func deleteCollectionChunks(ctx context.Context, st transaction.Storage, collectionUUID []byte) error { 232 chunksToDelete := make([]*pinChunkItem, 0) 233 234 err := st.IndexStore().Iterate( 235 storage.Query{ 236 Factory: func() storage.Item { return &pinChunkItem{UUID: collectionUUID} }, 237 }, func(r storage.Result) (bool, error) { 238 addr := swarm.NewAddress([]byte(r.ID)) 239 chunk := &pinChunkItem{UUID: collectionUUID, Addr: addr} 240 chunksToDelete = append(chunksToDelete, chunk) 241 return false, nil 242 }, 243 ) 244 if err != nil { 245 return fmt.Errorf("pin store: failed iterating collection chunks: %w", err) 246 } 247 248 eg, ctx := errgroup.WithContext(ctx) 249 eg.SetLimit(runtime.NumCPU()) 250 251 for _, item := range chunksToDelete { 252 func(item *pinChunkItem) { 253 eg.Go(func() error { 254 return st.Run(ctx, func(s transaction.Store) error { 255 return errors.Join( 256 s.IndexStore().Delete(item), 257 s.ChunkStore().Delete(ctx, item.Addr), 258 ) 259 }) 260 }) 261 262 }(item) 263 } 264 265 err = eg.Wait() 266 if err != nil { 267 return fmt.Errorf("pin store: failed tx deleting collection chunks: %w", err) 268 } 269 270 return nil 271 } 272 273 // DeletePin will delete the root pin and all the chunks that are part of this collection. 274 func DeletePin(ctx context.Context, st transaction.Storage, root swarm.Address) error { 275 collection := &pinCollectionItem{Addr: root} 276 277 err := st.IndexStore().Get(collection) 278 if err != nil { 279 return fmt.Errorf("pin store: failed getting collection: %w", err) 280 } 281 282 if err := deleteCollectionChunks(ctx, st, collection.UUID); err != nil { 283 return err 284 } 285 286 return st.Run(ctx, func(s transaction.Store) error { 287 err := s.IndexStore().Delete(collection) 288 if err != nil { 289 return fmt.Errorf("pin store: failed deleting root collection: %w", err) 290 } 291 return nil 292 }) 293 } 294 295 func IterateCollection(st storage.Reader, root swarm.Address, fn func(addr swarm.Address) (bool, error)) error { 296 collection := &pinCollectionItem{Addr: root} 297 err := st.Get(collection) 298 if err != nil { 299 return fmt.Errorf("pin store: failed getting collection: %w", err) 300 } 301 302 return st.Iterate(storage.Query{ 303 Factory: func() storage.Item { return &pinChunkItem{UUID: collection.UUID} }, 304 ItemProperty: storage.QueryItemID, 305 }, func(r storage.Result) (bool, error) { 306 addr := swarm.NewAddress([]byte(r.ID)) 307 stop, err := fn(addr) 308 if err != nil { 309 return true, err 310 } 311 return stop, nil 312 }) 313 } 314 315 func IterateCollectionStats(st storage.Reader, iterateFn func(st CollectionStat) (bool, error)) error { 316 return st.Iterate( 317 storage.Query{ 318 Factory: func() storage.Item { return new(pinCollectionItem) }, 319 }, 320 func(r storage.Result) (bool, error) { 321 return iterateFn(r.Entry.(*pinCollectionItem).Stat) 322 }, 323 ) 324 } 325 326 // pinCollectionSize represents the size of the pinCollectionItem 327 const pinCollectionItemSize = encryption.ReferenceSize + uuidSize + 8 + 8 328 329 var _ storage.Item = (*pinCollectionItem)(nil) 330 331 // pinCollectionItem is the index used to describe a pinning collection. The Addr 332 // is the root reference of the collection and UUID is a unique UUID for this collection. 333 // The Address could be an encrypted swarm hash. This hash has the key to decrypt the 334 // collection. 335 type pinCollectionItem struct { 336 Addr swarm.Address 337 UUID []byte 338 Stat CollectionStat 339 } 340 341 func (p *pinCollectionItem) ID() string { return p.Addr.ByteString() } 342 343 func (pinCollectionItem) Namespace() string { return "pinCollectionItem" } 344 345 func (p *pinCollectionItem) Marshal() ([]byte, error) { 346 if p.Addr.IsZero() { 347 return nil, errInvalidPinCollectionAddr 348 } 349 if len(p.UUID) == 0 { 350 return nil, errInvalidPinCollectionUUID 351 } 352 buf := make([]byte, pinCollectionItemSize) 353 copy(buf[:encryption.ReferenceSize], p.Addr.Bytes()) 354 off := encryption.ReferenceSize 355 copy(buf[off:off+uuidSize], p.UUID) 356 statBufOff := encryption.ReferenceSize + uuidSize 357 binary.LittleEndian.PutUint64(buf[statBufOff:], p.Stat.Total) 358 binary.LittleEndian.PutUint64(buf[statBufOff+8:], p.Stat.DupInCollection) 359 return buf, nil 360 } 361 362 func (p *pinCollectionItem) Unmarshal(buf []byte) error { 363 if len(buf) != pinCollectionItemSize { 364 return errInvalidPinCollectionSize 365 } 366 ni := new(pinCollectionItem) 367 if bytes.Equal(buf[swarm.HashSize:encryption.ReferenceSize], emptyKey) { 368 ni.Addr = swarm.NewAddress(buf[:swarm.HashSize]).Clone() 369 } else { 370 ni.Addr = swarm.NewAddress(buf[:encryption.ReferenceSize]).Clone() 371 } 372 off := encryption.ReferenceSize 373 ni.UUID = append(make([]byte, 0, uuidSize), buf[off:off+uuidSize]...) 374 statBuf := buf[off+uuidSize:] 375 ni.Stat.Total = binary.LittleEndian.Uint64(statBuf[:8]) 376 ni.Stat.DupInCollection = binary.LittleEndian.Uint64(statBuf[8:16]) 377 *p = *ni 378 return nil 379 } 380 381 func (p *pinCollectionItem) Clone() storage.Item { 382 if p == nil { 383 return nil 384 } 385 return &pinCollectionItem{ 386 Addr: p.Addr.Clone(), 387 UUID: append([]byte(nil), p.UUID...), 388 Stat: p.Stat, 389 } 390 } 391 392 func (p pinCollectionItem) String() string { 393 return storageutil.JoinFields(p.Namespace(), p.ID()) 394 } 395 396 var _ storage.Item = (*pinChunkItem)(nil) 397 398 // pinChunkItem is the index used to represent a single chunk in the pinning 399 // collection. It is prefixed with the UUID of the collection. 400 type pinChunkItem struct { 401 UUID []byte 402 Addr swarm.Address 403 } 404 405 func (p *pinChunkItem) Namespace() string { return string(p.UUID) } 406 407 func (p *pinChunkItem) ID() string { return p.Addr.ByteString() } 408 409 // pinChunkItem is a key-only type index. We don't need to store any value. As such 410 // the serialization functions would be no-ops. A Get operation on this key is not 411 // required as the key would constitute the item. Usually these type of indexes are 412 // useful for key-only iterations. 413 func (p *pinChunkItem) Marshal() ([]byte, error) { 414 return nil, nil 415 } 416 417 func (p *pinChunkItem) Unmarshal(_ []byte) error { 418 return nil 419 } 420 421 func (p *pinChunkItem) Clone() storage.Item { 422 if p == nil { 423 return nil 424 } 425 return &pinChunkItem{ 426 UUID: append([]byte(nil), p.UUID...), 427 Addr: p.Addr.Clone(), 428 } 429 } 430 431 func (p pinChunkItem) String() string { 432 return storageutil.JoinFields(p.Namespace(), p.ID()) 433 } 434 435 type dirtyCollection struct { 436 UUID []byte 437 } 438 439 func (d *dirtyCollection) ID() string { return string(d.UUID) } 440 441 func (dirtyCollection) Namespace() string { return "dirtyCollection" } 442 443 func (d *dirtyCollection) Marshal() ([]byte, error) { 444 return nil, nil 445 } 446 447 func (d *dirtyCollection) Unmarshal(_ []byte) error { 448 return nil 449 } 450 451 func (d *dirtyCollection) Clone() storage.Item { 452 if d == nil { 453 return nil 454 } 455 return &dirtyCollection{ 456 UUID: append([]byte(nil), d.UUID...), 457 } 458 } 459 460 func (d dirtyCollection) String() string { 461 return storageutil.JoinFields(d.Namespace(), d.ID()) 462 }