github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/internal/cache/cache.go (about) 1 // Copyright 2022 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cache 6 7 import ( 8 "context" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 "runtime" 13 "strconv" 14 "sync/atomic" 15 "time" 16 17 storage "github.com/ethersphere/bee/v2/pkg/storage" 18 "github.com/ethersphere/bee/v2/pkg/storer/internal/transaction" 19 "github.com/ethersphere/bee/v2/pkg/swarm" 20 "golang.org/x/sync/errgroup" 21 "resenje.org/multex" 22 ) 23 24 var now = time.Now 25 26 // exported for migration 27 type CacheEntryItem = cacheEntry 28 29 const cacheEntrySize = swarm.HashSize + 8 30 31 var _ storage.Item = (*cacheEntry)(nil) 32 33 var ( 34 errMarshalCacheEntryInvalidAddress = errors.New("marshal cacheEntry: invalid address") 35 errMarshalCacheEntryInvalidTimestamp = errors.New("marshal cacheEntry: invalid timestamp") 36 errUnmarshalCacheEntryInvalidSize = errors.New("unmarshal cacheEntry: invalid size") 37 ) 38 39 // Cache is the part of the localstore which keeps track of the chunks that are not 40 // part of the reserve but are potentially useful to store for obtaining bandwidth 41 // incentives. 42 type Cache struct { 43 size atomic.Int64 44 capacity int 45 glock *multex.Multex // blocks Get and Put ops while shallow copy is running. 46 } 47 48 // New creates a new Cache component with the specified capacity. The store is used 49 // here only to read the initial state of the cache before shutdown if there was 50 // any. 51 func New(ctx context.Context, store storage.Reader, capacity uint64) (*Cache, error) { 52 count, err := store.Count(&cacheEntry{}) 53 if err != nil { 54 return nil, fmt.Errorf("failed counting cache entries: %w", err) 55 } 56 57 c := &Cache{capacity: int(capacity), glock: multex.New()} 58 c.size.Store(int64(count)) 59 60 return c, nil 61 } 62 63 // Size returns the current size of the cache. 64 func (c *Cache) Size() uint64 { 65 return uint64(c.size.Load()) 66 } 67 68 // Capacity returns the capacity of the cache. 69 func (c *Cache) Capacity() uint64 { return uint64(c.capacity) } 70 71 // Putter returns a Storage.Putter instance which adds the chunk to the underlying 72 // chunkstore and also adds a Cache entry for the chunk. 73 func (c *Cache) Putter(store transaction.Storage) storage.Putter { 74 return storage.PutterFunc(func(ctx context.Context, chunk swarm.Chunk) error { 75 76 c.glock.Lock(chunk.Address().ByteString()) 77 defer c.glock.Unlock(chunk.Address().ByteString()) 78 79 trx, done := store.NewTransaction(ctx) 80 defer done() 81 82 newEntry := &cacheEntry{Address: chunk.Address()} 83 found, err := trx.IndexStore().Has(newEntry) 84 if err != nil { 85 return fmt.Errorf("failed checking has cache entry: %w", err) 86 } 87 88 // if chunk is already part of cache, return found. 89 if found { 90 return nil 91 } 92 93 newEntry.AccessTimestamp = now().UnixNano() 94 err = trx.IndexStore().Put(newEntry) 95 if err != nil { 96 return fmt.Errorf("failed adding cache entry: %w", err) 97 } 98 99 err = trx.IndexStore().Put(&cacheOrderIndex{ 100 Address: newEntry.Address, 101 AccessTimestamp: newEntry.AccessTimestamp, 102 }) 103 if err != nil { 104 return fmt.Errorf("failed adding cache order index: %w", err) 105 } 106 107 err = trx.ChunkStore().Put(ctx, chunk) 108 if err != nil { 109 return fmt.Errorf("failed adding chunk to chunkstore: %w", err) 110 } 111 112 if err := trx.Commit(); err != nil { 113 return fmt.Errorf("batch commit: %w", err) 114 } 115 116 c.size.Add(1) 117 118 return nil 119 }) 120 } 121 122 // Getter returns a Storage.Getter instance which checks if the chunks accessed are 123 // part of cache it will update the cache indexes. If the operation to update the 124 // cache indexes fail, we need to fail the operation as this should signal the user 125 // of this getter to rollback the operation. 126 func (c *Cache) Getter(store transaction.Storage) storage.Getter { 127 return storage.GetterFunc(func(ctx context.Context, address swarm.Address) (swarm.Chunk, error) { 128 129 c.glock.Lock(address.ByteString()) 130 defer c.glock.Unlock(address.ByteString()) 131 132 trx, done := store.NewTransaction(ctx) 133 defer done() 134 135 ch, err := trx.ChunkStore().Get(ctx, address) 136 if err != nil { 137 return nil, err 138 } 139 140 // check if there is an entry in Cache. As this is the download path, we do 141 // a best-effort operation. So in case of any error we return the chunk. 142 entry := &cacheEntry{Address: address} 143 err = trx.IndexStore().Get(entry) 144 if err != nil { 145 if errors.Is(err, storage.ErrNotFound) { 146 return ch, nil 147 } 148 return nil, fmt.Errorf("unexpected error getting indexstore entry: %w", err) 149 } 150 151 err = trx.IndexStore().Delete(&cacheOrderIndex{ 152 Address: entry.Address, 153 AccessTimestamp: entry.AccessTimestamp, 154 }) 155 if err != nil { 156 return nil, fmt.Errorf("failed deleting cache order index: %w", err) 157 } 158 159 entry.AccessTimestamp = now().UnixNano() 160 err = trx.IndexStore().Put(&cacheOrderIndex{ 161 Address: entry.Address, 162 AccessTimestamp: entry.AccessTimestamp, 163 }) 164 if err != nil { 165 return nil, fmt.Errorf("failed adding cache order index: %w", err) 166 } 167 168 err = trx.IndexStore().Put(entry) 169 if err != nil { 170 return nil, fmt.Errorf("failed adding cache entry: %w", err) 171 } 172 173 err = trx.Commit() 174 if err != nil { 175 return nil, fmt.Errorf("batch commit: %w", err) 176 } 177 178 return ch, nil 179 }) 180 } 181 182 // RemoveOldest removes the oldest cache entries from the store. The count 183 // specifies the number of entries to remove. 184 func (c *Cache) RemoveOldest(ctx context.Context, st transaction.Storage, count uint64) error { 185 186 if count <= 0 { 187 return nil 188 } 189 190 evictItems := make([]*cacheEntry, 0, count) 191 err := st.IndexStore().Iterate( 192 storage.Query{ 193 Factory: func() storage.Item { return &cacheOrderIndex{} }, 194 ItemProperty: storage.QueryItemID, 195 }, 196 func(res storage.Result) (bool, error) { 197 accessTime, addr, err := idFromKey(res.ID) 198 if err != nil { 199 return false, fmt.Errorf("failed to parse cache order index %s: %w", res.ID, err) 200 } 201 entry := &cacheEntry{ 202 Address: addr, 203 AccessTimestamp: accessTime, 204 } 205 evictItems = append(evictItems, entry) 206 count-- 207 return count == 0, nil 208 }, 209 ) 210 if err != nil { 211 return fmt.Errorf("failed iterating over cache order index: %w", err) 212 } 213 214 eg, ctx := errgroup.WithContext(ctx) 215 eg.SetLimit(runtime.NumCPU()) 216 217 for _, item := range evictItems { 218 func(item *cacheEntry) { 219 eg.Go(func() error { 220 c.glock.Lock(item.Address.ByteString()) 221 defer c.glock.Unlock(item.Address.ByteString()) 222 err := st.Run(ctx, func(s transaction.Store) error { 223 return errors.Join( 224 s.IndexStore().Delete(item), 225 s.IndexStore().Delete(&cacheOrderIndex{ 226 Address: item.Address, 227 AccessTimestamp: item.AccessTimestamp, 228 }), 229 s.ChunkStore().Delete(ctx, item.Address), 230 ) 231 }) 232 if err != nil { 233 return err 234 } 235 c.size.Add(-1) 236 return nil 237 }) 238 }(item) 239 } 240 241 return eg.Wait() 242 } 243 244 // ShallowCopy creates cache entries with the expectation that the chunk already exists in the chunkstore. 245 func (c *Cache) ShallowCopy( 246 ctx context.Context, 247 store transaction.Storage, 248 addrs ...swarm.Address, 249 ) (err error) { 250 251 // TODO: add proper mutex locking before usage 252 253 entries := make([]*cacheEntry, 0, len(addrs)) 254 255 defer func() { 256 if err != nil { 257 err = errors.Join(err, 258 store.Run(context.Background(), func(s transaction.Store) error { 259 for _, entry := range entries { 260 dErr := s.ChunkStore().Delete(context.Background(), entry.Address) 261 if dErr != nil { 262 return dErr 263 } 264 } 265 return nil 266 }), 267 ) 268 } 269 }() 270 271 for _, addr := range addrs { 272 entry := &cacheEntry{Address: addr, AccessTimestamp: now().UnixNano()} 273 if has, err := store.IndexStore().Has(entry); err == nil && has { 274 // Since the caller has previously referenced the chunk (+1 refCnt), and if the chunk is already referenced 275 // by the cache store (+1 refCnt), then we must decrement the refCnt by one ( -1 refCnt to bring the total to +1). 276 // See https://github.com/ethersphere/bee/issues/4530. 277 _ = store.Run(ctx, func(s transaction.Store) error { return s.ChunkStore().Delete(ctx, addr) }) 278 continue 279 } 280 entries = append(entries, entry) 281 } 282 283 if len(entries) == 0 { 284 return nil 285 } 286 287 //consider only the amount that can fit, the rest should be deleted from the chunkstore. 288 if len(entries) > c.capacity { 289 for _, addr := range entries[:len(entries)-c.capacity] { 290 _ = store.Run(ctx, func(s transaction.Store) error { return s.ChunkStore().Delete(ctx, addr.Address) }) 291 } 292 entries = entries[len(entries)-c.capacity:] 293 } 294 295 err = store.Run(ctx, func(s transaction.Store) error { 296 for _, entry := range entries { 297 err = s.IndexStore().Put(entry) 298 if err != nil { 299 return fmt.Errorf("failed adding entry %s: %w", entry, err) 300 } 301 err = s.IndexStore().Put(&cacheOrderIndex{ 302 Address: entry.Address, 303 AccessTimestamp: entry.AccessTimestamp, 304 }) 305 if err != nil { 306 return fmt.Errorf("failed adding cache order index: %w", err) 307 } 308 } 309 return nil 310 }) 311 if err != nil { 312 return err 313 } 314 315 c.size.Add(int64(len(entries))) 316 return nil 317 } 318 319 type cacheEntry struct { 320 Address swarm.Address 321 AccessTimestamp int64 322 } 323 324 func (c *cacheEntry) ID() string { return c.Address.ByteString() } 325 326 func (cacheEntry) Namespace() string { return "cacheEntry" } 327 328 func (c *cacheEntry) Marshal() ([]byte, error) { 329 entryBuf := make([]byte, cacheEntrySize) 330 if c.Address.IsZero() { 331 return nil, errMarshalCacheEntryInvalidAddress 332 } 333 if c.AccessTimestamp <= 0 { 334 return nil, errMarshalCacheEntryInvalidTimestamp 335 } 336 copy(entryBuf[:swarm.HashSize], c.Address.Bytes()) 337 binary.LittleEndian.PutUint64(entryBuf[swarm.HashSize:], uint64(c.AccessTimestamp)) 338 return entryBuf, nil 339 } 340 341 func (c *cacheEntry) Unmarshal(buf []byte) error { 342 if len(buf) != cacheEntrySize { 343 return errUnmarshalCacheEntryInvalidSize 344 } 345 newEntry := new(cacheEntry) 346 newEntry.Address = swarm.NewAddress(append(make([]byte, 0, swarm.HashSize), buf[:swarm.HashSize]...)) 347 newEntry.AccessTimestamp = int64(binary.LittleEndian.Uint64(buf[swarm.HashSize:])) 348 *c = *newEntry 349 return nil 350 } 351 352 func (c *cacheEntry) Clone() storage.Item { 353 if c == nil { 354 return nil 355 } 356 return &cacheEntry{ 357 Address: c.Address.Clone(), 358 AccessTimestamp: c.AccessTimestamp, 359 } 360 } 361 362 func (c cacheEntry) String() string { 363 return fmt.Sprintf( 364 "cacheEntry { Address: %s AccessTimestamp: %s }", 365 c.Address, 366 time.Unix(c.AccessTimestamp, 0).UTC().Format(time.RFC3339), 367 ) 368 } 369 370 var _ storage.Item = (*cacheOrderIndex)(nil) 371 372 type cacheOrderIndex struct { 373 AccessTimestamp int64 374 Address swarm.Address 375 } 376 377 func keyFromID(ts int64, addr swarm.Address) string { 378 tsStr := fmt.Sprintf("%d", ts) 379 return tsStr + addr.ByteString() 380 } 381 382 func idFromKey(key string) (int64, swarm.Address, error) { 383 ts := key[:len(key)-swarm.HashSize] 384 addr := key[len(key)-swarm.HashSize:] 385 n, err := strconv.ParseInt(ts, 10, 64) 386 if err != nil { 387 return 0, swarm.ZeroAddress, err 388 } 389 return n, swarm.NewAddress([]byte(addr)), nil 390 } 391 392 func (c *cacheOrderIndex) ID() string { 393 return keyFromID(c.AccessTimestamp, c.Address) 394 } 395 396 func (cacheOrderIndex) Namespace() string { return "cacheOrderIndex" } 397 398 func (cacheOrderIndex) Marshal() ([]byte, error) { 399 return nil, nil 400 } 401 402 func (cacheOrderIndex) Unmarshal(_ []byte) error { 403 return nil 404 } 405 406 func (c *cacheOrderIndex) Clone() storage.Item { 407 if c == nil { 408 return nil 409 } 410 return &cacheOrderIndex{ 411 AccessTimestamp: c.AccessTimestamp, 412 Address: c.Address.Clone(), 413 } 414 } 415 416 func (c cacheOrderIndex) String() string { 417 return fmt.Sprintf( 418 "cacheOrderIndex { AccessTimestamp: %d Address: %s }", 419 c.AccessTimestamp, 420 c.Address.ByteString(), 421 ) 422 }