github.com/ethersphere/bee/v2@v2.2.0/pkg/storer/reserve.go (about) 1 // Copyright 2023 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package storer 6 7 import ( 8 "context" 9 "encoding/hex" 10 "errors" 11 "fmt" 12 "math" 13 "slices" 14 "sync" 15 "sync/atomic" 16 "time" 17 18 "github.com/ethersphere/bee/v2/pkg/postage" 19 "github.com/ethersphere/bee/v2/pkg/storage" 20 "github.com/ethersphere/bee/v2/pkg/storage/storageutil" 21 "github.com/ethersphere/bee/v2/pkg/storer/internal/reserve" 22 "github.com/ethersphere/bee/v2/pkg/storer/internal/transaction" 23 "github.com/ethersphere/bee/v2/pkg/swarm" 24 ) 25 26 const ( 27 reserveOverCapacity = "reserveOverCapacity" 28 reserveUnreserved = "reserveUnreserved" 29 batchExpiry = "batchExpiry" 30 batchExpiryDone = "batchExpiryDone" 31 ) 32 33 var errMaxRadius = errors.New("max radius reached") 34 var reserveSizeWithinRadius atomic.Uint64 35 36 type Syncer interface { 37 // Number of active historical syncing jobs. 38 SyncRate() float64 39 Start(context.Context) 40 } 41 42 func threshold(capacity int) int { return capacity * 5 / 10 } 43 44 func (db *DB) startReserveWorkers( 45 ctx context.Context, 46 radius func() (uint8, error), 47 ) { 48 ctx, cancel := context.WithCancel(ctx) 49 go func() { 50 <-db.quit 51 cancel() 52 }() 53 54 db.inFlight.Add(1) 55 go db.reserveWorker(ctx) 56 57 select { 58 case <-time.After(db.opts.reserveWarmupDuration): 59 case <-db.quit: 60 return 61 } 62 63 r, err := radius() 64 if err != nil { 65 db.logger.Error(err, "reserve worker initial radius") 66 return // node shutdown 67 } 68 69 err = db.reserve.SetRadius(r) 70 if err != nil { 71 db.logger.Error(err, "reserve set radius") 72 } else { 73 db.metrics.StorageRadius.Set(float64(r)) 74 } 75 76 // syncing can now begin now that the reserver worker is running 77 db.syncer.Start(ctx) 78 } 79 80 func (db *DB) countWithinRadius(ctx context.Context) (int, error) { 81 82 count := 0 83 missing := 0 84 radius := db.StorageRadius() 85 86 evictBatches := make(map[string]bool) 87 88 err := db.reserve.IterateChunksItems(0, func(ci *reserve.ChunkBinItem) (bool, error) { 89 if ci.Bin >= radius { 90 count++ 91 } 92 93 if exists, err := db.batchstore.Exists(ci.BatchID); err == nil && !exists { 94 missing++ 95 evictBatches[string(ci.BatchID)] = true 96 } 97 return false, nil 98 }) 99 if err != nil { 100 return 0, err 101 } 102 103 for batch := range evictBatches { 104 db.logger.Debug("reserve: invalid batch", "batch_id", hex.EncodeToString([]byte(batch))) 105 err = errors.Join(err, db.EvictBatch(ctx, []byte(batch))) 106 } 107 108 db.metrics.ReserveSizeWithinRadius.Set(float64(count)) 109 db.metrics.ReserveMissingBatch.Set(float64(missing)) 110 reserveSizeWithinRadius.Store(uint64(count)) 111 112 return count, err 113 } 114 115 func (db *DB) reserveWorker(ctx context.Context) { 116 defer db.inFlight.Done() 117 118 batchExpiryTrigger, batchExpiryUnsub := db.events.Subscribe(batchExpiry) 119 defer batchExpiryUnsub() 120 121 overCapTrigger, overCapUnsub := db.events.Subscribe(reserveOverCapacity) 122 defer overCapUnsub() 123 124 thresholdTicker := time.NewTicker(db.opts.reserveWakeupDuration) 125 defer thresholdTicker.Stop() 126 127 _, _ = db.countWithinRadius(ctx) 128 129 for { 130 select { 131 case <-ctx.Done(): 132 return 133 case <-batchExpiryTrigger: 134 135 err := db.evictExpiredBatches(ctx) 136 if err != nil { 137 db.logger.Warning("reserve worker evict expired batches", "error", err) 138 } 139 140 db.events.Trigger(batchExpiryDone) 141 142 if !db.reserve.IsWithinCapacity() { 143 db.events.Trigger(reserveOverCapacity) 144 } 145 146 case <-overCapTrigger: 147 148 db.metrics.OverCapTriggerCount.Inc() 149 if err := db.unreserve(ctx); err != nil { 150 db.logger.Warning("reserve worker unreserve", "error", err) 151 } 152 153 case <-thresholdTicker.C: 154 155 radius := db.reserve.Radius() 156 count, err := db.countWithinRadius(ctx) 157 if err != nil { 158 db.logger.Warning("reserve worker count within radius", "error", err) 159 continue 160 } 161 162 if count < threshold(db.reserve.Capacity()) && db.syncer.SyncRate() == 0 && radius > db.opts.minimumRadius { 163 radius-- 164 if err := db.reserve.SetRadius(radius); err != nil { 165 db.logger.Error(err, "reserve set radius") 166 } 167 db.metrics.StorageRadius.Set(float64(radius)) 168 db.logger.Info("reserve radius decrease", "radius", radius) 169 } 170 } 171 } 172 } 173 174 func (db *DB) evictExpiredBatches(ctx context.Context) error { 175 176 batches, err := db.getExpiredBatches() 177 if err != nil { 178 return err 179 } 180 181 for _, batchID := range batches { 182 evicted, err := db.evictBatch(ctx, batchID, math.MaxInt, swarm.MaxBins) 183 if err != nil { 184 return err 185 } 186 if evicted > 0 { 187 db.logger.Debug("evicted expired batch", "batch_id", hex.EncodeToString(batchID), "total_evicted", evicted) 188 } 189 err = db.storage.Run(ctx, func(st transaction.Store) error { 190 return st.IndexStore().Delete(&expiredBatchItem{BatchID: batchID}) 191 }) 192 if err != nil { 193 return err 194 } 195 } 196 197 return nil 198 } 199 200 func (db *DB) getExpiredBatches() ([][]byte, error) { 201 var batchesToEvict [][]byte 202 err := db.storage.IndexStore().Iterate(storage.Query{ 203 Factory: func() storage.Item { return new(expiredBatchItem) }, 204 ItemProperty: storage.QueryItemID, 205 }, func(result storage.Result) (bool, error) { 206 batchesToEvict = append(batchesToEvict, []byte(result.ID)) 207 return false, nil 208 }) 209 if err != nil { 210 return nil, err 211 } 212 return batchesToEvict, nil 213 } 214 215 func (db *DB) evictBatch( 216 ctx context.Context, 217 batchID []byte, 218 evictCount int, 219 upToBin uint8, 220 ) (evicted int, err error) { 221 dur := captureDuration(time.Now()) 222 defer func() { 223 db.metrics.ReserveSize.Set(float64(db.reserve.Size())) 224 db.metrics.MethodCallsDuration.WithLabelValues("reserve", "EvictBatch").Observe(dur()) 225 if err == nil { 226 db.metrics.MethodCalls.WithLabelValues("reserve", "EvictBatch", "success").Inc() 227 } else { 228 db.metrics.MethodCalls.WithLabelValues("reserve", "EvictBatch", "failure").Inc() 229 } 230 if upToBin == swarm.MaxBins { 231 db.metrics.ExpiredChunkCount.Add(float64(evicted)) 232 } else { 233 db.metrics.EvictedChunkCount.Add(float64(evicted)) 234 } 235 db.logger.Debug( 236 "reserve eviction", 237 "uptoBin", upToBin, 238 "evicted", evicted, 239 "batchID", hex.EncodeToString(batchID), 240 "new_size", db.reserve.Size(), 241 ) 242 }() 243 244 return db.reserve.EvictBatchBin(ctx, batchID, evictCount, upToBin) 245 } 246 247 // EvictBatch evicts all chunks belonging to a batch from the reserve. 248 func (db *DB) EvictBatch(ctx context.Context, batchID []byte) error { 249 if db.reserve == nil { 250 // if reserve is not configured, do nothing 251 return nil 252 } 253 254 err := db.storage.Run(ctx, func(tx transaction.Store) error { 255 return tx.IndexStore().Put(&expiredBatchItem{BatchID: batchID}) 256 }) 257 if err != nil { 258 return fmt.Errorf("save expired batch: %w", err) 259 } 260 261 db.events.Trigger(batchExpiry) 262 return nil 263 } 264 265 func (db *DB) ReserveGet(ctx context.Context, addr swarm.Address, batchID []byte, stampHash []byte) (ch swarm.Chunk, err error) { 266 dur := captureDuration(time.Now()) 267 defer func() { 268 db.metrics.MethodCallsDuration.WithLabelValues("reserve", "ReserveGet").Observe(dur()) 269 if err == nil || errors.Is(err, storage.ErrNotFound) { 270 db.metrics.MethodCalls.WithLabelValues("reserve", "ReserveGet", "success").Inc() 271 } else { 272 db.metrics.MethodCalls.WithLabelValues("reserve", "ReserveGet", "failure").Inc() 273 db.logger.Debug("reserve get error", "error", err) 274 } 275 }() 276 277 return db.reserve.Get(ctx, addr, batchID, stampHash) 278 } 279 280 func (db *DB) ReserveHas(addr swarm.Address, batchID []byte, stampHash []byte) (has bool, err error) { 281 dur := captureDuration(time.Now()) 282 defer func() { 283 db.metrics.MethodCallsDuration.WithLabelValues("reserve", "ReserveHas").Observe(dur()) 284 if err == nil { 285 db.metrics.MethodCalls.WithLabelValues("reserve", "ReserveHas", "success").Inc() 286 } else { 287 db.metrics.MethodCalls.WithLabelValues("reserve", "ReserveHas", "failure").Inc() 288 db.logger.Debug("reserve has error", "error", err) 289 } 290 }() 291 292 return db.reserve.Has(addr, batchID, stampHash) 293 } 294 295 // ReservePutter returns a Putter for inserting chunks into the reserve. 296 func (db *DB) ReservePutter() storage.Putter { 297 return putterWithMetrics{ 298 storage.PutterFunc( 299 func(ctx context.Context, chunk swarm.Chunk) error { 300 err := db.reserve.Put(ctx, chunk) 301 if err != nil { 302 db.logger.Debug("reserve put error", "error", err) 303 return fmt.Errorf("reserve putter.Put: %w", err) 304 } 305 db.reserveBinEvents.Trigger(string(db.po(chunk.Address()))) 306 if !db.reserve.IsWithinCapacity() { 307 db.events.Trigger(reserveOverCapacity) 308 } 309 db.metrics.ReserveSize.Set(float64(db.reserve.Size())) 310 return nil 311 }, 312 ), 313 db.metrics, 314 "reserve", 315 } 316 } 317 318 func (db *DB) unreserve(ctx context.Context) (err error) { 319 dur := captureDuration(time.Now()) 320 defer func() { 321 db.metrics.MethodCallsDuration.WithLabelValues("reserve", "unreserve").Observe(dur()) 322 if err == nil { 323 db.metrics.MethodCalls.WithLabelValues("reserve", "unreserve", "success").Inc() 324 } else { 325 db.metrics.MethodCalls.WithLabelValues("reserve", "unreserve", "failure").Inc() 326 } 327 }() 328 329 radius := db.reserve.Radius() 330 defer db.events.Trigger(reserveUnreserved) 331 332 target := db.reserve.EvictionTarget() 333 if target <= 0 { 334 return nil 335 } 336 337 db.logger.Info("unreserve start", "target", target, "radius", radius) 338 339 batchExpiry, unsub := db.events.Subscribe(batchExpiry) 340 defer unsub() 341 342 totalEvicted := 0 343 344 var batches [][]byte 345 err = db.batchstore.Iterate(func(b *postage.Batch) (bool, error) { 346 batches = append(batches, b.ID) 347 return false, nil 348 }) 349 if err != nil { 350 return err 351 } 352 353 for radius < swarm.MaxBins { 354 355 for _, b := range batches { 356 357 select { 358 case <-batchExpiry: 359 db.logger.Debug("stopping unreserve, received batch expiration signal") 360 return nil 361 default: 362 } 363 364 evict := target - totalEvicted 365 if evict < int(db.opts.reserveMinEvictCount) { // evict at least a min count 366 evict = int(db.opts.reserveMinEvictCount) 367 } 368 369 binEvicted, err := db.evictBatch(ctx, b, evict, radius) 370 // eviction happens in batches, so we need to keep track of the total 371 // number of chunks evicted even if there was an error 372 totalEvicted += binEvicted 373 374 // we can only get error here for critical cases, for eg. batch commit 375 // error, which is not recoverable 376 if err != nil { 377 return err 378 } 379 380 if totalEvicted >= target { 381 db.logger.Info("unreserve finished", "evicted", totalEvicted, "radius", radius) 382 return nil 383 } 384 } 385 386 radius++ 387 db.logger.Info("reserve radius increase", "radius", radius) 388 _ = db.reserve.SetRadius(radius) 389 db.metrics.StorageRadius.Set(float64(radius)) 390 } 391 392 return errMaxRadius 393 } 394 395 // ReserveLastBinIDs returns all of the highest binIDs from all the bins in the reserve and the epoch time of the reserve. 396 func (db *DB) ReserveLastBinIDs() ([]uint64, uint64, error) { 397 if db.reserve == nil { 398 return nil, 0, nil 399 } 400 401 return db.reserve.LastBinIDs() 402 } 403 404 func (db *DB) ReserveIterateChunks(cb func(swarm.Chunk) (bool, error)) error { 405 return db.reserve.IterateChunks(0, cb) 406 } 407 408 func (db *DB) StorageRadius() uint8 { 409 if db.reserve == nil { 410 return 0 411 } 412 return db.reserve.Radius() 413 } 414 415 func (db *DB) ReserveSize() int { 416 if db.reserve == nil { 417 return 0 418 } 419 return db.reserve.Size() 420 } 421 422 func (db *DB) ReserveSizeWithinRadius() uint64 { 423 return reserveSizeWithinRadius.Load() 424 } 425 426 func (db *DB) IsWithinStorageRadius(addr swarm.Address) bool { 427 if db.reserve == nil { 428 return false 429 } 430 return swarm.Proximity(addr.Bytes(), db.baseAddr.Bytes()) >= db.reserve.Radius() 431 } 432 433 // BinC is the result returned from the SubscribeBin channel that contains the chunk address and the binID 434 type BinC struct { 435 Address swarm.Address 436 BinID uint64 437 BatchID []byte 438 StampHash []byte 439 } 440 441 // SubscribeBin returns a channel that feeds all the chunks in the reserve from a certain bin between a start and end binIDs. 442 func (db *DB) SubscribeBin(ctx context.Context, bin uint8, start uint64) (<-chan *BinC, func(), <-chan error) { 443 out := make(chan *BinC) 444 done := make(chan struct{}) 445 errC := make(chan error, 1) 446 447 db.inFlight.Add(1) 448 go func() { 449 defer db.inFlight.Done() 450 451 trigger, unsub := db.reserveBinEvents.Subscribe(string(bin)) 452 defer unsub() 453 defer close(out) 454 455 for { 456 457 err := db.reserve.IterateBin(bin, start, func(a swarm.Address, binID uint64, batchID, stampHash []byte) (bool, error) { 458 select { 459 case out <- &BinC{Address: a, BinID: binID, BatchID: batchID, StampHash: stampHash}: 460 start = binID + 1 461 case <-done: 462 return true, nil 463 case <-db.quit: 464 return false, ErrDBQuit 465 case <-ctx.Done(): 466 return false, ctx.Err() 467 } 468 469 return false, nil 470 }) 471 if err != nil { 472 errC <- err 473 return 474 } 475 476 select { 477 case <-trigger: 478 case <-done: 479 return 480 case <-db.quit: 481 errC <- ErrDBQuit 482 return 483 case <-ctx.Done(): 484 errC <- err 485 return 486 } 487 } 488 }() 489 490 var doneOnce sync.Once 491 return out, func() { 492 doneOnce.Do(func() { close(done) }) 493 }, errC 494 } 495 496 // expiredBatchItem is a storage.Item implementation for expired batches. 497 type expiredBatchItem struct { 498 BatchID []byte 499 } 500 501 // ID implements storage.Item. 502 func (e *expiredBatchItem) ID() string { 503 return string(e.BatchID) 504 } 505 506 // Namespace implements storage.Item. 507 func (e *expiredBatchItem) Namespace() string { 508 return "expiredBatchItem" 509 } 510 511 // Marshal implements storage.Item. 512 // It is a no-op as expiredBatchItem is not serialized. 513 func (e *expiredBatchItem) Marshal() ([]byte, error) { 514 return nil, nil 515 } 516 517 // Unmarshal implements storage.Item. 518 // It is a no-op as expiredBatchItem is not serialized. 519 func (e *expiredBatchItem) Unmarshal(_ []byte) error { 520 return nil 521 } 522 523 // Clone implements storage.Item. 524 func (e *expiredBatchItem) Clone() storage.Item { 525 if e == nil { 526 return nil 527 } 528 return &expiredBatchItem{ 529 BatchID: slices.Clone(e.BatchID), 530 } 531 } 532 533 // String implements storage.Item. 534 func (e *expiredBatchItem) String() string { 535 return storageutil.JoinFields(e.Namespace(), e.ID()) 536 } 537 538 func (db *DB) po(addr swarm.Address) uint8 { 539 return swarm.Proximity(db.baseAddr.Bytes(), addr.Bytes()) 540 }