github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/mempool/stdmap/eject.go (about)

     1  package stdmap
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sort"
     7  	"sync"
     8  
     9  	"github.com/onflow/flow-go/model/flow"
    10  	"github.com/onflow/flow-go/utils/rand"
    11  )
    12  
    13  // this is the threshold for how much over the guaranteed capacity the
    14  // collection should be before performing a batch ejection
    15  const overCapacityThreshold = 128
    16  
    17  // BatchEjectFunc implements an ejection policy to remove elements when the mempool
    18  // exceeds its specified capacity. A custom ejection policy can be injected
    19  // into the memory pool upon creation to change the strategy of eviction.
    20  // The ejection policy is executed from within the thread that serves the
    21  // mempool. Implementations should adhere to the following convention:
    22  //   - The ejector function has the freedom to eject _multiple_ elements.
    23  //   - In a single `eject` call, it must eject as many elements to statistically
    24  //     keep the mempool size within the desired limit.
    25  //   - The ejector _might_ (for performance reasons) retain more elements in the
    26  //     mempool than the targeted capacity.
    27  //   - The ejector _must_ notify the `Backend.ejectionCallbacks` for _each_
    28  //     element it removes from the mempool.
    29  //   - Implementations do _not_ need to be concurrency safe. The Backend handles
    30  //     concurrency (specifically, it locks the mempool during ejection).
    31  //   - The implementation should be non-blocking (though, it is allowed to
    32  //     take a bit of time; the mempool will just be locked during this time).
    33  type BatchEjectFunc func(b *Backend) (bool, error)
    34  type EjectFunc func(b *Backend) (flow.Identifier, flow.Entity, bool)
    35  
    36  // EjectRandomFast checks if the map size is beyond the
    37  // threshold size, and will iterate through them and eject unneeded
    38  // entries if that is the case.  Return values are unused
    39  func EjectRandomFast(b *Backend) (bool, error) {
    40  	currentSize := b.backData.Size()
    41  
    42  	if b.guaranteedCapacity >= currentSize {
    43  		return false, nil
    44  	}
    45  	// At this point, we know that currentSize > b.guaranteedCapacity. As
    46  	// currentSize fits into an int, b.guaranteedCapacity must also fit.
    47  	overcapacity := currentSize - b.guaranteedCapacity
    48  	if overcapacity <= overCapacityThreshold {
    49  		return false, nil
    50  	}
    51  
    52  	// Randomly select indices of elements to remove:
    53  	mapIndices := make([]int, 0, overcapacity)
    54  	for i := overcapacity; i > 0; i-- {
    55  		rand, err := rand.Uintn(currentSize)
    56  		if err != nil {
    57  			return false, fmt.Errorf("random generation failed: %w", err)
    58  		}
    59  		mapIndices = append(mapIndices, int(rand))
    60  	}
    61  	sort.Ints(mapIndices) // inplace
    62  
    63  	// Now, mapIndices is a sequentially sorted list of indices to remove.
    64  	// Remove them in a loop. Repeated indices are idempotent (subsequent
    65  	// ejection calls will make up for it).
    66  	idx := 0                     // index into mapIndices
    67  	next2Remove := mapIndices[0] // index of the element to be removed next
    68  	i := 0                       // index into the entities map
    69  	for entityID, entity := range b.backData.All() {
    70  		if i == next2Remove {
    71  			b.backData.Remove(entityID) // remove entity
    72  			for _, callback := range b.ejectionCallbacks {
    73  				callback(entity) // notify callback
    74  			}
    75  
    76  			idx++
    77  
    78  			// There is a (1 in b.guaranteedCapacity) chance that the
    79  			// next value in mapIndices is a duplicate. If a duplicate is
    80  			// found, skip it by incrementing 'idx'
    81  			for ; idx < int(overcapacity) && next2Remove == mapIndices[idx]; idx++ {
    82  			}
    83  
    84  			if idx == int(overcapacity) {
    85  				return true, nil
    86  			}
    87  			next2Remove = mapIndices[idx]
    88  		}
    89  		i++
    90  	}
    91  	return true, nil
    92  }
    93  
    94  // EjectPanic simply panics, crashing the program. Useful when cache is not expected
    95  // to grow beyond certain limits, but ejecting is not applicable
    96  func EjectPanic(b *Backend) (flow.Identifier, flow.Entity, bool) {
    97  	panic("unexpected: mempool size over the limit")
    98  }
    99  
   100  // LRUEjector provides a swift FIFO ejection functionality
   101  type LRUEjector struct {
   102  	sync.Mutex
   103  	table  map[flow.Identifier]uint64 // keeps sequence number of entities it tracks
   104  	seqNum uint64                     // keeps the most recent sequence number
   105  }
   106  
   107  func NewLRUEjector() *LRUEjector {
   108  	return &LRUEjector{
   109  		table:  make(map[flow.Identifier]uint64),
   110  		seqNum: 0,
   111  	}
   112  }
   113  
   114  // Track should be called every time a new entity is added to the mempool.
   115  // It tracks the entity for later ejection.
   116  func (q *LRUEjector) Track(entityID flow.Identifier) {
   117  	q.Lock()
   118  	defer q.Unlock()
   119  
   120  	if _, ok := q.table[entityID]; ok {
   121  		// skips adding duplicate item
   122  		return
   123  	}
   124  
   125  	// TODO current table structure provides O(1) track and untrack features
   126  	// however, the Eject functionality is asymptotically O(n).
   127  	// With proper resource cleanups by the mempools, the Eject is supposed
   128  	// as a very infrequent operation. However, further optimizations on
   129  	// Eject efficiency is needed.
   130  	q.table[entityID] = q.seqNum
   131  	q.seqNum++
   132  }
   133  
   134  // Untrack simply removes the tracker of the ejector off the entityID
   135  func (q *LRUEjector) Untrack(entityID flow.Identifier) {
   136  	q.Lock()
   137  	defer q.Unlock()
   138  
   139  	delete(q.table, entityID)
   140  }
   141  
   142  // Eject implements EjectFunc for LRUEjector. It finds the entity with the lowest sequence number (i.e.,
   143  // the oldest entity). It also untracks.  This is using a linear search
   144  func (q *LRUEjector) Eject(b *Backend) flow.Identifier {
   145  	q.Lock()
   146  	defer q.Unlock()
   147  
   148  	// finds the oldest entity
   149  	oldestSQ := uint64(math.MaxUint64)
   150  	var oldestID flow.Identifier
   151  	for _, id := range b.backData.Identifiers() {
   152  		if sq, ok := q.table[id]; ok {
   153  			if sq < oldestSQ {
   154  				oldestID = id
   155  				oldestSQ = sq
   156  			}
   157  		}
   158  	}
   159  
   160  	// untracks the oldest id as it is supposed to be ejected
   161  	delete(q.table, oldestID)
   162  
   163  	return oldestID
   164  }