github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/mempool/stdmap/eject.go (about) 1 package stdmap 2 3 import ( 4 "fmt" 5 "math" 6 "sort" 7 "sync" 8 9 "github.com/onflow/flow-go/model/flow" 10 "github.com/onflow/flow-go/utils/rand" 11 ) 12 13 // this is the threshold for how much over the guaranteed capacity the 14 // collection should be before performing a batch ejection 15 const overCapacityThreshold = 128 16 17 // BatchEjectFunc implements an ejection policy to remove elements when the mempool 18 // exceeds its specified capacity. A custom ejection policy can be injected 19 // into the memory pool upon creation to change the strategy of eviction. 20 // The ejection policy is executed from within the thread that serves the 21 // mempool. Implementations should adhere to the following convention: 22 // - The ejector function has the freedom to eject _multiple_ elements. 23 // - In a single `eject` call, it must eject as many elements to statistically 24 // keep the mempool size within the desired limit. 25 // - The ejector _might_ (for performance reasons) retain more elements in the 26 // mempool than the targeted capacity. 27 // - The ejector _must_ notify the `Backend.ejectionCallbacks` for _each_ 28 // element it removes from the mempool. 29 // - Implementations do _not_ need to be concurrency safe. The Backend handles 30 // concurrency (specifically, it locks the mempool during ejection). 31 // - The implementation should be non-blocking (though, it is allowed to 32 // take a bit of time; the mempool will just be locked during this time). 33 type BatchEjectFunc func(b *Backend) (bool, error) 34 type EjectFunc func(b *Backend) (flow.Identifier, flow.Entity, bool) 35 36 // EjectRandomFast checks if the map size is beyond the 37 // threshold size, and will iterate through them and eject unneeded 38 // entries if that is the case. Return values are unused 39 func EjectRandomFast(b *Backend) (bool, error) { 40 currentSize := b.backData.Size() 41 42 if b.guaranteedCapacity >= currentSize { 43 return false, nil 44 } 45 // At this point, we know that currentSize > b.guaranteedCapacity. As 46 // currentSize fits into an int, b.guaranteedCapacity must also fit. 47 overcapacity := currentSize - b.guaranteedCapacity 48 if overcapacity <= overCapacityThreshold { 49 return false, nil 50 } 51 52 // Randomly select indices of elements to remove: 53 mapIndices := make([]int, 0, overcapacity) 54 for i := overcapacity; i > 0; i-- { 55 rand, err := rand.Uintn(currentSize) 56 if err != nil { 57 return false, fmt.Errorf("random generation failed: %w", err) 58 } 59 mapIndices = append(mapIndices, int(rand)) 60 } 61 sort.Ints(mapIndices) // inplace 62 63 // Now, mapIndices is a sequentially sorted list of indices to remove. 64 // Remove them in a loop. Repeated indices are idempotent (subsequent 65 // ejection calls will make up for it). 66 idx := 0 // index into mapIndices 67 next2Remove := mapIndices[0] // index of the element to be removed next 68 i := 0 // index into the entities map 69 for entityID, entity := range b.backData.All() { 70 if i == next2Remove { 71 b.backData.Remove(entityID) // remove entity 72 for _, callback := range b.ejectionCallbacks { 73 callback(entity) // notify callback 74 } 75 76 idx++ 77 78 // There is a (1 in b.guaranteedCapacity) chance that the 79 // next value in mapIndices is a duplicate. If a duplicate is 80 // found, skip it by incrementing 'idx' 81 for ; idx < int(overcapacity) && next2Remove == mapIndices[idx]; idx++ { 82 } 83 84 if idx == int(overcapacity) { 85 return true, nil 86 } 87 next2Remove = mapIndices[idx] 88 } 89 i++ 90 } 91 return true, nil 92 } 93 94 // EjectPanic simply panics, crashing the program. Useful when cache is not expected 95 // to grow beyond certain limits, but ejecting is not applicable 96 func EjectPanic(b *Backend) (flow.Identifier, flow.Entity, bool) { 97 panic("unexpected: mempool size over the limit") 98 } 99 100 // LRUEjector provides a swift FIFO ejection functionality 101 type LRUEjector struct { 102 sync.Mutex 103 table map[flow.Identifier]uint64 // keeps sequence number of entities it tracks 104 seqNum uint64 // keeps the most recent sequence number 105 } 106 107 func NewLRUEjector() *LRUEjector { 108 return &LRUEjector{ 109 table: make(map[flow.Identifier]uint64), 110 seqNum: 0, 111 } 112 } 113 114 // Track should be called every time a new entity is added to the mempool. 115 // It tracks the entity for later ejection. 116 func (q *LRUEjector) Track(entityID flow.Identifier) { 117 q.Lock() 118 defer q.Unlock() 119 120 if _, ok := q.table[entityID]; ok { 121 // skips adding duplicate item 122 return 123 } 124 125 // TODO current table structure provides O(1) track and untrack features 126 // however, the Eject functionality is asymptotically O(n). 127 // With proper resource cleanups by the mempools, the Eject is supposed 128 // as a very infrequent operation. However, further optimizations on 129 // Eject efficiency is needed. 130 q.table[entityID] = q.seqNum 131 q.seqNum++ 132 } 133 134 // Untrack simply removes the tracker of the ejector off the entityID 135 func (q *LRUEjector) Untrack(entityID flow.Identifier) { 136 q.Lock() 137 defer q.Unlock() 138 139 delete(q.table, entityID) 140 } 141 142 // Eject implements EjectFunc for LRUEjector. It finds the entity with the lowest sequence number (i.e., 143 // the oldest entity). It also untracks. This is using a linear search 144 func (q *LRUEjector) Eject(b *Backend) flow.Identifier { 145 q.Lock() 146 defer q.Unlock() 147 148 // finds the oldest entity 149 oldestSQ := uint64(math.MaxUint64) 150 var oldestID flow.Identifier 151 for _, id := range b.backData.Identifiers() { 152 if sq, ok := q.table[id]; ok { 153 if sq < oldestSQ { 154 oldestID = id 155 oldestSQ = sq 156 } 157 } 158 } 159 160 // untracks the oldest id as it is supposed to be ejected 161 delete(q.table, oldestID) 162 163 return oldestID 164 }