github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/mempool/herocache/backdata/cache.go (about)

     1  package herocache
     2  
     3  import (
     4  	"encoding/binary"
     5  	"time"
     6  	_ "unsafe" // for linking runtimeNano
     7  
     8  	"github.com/rs/zerolog"
     9  	"go.uber.org/atomic"
    10  
    11  	"github.com/onflow/flow-go/model/flow"
    12  	"github.com/onflow/flow-go/module"
    13  	"github.com/onflow/flow-go/module/mempool/herocache/backdata/heropool"
    14  	"github.com/onflow/flow-go/utils/logging"
    15  )
    16  
    17  //go:linkname runtimeNano runtime.nanotime
    18  func runtimeNano() int64
    19  
    20  const (
    21  	slotsPerBucket = uint64(16)
    22  
    23  	// slotAgeUnallocated defines an unallocated slot with zero age.
    24  	slotAgeUnallocated = uint64(0)
    25  
    26  	// telemetryCounterInterval is the number of required interactions with
    27  	// this back data prior to printing any log. This is done as a slow-down mechanism
    28  	// to avoid spamming logs upon read/write heavy operations. An interaction can be
    29  	// a read or write.
    30  	telemetryCounterInterval = uint64(10_000)
    31  
    32  	// telemetryDurationInterval is the required elapsed duration interval
    33  	// prior to printing any log. This is done as a slow-down mechanism
    34  	// to avoid spamming logs upon read/write heavy operations.
    35  	telemetryDurationInterval = 10 * time.Second
    36  )
    37  
    38  // bucketIndex is data type representing a bucket index.
    39  type bucketIndex uint64
    40  
    41  // slotIndex is data type representing a slot index in a bucket.
    42  type slotIndex uint64
    43  
    44  // sha32of256 is a 32-bits prefix flow.Identifier used to determine the bucketIndex of the entity
    45  // it represents.
    46  type sha32of256 uint32
    47  
    48  // slot is an internal notion corresponding to the identifier of an entity that is
    49  // meant to be stored in this Cache.
    50  type slot struct {
    51  	slotAge         uint64          // age of this slot.
    52  	entityIndex     heropool.EIndex // link to actual entity.
    53  	entityId32of256 sha32of256      // the 32-bits prefix of entity identifier.
    54  }
    55  
    56  // slotBucket represents a bucket of slots.
    57  type slotBucket struct {
    58  	slots [slotsPerBucket]slot
    59  }
    60  
    61  // Cache implements an array-based generic memory pool backed by a fixed total array.
    62  // Note that this implementation is NOT thread-safe, and the higher-level Backend is responsible for concurrency management.
    63  type Cache struct {
    64  	logger    zerolog.Logger
    65  	collector module.HeroCacheMetrics
    66  	// NOTE: as a BackData implementation, Cache must be non-blocking.
    67  	// Concurrency management is done by overlay Backend.
    68  	sizeLimit    uint32
    69  	slotCount    uint64 // total number of non-expired key-values
    70  	bucketNum    uint64 // total number of buckets (i.e., total of buckets)
    71  	ejectionMode heropool.EjectionMode
    72  	// buckets keeps the slots (i.e., entityId) of the (entityId, entity) pairs that are maintained in this BackData.
    73  	buckets []slotBucket
    74  	// entities keeps the values (i.e., entity) of the (entityId, entity) pairs that are maintained in this BackData.
    75  	entities *heropool.Pool
    76  	// telemetry
    77  	//
    78  	// availableSlotHistogram[i] represents number of buckets with i
    79  	// available (i.e., empty) slots to take.
    80  	availableSlotHistogram []uint64
    81  	// interactionCounter keeps track of interactions made with
    82  	// Cache. Invoking any methods of this BackData is considered
    83  	// towards an interaction. The interaction counter is set to zero whenever
    84  	// it reaches a predefined limit. Its purpose is to manage the speed at which
    85  	// telemetry logs are printed.
    86  	interactionCounter *atomic.Uint64
    87  	// lastTelemetryDump keeps track of the last time telemetry logs dumped.
    88  	// Its purpose is to manage the speed at which telemetry logs are printed.
    89  	lastTelemetryDump *atomic.Int64
    90  	// tracer reports ejection events, initially nil but can be injection using CacheOpt
    91  	tracer Tracer
    92  }
    93  
    94  // DefaultOversizeFactor determines the default oversizing factor of HeroCache.
    95  // What is oversize factor?
    96  // Imagine adding n keys, rounds times to a hash table with a fixed number slots per bucket.
    97  // The number of buckets can be chosen upon initialization and then never changes.
    98  // If a bucket is full then the oldest key is ejected, and if that key is too new, this is a bucket overflow.
    99  // How many buckets are needed to avoid a bucket overflow assuming cryptographic key hashing is used?
   100  // The overSizeFactor is used to determine the number of buckets.
   101  // Assume n 16, rounds 3, & slotsPerBucket 3 for the tiny example below showing overSizeFactor 1 thru 6.
   102  // As overSizeFactor is increased the chance of overflowing a bucket is decreased.
   103  // With overSizeFactor 1:  8 from 48 keys can be added before bucket overflow.
   104  // With overSizeFactor 2:  10 from 48 keys can be added before bucket overflow.
   105  // With overSizeFactor 3:  13 from 48 keys can be added before bucket overflow.
   106  // With overSizeFactor 4:  15 from 48 keys can be added before bucket overflow.
   107  // With overSizeFactor 5:  27 from 48 keys can be added before bucket overflow.
   108  // With overSizeFactor 6:  48 from 48 keys can be added.
   109  // The default overSizeFactor factor is different in the package code because slotsPerBucket is > 3.
   110  const DefaultOversizeFactor = uint32(8)
   111  
   112  func NewCache(
   113  	sizeLimit uint32,
   114  	oversizeFactor uint32,
   115  	ejectionMode heropool.EjectionMode,
   116  	logger zerolog.Logger,
   117  	collector module.HeroCacheMetrics,
   118  	opts ...CacheOpt,
   119  ) *Cache {
   120  
   121  	// total buckets.
   122  	capacity := uint64(sizeLimit * oversizeFactor)
   123  	bucketNum := capacity / slotsPerBucket
   124  	if bucketNum == 0 {
   125  		// we panic here because we don't want to continue with a zero bucketNum (it can cause a DoS attack).
   126  		panic("bucketNum cannot be zero, choose a bigger sizeLimit or a smaller oversizeFactor")
   127  	}
   128  
   129  	if capacity%slotsPerBucket != 0 {
   130  		// accounting for remainder.
   131  		bucketNum++
   132  	}
   133  
   134  	bd := &Cache{
   135  		logger:                 logger,
   136  		collector:              collector,
   137  		bucketNum:              bucketNum,
   138  		sizeLimit:              sizeLimit,
   139  		buckets:                make([]slotBucket, bucketNum),
   140  		ejectionMode:           ejectionMode,
   141  		entities:               heropool.NewHeroPool(sizeLimit, ejectionMode, logger),
   142  		availableSlotHistogram: make([]uint64, slotsPerBucket+1), // +1 is to account for empty buckets as well.
   143  		interactionCounter:     atomic.NewUint64(0),
   144  		lastTelemetryDump:      atomic.NewInt64(0),
   145  	}
   146  
   147  	// apply extra options
   148  	for _, opt := range opts {
   149  		opt(bd)
   150  	}
   151  
   152  	return bd
   153  }
   154  
   155  // Has checks if backdata already contains the entity with the given identifier.
   156  func (c *Cache) Has(entityID flow.Identifier) bool {
   157  	defer c.logTelemetry()
   158  
   159  	_, _, _, ok := c.get(entityID)
   160  	return ok
   161  }
   162  
   163  // Add adds the given entity to the backdata and returns true if the entity was added or false if
   164  // a valid entity already exists for the provided ID.
   165  func (c *Cache) Add(entityID flow.Identifier, entity flow.Entity) bool {
   166  	defer c.logTelemetry()
   167  	return c.put(entityID, entity)
   168  }
   169  
   170  // Remove removes the entity with the given identifier and returns the removed entity and true if
   171  // the entity was removed or false if the entity was not found.
   172  func (c *Cache) Remove(entityID flow.Identifier) (flow.Entity, bool) {
   173  	defer c.logTelemetry()
   174  
   175  	entity, bucketIndex, sliceIndex, exists := c.get(entityID)
   176  	if !exists {
   177  		return nil, false
   178  	}
   179  	// removes value from underlying entities list.
   180  	c.invalidateEntity(bucketIndex, sliceIndex)
   181  
   182  	// frees up slot
   183  	c.unuseSlot(bucketIndex, sliceIndex)
   184  
   185  	c.collector.OnKeyRemoved(c.entities.Size())
   186  	return entity, true
   187  }
   188  
   189  // Adjust adjusts the entity using the given function if the given identifier can be found.
   190  // Returns a bool which indicates whether the entity was updated as well as the updated entity.
   191  func (c *Cache) Adjust(entityID flow.Identifier, f func(flow.Entity) flow.Entity) (flow.Entity, bool) {
   192  	defer c.logTelemetry()
   193  
   194  	entity, removed := c.Remove(entityID)
   195  	if !removed {
   196  		return nil, false
   197  	}
   198  
   199  	newEntity := f(entity)
   200  	newEntityID := newEntity.ID()
   201  
   202  	c.put(newEntityID, newEntity)
   203  
   204  	return newEntity, true
   205  }
   206  
   207  // AdjustWithInit adjusts the entity using the given function if the given identifier can be found. When the
   208  // entity is not found, it initializes the entity using the given init function and then applies the adjust function.
   209  // Args:
   210  // - entityID: the identifier of the entity to adjust.
   211  // - adjust: the function that adjusts the entity.
   212  // - init: the function that initializes the entity when it is not found.
   213  // Returns:
   214  //   - the adjusted entity.
   215  //
   216  // - a bool which indicates whether the entity was adjusted.
   217  func (c *Cache) AdjustWithInit(entityID flow.Identifier, adjust func(flow.Entity) flow.Entity, init func() flow.Entity) (flow.Entity, bool) {
   218  	defer c.logTelemetry()
   219  
   220  	if c.Has(entityID) {
   221  		return c.Adjust(entityID, adjust)
   222  	}
   223  	c.put(entityID, init())
   224  	return c.Adjust(entityID, adjust)
   225  }
   226  
   227  // GetWithInit returns the given entity from the backdata. If the entity does not exist, it creates a new entity
   228  // using the factory function and stores it in the backdata.
   229  // Args:
   230  // - entityID: the identifier of the entity to get.
   231  // - init: the function that initializes the entity when it is not found.
   232  // Returns:
   233  //   - the entity.
   234  //
   235  // - a bool which indicates whether the entity was found (or created).
   236  func (c *Cache) GetWithInit(entityID flow.Identifier, init func() flow.Entity) (flow.Entity, bool) {
   237  	defer c.logTelemetry()
   238  
   239  	if c.Has(entityID) {
   240  		return c.ByID(entityID)
   241  	}
   242  	c.put(entityID, init())
   243  	return c.ByID(entityID)
   244  }
   245  
   246  // ByID returns the given entity from the backdata.
   247  func (c *Cache) ByID(entityID flow.Identifier) (flow.Entity, bool) {
   248  	defer c.logTelemetry()
   249  
   250  	entity, _, _, ok := c.get(entityID)
   251  	return entity, ok
   252  }
   253  
   254  // Size returns the size of the backdata, i.e., total number of stored (entityId, entity) pairs.
   255  func (c *Cache) Size() uint {
   256  	defer c.logTelemetry()
   257  
   258  	return uint(c.entities.Size())
   259  }
   260  
   261  // Head returns the head of queue.
   262  // Boolean return value determines whether there is a head available.
   263  func (c *Cache) Head() (flow.Entity, bool) {
   264  	return c.entities.Head()
   265  }
   266  
   267  // All returns all entities stored in the backdata.
   268  func (c *Cache) All() map[flow.Identifier]flow.Entity {
   269  	defer c.logTelemetry()
   270  
   271  	entitiesList := c.entities.All()
   272  	all := make(map[flow.Identifier]flow.Entity, len(c.entities.All()))
   273  
   274  	total := len(entitiesList)
   275  	for i := 0; i < total; i++ {
   276  		p := entitiesList[i]
   277  		all[p.Id()] = p.Entity()
   278  	}
   279  
   280  	return all
   281  }
   282  
   283  // Identifiers returns the list of identifiers of entities stored in the backdata.
   284  func (c *Cache) Identifiers() flow.IdentifierList {
   285  	defer c.logTelemetry()
   286  
   287  	ids := make(flow.IdentifierList, c.entities.Size())
   288  	for i, p := range c.entities.All() {
   289  		ids[i] = p.Id()
   290  	}
   291  
   292  	return ids
   293  }
   294  
   295  // Entities returns the list of entities stored in the backdata.
   296  func (c *Cache) Entities() []flow.Entity {
   297  	defer c.logTelemetry()
   298  
   299  	entities := make([]flow.Entity, c.entities.Size())
   300  	for i, p := range c.entities.All() {
   301  		entities[i] = p.Entity()
   302  	}
   303  
   304  	return entities
   305  }
   306  
   307  // Clear removes all entities from the backdata.
   308  func (c *Cache) Clear() {
   309  	defer c.logTelemetry()
   310  
   311  	c.buckets = make([]slotBucket, c.bucketNum)
   312  	c.entities = heropool.NewHeroPool(c.sizeLimit, c.ejectionMode, c.logger)
   313  	c.availableSlotHistogram = make([]uint64, slotsPerBucket+1)
   314  	c.interactionCounter = atomic.NewUint64(0)
   315  	c.lastTelemetryDump = atomic.NewInt64(0)
   316  	c.slotCount = 0
   317  }
   318  
   319  // put writes the (entityId, entity) pair into this BackData. Boolean return value
   320  // determines whether the write operation was successful. A write operation fails when there is already
   321  // a duplicate entityId exists in the BackData, and that entityId is linked to a valid entity.
   322  func (c *Cache) put(entityId flow.Identifier, entity flow.Entity) bool {
   323  	c.collector.OnKeyPutAttempt(c.entities.Size())
   324  
   325  	entityId32of256, b := c.entityId32of256AndBucketIndex(entityId)
   326  	slotToUse, unique := c.slotIndexInBucket(b, entityId32of256, entityId)
   327  	if !unique {
   328  		// entityId already exists
   329  		c.collector.OnKeyPutDeduplicated()
   330  		return false
   331  	}
   332  
   333  	if linkedId, _, ok := c.linkedEntityOf(b, slotToUse); ok {
   334  		// bucket is full, and we are replacing an already linked (but old) slot that has a valid value, hence
   335  		// we should remove its value from underlying entities list.
   336  		ejectedEntity := c.invalidateEntity(b, slotToUse)
   337  		if c.tracer != nil {
   338  			c.tracer.EntityEjectionDueToEmergency(ejectedEntity)
   339  		}
   340  		c.collector.OnEntityEjectionDueToEmergency()
   341  		c.logger.Warn().
   342  			Hex("replaced_entity_id", logging.ID(linkedId)).
   343  			Hex("added_entity_id", logging.ID(entityId)).
   344  			Msg("emergency ejection, adding entity to cache resulted in replacing a valid key, potential collision")
   345  	}
   346  
   347  	c.slotCount++
   348  	entityIndex, slotAvailable, ejectedEntity := c.entities.Add(entityId, entity, c.ownerIndexOf(b, slotToUse))
   349  	if !slotAvailable {
   350  		c.collector.OnKeyPutDrop()
   351  		return false
   352  	}
   353  
   354  	if ejectedEntity != nil {
   355  		// cache is at its full size and ejection happened to make room for this new entity.
   356  		if c.tracer != nil {
   357  			c.tracer.EntityEjectionDueToFullCapacity(ejectedEntity)
   358  		}
   359  		c.collector.OnEntityEjectionDueToFullCapacity()
   360  	}
   361  
   362  	c.buckets[b].slots[slotToUse].slotAge = c.slotCount
   363  	c.buckets[b].slots[slotToUse].entityIndex = entityIndex
   364  	c.buckets[b].slots[slotToUse].entityId32of256 = entityId32of256
   365  	c.collector.OnKeyPutSuccess(c.entities.Size())
   366  	return true
   367  }
   368  
   369  // get retrieves the entity corresponding to given identifier from underlying entities list.
   370  // The boolean return value determines whether an entity with given id exists in the BackData.
   371  func (c *Cache) get(entityID flow.Identifier) (flow.Entity, bucketIndex, slotIndex, bool) {
   372  	entityId32of256, b := c.entityId32of256AndBucketIndex(entityID)
   373  	for s := slotIndex(0); s < slotIndex(slotsPerBucket); s++ {
   374  		if c.buckets[b].slots[s].entityId32of256 != entityId32of256 {
   375  			continue
   376  		}
   377  
   378  		id, entity, linked := c.linkedEntityOf(b, s)
   379  		if !linked {
   380  			// no linked entity for this (bucketIndex, slotIndex) pair.
   381  			c.collector.OnKeyGetFailure()
   382  			return nil, 0, 0, false
   383  		}
   384  
   385  		if id != entityID {
   386  			// checking identifiers fully.
   387  			continue
   388  		}
   389  
   390  		c.collector.OnKeyGetSuccess()
   391  		return entity, b, s, true
   392  	}
   393  
   394  	c.collector.OnKeyGetFailure()
   395  	return nil, 0, 0, false
   396  }
   397  
   398  // entityId32of256AndBucketIndex determines the id prefix as well as the bucket index corresponding to the
   399  // given identifier.
   400  func (c *Cache) entityId32of256AndBucketIndex(id flow.Identifier) (sha32of256, bucketIndex) {
   401  	// uint64(id[0:8]) used to compute bucket index for which this identifier belongs to
   402  	b := binary.LittleEndian.Uint64(id[0:8]) % c.bucketNum
   403  
   404  	// uint32(id[8:12]) used to compute a shorter identifier for this id to represent in memory.
   405  	entityId32of256 := binary.LittleEndian.Uint32(id[8:12])
   406  
   407  	return sha32of256(entityId32of256), bucketIndex(b)
   408  }
   409  
   410  // expiryThreshold returns the threshold for which all slots with index below threshold are considered old enough for eviction.
   411  func (c *Cache) expiryThreshold() uint64 {
   412  	var expiryThreshold uint64 = 0
   413  	if c.slotCount > uint64(c.sizeLimit) {
   414  		// total number of slots written are above the predefined limit
   415  		expiryThreshold = c.slotCount - uint64(c.sizeLimit)
   416  	}
   417  
   418  	return expiryThreshold
   419  }
   420  
   421  // slotIndexInBucket returns a free slot for this entityId in the bucket. In case the bucket is full, it invalidates the oldest valid slot,
   422  // and returns its index as free slot. It returns false if the entityId already exists in this bucket.
   423  func (c *Cache) slotIndexInBucket(b bucketIndex, slotId sha32of256, entityId flow.Identifier) (slotIndex, bool) {
   424  	slotToUse := slotIndex(0)
   425  	expiryThreshold := c.expiryThreshold()
   426  	availableSlotCount := uint64(0) // for telemetry logs.
   427  
   428  	oldestSlotInBucket := c.slotCount + 1 // initializes the oldest slot to current max.
   429  
   430  	for s := slotIndex(0); s < slotIndex(slotsPerBucket); s++ {
   431  		if c.buckets[b].slots[s].slotAge < oldestSlotInBucket {
   432  			// record slot s as oldest slot
   433  			oldestSlotInBucket = c.buckets[b].slots[s].slotAge
   434  			slotToUse = s
   435  		}
   436  
   437  		if c.buckets[b].slots[s].slotAge <= expiryThreshold {
   438  			// slot technically expired or never assigned
   439  			availableSlotCount++
   440  			continue
   441  		}
   442  
   443  		if c.buckets[b].slots[s].entityId32of256 != slotId {
   444  			// slot id is distinct and fresh, and hence move to next slot.
   445  			continue
   446  		}
   447  
   448  		id, _, linked := c.linkedEntityOf(b, s)
   449  		if !linked {
   450  			// slot is not linked to a valid entity, hence, can be used
   451  			// as an available slot.
   452  			availableSlotCount++
   453  			slotToUse = s
   454  			continue
   455  		}
   456  
   457  		if id != entityId {
   458  			// slot is fresh, fully distinct, and linked. Hence,
   459  			// moving to next slot.
   460  			continue
   461  		}
   462  
   463  		// entity ID already exists in the bucket
   464  		return 0, false
   465  	}
   466  
   467  	c.availableSlotHistogram[availableSlotCount]++
   468  	c.collector.BucketAvailableSlots(availableSlotCount, slotsPerBucket)
   469  	return slotToUse, true
   470  }
   471  
   472  // ownerIndexOf maps the (bucketIndex, slotIndex) pair to a canonical unique (scalar) index.
   473  // This scalar index is used to represent this (bucketIndex, slotIndex) pair in the underlying
   474  // entities list.
   475  func (c *Cache) ownerIndexOf(b bucketIndex, s slotIndex) uint64 {
   476  	return (uint64(b) * slotsPerBucket) + uint64(s)
   477  }
   478  
   479  // linkedEntityOf returns the entity linked to this (bucketIndex, slotIndex) pair from the underlying entities list.
   480  // By a linked entity, we mean if the entity has an owner index matching to (bucketIndex, slotIndex).
   481  // The bool return value corresponds to whether there is a linked entity to this (bucketIndex, slotIndex) or not.
   482  func (c *Cache) linkedEntityOf(b bucketIndex, s slotIndex) (flow.Identifier, flow.Entity, bool) {
   483  	if c.buckets[b].slots[s].slotAge == slotAgeUnallocated {
   484  		// slotIndex never used, or recently invalidated, hence
   485  		// does not have any linked entity
   486  		return flow.Identifier{}, nil, false
   487  	}
   488  
   489  	// retrieving entity index in the underlying entities linked-list
   490  	valueIndex := c.buckets[b].slots[s].entityIndex
   491  	id, entity, owner := c.entities.Get(valueIndex)
   492  	if c.ownerIndexOf(b, s) != owner {
   493  		// entity is not linked to this (bucketIndex, slotIndex)
   494  		c.buckets[b].slots[s].slotAge = slotAgeUnallocated
   495  		return flow.Identifier{}, nil, false
   496  	}
   497  
   498  	return id, entity, true
   499  }
   500  
   501  // logTelemetry prints telemetry logs depending on number of interactions and last time telemetry has been logged.
   502  func (c *Cache) logTelemetry() {
   503  	counter := c.interactionCounter.Inc()
   504  	if counter < telemetryCounterInterval {
   505  		// not enough interactions to log.
   506  		return
   507  	}
   508  	if time.Duration(runtimeNano()-c.lastTelemetryDump.Load()) < telemetryDurationInterval {
   509  		// not long elapsed since last log.
   510  		return
   511  	}
   512  	if !c.interactionCounter.CompareAndSwap(counter, 0) {
   513  		// raced on CAS, hence, not logging.
   514  		return
   515  	}
   516  
   517  	lg := c.logger.With().
   518  		Uint64("total_slots_written", c.slotCount).
   519  		Uint64("total_interactions_since_last_log", counter).Logger()
   520  
   521  	for i := range c.availableSlotHistogram {
   522  		lg = lg.With().
   523  			Int("available_slots", i).
   524  			Uint64("total_buckets", c.availableSlotHistogram[i]).
   525  			Logger()
   526  	}
   527  
   528  	lg.Debug().Msg("logging telemetry")
   529  	c.lastTelemetryDump.Store(runtimeNano())
   530  }
   531  
   532  // unuseSlot marks slot as free so that it is ready to be re-used.
   533  func (c *Cache) unuseSlot(b bucketIndex, s slotIndex) {
   534  	c.buckets[b].slots[s].slotAge = slotAgeUnallocated
   535  }
   536  
   537  // invalidateEntity removes the entity linked to the specified slot from the underlying entities
   538  // list. So that entity slot is made available to take if needed.
   539  func (c *Cache) invalidateEntity(b bucketIndex, s slotIndex) flow.Entity {
   540  	return c.entities.Remove(c.buckets[b].slots[s].entityIndex)
   541  }