github.com/lrita/cache@v1.0.1/cache.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cache
     6  
     7  import (
     8  	"runtime"
     9  	"sync"
    10  	"sync/atomic"
    11  	"unsafe"
    12  
    13  	"github.com/lrita/cache/race"
    14  	"github.com/lrita/numa"
    15  )
    16  
    17  // Cache is a set of temporary objects that may be individually saved and
    18  // retrieved.
    19  //
    20  // A Cache is safe for use by multiple goroutines simultaneously.
    21  //
    22  // Cache's purpose is to cache allocated but unused items for later reuse,
    23  // relieving pressure on the garbage collector. That is, it makes it easy to
    24  // build efficient, thread-safe free lists. However, it is not suitable for all
    25  // free lists.
    26  //
    27  // An appropriate use of a Cache is to manage a group of temporary items
    28  // silently shared among and potentially reused by concurrent independent
    29  // clients of a package. Cache provides a way to amortize allocation overhead
    30  // across many clients.
    31  //
    32  // The difference with std-lib sync.Pool is that the items in Cache does not be
    33  // deallocated by GC, and there are multi slot in per-P storage and per-NUMA
    34  // node storage. The free list in Cache maintained as parts of a long-lived
    35  // object aim for a long process logic. The users can twist the per-NUMA node
    36  // size(Cache.Size) to make minimum allocation by profile.
    37  //
    38  // A Cache must not be copied after first use.
    39  type Cache struct {
    40  	noCopy noCopy
    41  
    42  	nodes unsafe.Pointer // per-NUMA NODE pool, actual type is [N]cacheNode
    43  
    44  	local     unsafe.Pointer // local fixed-size per-P pool, actual type is [P]cacheLocal
    45  	localSize uintptr        // size of the local array
    46  
    47  	mu sync.Mutex
    48  	// New optionally specifies a function to generate
    49  	// a value when Get would otherwise return nil.
    50  	// It may not be changed concurrently with calls to Get.
    51  	New func() interface{}
    52  	// Size optinally specifies the max items in the per-NUMA NODE lists.
    53  	Size int64
    54  }
    55  
    56  const (
    57  	locked   int64 = 1
    58  	unlocked       = 0
    59  	// interface{} is 16 bytes wide on 64bit platforms,
    60  	// leaving only 7 slots per 128 bytes cache line.
    61  	cacheShardSize = 7 // number of elements per shard
    62  )
    63  
    64  // due to https://github.com/golang/go/issues/14620, in some situation, we
    65  // cannot make the object aligned by composited.
    66  type issues14620a struct {
    67  	_ *cacheShard
    68  }
    69  
    70  type cacheShardInternal struct {
    71  	elems int
    72  	elem  [cacheShardSize]interface{}
    73  	next  *cacheShard
    74  }
    75  
    76  type cacheShard struct {
    77  	cacheShardInternal
    78  	// Prevents false sharing on widespread platforms with
    79  	// 128 mod (cache line size) = 0.
    80  	_ [128 - unsafe.Sizeof(cacheShardInternal{})%128]byte
    81  }
    82  
    83  type cacheLocal cacheShard
    84  
    85  type cacheNodeInternal struct {
    86  	lock  int64
    87  	_     [7]int64
    88  	size  int64       // node size of full shards
    89  	full  *cacheShard // node pool of full shards (elems == cacheShardSize)
    90  	empty *cacheShard // node pool of empty shards (elems == 0)
    91  }
    92  
    93  func (c *cacheNodeInternal) trylock() bool {
    94  	ok := atomic.CompareAndSwapInt64(&c.lock, unlocked, locked)
    95  	if race.Enabled && ok {
    96  		race.Acquire(unsafe.Pointer(c))
    97  	}
    98  	return ok
    99  }
   100  
   101  func (c *cacheNodeInternal) unlock() {
   102  	if race.Enabled {
   103  		race.Release(unsafe.Pointer(c))
   104  	}
   105  	atomic.StoreInt64(&c.lock, unlocked)
   106  }
   107  
   108  type cacheNode struct {
   109  	cacheNodeInternal
   110  	// Prevents false sharing on widespread platforms with
   111  	// 128 mod (cache line size) = 0.
   112  	_ [128 - unsafe.Sizeof(cacheNodeInternal{})%128]byte
   113  }
   114  
   115  // Put adds x to the Cache.
   116  func (c *Cache) Put(x interface{}) {
   117  	if x == nil {
   118  		return
   119  	}
   120  
   121  	l := c.pin()
   122  
   123  	if race.Enabled {
   124  		race.Acquire(unsafe.Pointer(l))
   125  	}
   126  
   127  	if l.elems < cacheShardSize {
   128  		l.elem[l.elems] = x
   129  		l.elems++
   130  	} else if next := l.next; next != nil && next.elems < cacheShardSize {
   131  		next.elem[next.elems] = x
   132  		next.elems++
   133  	} else if c.Size > 0 {
   134  		n := c.node()
   135  		if atomic.LoadInt64(&n.size) < c.Size && n.trylock() {
   136  			// There is no space in the private pool but we were able to acquire
   137  			// the node lock, so we can try to move shards to/from the local
   138  			// node pool.
   139  			if full := l.next; full != nil {
   140  				// The l.next shard is full: move it to the node pool.
   141  				l.next = nil
   142  				full.next = n.full
   143  				n.full = full
   144  				atomic.AddInt64(&n.size, cacheShardSize)
   145  			}
   146  			if n.size < c.Size { // double check
   147  				if empty := n.empty; empty != nil {
   148  					// Grab a reusable empty shard from the node empty pool and move it
   149  					// to the private pool.
   150  					n.empty = empty.next
   151  					empty.next = nil
   152  					l.next = empty
   153  					n.unlock()
   154  				} else {
   155  					// The node empty pool contains no reusable shards: allocate a new
   156  					// empty shard.
   157  					n.unlock()
   158  					l.next = &cacheShard{}
   159  				}
   160  				l.next.elem[0] = x
   161  				l.next.elems = 1
   162  			} else {
   163  				n.unlock()
   164  			}
   165  		}
   166  	} // else: drop it on the floor.
   167  
   168  	if race.Enabled {
   169  		race.Release(unsafe.Pointer(l))
   170  	}
   171  
   172  	runtime_procUnpin()
   173  }
   174  
   175  // Get selects an arbitrary item from the Cache, removes it from the
   176  // Cache, and returns it to the caller.
   177  // Get may choose to ignore the pool and treat it as empty.
   178  // Callers should not assume any relation between values passed to Put and
   179  // the values returned by Get.
   180  //
   181  // If Get would otherwise return nil and p.New is non-nil, Get returns
   182  // the result of calling p.New.
   183  func (c *Cache) Get() (x interface{}) {
   184  	l := c.pin()
   185  
   186  	if race.Enabled {
   187  		race.Acquire(unsafe.Pointer(l))
   188  	}
   189  
   190  	if l.elems > 0 {
   191  		l.elems--
   192  		x, l.elem[l.elems] = l.elem[l.elems], nil
   193  	} else if next := l.next; next != nil && next.elems > 0 {
   194  		next.elems--
   195  		x, next.elem[next.elems] = next.elem[next.elems], nil
   196  	} else if c.Size > 0 {
   197  		n := c.node()
   198  		if atomic.LoadInt64(&n.size) > 0 && n.trylock() {
   199  			// The private pool is empty but we were able to acquire the node
   200  			// lock, so we can try to move shards to/from the node pools.
   201  			if empty := l.next; empty != nil {
   202  				// The l.next shard is empty: move it to the node empty pool.
   203  				l.next = nil
   204  				empty.next = n.empty
   205  				n.empty = empty
   206  			}
   207  			// Grab full shard from global pool and obtain x from it.
   208  			if full := n.full; full != nil {
   209  				n.full = full.next
   210  				full.next = nil
   211  				l.next = full
   212  				atomic.AddInt64(&n.size, -cacheShardSize)
   213  				full.elems--
   214  				x, full.elem[full.elems] = full.elem[full.elems], nil
   215  			}
   216  			n.unlock()
   217  		}
   218  	}
   219  
   220  	if race.Enabled {
   221  		race.Release(unsafe.Pointer(l))
   222  	}
   223  
   224  	runtime_procUnpin()
   225  
   226  	if x == nil {
   227  		getmissingevent()
   228  		if c.New != nil {
   229  			x = c.New()
   230  		}
   231  	}
   232  	return x
   233  }
   234  
   235  func (c *Cache) node() *cacheNode {
   236  	n := atomic.LoadPointer(&c.nodes) // load-acquire
   237  	_, nn := numa.GetCPUAndNode()
   238  	np := unsafe.Pointer(uintptr(n) + uintptr(nn)*unsafe.Sizeof(cacheNode{}))
   239  	return (*cacheNode)(np)
   240  }
   241  
   242  // pin pins the current goroutine to P, disables preemption and returns cacheLocal
   243  // pool for the P. Caller must call runtime_procPin() when done with the pool.
   244  func (c *Cache) pin() *cacheLocal {
   245  	pid := runtime_procPin()
   246  	// In pinSlow we store to localSize and then to local, here we load in opposite order.
   247  	// Since we've disabled preemption, GC cannot happen in between.
   248  	// Thus here we must observe local at least as large localSize.
   249  	// We can observe a newer/larger local, it is fine (we must observe its zero-initialized-ness).
   250  	s := atomic.LoadUintptr(&c.localSize) // load-acquire
   251  	l := atomic.LoadPointer(&c.local)     // load-acquire
   252  	if uintptr(pid) < s {
   253  		return indexLocal(l, pid)
   254  	}
   255  	return c.pinSlow()
   256  }
   257  
   258  func (c *Cache) pinSlow() *cacheLocal {
   259  	// Retry under the mutex.
   260  	// Can not lock the mutex while pinned.
   261  	runtime_procUnpin()
   262  	c.mu.Lock()
   263  	defer c.mu.Unlock()
   264  	pid := runtime_procPin()
   265  	// DOUBLE CHECKED LOCKING
   266  	s := c.localSize
   267  	l := c.local
   268  	if uintptr(pid) < s {
   269  		return indexLocal(l, pid)
   270  	}
   271  	// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
   272  	size := runtime.GOMAXPROCS(0)
   273  	local := make([]cacheLocal, size)
   274  	nodes := make([]cacheNode, numa.MaxNodeID()+1)
   275  	atomic.StorePointer(&c.nodes, unsafe.Pointer(&nodes[0])) // store-release
   276  	atomic.StorePointer(&c.local, unsafe.Pointer(&local[0])) // store-release
   277  	atomic.StoreUintptr(&c.localSize, uintptr(size))         // store-release
   278  	return &local[pid]
   279  }
   280  
   281  func indexLocal(l unsafe.Pointer, i int) *cacheLocal {
   282  	lp := unsafe.Pointer(uintptr(l) + uintptr(i)*unsafe.Sizeof(cacheLocal{}))
   283  	return (*cacheLocal)(lp)
   284  }
   285  
   286  // Implemented in runtime.
   287  
   288  //go:linkname runtime_procPin runtime.procPin
   289  //go:nosplit
   290  func runtime_procPin() int
   291  
   292  //go:linkname runtime_procUnpin runtime.procUnpin
   293  //go:nosplit
   294  func runtime_procUnpin()