github.com/lrita/cache@v1.0.1/bufcache.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cache
     6  
     7  import (
     8  	"runtime"
     9  	"sync"
    10  	"sync/atomic"
    11  	"unsafe"
    12  
    13  	"github.com/lrita/cache/race"
    14  	"github.com/lrita/numa"
    15  )
    16  
    17  // BufCache is a set of temporary bytes buffer that may be individually saved
    18  // and retrieved.
    19  //
    20  // A BufCache is safe for use by multiple goroutines simultaneously.
    21  //
    22  // BufCache's purpose is to cache allocated but unused items for later reuse,
    23  // relieving pressure on the garbage collector. That is, it makes it easy to
    24  // build efficient, thread-safe free lists. However, it is not suitable for all
    25  // free lists.
    26  //
    27  // An appropriate use of a BufCache is to manage a group of temporary items
    28  // silently shared among and potentially reused by concurrent independent
    29  // clients of a package. BufCache provides a way to amortize allocation overhead
    30  // across many clients.
    31  //
    32  // The difference with std-lib sync.Pool is that the items in BufCache does not be
    33  // deallocated by GC, and there are multi slot in per-P and per-NUMA NODE storage.
    34  // The free list in BufCache maintained as parts of a long-lived object aim for
    35  // a long process logic. The users can twist the per-NUMA free lists size(BufCache.Size)
    36  // to make minimum allocation by the profile.
    37  //
    38  // A BufCache must not be copied after first use.
    39  //
    40  // Assigning a slice of byte to a interface{} will cause a allocation, so we
    41  // specialize a implementants from Cache.
    42  type BufCache struct {
    43  	noCopy noCopy
    44  
    45  	nodes unsafe.Pointer // per-NUMA NODE pool, actual type is [N]bufCacheNode
    46  
    47  	local     unsafe.Pointer // local fixed-size per-P pool, actual type is [P]bufCacheLocal
    48  	localSize uintptr        // size of the local array
    49  
    50  	mu sync.Mutex
    51  	// New optionally specifies a function to generate
    52  	// a value when Get would otherwise return nil.
    53  	// It may not be changed concurrently with calls to Get.
    54  	New func() []byte
    55  	// Size optinally specifies the max items in the per-P local lists.
    56  	Size int64
    57  }
    58  
    59  // due to https://github.com/golang/go/issues/14620, in some situation, we
    60  // cannot make the object aligned by composited.
    61  type issues14620b struct {
    62  	_ *bufCacheShard
    63  }
    64  
    65  const (
    66  	// []byte is 24 bytes wide on 64bit platforms,
    67  	// leaving only 4 slots per 128 bytes cache line.
    68  	bufCacheShardSize = 4 // number of elements per shard
    69  )
    70  
    71  type bufCacheShardInternal struct {
    72  	elems int
    73  	elem  [bufCacheShardSize][]byte
    74  	next  *bufCacheShard
    75  }
    76  
    77  type bufCacheShard struct {
    78  	bufCacheShardInternal
    79  	// Prevents false sharing on widespread platforms with
    80  	// 128 mod (bufCache line size) = 0.
    81  	_ [128 - unsafe.Sizeof(bufCacheShardInternal{})%128]byte
    82  }
    83  
    84  type bufCacheLocal bufCacheShard
    85  
    86  type bufCacheNodeInternal struct {
    87  	lock  int64
    88  	_     [7]int64
    89  	size  int64          // size of full shards
    90  	full  *bufCacheShard // pool of full shards (elems == bufCacheShardSize)
    91  	empty *bufCacheShard // pool of empty shards (elems == 0)
    92  }
    93  
    94  func (c *bufCacheNodeInternal) trylock() bool {
    95  	ok := atomic.CompareAndSwapInt64(&c.lock, unlocked, locked)
    96  	if race.Enabled && ok {
    97  		race.Acquire(unsafe.Pointer(c))
    98  	}
    99  	return ok
   100  }
   101  
   102  func (c *bufCacheNodeInternal) unlock() {
   103  	if race.Enabled {
   104  		race.Release(unsafe.Pointer(c))
   105  	}
   106  	atomic.StoreInt64(&c.lock, unlocked)
   107  }
   108  
   109  type bufCacheNode struct {
   110  	bufCacheNodeInternal
   111  	// Prevents false sharing on widespread platforms with
   112  	// 128 mod (bufCache line size) = 0.
   113  	_ [128 - unsafe.Sizeof(bufCacheNodeInternal{})%128]byte
   114  }
   115  
   116  // Put adds x to the BufCache.
   117  func (c *BufCache) Put(x []byte) {
   118  	if len(x) == 0 {
   119  		return
   120  	}
   121  
   122  	l := c.pin()
   123  
   124  	if race.Enabled {
   125  		race.Acquire(unsafe.Pointer(l))
   126  	}
   127  
   128  	if l.elems < bufCacheShardSize {
   129  		l.elem[l.elems] = x
   130  		l.elems++
   131  	} else if next := l.next; next != nil && next.elems < bufCacheShardSize {
   132  		next.elem[next.elems] = x
   133  		next.elems++
   134  	} else if c.Size > 0 {
   135  		n := c.node()
   136  		if atomic.LoadInt64(&n.size) < c.Size && n.trylock() {
   137  			// There is no space in the private pool but we were able to acquire
   138  			// the node lock, so we can try to move shards to/from the local
   139  			// node pool.
   140  			if full := l.next; full != nil {
   141  				// The l.next shard is full: move it to the node pool.
   142  				l.next = nil
   143  				full.next = n.full
   144  				n.full = full
   145  				atomic.AddInt64(&n.size, bufCacheShardSize)
   146  			}
   147  			if n.size < c.Size { // double check
   148  				if empty := n.empty; empty != nil {
   149  					// Grab a reusable empty shard from the node empty pool and move it
   150  					// to the private pool.
   151  					n.empty = empty.next
   152  					empty.next = nil
   153  					l.next = empty
   154  					n.unlock()
   155  				} else {
   156  					// The node empty pool contains no reusable shards: allocate a new
   157  					// empty shard.
   158  					n.unlock()
   159  					l.next = &bufCacheShard{}
   160  				}
   161  				l.next.elem[0] = x
   162  				l.next.elems = 1
   163  			} else {
   164  				n.unlock()
   165  			}
   166  		}
   167  	} // else: drop it on the floor.
   168  
   169  	if race.Enabled {
   170  		race.Release(unsafe.Pointer(l))
   171  	}
   172  
   173  	runtime_procUnpin()
   174  }
   175  
   176  // Get selects an arbitrary item from the BufCache, removes it from the
   177  // BufCache, and returns it to the caller.
   178  // Get may choose to ignore the pool and treat it as empty.
   179  // Callers should not assume any relation between values passed to Put and
   180  // the values returned by Get.
   181  //
   182  // If Get would otherwise return nil and p.New is non-nil, Get returns
   183  // the result of calling p.New.
   184  func (c *BufCache) Get() (x []byte) {
   185  	l := c.pin()
   186  
   187  	if race.Enabled {
   188  		race.Acquire(unsafe.Pointer(l))
   189  	}
   190  
   191  	if l.elems > 0 {
   192  		l.elems--
   193  		x, l.elem[l.elems] = l.elem[l.elems], nil
   194  	} else if next := l.next; next != nil && next.elems > 0 {
   195  		next.elems--
   196  		x, next.elem[next.elems] = next.elem[next.elems], nil
   197  	} else if c.Size > 0 {
   198  		n := c.node()
   199  		if atomic.LoadInt64(&n.size) > 0 && n.trylock() {
   200  			// The private pool is empty but we were able to acquire the node
   201  			// lock, so we can try to move shards to/from the node pools.
   202  			if empty := l.next; empty != nil {
   203  				// The l.next shard is empty: move it to the node empty pool.
   204  				l.next = nil
   205  				empty.next = n.empty
   206  				n.empty = empty
   207  			}
   208  			// Grab full shard from global pool and obtain x from it.
   209  			if full := n.full; full != nil {
   210  				n.full = full.next
   211  				full.next = nil
   212  				l.next = full
   213  				atomic.AddInt64(&n.size, -bufCacheShardSize)
   214  				full.elems--
   215  				x, full.elem[full.elems] = full.elem[full.elems], nil
   216  			}
   217  			n.unlock()
   218  		}
   219  	}
   220  
   221  	if race.Enabled {
   222  		race.Release(unsafe.Pointer(l))
   223  	}
   224  
   225  	runtime_procUnpin()
   226  
   227  	if x == nil {
   228  		getmissingevent()
   229  		if c.New != nil {
   230  			x = c.New()
   231  		}
   232  	}
   233  	return x
   234  }
   235  
   236  func (c *BufCache) node() *bufCacheNode {
   237  	n := atomic.LoadPointer(&c.nodes) // load-acquire
   238  	_, nn := numa.GetCPUAndNode()
   239  	np := unsafe.Pointer(uintptr(n) + uintptr(nn)*unsafe.Sizeof(bufCacheNode{}))
   240  	return (*bufCacheNode)(np)
   241  }
   242  
   243  // pin pins the current goroutine to P, disables preemption and returns bufCacheLocal
   244  // pool for the P. Caller must call runtime_procPin() when done with the pool.
   245  func (c *BufCache) pin() *bufCacheLocal {
   246  	pid := runtime_procPin()
   247  	// In pinSlow we store to localSize and then to local, here we load in opposite order.
   248  	// Since we've disabled preemption, GC cannot happen in between.
   249  	// Thus here we must observe local at least as large localSize.
   250  	// We can observe a newer/larger local, it is fine (we must observe its zero-initialized-ness).
   251  	s := atomic.LoadUintptr(&c.localSize) // load-acquire
   252  	l := atomic.LoadPointer(&c.local)     // load-acquire
   253  	if uintptr(pid) < s {
   254  		return bufindexLocal(l, pid)
   255  	}
   256  	return c.pinSlow()
   257  }
   258  
   259  func (c *BufCache) pinSlow() *bufCacheLocal {
   260  	// Retry under the mutex.
   261  	// Can not lock the mutex while pinned.
   262  	runtime_procUnpin()
   263  	c.mu.Lock()
   264  	defer c.mu.Unlock()
   265  	pid := runtime_procPin()
   266  	// DOUBLE CHECKED LOCKING
   267  	s := c.localSize
   268  	l := c.local
   269  	if uintptr(pid) < s {
   270  		return bufindexLocal(l, pid)
   271  	}
   272  	// If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one.
   273  	size := runtime.GOMAXPROCS(0)
   274  	local := make([]bufCacheLocal, size)
   275  	nodes := make([]bufCacheNode, numa.MaxNodeID()+1)
   276  	atomic.StorePointer(&c.nodes, unsafe.Pointer(&nodes[0])) // store-release
   277  	atomic.StorePointer(&c.local, unsafe.Pointer(&local[0])) // store-release
   278  	atomic.StoreUintptr(&c.localSize, uintptr(size))         // store-release
   279  	return &local[pid]
   280  }
   281  
   282  func bufindexLocal(l unsafe.Pointer, i int) *bufCacheLocal {
   283  	lp := unsafe.Pointer(uintptr(l) + uintptr(i)*unsafe.Sizeof(bufCacheLocal{}))
   284  	return (*bufCacheLocal)(lp)
   285  }