github.com/lrita/cache@v1.0.1/cache.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cache 6 7 import ( 8 "runtime" 9 "sync" 10 "sync/atomic" 11 "unsafe" 12 13 "github.com/lrita/cache/race" 14 "github.com/lrita/numa" 15 ) 16 17 // Cache is a set of temporary objects that may be individually saved and 18 // retrieved. 19 // 20 // A Cache is safe for use by multiple goroutines simultaneously. 21 // 22 // Cache's purpose is to cache allocated but unused items for later reuse, 23 // relieving pressure on the garbage collector. That is, it makes it easy to 24 // build efficient, thread-safe free lists. However, it is not suitable for all 25 // free lists. 26 // 27 // An appropriate use of a Cache is to manage a group of temporary items 28 // silently shared among and potentially reused by concurrent independent 29 // clients of a package. Cache provides a way to amortize allocation overhead 30 // across many clients. 31 // 32 // The difference with std-lib sync.Pool is that the items in Cache does not be 33 // deallocated by GC, and there are multi slot in per-P storage and per-NUMA 34 // node storage. The free list in Cache maintained as parts of a long-lived 35 // object aim for a long process logic. The users can twist the per-NUMA node 36 // size(Cache.Size) to make minimum allocation by profile. 37 // 38 // A Cache must not be copied after first use. 39 type Cache struct { 40 noCopy noCopy 41 42 nodes unsafe.Pointer // per-NUMA NODE pool, actual type is [N]cacheNode 43 44 local unsafe.Pointer // local fixed-size per-P pool, actual type is [P]cacheLocal 45 localSize uintptr // size of the local array 46 47 mu sync.Mutex 48 // New optionally specifies a function to generate 49 // a value when Get would otherwise return nil. 50 // It may not be changed concurrently with calls to Get. 51 New func() interface{} 52 // Size optinally specifies the max items in the per-NUMA NODE lists. 53 Size int64 54 } 55 56 const ( 57 locked int64 = 1 58 unlocked = 0 59 // interface{} is 16 bytes wide on 64bit platforms, 60 // leaving only 7 slots per 128 bytes cache line. 61 cacheShardSize = 7 // number of elements per shard 62 ) 63 64 // due to https://github.com/golang/go/issues/14620, in some situation, we 65 // cannot make the object aligned by composited. 66 type issues14620a struct { 67 _ *cacheShard 68 } 69 70 type cacheShardInternal struct { 71 elems int 72 elem [cacheShardSize]interface{} 73 next *cacheShard 74 } 75 76 type cacheShard struct { 77 cacheShardInternal 78 // Prevents false sharing on widespread platforms with 79 // 128 mod (cache line size) = 0. 80 _ [128 - unsafe.Sizeof(cacheShardInternal{})%128]byte 81 } 82 83 type cacheLocal cacheShard 84 85 type cacheNodeInternal struct { 86 lock int64 87 _ [7]int64 88 size int64 // node size of full shards 89 full *cacheShard // node pool of full shards (elems == cacheShardSize) 90 empty *cacheShard // node pool of empty shards (elems == 0) 91 } 92 93 func (c *cacheNodeInternal) trylock() bool { 94 ok := atomic.CompareAndSwapInt64(&c.lock, unlocked, locked) 95 if race.Enabled && ok { 96 race.Acquire(unsafe.Pointer(c)) 97 } 98 return ok 99 } 100 101 func (c *cacheNodeInternal) unlock() { 102 if race.Enabled { 103 race.Release(unsafe.Pointer(c)) 104 } 105 atomic.StoreInt64(&c.lock, unlocked) 106 } 107 108 type cacheNode struct { 109 cacheNodeInternal 110 // Prevents false sharing on widespread platforms with 111 // 128 mod (cache line size) = 0. 112 _ [128 - unsafe.Sizeof(cacheNodeInternal{})%128]byte 113 } 114 115 // Put adds x to the Cache. 116 func (c *Cache) Put(x interface{}) { 117 if x == nil { 118 return 119 } 120 121 l := c.pin() 122 123 if race.Enabled { 124 race.Acquire(unsafe.Pointer(l)) 125 } 126 127 if l.elems < cacheShardSize { 128 l.elem[l.elems] = x 129 l.elems++ 130 } else if next := l.next; next != nil && next.elems < cacheShardSize { 131 next.elem[next.elems] = x 132 next.elems++ 133 } else if c.Size > 0 { 134 n := c.node() 135 if atomic.LoadInt64(&n.size) < c.Size && n.trylock() { 136 // There is no space in the private pool but we were able to acquire 137 // the node lock, so we can try to move shards to/from the local 138 // node pool. 139 if full := l.next; full != nil { 140 // The l.next shard is full: move it to the node pool. 141 l.next = nil 142 full.next = n.full 143 n.full = full 144 atomic.AddInt64(&n.size, cacheShardSize) 145 } 146 if n.size < c.Size { // double check 147 if empty := n.empty; empty != nil { 148 // Grab a reusable empty shard from the node empty pool and move it 149 // to the private pool. 150 n.empty = empty.next 151 empty.next = nil 152 l.next = empty 153 n.unlock() 154 } else { 155 // The node empty pool contains no reusable shards: allocate a new 156 // empty shard. 157 n.unlock() 158 l.next = &cacheShard{} 159 } 160 l.next.elem[0] = x 161 l.next.elems = 1 162 } else { 163 n.unlock() 164 } 165 } 166 } // else: drop it on the floor. 167 168 if race.Enabled { 169 race.Release(unsafe.Pointer(l)) 170 } 171 172 runtime_procUnpin() 173 } 174 175 // Get selects an arbitrary item from the Cache, removes it from the 176 // Cache, and returns it to the caller. 177 // Get may choose to ignore the pool and treat it as empty. 178 // Callers should not assume any relation between values passed to Put and 179 // the values returned by Get. 180 // 181 // If Get would otherwise return nil and p.New is non-nil, Get returns 182 // the result of calling p.New. 183 func (c *Cache) Get() (x interface{}) { 184 l := c.pin() 185 186 if race.Enabled { 187 race.Acquire(unsafe.Pointer(l)) 188 } 189 190 if l.elems > 0 { 191 l.elems-- 192 x, l.elem[l.elems] = l.elem[l.elems], nil 193 } else if next := l.next; next != nil && next.elems > 0 { 194 next.elems-- 195 x, next.elem[next.elems] = next.elem[next.elems], nil 196 } else if c.Size > 0 { 197 n := c.node() 198 if atomic.LoadInt64(&n.size) > 0 && n.trylock() { 199 // The private pool is empty but we were able to acquire the node 200 // lock, so we can try to move shards to/from the node pools. 201 if empty := l.next; empty != nil { 202 // The l.next shard is empty: move it to the node empty pool. 203 l.next = nil 204 empty.next = n.empty 205 n.empty = empty 206 } 207 // Grab full shard from global pool and obtain x from it. 208 if full := n.full; full != nil { 209 n.full = full.next 210 full.next = nil 211 l.next = full 212 atomic.AddInt64(&n.size, -cacheShardSize) 213 full.elems-- 214 x, full.elem[full.elems] = full.elem[full.elems], nil 215 } 216 n.unlock() 217 } 218 } 219 220 if race.Enabled { 221 race.Release(unsafe.Pointer(l)) 222 } 223 224 runtime_procUnpin() 225 226 if x == nil { 227 getmissingevent() 228 if c.New != nil { 229 x = c.New() 230 } 231 } 232 return x 233 } 234 235 func (c *Cache) node() *cacheNode { 236 n := atomic.LoadPointer(&c.nodes) // load-acquire 237 _, nn := numa.GetCPUAndNode() 238 np := unsafe.Pointer(uintptr(n) + uintptr(nn)*unsafe.Sizeof(cacheNode{})) 239 return (*cacheNode)(np) 240 } 241 242 // pin pins the current goroutine to P, disables preemption and returns cacheLocal 243 // pool for the P. Caller must call runtime_procPin() when done with the pool. 244 func (c *Cache) pin() *cacheLocal { 245 pid := runtime_procPin() 246 // In pinSlow we store to localSize and then to local, here we load in opposite order. 247 // Since we've disabled preemption, GC cannot happen in between. 248 // Thus here we must observe local at least as large localSize. 249 // We can observe a newer/larger local, it is fine (we must observe its zero-initialized-ness). 250 s := atomic.LoadUintptr(&c.localSize) // load-acquire 251 l := atomic.LoadPointer(&c.local) // load-acquire 252 if uintptr(pid) < s { 253 return indexLocal(l, pid) 254 } 255 return c.pinSlow() 256 } 257 258 func (c *Cache) pinSlow() *cacheLocal { 259 // Retry under the mutex. 260 // Can not lock the mutex while pinned. 261 runtime_procUnpin() 262 c.mu.Lock() 263 defer c.mu.Unlock() 264 pid := runtime_procPin() 265 // DOUBLE CHECKED LOCKING 266 s := c.localSize 267 l := c.local 268 if uintptr(pid) < s { 269 return indexLocal(l, pid) 270 } 271 // If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one. 272 size := runtime.GOMAXPROCS(0) 273 local := make([]cacheLocal, size) 274 nodes := make([]cacheNode, numa.MaxNodeID()+1) 275 atomic.StorePointer(&c.nodes, unsafe.Pointer(&nodes[0])) // store-release 276 atomic.StorePointer(&c.local, unsafe.Pointer(&local[0])) // store-release 277 atomic.StoreUintptr(&c.localSize, uintptr(size)) // store-release 278 return &local[pid] 279 } 280 281 func indexLocal(l unsafe.Pointer, i int) *cacheLocal { 282 lp := unsafe.Pointer(uintptr(l) + uintptr(i)*unsafe.Sizeof(cacheLocal{})) 283 return (*cacheLocal)(lp) 284 } 285 286 // Implemented in runtime. 287 288 //go:linkname runtime_procPin runtime.procPin 289 //go:nosplit 290 func runtime_procPin() int 291 292 //go:linkname runtime_procUnpin runtime.procUnpin 293 //go:nosplit 294 func runtime_procUnpin()