github.com/lrita/cache@v1.0.1/bufcache.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cache 6 7 import ( 8 "runtime" 9 "sync" 10 "sync/atomic" 11 "unsafe" 12 13 "github.com/lrita/cache/race" 14 "github.com/lrita/numa" 15 ) 16 17 // BufCache is a set of temporary bytes buffer that may be individually saved 18 // and retrieved. 19 // 20 // A BufCache is safe for use by multiple goroutines simultaneously. 21 // 22 // BufCache's purpose is to cache allocated but unused items for later reuse, 23 // relieving pressure on the garbage collector. That is, it makes it easy to 24 // build efficient, thread-safe free lists. However, it is not suitable for all 25 // free lists. 26 // 27 // An appropriate use of a BufCache is to manage a group of temporary items 28 // silently shared among and potentially reused by concurrent independent 29 // clients of a package. BufCache provides a way to amortize allocation overhead 30 // across many clients. 31 // 32 // The difference with std-lib sync.Pool is that the items in BufCache does not be 33 // deallocated by GC, and there are multi slot in per-P and per-NUMA NODE storage. 34 // The free list in BufCache maintained as parts of a long-lived object aim for 35 // a long process logic. The users can twist the per-NUMA free lists size(BufCache.Size) 36 // to make minimum allocation by the profile. 37 // 38 // A BufCache must not be copied after first use. 39 // 40 // Assigning a slice of byte to a interface{} will cause a allocation, so we 41 // specialize a implementants from Cache. 42 type BufCache struct { 43 noCopy noCopy 44 45 nodes unsafe.Pointer // per-NUMA NODE pool, actual type is [N]bufCacheNode 46 47 local unsafe.Pointer // local fixed-size per-P pool, actual type is [P]bufCacheLocal 48 localSize uintptr // size of the local array 49 50 mu sync.Mutex 51 // New optionally specifies a function to generate 52 // a value when Get would otherwise return nil. 53 // It may not be changed concurrently with calls to Get. 54 New func() []byte 55 // Size optinally specifies the max items in the per-P local lists. 56 Size int64 57 } 58 59 // due to https://github.com/golang/go/issues/14620, in some situation, we 60 // cannot make the object aligned by composited. 61 type issues14620b struct { 62 _ *bufCacheShard 63 } 64 65 const ( 66 // []byte is 24 bytes wide on 64bit platforms, 67 // leaving only 4 slots per 128 bytes cache line. 68 bufCacheShardSize = 4 // number of elements per shard 69 ) 70 71 type bufCacheShardInternal struct { 72 elems int 73 elem [bufCacheShardSize][]byte 74 next *bufCacheShard 75 } 76 77 type bufCacheShard struct { 78 bufCacheShardInternal 79 // Prevents false sharing on widespread platforms with 80 // 128 mod (bufCache line size) = 0. 81 _ [128 - unsafe.Sizeof(bufCacheShardInternal{})%128]byte 82 } 83 84 type bufCacheLocal bufCacheShard 85 86 type bufCacheNodeInternal struct { 87 lock int64 88 _ [7]int64 89 size int64 // size of full shards 90 full *bufCacheShard // pool of full shards (elems == bufCacheShardSize) 91 empty *bufCacheShard // pool of empty shards (elems == 0) 92 } 93 94 func (c *bufCacheNodeInternal) trylock() bool { 95 ok := atomic.CompareAndSwapInt64(&c.lock, unlocked, locked) 96 if race.Enabled && ok { 97 race.Acquire(unsafe.Pointer(c)) 98 } 99 return ok 100 } 101 102 func (c *bufCacheNodeInternal) unlock() { 103 if race.Enabled { 104 race.Release(unsafe.Pointer(c)) 105 } 106 atomic.StoreInt64(&c.lock, unlocked) 107 } 108 109 type bufCacheNode struct { 110 bufCacheNodeInternal 111 // Prevents false sharing on widespread platforms with 112 // 128 mod (bufCache line size) = 0. 113 _ [128 - unsafe.Sizeof(bufCacheNodeInternal{})%128]byte 114 } 115 116 // Put adds x to the BufCache. 117 func (c *BufCache) Put(x []byte) { 118 if len(x) == 0 { 119 return 120 } 121 122 l := c.pin() 123 124 if race.Enabled { 125 race.Acquire(unsafe.Pointer(l)) 126 } 127 128 if l.elems < bufCacheShardSize { 129 l.elem[l.elems] = x 130 l.elems++ 131 } else if next := l.next; next != nil && next.elems < bufCacheShardSize { 132 next.elem[next.elems] = x 133 next.elems++ 134 } else if c.Size > 0 { 135 n := c.node() 136 if atomic.LoadInt64(&n.size) < c.Size && n.trylock() { 137 // There is no space in the private pool but we were able to acquire 138 // the node lock, so we can try to move shards to/from the local 139 // node pool. 140 if full := l.next; full != nil { 141 // The l.next shard is full: move it to the node pool. 142 l.next = nil 143 full.next = n.full 144 n.full = full 145 atomic.AddInt64(&n.size, bufCacheShardSize) 146 } 147 if n.size < c.Size { // double check 148 if empty := n.empty; empty != nil { 149 // Grab a reusable empty shard from the node empty pool and move it 150 // to the private pool. 151 n.empty = empty.next 152 empty.next = nil 153 l.next = empty 154 n.unlock() 155 } else { 156 // The node empty pool contains no reusable shards: allocate a new 157 // empty shard. 158 n.unlock() 159 l.next = &bufCacheShard{} 160 } 161 l.next.elem[0] = x 162 l.next.elems = 1 163 } else { 164 n.unlock() 165 } 166 } 167 } // else: drop it on the floor. 168 169 if race.Enabled { 170 race.Release(unsafe.Pointer(l)) 171 } 172 173 runtime_procUnpin() 174 } 175 176 // Get selects an arbitrary item from the BufCache, removes it from the 177 // BufCache, and returns it to the caller. 178 // Get may choose to ignore the pool and treat it as empty. 179 // Callers should not assume any relation between values passed to Put and 180 // the values returned by Get. 181 // 182 // If Get would otherwise return nil and p.New is non-nil, Get returns 183 // the result of calling p.New. 184 func (c *BufCache) Get() (x []byte) { 185 l := c.pin() 186 187 if race.Enabled { 188 race.Acquire(unsafe.Pointer(l)) 189 } 190 191 if l.elems > 0 { 192 l.elems-- 193 x, l.elem[l.elems] = l.elem[l.elems], nil 194 } else if next := l.next; next != nil && next.elems > 0 { 195 next.elems-- 196 x, next.elem[next.elems] = next.elem[next.elems], nil 197 } else if c.Size > 0 { 198 n := c.node() 199 if atomic.LoadInt64(&n.size) > 0 && n.trylock() { 200 // The private pool is empty but we were able to acquire the node 201 // lock, so we can try to move shards to/from the node pools. 202 if empty := l.next; empty != nil { 203 // The l.next shard is empty: move it to the node empty pool. 204 l.next = nil 205 empty.next = n.empty 206 n.empty = empty 207 } 208 // Grab full shard from global pool and obtain x from it. 209 if full := n.full; full != nil { 210 n.full = full.next 211 full.next = nil 212 l.next = full 213 atomic.AddInt64(&n.size, -bufCacheShardSize) 214 full.elems-- 215 x, full.elem[full.elems] = full.elem[full.elems], nil 216 } 217 n.unlock() 218 } 219 } 220 221 if race.Enabled { 222 race.Release(unsafe.Pointer(l)) 223 } 224 225 runtime_procUnpin() 226 227 if x == nil { 228 getmissingevent() 229 if c.New != nil { 230 x = c.New() 231 } 232 } 233 return x 234 } 235 236 func (c *BufCache) node() *bufCacheNode { 237 n := atomic.LoadPointer(&c.nodes) // load-acquire 238 _, nn := numa.GetCPUAndNode() 239 np := unsafe.Pointer(uintptr(n) + uintptr(nn)*unsafe.Sizeof(bufCacheNode{})) 240 return (*bufCacheNode)(np) 241 } 242 243 // pin pins the current goroutine to P, disables preemption and returns bufCacheLocal 244 // pool for the P. Caller must call runtime_procPin() when done with the pool. 245 func (c *BufCache) pin() *bufCacheLocal { 246 pid := runtime_procPin() 247 // In pinSlow we store to localSize and then to local, here we load in opposite order. 248 // Since we've disabled preemption, GC cannot happen in between. 249 // Thus here we must observe local at least as large localSize. 250 // We can observe a newer/larger local, it is fine (we must observe its zero-initialized-ness). 251 s := atomic.LoadUintptr(&c.localSize) // load-acquire 252 l := atomic.LoadPointer(&c.local) // load-acquire 253 if uintptr(pid) < s { 254 return bufindexLocal(l, pid) 255 } 256 return c.pinSlow() 257 } 258 259 func (c *BufCache) pinSlow() *bufCacheLocal { 260 // Retry under the mutex. 261 // Can not lock the mutex while pinned. 262 runtime_procUnpin() 263 c.mu.Lock() 264 defer c.mu.Unlock() 265 pid := runtime_procPin() 266 // DOUBLE CHECKED LOCKING 267 s := c.localSize 268 l := c.local 269 if uintptr(pid) < s { 270 return bufindexLocal(l, pid) 271 } 272 // If GOMAXPROCS changes between GCs, we re-allocate the array and lose the old one. 273 size := runtime.GOMAXPROCS(0) 274 local := make([]bufCacheLocal, size) 275 nodes := make([]bufCacheNode, numa.MaxNodeID()+1) 276 atomic.StorePointer(&c.nodes, unsafe.Pointer(&nodes[0])) // store-release 277 atomic.StorePointer(&c.local, unsafe.Pointer(&local[0])) // store-release 278 atomic.StoreUintptr(&c.localSize, uintptr(size)) // store-release 279 return &local[pid] 280 } 281 282 func bufindexLocal(l unsafe.Pointer, i int) *bufCacheLocal { 283 lp := unsafe.Pointer(uintptr(l) + uintptr(i)*unsafe.Sizeof(bufCacheLocal{})) 284 return (*bufCacheLocal)(lp) 285 }