github.com/grailbio/base@v0.0.11/gtl/randomized_freepool.go.tpl (about) 1 // +build !race 2 3 // ZZFreePool is thread-safe pool that uses power-of-two loadbalancing across 4 // CPUs. 5 6 // This library requires the following two additional files per package. For 7 // now, create them manually. 8 // 9 // 1. A go file with the following contents 10 // 11 // package PACKAGE 12 // 13 // // This import is needed to use go:linkname. 14 // import _ "unsafe" 15 // // The following functions are defined in go runtime. To use them, we need to 16 // // import "unsafe", and elsewhere in this package, import "C" to force compiler 17 // // to recognize the "go:linktime" directive. Some of the details are explained 18 // // in the below blog post. 19 // // 20 // // procPin() pins the caller to the current processor, and returns the processor 21 // // id in range [0,GOMAXPROCS). procUnpin() undos the effect of procPin(). 22 // // 23 // // http://www.alangpierce.com/blog/2016/03/17/adventures-in-go-accessing-unexported-functions/ 24 // 25 // //go:linkname runtime_procPin sync.runtime_procPin 26 // //go:nosplit 27 // func runtime_procPin() int 28 // 29 // //go:linkname runtime_procUnpin sync.runtime_procUnpin 30 // //go:nosplit 31 // func runtime_procUnpin() 32 // 33 // //go:linkname fastrandn sync.fastrandn 34 // func fastrandn(n uint32) uint32 35 // 36 // 2. An empty .s file. 37 38 package PACKAGE 39 40 import ( 41 "runtime" 42 "sync" 43 "sync/atomic" 44 "unsafe" 45 ) 46 47 // ZZFreePool is a variation of sync.Pool, specialized for a concrete type. 48 // 49 // - Put() performs power-of-two loadbalancing, and Get() looks only at the 50 // local queue. This improves the performance of Get() on many-core machines, 51 // at the cost of slightly more allocations. 52 // 53 // - It assumes that GOMAXPROCS is fixed at boot. 54 // 55 // - It never frees objects accumulated in the pool. We could add this feature 56 // if needed. 57 type ZZFreePool struct { 58 new func() ELEM 59 local []zzPoolLocal 60 maxLocalSize int64 61 } 62 63 const ( 64 zzMaxPrivateElems = 4 65 zzCacheLineSize = 64 66 ) 67 68 type zzPoolLocalInner struct { 69 private [zzMaxPrivateElems]ELEM // Can be used only by the respective P. 70 privateSize int 71 72 shared []ELEM // Can be used by any P. 73 sharedSize int64 // ==len(shared), but can be accessed w/o holding mu. 74 mu sync.Mutex // Protects shared. 75 } 76 77 type zzPoolLocal struct { 78 zzPoolLocalInner 79 // Pad prevents false sharing. 80 pad [zzCacheLineSize - unsafe.Sizeof(zzPoolLocalInner{})%zzCacheLineSize]byte 81 } 82 83 // NewZZFreePool creates a new free object pool. new should create a new 84 // object. It is called when the pool is empty on Get(). maxSize bounds the 85 // approx max number of objects that can be stored in the pool. Beyond this 86 // limit, Put() call will drop the objects. 87 func NewZZFreePool(new func() ELEM, maxSize int) *ZZFreePool { 88 maxProcs := runtime.GOMAXPROCS(0) 89 maxLocalSize := -1 90 if maxSize > 0 { 91 maxLocalSize = maxSize / maxProcs 92 if maxLocalSize <= 0 { 93 maxLocalSize = 1 94 } 95 } 96 p := &ZZFreePool{ 97 new: new, 98 local: make([]zzPoolLocal, maxProcs), 99 maxLocalSize: int64(maxLocalSize), 100 } 101 return p 102 } 103 104 func (p *ZZFreePool) pin() *zzPoolLocal { 105 pid := runtime_procPin() 106 if int(pid) >= len(p.local) { 107 panic(pid) 108 } 109 return &p.local[pid] 110 } 111 112 // Put adds an object to the freepool. The caller shall not touch the object 113 // after the call. 114 func (p *ZZFreePool) Put(x ELEM) { 115 done := false 116 l := p.pin() 117 if l.privateSize < zzMaxPrivateElems { 118 l.private[l.privateSize] = x 119 l.privateSize++ 120 done = true 121 } 122 runtime_procUnpin() 123 if !done { 124 // Pick another random queue, then add x to the shorter one. 125 // This policy ("power of two") reduces load imbalance across 126 // queues to log(log(#queues)) . 127 // 128 // https://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf 129 l2 := &p.local[int(fastrandn(uint32(len(p.local))))] 130 lSize := atomic.LoadInt64(&l.sharedSize) 131 l2Size := atomic.LoadInt64(&l2.sharedSize) 132 if l2Size < lSize { 133 l = l2 134 } 135 l.mu.Lock() 136 if p.maxLocalSize >= 0 && l.sharedSize < p.maxLocalSize { 137 l.shared = append(l.shared, x) 138 atomic.StoreInt64(&l.sharedSize, l.sharedSize+1) // Release store. 139 } 140 l.mu.Unlock() 141 } 142 } 143 144 // Get removes an object from the freepool. If pool is empty, it calls the 145 // callback passed to NewFreePool. 146 func (p *ZZFreePool) Get() ELEM { 147 l := p.pin() 148 var x ELEM 149 done := false 150 if l.privateSize > 0 { 151 l.privateSize-- 152 x = l.private[l.privateSize] 153 var empty ELEM 154 l.private[l.privateSize] = empty 155 done = true 156 } 157 runtime_procUnpin() 158 if done { 159 return x 160 } 161 l.mu.Lock() 162 last := len(l.shared) - 1 163 if last >= 0 { 164 x = l.shared[last] 165 l.shared = l.shared[:last] 166 atomic.StoreInt64(&l.sharedSize, l.sharedSize-1) 167 done = true 168 } 169 l.mu.Unlock() 170 if !done { 171 x = p.new() 172 } 173 return x 174 } 175 176 // ApproxLen returns an approximate length of the pool. For unittesting only. 177 // 178 // It returns an accurate value iff. no other thread is accessing the pool. 179 func (p *ZZFreePool) ApproxLen() int { 180 n := 0 181 for i := range p.local { 182 n += p.local[i].privateSize 183 n += int(p.local[i].sharedSize) 184 } 185 return n 186 }