github.com/grailbio/base@v0.0.11/intervalmap/search_freepool.go (about) 1 // Code generated by "../gtl/generate.py --output=search_freepool.go --prefix=searcher --PREFIX=searcher -DELEM=*searcher --package=intervalmap ../gtl/randomized_freepool.go.tpl". DO NOT EDIT. 2 3 //go:build !race 4 // +build !race 5 6 // searcherFreePool is thread-safe pool that uses power-of-two loadbalancing across 7 // CPUs. 8 9 // This library requires the following two additional files per package. For 10 // now, create them manually. 11 // 12 // 1. A go file with the following contents 13 // 14 // package intervalmap 15 // 16 // // This import is needed to use go:linkname. 17 // import _ "unsafe" 18 // // The following functions are defined in go runtime. To use them, we need to 19 // // import "unsafe", and elsewhere in this package, import "C" to force compiler 20 // // to recognize the "go:linktime" directive. Some of the details are explained 21 // // in the below blog post. 22 // // 23 // // procPin() pins the caller to the current processor, and returns the processor 24 // // id in range [0,GOMAXPROCS). procUnpin() undos the effect of procPin(). 25 // // 26 // // http://www.alangpierce.com/blog/2016/03/17/adventures-in-go-accessing-unexported-functions/ 27 // 28 // //go:linkname runtime_procPin sync.runtime_procPin 29 // //go:nosplit 30 // func runtime_procPin() int 31 // 32 // //go:linkname runtime_procUnpin sync.runtime_procUnpin 33 // //go:nosplit 34 // func runtime_procUnpin() 35 // 36 // //go:linkname fastrandn sync.fastrandn 37 // func fastrandn(n uint32) uint32 38 // 39 // 2. An empty .s file. 40 41 package intervalmap 42 43 import ( 44 "runtime" 45 "sync" 46 "sync/atomic" 47 "unsafe" 48 ) 49 50 // searcherFreePool is a variation of sync.Pool, specialized for a concrete type. 51 // 52 // - Put() performs power-of-two loadbalancing, and Get() looks only at the 53 // local queue. This improves the performance of Get() on many-core machines, 54 // at the cost of slightly more allocations. 55 // 56 // - It assumes that GOMAXPROCS is fixed at boot. 57 // 58 // - It never frees objects accumulated in the pool. We could add this feature 59 // if needed. 60 type searcherFreePool struct { 61 new func() *searcher 62 local []searcherPoolLocal 63 maxLocalSize int64 64 } 65 66 const ( 67 searcherMaxPrivateElems = 4 68 searcherCacheLineSize = 64 69 ) 70 71 type searcherPoolLocalInner struct { 72 private [searcherMaxPrivateElems]*searcher // Can be used only by the respective P. 73 privateSize int 74 75 shared []*searcher // Can be used by any P. 76 sharedSize int64 // ==len(shared), but can be accessed w/o holding mu. 77 mu sync.Mutex // Protects shared. 78 } 79 80 type searcherPoolLocal struct { 81 searcherPoolLocalInner 82 // Pad prevents false sharing. 83 pad [searcherCacheLineSize - unsafe.Sizeof(searcherPoolLocalInner{})%searcherCacheLineSize]byte 84 } 85 86 // NewsearcherFreePool creates a new free object pool. new should create a new 87 // object. It is called when the pool is empty on Get(). maxSize bounds the 88 // approx max number of objects that can be stored in the pool. Beyond this 89 // limit, Put() call will drop the objects. 90 func NewsearcherFreePool(new func() *searcher, maxSize int) *searcherFreePool { 91 maxProcs := runtime.GOMAXPROCS(0) 92 maxLocalSize := -1 93 if maxSize > 0 { 94 maxLocalSize = maxSize / maxProcs 95 if maxLocalSize <= 0 { 96 maxLocalSize = 1 97 } 98 } 99 p := &searcherFreePool{ 100 new: new, 101 local: make([]searcherPoolLocal, maxProcs), 102 maxLocalSize: int64(maxLocalSize), 103 } 104 return p 105 } 106 107 func (p *searcherFreePool) pin() *searcherPoolLocal { 108 pid := runtime_procPin() 109 if int(pid) >= len(p.local) { 110 panic(pid) 111 } 112 return &p.local[pid] 113 } 114 115 // Put adds an object to the freepool. The caller shall not touch the object 116 // after the call. 117 func (p *searcherFreePool) Put(x *searcher) { 118 done := false 119 l := p.pin() 120 if l.privateSize < searcherMaxPrivateElems { 121 l.private[l.privateSize] = x 122 l.privateSize++ 123 done = true 124 } 125 runtime_procUnpin() 126 if !done { 127 // Pick another random queue, then add x to the shorter one. 128 // This policy ("power of two") reduces load imbalance across 129 // queues to log(log(#queues)) . 130 // 131 // https://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf 132 l2 := &p.local[int(fastrandn(uint32(len(p.local))))] 133 lSize := atomic.LoadInt64(&l.sharedSize) 134 l2Size := atomic.LoadInt64(&l2.sharedSize) 135 if l2Size < lSize { 136 l = l2 137 } 138 l.mu.Lock() 139 if p.maxLocalSize >= 0 && l.sharedSize < p.maxLocalSize { 140 l.shared = append(l.shared, x) 141 atomic.StoreInt64(&l.sharedSize, l.sharedSize+1) // Release store. 142 } 143 l.mu.Unlock() 144 } 145 } 146 147 // Get removes an object from the freepool. If pool is empty, it calls the 148 // callback passed to NewFreePool. 149 func (p *searcherFreePool) Get() *searcher { 150 l := p.pin() 151 var x *searcher 152 done := false 153 if l.privateSize > 0 { 154 l.privateSize-- 155 x = l.private[l.privateSize] 156 var empty *searcher 157 l.private[l.privateSize] = empty 158 done = true 159 } 160 runtime_procUnpin() 161 if done { 162 return x 163 } 164 l.mu.Lock() 165 last := len(l.shared) - 1 166 if last >= 0 { 167 x = l.shared[last] 168 l.shared = l.shared[:last] 169 atomic.StoreInt64(&l.sharedSize, l.sharedSize-1) 170 done = true 171 } 172 l.mu.Unlock() 173 if !done { 174 x = p.new() 175 } 176 return x 177 } 178 179 // ApproxLen returns an approximate length of the pool. For unittesting only. 180 // 181 // It returns an accurate value iff. no other thread is accessing the pool. 182 func (p *searcherFreePool) ApproxLen() int { 183 n := 0 184 for i := range p.local { 185 n += p.local[i].privateSize 186 n += int(p.local[i].sharedSize) 187 } 188 return n 189 }