github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/record_pool.go (about) 1 // Code generated by " ../../base/gtl/generate.py --output=record_pool.go --prefix=Record -DELEM=*Record --package=sam ../../base/gtl/randomized_freepool.go.tpl ". DO NOT EDIT. 2 3 // +build !race 4 5 // RecordFreePool is thread-safe pool that uses power-of-two loadbalancing across 6 // CPUs. 7 8 // This library requires the following two additional files per package. For 9 // now, create them manually. 10 // 11 // 1. A go file with the following contents 12 // 13 // package sam 14 // 15 // // This import is needed to use go:linkname. 16 // import _ "unsafe" 17 // // The following functions are defined in go runtime. To use them, we need to 18 // // import "unsafe", and elsewhere in this package, import "C" to force compiler 19 // // to recognize the "go:linktime" directive. Some of the details are explained 20 // // in the below blog post. 21 // // 22 // // procPin() pins the caller to the current processor, and returns the processor 23 // // id in range [0,GOMAXPROCS). procUnpin() undos the effect of procPin(). 24 // // 25 // // http://www.alangpierce.com/blog/2016/03/17/adventures-in-go-accessing-unexported-functions/ 26 // 27 // //go:linkname runtime_procPin sync.runtime_procPin 28 // //go:nosplit 29 // func runtime_procPin() int 30 // 31 // //go:linkname runtime_procUnpin sync.runtime_procUnpin 32 // //go:nosplit 33 // func runtime_procUnpin() 34 // 35 // //go:linkname fastrand runtime.fastrand 36 // func fastrand() uint32 37 // 38 // 2. An empty .s file. 39 40 package sam 41 42 import ( 43 "runtime" 44 "sync" 45 "sync/atomic" 46 "unsafe" 47 ) 48 49 // RecordFreePool is a variation of sync.Pool, specialized for a concrete type. 50 // 51 // - Put() performs power-of-two loadbalancing, and Get() looks only at the 52 // local queue. This improves the performance of Get() on many-core machines, 53 // at the cost of slightly more allocations. 54 // 55 // - It assumes that GOMAXPROCS is fixed at boot. 56 // 57 // - It never frees objects accumulated in the pool. We could add this feature 58 // if needed. 59 type RecordFreePool struct { 60 new func() *Record 61 local []RecordPoolLocal 62 maxLocalSize int64 63 } 64 65 const ( 66 RecordMaxPrivateElems = 4 67 RecordCacheLineSize = 64 68 ) 69 70 type RecordPoolLocalInner struct { 71 private [RecordMaxPrivateElems]*Record // Can be used only by the respective P. 72 privateSize int 73 74 shared []*Record // Can be used by any P. 75 sharedSize int64 // ==len(shared), but can be accessed w/o holding mu. 76 mu sync.Mutex // Protects shared. 77 } 78 79 type RecordPoolLocal struct { 80 RecordPoolLocalInner 81 // Pad prevents false sharing. 82 pad [RecordCacheLineSize - unsafe.Sizeof(RecordPoolLocalInner{})%RecordCacheLineSize]byte 83 } 84 85 // NewRecordFreePool creates a new free object pool. new should create a new 86 // object. It is called when the pool is empty on Get(). maxSize bounds the 87 // approx max number of objects that can be stored in the pool. Beyond this 88 // limit, Put() call will drop the objects. 89 func NewRecordFreePool(new func() *Record, maxSize int) *RecordFreePool { 90 maxProcs := runtime.GOMAXPROCS(0) 91 maxLocalSize := -1 92 if maxSize > 0 { 93 maxLocalSize = maxSize / maxProcs 94 if maxLocalSize <= 0 { 95 maxLocalSize = 1 96 } 97 } 98 p := &RecordFreePool{ 99 new: new, 100 local: make([]RecordPoolLocal, maxProcs), 101 maxLocalSize: int64(maxLocalSize), 102 } 103 return p 104 } 105 106 func (p *RecordFreePool) pin() *RecordPoolLocal { 107 pid := runtime_procPin() 108 if int(pid) >= len(p.local) { 109 panic(pid) 110 } 111 return &p.local[pid] 112 } 113 114 // Put adds an object to the freepool. The caller shall not touch the object 115 // after the call. 116 func (p *RecordFreePool) Put(x *Record) { 117 done := false 118 l := p.pin() 119 if l.privateSize < RecordMaxPrivateElems { 120 l.private[l.privateSize] = x 121 l.privateSize++ 122 done = true 123 } 124 runtime_procUnpin() 125 if !done { 126 // Pick another random queue, then add x to the shorter one. 127 // This policy ("power of two") reduces load imbalance across 128 // queues to log(log(#queues)) . 129 // 130 // https://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf 131 l2 := &p.local[int(fastrand())%len(p.local)] 132 lSize := atomic.LoadInt64(&l.sharedSize) 133 l2Size := atomic.LoadInt64(&l2.sharedSize) 134 if l2Size < lSize { 135 l = l2 136 } 137 l.mu.Lock() 138 if p.maxLocalSize >= 0 && l.sharedSize < p.maxLocalSize { 139 l.shared = append(l.shared, x) 140 atomic.StoreInt64(&l.sharedSize, l.sharedSize+1) // Release store. 141 } 142 l.mu.Unlock() 143 } 144 } 145 146 // Get removes an object from the freepool. If pool is empty, it calls the 147 // callback passed to NewFreePool. 148 func (p *RecordFreePool) Get() *Record { 149 l := p.pin() 150 var x *Record 151 done := false 152 if l.privateSize > 0 { 153 l.privateSize-- 154 x = l.private[l.privateSize] 155 var empty *Record 156 l.private[l.privateSize] = empty 157 done = true 158 } 159 runtime_procUnpin() 160 if done { 161 return x 162 } 163 l.mu.Lock() 164 last := len(l.shared) - 1 165 if last >= 0 { 166 x = l.shared[last] 167 l.shared = l.shared[:last] 168 atomic.StoreInt64(&l.sharedSize, l.sharedSize-1) 169 done = true 170 } 171 l.mu.Unlock() 172 if !done { 173 x = p.new() 174 } 175 return x 176 } 177 178 // ApproxLen returns an approximate length of the pool. For unittesting only. 179 // 180 // It returns an accurate value iff. no other thread is accessing the pool. 181 func (p *RecordFreePool) ApproxLen() int { 182 n := 0 183 for i := range p.local { 184 n += p.local[i].privateSize 185 n += int(p.local[i].sharedSize) 186 } 187 return n 188 }