github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/gtl/randomized_freepool.go.tpl (about)

     1  // +build !race
     2  
     3  // ZZFreePool is thread-safe pool that uses power-of-two loadbalancing across
     4  // CPUs.
     5  
     6  // This library requires the following two additional files per package. For
     7  // now, create them manually.
     8  //
     9  // 1. A go file with the following contents
    10  //
    11  // package PACKAGE
    12  //
    13  // // This import is needed to use go:linkname.
    14  // import _ "unsafe"
    15  // // The following functions are defined in go runtime.  To use them, we need to
    16  // // import "unsafe", and elsewhere in this package, import "C" to force compiler
    17  // // to recognize the "go:linktime" directive. Some of the details are explained
    18  // // in the below blog post.
    19  // //
    20  // // procPin() pins the caller to the current processor, and returns the processor
    21  // // id in range [0,GOMAXPROCS). procUnpin() undos the effect of procPin().
    22  // //
    23  // // http://www.alangpierce.com/blog/2016/03/17/adventures-in-go-accessing-unexported-functions/
    24  //
    25  // //go:linkname runtime_procPin sync.runtime_procPin
    26  // //go:nosplit
    27  // func runtime_procPin() int
    28  //
    29  // //go:linkname runtime_procUnpin sync.runtime_procUnpin
    30  // //go:nosplit
    31  // func runtime_procUnpin()
    32  //
    33  // //go:linkname fastrandn sync.fastrandn
    34  // func fastrandn(n uint32) uint32
    35  //
    36  // 2. An empty .s file.
    37  
    38  package PACKAGE
    39  
    40  import (
    41  	"runtime"
    42  	"sync"
    43  	"sync/atomic"
    44  	"unsafe"
    45  )
    46  
    47  // ZZFreePool is a variation of sync.Pool, specialized for a concrete type.
    48  //
    49  // - Put() performs power-of-two loadbalancing, and Get() looks only at the
    50  //   local queue.  This improves the performance of Get() on many-core machines,
    51  //   at the cost of slightly more allocations.
    52  //
    53  // - It assumes that GOMAXPROCS is fixed at boot.
    54  //
    55  // - It never frees objects accumulated in the pool. We could add this feature
    56  //   if needed.
    57  type ZZFreePool struct {
    58  	new          func() ELEM
    59  	local        []zzPoolLocal
    60  	maxLocalSize int64
    61  }
    62  
    63  const (
    64  	zzMaxPrivateElems = 4
    65  	zzCacheLineSize   = 64
    66  )
    67  
    68  type zzPoolLocalInner struct {
    69  	private     [zzMaxPrivateElems]ELEM // Can be used only by the respective P.
    70  	privateSize int
    71  
    72  	shared     []ELEM     // Can be used by any P.
    73  	sharedSize int64      // ==len(shared), but can be accessed w/o holding mu.
    74  	mu         sync.Mutex // Protects shared.
    75  }
    76  
    77  type zzPoolLocal struct {
    78  	zzPoolLocalInner
    79  	// Pad prevents false sharing.
    80  	pad [zzCacheLineSize - unsafe.Sizeof(zzPoolLocalInner{})%zzCacheLineSize]byte
    81  }
    82  
    83  // NewZZFreePool creates a new free object pool. new should create a new
    84  // object. It is called when the pool is empty on Get(). maxSize bounds the
    85  // approx max number of objects that can be stored in the pool. Beyond this
    86  // limit, Put() call will drop the objects.
    87  func NewZZFreePool(new func() ELEM, maxSize int) *ZZFreePool {
    88  	maxProcs := runtime.GOMAXPROCS(0)
    89  	maxLocalSize := -1
    90  	if maxSize > 0 {
    91  		maxLocalSize = maxSize / maxProcs
    92  		if maxLocalSize <= 0 {
    93  			maxLocalSize = 1
    94  		}
    95  	}
    96  	p := &ZZFreePool{
    97  		new:          new,
    98  		local:        make([]zzPoolLocal, maxProcs),
    99  		maxLocalSize: int64(maxLocalSize),
   100  	}
   101  	return p
   102  }
   103  
   104  func (p *ZZFreePool) pin() *zzPoolLocal {
   105  	pid := runtime_procPin()
   106  	if int(pid) >= len(p.local) {
   107  		panic(pid)
   108  	}
   109  	return &p.local[pid]
   110  }
   111  
   112  // Put adds an object to the freepool. The caller shall not touch the object
   113  // after the call.
   114  func (p *ZZFreePool) Put(x ELEM) {
   115  	done := false
   116  	l := p.pin()
   117  	if l.privateSize < zzMaxPrivateElems {
   118  		l.private[l.privateSize] = x
   119  		l.privateSize++
   120  		done = true
   121  	}
   122  	runtime_procUnpin()
   123  	if !done {
   124  		// Pick another random queue, then add x to the shorter one.
   125  		// This policy ("power of two") reduces load imbalance across
   126  		// queues to log(log(#queues)) .
   127  		//
   128  		// https://www.eecs.harvard.edu/~michaelm/postscripts/mythesis.pdf
   129  		l2 := &p.local[int(fastrandn(uint32(len(p.local))))]
   130  		lSize := atomic.LoadInt64(&l.sharedSize)
   131  		l2Size := atomic.LoadInt64(&l2.sharedSize)
   132  		if l2Size < lSize {
   133  			l = l2
   134  		}
   135  		l.mu.Lock()
   136  		if p.maxLocalSize >= 0 && l.sharedSize < p.maxLocalSize {
   137  			l.shared = append(l.shared, x)
   138  			atomic.StoreInt64(&l.sharedSize, l.sharedSize+1) // Release store.
   139  		}
   140  		l.mu.Unlock()
   141  	}
   142  }
   143  
   144  // Get removes an object from the freepool. If pool is empty, it calls the
   145  // callback passed to NewFreePool.
   146  func (p *ZZFreePool) Get() ELEM {
   147  	l := p.pin()
   148  	var x ELEM
   149  	done := false
   150  	if l.privateSize > 0 {
   151  		l.privateSize--
   152  		x = l.private[l.privateSize]
   153  		var empty ELEM
   154  		l.private[l.privateSize] = empty
   155  		done = true
   156  	}
   157  	runtime_procUnpin()
   158  	if done {
   159  		return x
   160  	}
   161  	l.mu.Lock()
   162  	last := len(l.shared) - 1
   163  	if last >= 0 {
   164  		x = l.shared[last]
   165  		l.shared = l.shared[:last]
   166  		atomic.StoreInt64(&l.sharedSize, l.sharedSize-1)
   167  		done = true
   168  	}
   169  	l.mu.Unlock()
   170  	if !done {
   171  		x = p.new()
   172  	}
   173  	return x
   174  }
   175  
   176  // ApproxLen returns an approximate length of the pool. For unittesting only.
   177  //
   178  // It returns an accurate value iff. no other thread is accessing the pool.
   179  func (p *ZZFreePool) ApproxLen() int {
   180  	n := 0
   181  	for i := range p.local {
   182  		n += p.local[i].privateSize
   183  		n += int(p.local[i].sharedSize)
   184  	}
   185  	return n
   186  }