github.com/bytedance/gopkg@v0.0.0-20240514070511-01b2cbcf35e1/lang/syncx/poolqueue.go (about)

     1  // Copyright 2021 ByteDance Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build !race
    16  // +build !race
    17  
    18  package syncx
    19  
    20  import (
    21  	"sync/atomic"
    22  	"unsafe"
    23  )
    24  
    25  // poolDequeue is a lock-free fixed-size single-producer,
    26  // multi-consumer queue. The single producer can both push and pop
    27  // from the head, and consumers can pop from the tail.
    28  //
    29  // It has the added feature that it nils out unused slots to avoid
    30  // unnecessary retention of objects. This is important for sync.Pool,
    31  // but not typically a property considered in the literature.
    32  type poolDequeue struct {
    33  	// headTail packs together a 32-bit head index and a 32-bit
    34  	// tail index. Both are indexes into vals modulo len(vals)-1.
    35  	//
    36  	// tail = index of oldest data in queue
    37  	// head = index of next slot to fill
    38  	//
    39  	// Slots in the range [tail, head) are owned by consumers.
    40  	// A consumer continues to own a slot outside this range until
    41  	// it nils the slot, at which point ownership passes to the
    42  	// producer.
    43  	//
    44  	// The head index is stored in the most-significant bits so
    45  	// that we can atomically add to it and the overflow is
    46  	// harmless.
    47  	headTail uint64
    48  
    49  	// vals is a ring buffer of interface{} values stored in this
    50  	// dequeue. The size of this must be a power of 2.
    51  	//
    52  	// vals[i].typ is nil if the slot is empty and non-nil
    53  	// otherwise. A slot is still in use until *both* the tail
    54  	// index has moved beyond it and typ has been set to nil. This
    55  	// is set to nil atomically by the consumer and read
    56  	// atomically by the producer.
    57  	vals []eface
    58  }
    59  
    60  type eface struct {
    61  	typ, val unsafe.Pointer
    62  }
    63  
    64  const dequeueBits = 32
    65  
    66  // dequeueLimit is the maximum size of a poolDequeue.
    67  //
    68  // This must be at most (1<<dequeueBits)/2 because detecting fullness
    69  // depends on wrapping around the ring buffer without wrapping around
    70  // the index. We divide by 4 so this fits in an int on 32-bit.
    71  const dequeueLimit = (1 << dequeueBits) / 4
    72  
    73  // dequeueNil is used in poolDeqeue to represent interface{}(nil).
    74  // Since we use nil to represent empty slots, we need a sentinel value
    75  // to represent nil.
    76  type dequeueNil *struct{}
    77  
    78  func (d *poolDequeue) unpack(ptrs uint64) (head, tail uint32) {
    79  	const mask = 1<<dequeueBits - 1
    80  	head = uint32((ptrs >> dequeueBits) & mask)
    81  	tail = uint32(ptrs & mask)
    82  	return
    83  }
    84  
    85  func (d *poolDequeue) pack(head, tail uint32) uint64 {
    86  	const mask = 1<<dequeueBits - 1
    87  	return (uint64(head) << dequeueBits) |
    88  		uint64(tail&mask)
    89  }
    90  
    91  // pushHead adds val at the head of the queue. It returns false if the
    92  // queue is full. It must only be called by a single producer.
    93  func (d *poolDequeue) pushHead(val *block) bool {
    94  	ptrs := atomic.LoadUint64(&d.headTail)
    95  	head, tail := d.unpack(ptrs)
    96  	if (tail+uint32(len(d.vals)))&(1<<dequeueBits-1) == head {
    97  		// Queue is full.
    98  		return false
    99  	}
   100  	slot := &d.vals[head&uint32(len(d.vals)-1)]
   101  
   102  	// Check if the head slot has been released by popTail.
   103  	typ := atomic.LoadPointer(&slot.typ)
   104  	if typ != nil {
   105  		// Another goroutine is still cleaning up the tail, so
   106  		// the queue is actually still full.
   107  		return false
   108  	}
   109  
   110  	// The head slot is free, so we own it.
   111  	*(**block)(unsafe.Pointer(slot)) = val
   112  
   113  	// Increment head. This passes ownership of slot to popTail
   114  	// and acts as a store barrier for writing the slot.
   115  	atomic.AddUint64(&d.headTail, 1<<dequeueBits)
   116  	return true
   117  }
   118  
   119  // popHead removes and returns the element at the head of the queue.
   120  // It returns false if the queue is empty. It must only be called by a
   121  // single producer.
   122  func (d *poolDequeue) popHead() (*block, bool) {
   123  	var slot *eface
   124  	for {
   125  		ptrs := atomic.LoadUint64(&d.headTail)
   126  		head, tail := d.unpack(ptrs)
   127  		if tail == head {
   128  			// Queue is empty.
   129  			return nil, false
   130  		}
   131  
   132  		// Confirm tail and decrement head. We do this before
   133  		// reading the value to take back ownership of this
   134  		// slot.
   135  		head--
   136  		ptrs2 := d.pack(head, tail)
   137  		if atomic.CompareAndSwapUint64(&d.headTail, ptrs, ptrs2) {
   138  			// We successfully took back slot.
   139  			slot = &d.vals[head&uint32(len(d.vals)-1)]
   140  			break
   141  		}
   142  	}
   143  
   144  	val := *(**block)(unsafe.Pointer(slot))
   145  	// Zero the slot. Unlike popTail, this isn't racing with
   146  	// pushHead, so we don't need to be careful here.
   147  	*slot = eface{}
   148  	return val, true
   149  }
   150  
   151  // popTail removes and returns the element at the tail of the queue.
   152  // It returns false if the queue is empty. It may be called by any
   153  // number of consumers.
   154  func (d *poolDequeue) popTail() (*block, bool) {
   155  	var slot *eface
   156  	for {
   157  		ptrs := atomic.LoadUint64(&d.headTail)
   158  		head, tail := d.unpack(ptrs)
   159  		if tail == head {
   160  			// Queue is empty.
   161  			return nil, false
   162  		}
   163  
   164  		// Confirm head and tail (for our speculative check
   165  		// above) and increment tail. If this succeeds, then
   166  		// we own the slot at tail.
   167  		ptrs2 := d.pack(head, tail+1)
   168  		if atomic.CompareAndSwapUint64(&d.headTail, ptrs, ptrs2) {
   169  			// Success.
   170  			slot = &d.vals[tail&uint32(len(d.vals)-1)]
   171  			break
   172  		}
   173  	}
   174  
   175  	// We now own slot.
   176  	val := *(**block)(unsafe.Pointer(slot))
   177  
   178  	// Tell pushHead that we're done with this slot. Zeroing the
   179  	// slot is also important so we don't leave behind references
   180  	// that could keep this object live longer than necessary.
   181  	//
   182  	// We write to val first and then publish that we're done with
   183  	// this slot by atomically writing to typ.
   184  	slot.val = nil
   185  	atomic.StorePointer(&slot.typ, nil)
   186  	// At this point pushHead owns the slot.
   187  	return val, true
   188  }
   189  
   190  // poolChain is a dynamically-sized version of poolDequeue.
   191  //
   192  // This is implemented as a doubly-linked list queue of poolDequeues
   193  // where each dequeue is double the size of the previous one. Once a
   194  // dequeue fills up, this allocates a new one and only ever pushes to
   195  // the latest dequeue. Pops happen from the other end of the list and
   196  // once a dequeue is exhausted, it gets removed from the list.
   197  type poolChain struct {
   198  	size int32
   199  
   200  	// head is the poolDequeue to push to. This is only accessed
   201  	// by the producer, so doesn't need to be synchronized.
   202  	head *poolChainElt
   203  
   204  	// tail is the poolDequeue to popTail from. This is accessed
   205  	// by consumers, so reads and writes must be atomic.
   206  	tail *poolChainElt
   207  }
   208  
   209  type poolChainElt struct {
   210  	poolDequeue
   211  
   212  	// next and prev link to the adjacent poolChainElts in this
   213  	// poolChain.
   214  	//
   215  	// next is written atomically by the producer and read
   216  	// atomically by the consumer. It only transitions from nil to
   217  	// non-nil.
   218  	//
   219  	// prev is written atomically by the consumer and read
   220  	// atomically by the producer. It only transitions from
   221  	// non-nil to nil.
   222  	next, prev *poolChainElt
   223  }
   224  
   225  func storePoolChainElt(pp **poolChainElt, v *poolChainElt) {
   226  	atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(pp)), unsafe.Pointer(v))
   227  }
   228  
   229  func loadPoolChainElt(pp **poolChainElt) *poolChainElt {
   230  	return (*poolChainElt)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(pp))))
   231  }
   232  
   233  func (c *poolChain) pushHead(val *block) {
   234  	atomic.AddInt32(&c.size, 1)
   235  	d := c.head
   236  	if d == nil {
   237  		// Initialize the chain.
   238  		const initSize = 8 // Must be a power of 2
   239  		d = new(poolChainElt)
   240  		d.vals = make([]eface, initSize)
   241  		c.head = d
   242  		storePoolChainElt(&c.tail, d)
   243  	}
   244  
   245  	if d.pushHead(val) {
   246  		return
   247  	}
   248  
   249  	// The current dequeue is full. Allocate a new one of twice
   250  	// the size.
   251  	newSize := len(d.vals) * 2
   252  	if newSize >= dequeueLimit {
   253  		// Can't make it any bigger.
   254  		newSize = dequeueLimit
   255  	}
   256  
   257  	d2 := &poolChainElt{prev: d}
   258  	d2.vals = make([]eface, newSize)
   259  	c.head = d2
   260  	storePoolChainElt(&d.next, d2)
   261  	d2.pushHead(val)
   262  }
   263  
   264  func (c *poolChain) popHead() (*block, bool) {
   265  	d := c.head
   266  	for d != nil {
   267  		if val, ok := d.popHead(); ok {
   268  			atomic.AddInt32(&c.size, -1)
   269  			return val, ok
   270  		}
   271  		// There may still be unconsumed elements in the
   272  		// previous dequeue, so try backing up.
   273  		d = loadPoolChainElt(&d.prev)
   274  	}
   275  	return nil, false
   276  }
   277  
   278  func (c *poolChain) popTail() (*block, bool) {
   279  	d := loadPoolChainElt(&c.tail)
   280  	if d == nil {
   281  		return nil, false
   282  	}
   283  
   284  	for {
   285  		// It's important that we load the next pointer
   286  		// *before* popping the tail. In general, d may be
   287  		// transiently empty, but if next is non-nil before
   288  		// the pop and the pop fails, then d is permanently
   289  		// empty, which is the only condition under which it's
   290  		// safe to drop d from the chain.
   291  		d2 := loadPoolChainElt(&d.next)
   292  
   293  		if val, ok := d.popTail(); ok {
   294  			atomic.AddInt32(&c.size, -1)
   295  			return val, ok
   296  		}
   297  
   298  		if d2 == nil {
   299  			// This is the only dequeue. It's empty right
   300  			// now, but could be pushed to in the future.
   301  			return nil, false
   302  		}
   303  
   304  		// The tail of the chain has been drained, so move on
   305  		// to the next dequeue. Try to drop it from the chain
   306  		// so the next pop doesn't have to look at the empty
   307  		// dequeue again.
   308  		if atomic.CompareAndSwapPointer((*unsafe.Pointer)(unsafe.Pointer(&c.tail)), unsafe.Pointer(d), unsafe.Pointer(d2)) {
   309  			// We won the race. Clear the prev pointer so
   310  			// the garbage collector can collect the empty
   311  			// dequeue and so popHead doesn't back up
   312  			// further than necessary.
   313  			storePoolChainElt(&d2.prev, nil)
   314  		}
   315  		d = d2
   316  	}
   317  }