github.com/alphadose/zenq/v2@v2.8.4/zenq.go (about)

     1  // A minimalist thread-safe queue implemented using a lock-free ringbuffer which is faster
     2  // and has lower memory allocations than golang's native channels
     3  // Based on the LMAX disruptor pattern https://lmax-exchange.github.io/disruptor/disruptor.html
     4  
     5  // Known Limitations:-
     6  //
     7  // 1. Max queue_size = 2^16
     8  // 2. The queue_size is a power of 2, in case a different size is provided then queue_size is rounded up to the next greater power of 2 upto a max of 2^16
     9  
    10  // Suggestions:-
    11  //
    12  // 1. Use runtime.LockOSThread() on the goroutine calling ZenQ.Read() for lowest latency provided you have > 1 cpu cores
    13  
    14  package zenq
    15  
    16  import (
    17  	"fmt"
    18  	"math"
    19  	"sync"
    20  	"sync/atomic"
    21  	"unsafe"
    22  
    23  	"github.com/alphadose/zenq/v2/constants"
    24  )
    25  
    26  // ZenQ global state enums
    27  const (
    28  	// Both reads and writes are possible
    29  	StateOpen = iota
    30  	// No further writes can be performed and you can only read upto the last committed write in this state
    31  	StateClosedForWrites
    32  	// Neither reads nor writes are possible, queue is fully exhausted
    33  	StateFullyClosed
    34  )
    35  
    36  // ZenQ selector state enums
    37  const (
    38  	// Open for being selected
    39  	SelectionOpen = iota
    40  	// Running state
    41  	SelectionRunning
    42  )
    43  
    44  // ZenQ Slot state enums
    45  const (
    46  	SlotEmpty = iota
    47  	SlotBusy
    48  	SlotCommitted
    49  	SlotClosed
    50  )
    51  
    52  type (
    53  	// a single slot in the queue
    54  	slot[T any] struct {
    55  		writeParker *ThreadParker[T]
    56  		atomic.Uint32
    57  		item T
    58  	}
    59  
    60  	// metadata of the queue
    61  	metaQ struct {
    62  		globalState uint8
    63  		// NOTE->self: strideLength and indexMask can be further optimized to uint8 for specialized ZenQs
    64  		// with known data types instead of generic type
    65  		// using variables with lower sizes decreases memory bandwidth consumption and increases speed
    66  		strideLength uint16
    67  		indexMask    uint16
    68  		contents     unsafe.Pointer
    69  		// memory pool refs for storing and leasing parking spots for goroutines
    70  		alloc func() any
    71  		free  func(any)
    72  	}
    73  
    74  	// container for the selection events among multiple queues
    75  	selectFactory[T any] struct {
    76  		selectionState atomic.Uint32
    77  		auxThread      unsafe.Pointer
    78  		backlog        atomic.Pointer[T]
    79  		waitList       List
    80  	}
    81  
    82  	// ZenQ is the CPU cache optimized ringbuffer implementation
    83  	ZenQ[T any] struct {
    84  		// The padding members 0 to 4 below are here to ensure each item is on a separate cache line.
    85  		// This prevents false sharing and hence improves performance.
    86  		_           cacheLinePadding
    87  		writerIndex atomic.Uint32
    88  		_           [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte
    89  		readerIndex atomic.Uint32
    90  		_           [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte
    91  		metaQ
    92  		_ [constants.CacheLinePadSize - unsafe.Sizeof(metaQ{})]byte
    93  		selectFactory[T]
    94  		_ [constants.CacheLinePadSize - unsafe.Sizeof(selectFactory[T]{})]byte
    95  	}
    96  )
    97  
    98  // returns the next greater power of 2 relative to val
    99  func nextGreaterPowerOf2(val uint32) uint32 {
   100  	return 1 << uint32(math.Min(math.Ceil(Fastlog2(math.Max(float64(val), 1))), 16))
   101  }
   102  
   103  // New returns a new queue given its payload type passed as a generic parameter
   104  func New[T any](size uint32) *ZenQ[T] {
   105  	var (
   106  		queueSize = nextGreaterPowerOf2(size)
   107  		contents  = make([]slot[T], queueSize, queueSize)
   108  		parkPool  = sync.Pool{New: func() any { return new(parkSpot[T]) }}
   109  	)
   110  	for idx := uint32(0); idx < queueSize; idx++ {
   111  		spot := parkPool.Get().(*parkSpot[T])
   112  		spot.threadPtr = nil
   113  		contents[idx].writeParker = NewThreadParker(spot)
   114  	}
   115  	zenq := &ZenQ[T]{
   116  		metaQ: metaQ{
   117  			strideLength: uint16(unsafe.Sizeof(slot[T]{})),
   118  			contents:     unsafe.Pointer(&contents[0]),
   119  			alloc:        parkPool.Get,
   120  			free:         parkPool.Put,
   121  			indexMask:    uint16(queueSize - 1),
   122  		},
   123  		selectFactory: selectFactory[T]{waitList: NewList()},
   124  	}
   125  	go zenq.selectSender()
   126  	// allow the above auxillary thread to manifest
   127  	mcall(gosched_m)
   128  	return zenq
   129  }
   130  
   131  // Size returns the number of items in the queue at any given time
   132  func (self *ZenQ[T]) Size() uint32 {
   133  	var (
   134  		readerIndex uint32 = self.readerIndex.Load() & uint32(self.indexMask)
   135  		writerIndex uint32 = self.writerIndex.Load() & uint32(self.indexMask)
   136  	)
   137  	if readerIndex > writerIndex {
   138  		return uint32(self.indexMask) + 2 - (readerIndex - writerIndex)
   139  	} else if writerIndex > readerIndex {
   140  		return writerIndex - readerIndex + 1
   141  	} else {
   142  		return 0
   143  	}
   144  }
   145  
   146  // Write writes a value to the queue
   147  // It returns whether the queue is currently open for writes or not
   148  // If not then it might be still open for reads, which can be checked by calling zenq.IsClosed()
   149  func (self *ZenQ[T]) Write(value T) (queueClosedForWrites bool) {
   150  	if Load8(&self.globalState) != StateOpen {
   151  		queueClosedForWrites = true
   152  		return
   153  	}
   154  
   155  	// Try to send directly to selector when possible or else just dequeue unselected references
   156  	// in order to reduce the burden on the auxillary thread and save cpu time
   157  direct_send:
   158  	if threadPtr, dataOut := self.waitList.Dequeue(); threadPtr != nil {
   159  		if selThread := atomic.SwapPointer(threadPtr, nil); selThread != nil {
   160  			// direct send to selector
   161  			*dataOut = value
   162  			// notify selector
   163  			safe_ready(selThread)
   164  			return
   165  		}
   166  		goto direct_send
   167  	}
   168  
   169  	slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents)))
   170  
   171  	// CAS -> change slot_state to busy if slot_state == empty
   172  	for !slot.CompareAndSwap(SlotEmpty, SlotBusy) {
   173  		switch slot.Load() {
   174  		case SlotBusy:
   175  			wait()
   176  		case SlotCommitted:
   177  			n := self.alloc().(*parkSpot[T])
   178  			n.threadPtr, n.value = GetG(), value
   179  			n.next.Store(nil)
   180  			slot.writeParker.Park(n)
   181  			mcall(fast_park)
   182  			return
   183  		case SlotEmpty:
   184  			continue
   185  		case SlotClosed:
   186  			return
   187  		}
   188  	}
   189  	slot.item = value
   190  	slot.Store(SlotCommitted)
   191  	return
   192  }
   193  
   194  // Read reads a value from the queue, you can once read once per object
   195  func (self *ZenQ[T]) Read() (data T, queueOpen bool) {
   196  	slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.readerIndex.Add(1))) + uintptr(self.contents)))
   197  
   198  	// CAS -> change slot_state to busy if slot_state == committed
   199  	for !slot.CompareAndSwap(SlotCommitted, SlotBusy) {
   200  		switch slot.Load() {
   201  		case SlotBusy:
   202  			wait()
   203  		case SlotEmpty:
   204  			var freeable *parkSpot[T]
   205  			if data, queueOpen, freeable = slot.writeParker.Ready(); queueOpen {
   206  				self.free(freeable)
   207  				return
   208  			} else if Load8(&self.globalState) != StateFullyClosed {
   209  				mcall(gosched_m)
   210  			} else {
   211  				// queue is closed, decrement the reader index by 1
   212  				self.readerIndex.Add(math.MaxUint32)
   213  				queueOpen = false
   214  				return
   215  			}
   216  		case SlotClosed:
   217  			if slot.CompareAndSwap(SlotClosed, SlotEmpty) {
   218  				Store8(&self.globalState, StateFullyClosed)
   219  			}
   220  			queueOpen = false
   221  			return
   222  		case SlotCommitted:
   223  			continue
   224  		}
   225  	}
   226  	data, queueOpen = slot.item, true
   227  	slot.Store(SlotEmpty)
   228  	return
   229  }
   230  
   231  // Close closes the ZenQ for further writes
   232  // You can only read uptill the last committed write after closing
   233  // This function will be blocking in case the queue is full
   234  // ZenQ is closed from a writer goroutine by design, hence it should always be called
   235  // from a writer goroutine and never from a reader goroutine which might cause the reader to get blocked and hence deadlock
   236  // It returns if the queue was already closed for writes or not
   237  func (self *ZenQ[T]) Close() (alreadyClosedForWrites bool) {
   238  	// This ensures a ZenQ is closed only once even if this function is called multiple times making this operation safe
   239  	if Load8(&self.globalState) != StateOpen {
   240  		alreadyClosedForWrites = true
   241  		return
   242  	}
   243  	Store8(&self.globalState, StateClosedForWrites)
   244  	slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents)))
   245  
   246  	// CAS -> change slot_state to busy if slot_state == empty
   247  	for !slot.CompareAndSwap(SlotEmpty, SlotBusy) {
   248  		switch slot.Load() {
   249  		case SlotBusy, SlotCommitted:
   250  			mcall(gosched_m)
   251  		case SlotEmpty:
   252  			continue
   253  		case SlotClosed:
   254  			return
   255  		}
   256  	}
   257  	// Closing commit
   258  	slot.Store(SlotClosed)
   259  	return
   260  }
   261  
   262  // CloseAsync closes the channel asynchronously
   263  // Useful when an user wants to close the channel from a reader end without blocking the thread
   264  func (self *ZenQ[T]) CloseAsync() {
   265  	go self.Close()
   266  }
   267  
   268  // The following 4 functions below implement the Selectable interface
   269  
   270  // ReadFromBackLog tries to read a data from backlog if available
   271  func (self *ZenQ[T]) ReadFromBackLog() (data any) {
   272  	if d := self.backlog.Swap(nil); d != nil {
   273  		data = *((*T)(d))
   274  	}
   275  	return
   276  }
   277  
   278  // Signal is the mechanism by which a selector notifies this ZenQ's auxillary thread to contest for the selection
   279  func (self *ZenQ[T]) Signal() uint8 {
   280  	if !self.selectionState.CompareAndSwap(SelectionOpen, SelectionRunning) {
   281  		return 0
   282  	} else {
   283  		safe_ready(self.auxThread)
   284  		return 1
   285  	}
   286  }
   287  
   288  // EnqueueSelector pushes a calling selector to this ZenQ's selector waitlist
   289  func (self *ZenQ[T]) EnqueueSelector(threadPtr *unsafe.Pointer, dataOut *any) {
   290  	self.waitList.Enqueue(threadPtr, dataOut)
   291  }
   292  
   293  // IsClosed returns whether the zenq is closed for both reads and writes
   294  func (self *ZenQ[T]) IsClosed() bool {
   295  	return Load8(&self.globalState) == StateFullyClosed
   296  }
   297  
   298  // Reset resets the queue state
   299  // This also releases all parked goroutines if any and drains all committed writes
   300  func (self *ZenQ[T]) Reset() {
   301  	// Close() is blocking when queue is full hence execute it asynchronously
   302  	self.CloseAsync()
   303  	// drain entire queue
   304  	for open := true; open; _, open = self.Read() {
   305  	}
   306  	Store8(&self.globalState, StateOpen)
   307  }
   308  
   309  // Dump dumps the current queue state
   310  // Unsafe to be called from multiple goroutines
   311  func (self *ZenQ[T]) Dump() {
   312  	fmt.Printf("writerIndex: %3d, readerIndex: %3d\n contents:-\n\n", self.writerIndex, self.readerIndex)
   313  	for idx := uintptr(0); idx <= uintptr(self.indexMask); idx++ {
   314  		slot := (*slot[T])(unsafe.Pointer(uintptr(self.contents) + idx*unsafe.Sizeof(slot[T]{})))
   315  		fmt.Printf("Slot -> %#v\n", *slot)
   316  	}
   317  }
   318  
   319  // selectSender is an auxillary thread which remains parked by default
   320  // only when a selector sends a signal, it is notified and tries to send back to the selector
   321  // if it fails, then it parks again and waits for another signal from another selection process
   322  // since it is parked most of the times, it consumes minimal cpu time making the selection process efficient
   323  func (self *ZenQ[T]) selectSender() {
   324  	atomic.StorePointer(&self.auxThread, GetG())
   325  	var (
   326  		data                 T
   327  		threadPtr            unsafe.Pointer
   328  		readState, queueOpen bool = false, true
   329  		selectorThread       *unsafe.Pointer
   330  		dataOut              *any
   331  	)
   332  
   333  	for {
   334  		// park by default and wait for Signal() notification from a selection process
   335  		mcall(fast_park)
   336  		if !readState {
   337  			data, queueOpen = self.Read()
   338  			readState = true
   339  		}
   340  
   341  	selector_dequeue:
   342  		for {
   343  			// keep dequeuing selectors from waitlist and try to acquire one
   344  			// if acquired write to selector, ready it and go back to parking state
   345  			if selectorThread, dataOut = self.waitList.Dequeue(); selectorThread != nil {
   346  				if threadPtr = atomic.SwapPointer(selectorThread, nil); threadPtr != nil {
   347  					// implementaion of sending from closed channel to selector mechanism
   348  					if queueOpen {
   349  						// write to the selector
   350  						*dataOut = data
   351  					} else {
   352  						// send nil from closed channel
   353  						*dataOut = nil
   354  					}
   355  					// notify selector
   356  					safe_ready(threadPtr)
   357  					readState = false
   358  					break selector_dequeue
   359  				} else {
   360  					continue
   361  				}
   362  			} else {
   363  				break selector_dequeue
   364  			}
   365  		}
   366  		// if not selected by any selector, commit data to backlog and wait for next signal
   367  		// saves a lot of cpu time
   368  		if readState && queueOpen {
   369  			var i T = data
   370  			self.backlog.Store(&i)
   371  		}
   372  		self.selectionState.Store(SelectionOpen)
   373  	}
   374  }