github.com/alphadose/zenq/v2@v2.8.2/zenq.go (about)

     1  // A minimalist thread-safe queue implemented using a lock-free ringbuffer which is faster
     2  // and has lower memory allocations than golang's native channels
     3  // Based on the LMAX disruptor pattern https://lmax-exchange.github.io/disruptor/disruptor.html
     4  
     5  // Known Limitations:-
     6  //
     7  // 1. Max queue_size = 2^16
     8  // 2. The queue_size is a power of 2, in case a different size is provided then queue_size is rounded up to the next greater power of 2 upto a max of 2^16
     9  
    10  // Suggestions:-
    11  //
    12  // 1. Use runtime.LockOSThread() on the goroutine calling ZenQ.Read() for lowest latency provided you have > 1 cpu cores
    13  
    14  package zenq
    15  
    16  import (
    17  	"fmt"
    18  	"math"
    19  	"sync"
    20  	"sync/atomic"
    21  	"unsafe"
    22  
    23  	"github.com/alphadose/zenq/v2/constants"
    24  )
    25  
    26  // ZenQ global state enums
    27  const (
    28  	// Both reads and writes are possible
    29  	StateOpen = iota
    30  	// No further writes can be performed and you can only read upto the last committed write in this state
    31  	StateClosedForWrites
    32  	// Neither reads nor writes are possible, queue is fully exhausted
    33  	StateFullyClosed
    34  )
    35  
    36  // ZenQ selector state enums
    37  const (
    38  	// Open for being selected
    39  	SelectionOpen = iota
    40  	// Running state
    41  	SelectionRunning
    42  )
    43  
    44  // ZenQ Slot state enums
    45  const (
    46  	SlotEmpty = iota
    47  	SlotBusy
    48  	SlotCommitted
    49  	SlotClosed
    50  )
    51  
    52  type (
    53  	// a single slot in the queue
    54  	slot[T any] struct {
    55  		writeParker *ThreadParker[T]
    56  		atomic.Uint32
    57  		item T
    58  	}
    59  
    60  	// metadata of the queue
    61  	metaQ struct {
    62  		globalState uint8
    63  		// NOTE->self: strideLength and indexMask can be further optimized to uint8 for specialized ZenQs
    64  		// with known data types instead of generic type
    65  		// using variables with lower sizes decreases memory bandwidth consumption and increases speed
    66  		strideLength uint16
    67  		indexMask    uint16
    68  		contents     unsafe.Pointer
    69  		// memory pool refs for storing and leasing parking spots for goroutines
    70  		alloc func() any
    71  		free  func(any)
    72  	}
    73  
    74  	// container for the selection events among multiple queues
    75  	selectFactory[T any] struct {
    76  		selectionState atomic.Uint32
    77  		auxThread      unsafe.Pointer
    78  		backlog        atomic.Pointer[T]
    79  		waitList       List
    80  	}
    81  
    82  	// ZenQ is the CPU cache optimized ringbuffer implementation
    83  	ZenQ[T any] struct {
    84  		// The padding members 0 to 4 below are here to ensure each item is on a separate cache line.
    85  		// This prevents false sharing and hence improves performance.
    86  		_           cacheLinePadding
    87  		writerIndex atomic.Uint32
    88  		_           [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte
    89  		readerIndex atomic.Uint32
    90  		_           [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte
    91  		metaQ
    92  		_ [constants.CacheLinePadSize - unsafe.Sizeof(metaQ{})]byte
    93  		selectFactory[T]
    94  		_ [constants.CacheLinePadSize - unsafe.Sizeof(selectFactory[T]{})]byte
    95  	}
    96  )
    97  
    98  // returns the next greater power of 2 relative to val
    99  func nextGreaterPowerOf2(val uint32) uint32 {
   100  	return 1 << uint32(math.Min(math.Ceil(Fastlog2(math.Max(float64(val), 1))), 16))
   101  }
   102  
   103  // New returns a new queue given its payload type passed as a generic parameter
   104  func New[T any](size uint32) *ZenQ[T] {
   105  	var (
   106  		queueSize = nextGreaterPowerOf2(size)
   107  		contents  = make([]slot[T], queueSize, queueSize)
   108  		parkPool  = sync.Pool{New: func() any { return new(parkSpot[T]) }}
   109  	)
   110  	for idx := uint32(0); idx < queueSize; idx++ {
   111  		spot := parkPool.Get().(*parkSpot[T])
   112  		spot.threadPtr = nil
   113  		contents[idx].writeParker = NewThreadParker(spot)
   114  	}
   115  	zenq := &ZenQ[T]{
   116  		metaQ: metaQ{
   117  			strideLength: uint16(unsafe.Sizeof(slot[T]{})),
   118  			contents:     unsafe.Pointer(&contents[0]),
   119  			alloc:        parkPool.Get,
   120  			free:         parkPool.Put,
   121  			indexMask:    uint16(queueSize - 1),
   122  		},
   123  		selectFactory: selectFactory[T]{waitList: NewList()},
   124  	}
   125  	go zenq.selectSender()
   126  	// allow the above auxillary thread to manifest
   127  	mcall(gosched_m)
   128  	return zenq
   129  }
   130  
   131  // Write writes a value to the queue
   132  // It returns whether the queue is currently open for writes or not
   133  // If not then it might be still open for reads, which can be checked by calling zenq.IsClosed()
   134  func (self *ZenQ[T]) Write(value T) (queueClosedForWrites bool) {
   135  	if Load8(&self.globalState) != StateOpen {
   136  		queueClosedForWrites = true
   137  		return
   138  	}
   139  
   140  	// Try to send directly to selector when possible or else just dequeue unselected references
   141  	// in order to reduce the burden on the auxillary thread and save cpu time
   142  direct_send:
   143  	if threadPtr, dataOut := self.waitList.Dequeue(); threadPtr != nil {
   144  		if selThread := atomic.SwapPointer(threadPtr, nil); selThread != nil {
   145  			// direct send to selector
   146  			*dataOut = value
   147  			// notify selector
   148  			safe_ready(selThread)
   149  			return
   150  		}
   151  		goto direct_send
   152  	}
   153  
   154  	slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents)))
   155  
   156  	// CAS -> change slot_state to busy if slot_state == empty
   157  	for !slot.CompareAndSwap(SlotEmpty, SlotBusy) {
   158  		switch slot.Load() {
   159  		case SlotBusy:
   160  			wait()
   161  		case SlotCommitted:
   162  			n := self.alloc().(*parkSpot[T])
   163  			n.threadPtr, n.value = GetG(), value
   164  			n.next.Store(nil)
   165  			slot.writeParker.Park(n)
   166  			mcall(fast_park)
   167  			return
   168  		case SlotEmpty:
   169  			continue
   170  		case SlotClosed:
   171  			return
   172  		}
   173  	}
   174  	slot.item = value
   175  	slot.Store(SlotCommitted)
   176  	return
   177  }
   178  
   179  // Read reads a value from the queue, you can once read once per object
   180  func (self *ZenQ[T]) Read() (data T, queueOpen bool) {
   181  	slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.readerIndex.Add(1))) + uintptr(self.contents)))
   182  
   183  	// CAS -> change slot_state to busy if slot_state == committed
   184  	for !slot.CompareAndSwap(SlotCommitted, SlotBusy) {
   185  		switch slot.Load() {
   186  		case SlotBusy:
   187  			wait()
   188  		case SlotEmpty:
   189  			var freeable *parkSpot[T]
   190  			if data, queueOpen, freeable = slot.writeParker.Ready(); queueOpen {
   191  				self.free(freeable)
   192  				return
   193  			} else if Load8(&self.globalState) != StateFullyClosed {
   194  				mcall(gosched_m)
   195  			} else {
   196  				// queue is closed, decrement the reader index by 1
   197  				self.readerIndex.Add(math.MaxUint32)
   198  				queueOpen = false
   199  				return
   200  			}
   201  		case SlotClosed:
   202  			if slot.CompareAndSwap(SlotClosed, SlotEmpty) {
   203  				Store8(&self.globalState, StateFullyClosed)
   204  			}
   205  			queueOpen = false
   206  			return
   207  		case SlotCommitted:
   208  			continue
   209  		}
   210  	}
   211  	data, queueOpen = slot.item, true
   212  	slot.Store(SlotEmpty)
   213  	return
   214  }
   215  
   216  // Close closes the ZenQ for further writes
   217  // You can only read uptill the last committed write after closing
   218  // This function will be blocking in case the queue is full
   219  // ZenQ is closed from a writer goroutine by design, hence it should always be called
   220  // from a writer goroutine and never from a reader goroutine which might cause the reader to get blocked and hence deadlock
   221  // It returns if the queue was already closed for writes or not
   222  func (self *ZenQ[T]) Close() (alreadyClosedForWrites bool) {
   223  	// This ensures a ZenQ is closed only once even if this function is called multiple times making this operation safe
   224  	if Load8(&self.globalState) != StateOpen {
   225  		alreadyClosedForWrites = true
   226  		return
   227  	}
   228  	Store8(&self.globalState, StateClosedForWrites)
   229  	slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents)))
   230  
   231  	// CAS -> change slot_state to busy if slot_state == empty
   232  	for !slot.CompareAndSwap(SlotEmpty, SlotBusy) {
   233  		switch slot.Load() {
   234  		case SlotBusy, SlotCommitted:
   235  			mcall(gosched_m)
   236  		case SlotEmpty:
   237  			continue
   238  		case SlotClosed:
   239  			return
   240  		}
   241  	}
   242  	// Closing commit
   243  	slot.Store(SlotClosed)
   244  	return
   245  }
   246  
   247  // CloseAsync closes the channel asynchronously
   248  // Useful when an user wants to close the channel from a reader end without blocking the thread
   249  func (self *ZenQ[T]) CloseAsync() {
   250  	go self.Close()
   251  }
   252  
   253  // The following 4 functions below implement the Selectable interface
   254  
   255  // ReadFromBackLog tries to read a data from backlog if available
   256  func (self *ZenQ[T]) ReadFromBackLog() (data any) {
   257  	if d := self.backlog.Swap(nil); d != nil {
   258  		data = *((*T)(d))
   259  	}
   260  	return
   261  }
   262  
   263  // Signal is the mechanism by which a selector notifies this ZenQ's auxillary thread to contest for the selection
   264  func (self *ZenQ[T]) Signal() uint8 {
   265  	if !self.selectionState.CompareAndSwap(SelectionOpen, SelectionRunning) {
   266  		return 0
   267  	} else {
   268  		safe_ready(self.auxThread)
   269  		return 1
   270  	}
   271  }
   272  
   273  // EnqueueSelector pushes a calling selector to this ZenQ's selector waitlist
   274  func (self *ZenQ[T]) EnqueueSelector(threadPtr *unsafe.Pointer, dataOut *any) {
   275  	self.waitList.Enqueue(threadPtr, dataOut)
   276  }
   277  
   278  // IsClosed returns whether the zenq is closed for both reads and writes
   279  func (self *ZenQ[T]) IsClosed() bool {
   280  	return Load8(&self.globalState) == StateFullyClosed
   281  }
   282  
   283  // Reset resets the queue state
   284  // This also releases all parked goroutines if any and drains all committed writes
   285  func (self *ZenQ[T]) Reset() {
   286  	// Close() is blocking when queue is full hence execute it asynchronously
   287  	self.CloseAsync()
   288  	// drain entire queue
   289  	for open := true; open; _, open = self.Read() {
   290  	}
   291  	Store8(&self.globalState, StateOpen)
   292  }
   293  
   294  // Dump dumps the current queue state
   295  // Unsafe to be called from multiple goroutines
   296  func (self *ZenQ[T]) Dump() {
   297  	fmt.Printf("writerIndex: %3d, readerIndex: %3d\n contents:-\n\n", self.writerIndex, self.readerIndex)
   298  	for idx := uintptr(0); idx <= uintptr(self.indexMask); idx++ {
   299  		slot := (*slot[T])(unsafe.Pointer(uintptr(self.contents) + idx*unsafe.Sizeof(slot[T]{})))
   300  		fmt.Printf("Slot -> %#v\n", *slot)
   301  	}
   302  }
   303  
   304  // selectSender is an auxillary thread which remains parked by default
   305  // only when a selector sends a signal, it is notified and tries to send back to the selector
   306  // if it fails, then it parks again and waits for another signal from another selection process
   307  // since it is parked most of the times, it consumes minimal cpu time making the selection process efficient
   308  func (self *ZenQ[T]) selectSender() {
   309  	atomic.StorePointer(&self.auxThread, GetG())
   310  	var (
   311  		data                 T
   312  		threadPtr            unsafe.Pointer
   313  		readState, queueOpen bool = false, true
   314  		selectorThread       *unsafe.Pointer
   315  		dataOut              *any
   316  	)
   317  
   318  	for {
   319  		// park by default and wait for Signal() notification from a selection process
   320  		mcall(fast_park)
   321  		if !readState {
   322  			data, queueOpen = self.Read()
   323  			readState = true
   324  		}
   325  
   326  	selector_dequeue:
   327  		for {
   328  			// keep dequeuing selectors from waitlist and try to acquire one
   329  			// if acquired write to selector, ready it and go back to parking state
   330  			if selectorThread, dataOut = self.waitList.Dequeue(); selectorThread != nil {
   331  				if threadPtr = atomic.SwapPointer(selectorThread, nil); threadPtr != nil {
   332  					// implementaion of sending from closed channel to selector mechanism
   333  					if queueOpen {
   334  						// write to the selector
   335  						*dataOut = data
   336  					} else {
   337  						// send nil from closed channel
   338  						*dataOut = nil
   339  					}
   340  					// notify selector
   341  					safe_ready(threadPtr)
   342  					readState = false
   343  					break selector_dequeue
   344  				} else {
   345  					continue
   346  				}
   347  			} else {
   348  				break selector_dequeue
   349  			}
   350  		}
   351  		// if not selected by any selector, commit data to backlog and wait for next signal
   352  		// saves a lot of cpu time
   353  		if readState && queueOpen {
   354  			var i T = data
   355  			self.backlog.Store(&i)
   356  		}
   357  		self.selectionState.Store(SelectionOpen)
   358  	}
   359  }