github.com/alphadose/zenq/v2@v2.8.4/zenq.go (about) 1 // A minimalist thread-safe queue implemented using a lock-free ringbuffer which is faster 2 // and has lower memory allocations than golang's native channels 3 // Based on the LMAX disruptor pattern https://lmax-exchange.github.io/disruptor/disruptor.html 4 5 // Known Limitations:- 6 // 7 // 1. Max queue_size = 2^16 8 // 2. The queue_size is a power of 2, in case a different size is provided then queue_size is rounded up to the next greater power of 2 upto a max of 2^16 9 10 // Suggestions:- 11 // 12 // 1. Use runtime.LockOSThread() on the goroutine calling ZenQ.Read() for lowest latency provided you have > 1 cpu cores 13 14 package zenq 15 16 import ( 17 "fmt" 18 "math" 19 "sync" 20 "sync/atomic" 21 "unsafe" 22 23 "github.com/alphadose/zenq/v2/constants" 24 ) 25 26 // ZenQ global state enums 27 const ( 28 // Both reads and writes are possible 29 StateOpen = iota 30 // No further writes can be performed and you can only read upto the last committed write in this state 31 StateClosedForWrites 32 // Neither reads nor writes are possible, queue is fully exhausted 33 StateFullyClosed 34 ) 35 36 // ZenQ selector state enums 37 const ( 38 // Open for being selected 39 SelectionOpen = iota 40 // Running state 41 SelectionRunning 42 ) 43 44 // ZenQ Slot state enums 45 const ( 46 SlotEmpty = iota 47 SlotBusy 48 SlotCommitted 49 SlotClosed 50 ) 51 52 type ( 53 // a single slot in the queue 54 slot[T any] struct { 55 writeParker *ThreadParker[T] 56 atomic.Uint32 57 item T 58 } 59 60 // metadata of the queue 61 metaQ struct { 62 globalState uint8 63 // NOTE->self: strideLength and indexMask can be further optimized to uint8 for specialized ZenQs 64 // with known data types instead of generic type 65 // using variables with lower sizes decreases memory bandwidth consumption and increases speed 66 strideLength uint16 67 indexMask uint16 68 contents unsafe.Pointer 69 // memory pool refs for storing and leasing parking spots for goroutines 70 alloc func() any 71 free func(any) 72 } 73 74 // container for the selection events among multiple queues 75 selectFactory[T any] struct { 76 selectionState atomic.Uint32 77 auxThread unsafe.Pointer 78 backlog atomic.Pointer[T] 79 waitList List 80 } 81 82 // ZenQ is the CPU cache optimized ringbuffer implementation 83 ZenQ[T any] struct { 84 // The padding members 0 to 4 below are here to ensure each item is on a separate cache line. 85 // This prevents false sharing and hence improves performance. 86 _ cacheLinePadding 87 writerIndex atomic.Uint32 88 _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte 89 readerIndex atomic.Uint32 90 _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte 91 metaQ 92 _ [constants.CacheLinePadSize - unsafe.Sizeof(metaQ{})]byte 93 selectFactory[T] 94 _ [constants.CacheLinePadSize - unsafe.Sizeof(selectFactory[T]{})]byte 95 } 96 ) 97 98 // returns the next greater power of 2 relative to val 99 func nextGreaterPowerOf2(val uint32) uint32 { 100 return 1 << uint32(math.Min(math.Ceil(Fastlog2(math.Max(float64(val), 1))), 16)) 101 } 102 103 // New returns a new queue given its payload type passed as a generic parameter 104 func New[T any](size uint32) *ZenQ[T] { 105 var ( 106 queueSize = nextGreaterPowerOf2(size) 107 contents = make([]slot[T], queueSize, queueSize) 108 parkPool = sync.Pool{New: func() any { return new(parkSpot[T]) }} 109 ) 110 for idx := uint32(0); idx < queueSize; idx++ { 111 spot := parkPool.Get().(*parkSpot[T]) 112 spot.threadPtr = nil 113 contents[idx].writeParker = NewThreadParker(spot) 114 } 115 zenq := &ZenQ[T]{ 116 metaQ: metaQ{ 117 strideLength: uint16(unsafe.Sizeof(slot[T]{})), 118 contents: unsafe.Pointer(&contents[0]), 119 alloc: parkPool.Get, 120 free: parkPool.Put, 121 indexMask: uint16(queueSize - 1), 122 }, 123 selectFactory: selectFactory[T]{waitList: NewList()}, 124 } 125 go zenq.selectSender() 126 // allow the above auxillary thread to manifest 127 mcall(gosched_m) 128 return zenq 129 } 130 131 // Size returns the number of items in the queue at any given time 132 func (self *ZenQ[T]) Size() uint32 { 133 var ( 134 readerIndex uint32 = self.readerIndex.Load() & uint32(self.indexMask) 135 writerIndex uint32 = self.writerIndex.Load() & uint32(self.indexMask) 136 ) 137 if readerIndex > writerIndex { 138 return uint32(self.indexMask) + 2 - (readerIndex - writerIndex) 139 } else if writerIndex > readerIndex { 140 return writerIndex - readerIndex + 1 141 } else { 142 return 0 143 } 144 } 145 146 // Write writes a value to the queue 147 // It returns whether the queue is currently open for writes or not 148 // If not then it might be still open for reads, which can be checked by calling zenq.IsClosed() 149 func (self *ZenQ[T]) Write(value T) (queueClosedForWrites bool) { 150 if Load8(&self.globalState) != StateOpen { 151 queueClosedForWrites = true 152 return 153 } 154 155 // Try to send directly to selector when possible or else just dequeue unselected references 156 // in order to reduce the burden on the auxillary thread and save cpu time 157 direct_send: 158 if threadPtr, dataOut := self.waitList.Dequeue(); threadPtr != nil { 159 if selThread := atomic.SwapPointer(threadPtr, nil); selThread != nil { 160 // direct send to selector 161 *dataOut = value 162 // notify selector 163 safe_ready(selThread) 164 return 165 } 166 goto direct_send 167 } 168 169 slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents))) 170 171 // CAS -> change slot_state to busy if slot_state == empty 172 for !slot.CompareAndSwap(SlotEmpty, SlotBusy) { 173 switch slot.Load() { 174 case SlotBusy: 175 wait() 176 case SlotCommitted: 177 n := self.alloc().(*parkSpot[T]) 178 n.threadPtr, n.value = GetG(), value 179 n.next.Store(nil) 180 slot.writeParker.Park(n) 181 mcall(fast_park) 182 return 183 case SlotEmpty: 184 continue 185 case SlotClosed: 186 return 187 } 188 } 189 slot.item = value 190 slot.Store(SlotCommitted) 191 return 192 } 193 194 // Read reads a value from the queue, you can once read once per object 195 func (self *ZenQ[T]) Read() (data T, queueOpen bool) { 196 slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.readerIndex.Add(1))) + uintptr(self.contents))) 197 198 // CAS -> change slot_state to busy if slot_state == committed 199 for !slot.CompareAndSwap(SlotCommitted, SlotBusy) { 200 switch slot.Load() { 201 case SlotBusy: 202 wait() 203 case SlotEmpty: 204 var freeable *parkSpot[T] 205 if data, queueOpen, freeable = slot.writeParker.Ready(); queueOpen { 206 self.free(freeable) 207 return 208 } else if Load8(&self.globalState) != StateFullyClosed { 209 mcall(gosched_m) 210 } else { 211 // queue is closed, decrement the reader index by 1 212 self.readerIndex.Add(math.MaxUint32) 213 queueOpen = false 214 return 215 } 216 case SlotClosed: 217 if slot.CompareAndSwap(SlotClosed, SlotEmpty) { 218 Store8(&self.globalState, StateFullyClosed) 219 } 220 queueOpen = false 221 return 222 case SlotCommitted: 223 continue 224 } 225 } 226 data, queueOpen = slot.item, true 227 slot.Store(SlotEmpty) 228 return 229 } 230 231 // Close closes the ZenQ for further writes 232 // You can only read uptill the last committed write after closing 233 // This function will be blocking in case the queue is full 234 // ZenQ is closed from a writer goroutine by design, hence it should always be called 235 // from a writer goroutine and never from a reader goroutine which might cause the reader to get blocked and hence deadlock 236 // It returns if the queue was already closed for writes or not 237 func (self *ZenQ[T]) Close() (alreadyClosedForWrites bool) { 238 // This ensures a ZenQ is closed only once even if this function is called multiple times making this operation safe 239 if Load8(&self.globalState) != StateOpen { 240 alreadyClosedForWrites = true 241 return 242 } 243 Store8(&self.globalState, StateClosedForWrites) 244 slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents))) 245 246 // CAS -> change slot_state to busy if slot_state == empty 247 for !slot.CompareAndSwap(SlotEmpty, SlotBusy) { 248 switch slot.Load() { 249 case SlotBusy, SlotCommitted: 250 mcall(gosched_m) 251 case SlotEmpty: 252 continue 253 case SlotClosed: 254 return 255 } 256 } 257 // Closing commit 258 slot.Store(SlotClosed) 259 return 260 } 261 262 // CloseAsync closes the channel asynchronously 263 // Useful when an user wants to close the channel from a reader end without blocking the thread 264 func (self *ZenQ[T]) CloseAsync() { 265 go self.Close() 266 } 267 268 // The following 4 functions below implement the Selectable interface 269 270 // ReadFromBackLog tries to read a data from backlog if available 271 func (self *ZenQ[T]) ReadFromBackLog() (data any) { 272 if d := self.backlog.Swap(nil); d != nil { 273 data = *((*T)(d)) 274 } 275 return 276 } 277 278 // Signal is the mechanism by which a selector notifies this ZenQ's auxillary thread to contest for the selection 279 func (self *ZenQ[T]) Signal() uint8 { 280 if !self.selectionState.CompareAndSwap(SelectionOpen, SelectionRunning) { 281 return 0 282 } else { 283 safe_ready(self.auxThread) 284 return 1 285 } 286 } 287 288 // EnqueueSelector pushes a calling selector to this ZenQ's selector waitlist 289 func (self *ZenQ[T]) EnqueueSelector(threadPtr *unsafe.Pointer, dataOut *any) { 290 self.waitList.Enqueue(threadPtr, dataOut) 291 } 292 293 // IsClosed returns whether the zenq is closed for both reads and writes 294 func (self *ZenQ[T]) IsClosed() bool { 295 return Load8(&self.globalState) == StateFullyClosed 296 } 297 298 // Reset resets the queue state 299 // This also releases all parked goroutines if any and drains all committed writes 300 func (self *ZenQ[T]) Reset() { 301 // Close() is blocking when queue is full hence execute it asynchronously 302 self.CloseAsync() 303 // drain entire queue 304 for open := true; open; _, open = self.Read() { 305 } 306 Store8(&self.globalState, StateOpen) 307 } 308 309 // Dump dumps the current queue state 310 // Unsafe to be called from multiple goroutines 311 func (self *ZenQ[T]) Dump() { 312 fmt.Printf("writerIndex: %3d, readerIndex: %3d\n contents:-\n\n", self.writerIndex, self.readerIndex) 313 for idx := uintptr(0); idx <= uintptr(self.indexMask); idx++ { 314 slot := (*slot[T])(unsafe.Pointer(uintptr(self.contents) + idx*unsafe.Sizeof(slot[T]{}))) 315 fmt.Printf("Slot -> %#v\n", *slot) 316 } 317 } 318 319 // selectSender is an auxillary thread which remains parked by default 320 // only when a selector sends a signal, it is notified and tries to send back to the selector 321 // if it fails, then it parks again and waits for another signal from another selection process 322 // since it is parked most of the times, it consumes minimal cpu time making the selection process efficient 323 func (self *ZenQ[T]) selectSender() { 324 atomic.StorePointer(&self.auxThread, GetG()) 325 var ( 326 data T 327 threadPtr unsafe.Pointer 328 readState, queueOpen bool = false, true 329 selectorThread *unsafe.Pointer 330 dataOut *any 331 ) 332 333 for { 334 // park by default and wait for Signal() notification from a selection process 335 mcall(fast_park) 336 if !readState { 337 data, queueOpen = self.Read() 338 readState = true 339 } 340 341 selector_dequeue: 342 for { 343 // keep dequeuing selectors from waitlist and try to acquire one 344 // if acquired write to selector, ready it and go back to parking state 345 if selectorThread, dataOut = self.waitList.Dequeue(); selectorThread != nil { 346 if threadPtr = atomic.SwapPointer(selectorThread, nil); threadPtr != nil { 347 // implementaion of sending from closed channel to selector mechanism 348 if queueOpen { 349 // write to the selector 350 *dataOut = data 351 } else { 352 // send nil from closed channel 353 *dataOut = nil 354 } 355 // notify selector 356 safe_ready(threadPtr) 357 readState = false 358 break selector_dequeue 359 } else { 360 continue 361 } 362 } else { 363 break selector_dequeue 364 } 365 } 366 // if not selected by any selector, commit data to backlog and wait for next signal 367 // saves a lot of cpu time 368 if readState && queueOpen { 369 var i T = data 370 self.backlog.Store(&i) 371 } 372 self.selectionState.Store(SelectionOpen) 373 } 374 }