github.com/alphadose/zenq/v2@v2.8.2/zenq.go (about) 1 // A minimalist thread-safe queue implemented using a lock-free ringbuffer which is faster 2 // and has lower memory allocations than golang's native channels 3 // Based on the LMAX disruptor pattern https://lmax-exchange.github.io/disruptor/disruptor.html 4 5 // Known Limitations:- 6 // 7 // 1. Max queue_size = 2^16 8 // 2. The queue_size is a power of 2, in case a different size is provided then queue_size is rounded up to the next greater power of 2 upto a max of 2^16 9 10 // Suggestions:- 11 // 12 // 1. Use runtime.LockOSThread() on the goroutine calling ZenQ.Read() for lowest latency provided you have > 1 cpu cores 13 14 package zenq 15 16 import ( 17 "fmt" 18 "math" 19 "sync" 20 "sync/atomic" 21 "unsafe" 22 23 "github.com/alphadose/zenq/v2/constants" 24 ) 25 26 // ZenQ global state enums 27 const ( 28 // Both reads and writes are possible 29 StateOpen = iota 30 // No further writes can be performed and you can only read upto the last committed write in this state 31 StateClosedForWrites 32 // Neither reads nor writes are possible, queue is fully exhausted 33 StateFullyClosed 34 ) 35 36 // ZenQ selector state enums 37 const ( 38 // Open for being selected 39 SelectionOpen = iota 40 // Running state 41 SelectionRunning 42 ) 43 44 // ZenQ Slot state enums 45 const ( 46 SlotEmpty = iota 47 SlotBusy 48 SlotCommitted 49 SlotClosed 50 ) 51 52 type ( 53 // a single slot in the queue 54 slot[T any] struct { 55 writeParker *ThreadParker[T] 56 atomic.Uint32 57 item T 58 } 59 60 // metadata of the queue 61 metaQ struct { 62 globalState uint8 63 // NOTE->self: strideLength and indexMask can be further optimized to uint8 for specialized ZenQs 64 // with known data types instead of generic type 65 // using variables with lower sizes decreases memory bandwidth consumption and increases speed 66 strideLength uint16 67 indexMask uint16 68 contents unsafe.Pointer 69 // memory pool refs for storing and leasing parking spots for goroutines 70 alloc func() any 71 free func(any) 72 } 73 74 // container for the selection events among multiple queues 75 selectFactory[T any] struct { 76 selectionState atomic.Uint32 77 auxThread unsafe.Pointer 78 backlog atomic.Pointer[T] 79 waitList List 80 } 81 82 // ZenQ is the CPU cache optimized ringbuffer implementation 83 ZenQ[T any] struct { 84 // The padding members 0 to 4 below are here to ensure each item is on a separate cache line. 85 // This prevents false sharing and hence improves performance. 86 _ cacheLinePadding 87 writerIndex atomic.Uint32 88 _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte 89 readerIndex atomic.Uint32 90 _ [constants.CacheLinePadSize - unsafe.Sizeof(atomic.Uint32{})]byte 91 metaQ 92 _ [constants.CacheLinePadSize - unsafe.Sizeof(metaQ{})]byte 93 selectFactory[T] 94 _ [constants.CacheLinePadSize - unsafe.Sizeof(selectFactory[T]{})]byte 95 } 96 ) 97 98 // returns the next greater power of 2 relative to val 99 func nextGreaterPowerOf2(val uint32) uint32 { 100 return 1 << uint32(math.Min(math.Ceil(Fastlog2(math.Max(float64(val), 1))), 16)) 101 } 102 103 // New returns a new queue given its payload type passed as a generic parameter 104 func New[T any](size uint32) *ZenQ[T] { 105 var ( 106 queueSize = nextGreaterPowerOf2(size) 107 contents = make([]slot[T], queueSize, queueSize) 108 parkPool = sync.Pool{New: func() any { return new(parkSpot[T]) }} 109 ) 110 for idx := uint32(0); idx < queueSize; idx++ { 111 spot := parkPool.Get().(*parkSpot[T]) 112 spot.threadPtr = nil 113 contents[idx].writeParker = NewThreadParker(spot) 114 } 115 zenq := &ZenQ[T]{ 116 metaQ: metaQ{ 117 strideLength: uint16(unsafe.Sizeof(slot[T]{})), 118 contents: unsafe.Pointer(&contents[0]), 119 alloc: parkPool.Get, 120 free: parkPool.Put, 121 indexMask: uint16(queueSize - 1), 122 }, 123 selectFactory: selectFactory[T]{waitList: NewList()}, 124 } 125 go zenq.selectSender() 126 // allow the above auxillary thread to manifest 127 mcall(gosched_m) 128 return zenq 129 } 130 131 // Write writes a value to the queue 132 // It returns whether the queue is currently open for writes or not 133 // If not then it might be still open for reads, which can be checked by calling zenq.IsClosed() 134 func (self *ZenQ[T]) Write(value T) (queueClosedForWrites bool) { 135 if Load8(&self.globalState) != StateOpen { 136 queueClosedForWrites = true 137 return 138 } 139 140 // Try to send directly to selector when possible or else just dequeue unselected references 141 // in order to reduce the burden on the auxillary thread and save cpu time 142 direct_send: 143 if threadPtr, dataOut := self.waitList.Dequeue(); threadPtr != nil { 144 if selThread := atomic.SwapPointer(threadPtr, nil); selThread != nil { 145 // direct send to selector 146 *dataOut = value 147 // notify selector 148 safe_ready(selThread) 149 return 150 } 151 goto direct_send 152 } 153 154 slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents))) 155 156 // CAS -> change slot_state to busy if slot_state == empty 157 for !slot.CompareAndSwap(SlotEmpty, SlotBusy) { 158 switch slot.Load() { 159 case SlotBusy: 160 wait() 161 case SlotCommitted: 162 n := self.alloc().(*parkSpot[T]) 163 n.threadPtr, n.value = GetG(), value 164 n.next.Store(nil) 165 slot.writeParker.Park(n) 166 mcall(fast_park) 167 return 168 case SlotEmpty: 169 continue 170 case SlotClosed: 171 return 172 } 173 } 174 slot.item = value 175 slot.Store(SlotCommitted) 176 return 177 } 178 179 // Read reads a value from the queue, you can once read once per object 180 func (self *ZenQ[T]) Read() (data T, queueOpen bool) { 181 slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.readerIndex.Add(1))) + uintptr(self.contents))) 182 183 // CAS -> change slot_state to busy if slot_state == committed 184 for !slot.CompareAndSwap(SlotCommitted, SlotBusy) { 185 switch slot.Load() { 186 case SlotBusy: 187 wait() 188 case SlotEmpty: 189 var freeable *parkSpot[T] 190 if data, queueOpen, freeable = slot.writeParker.Ready(); queueOpen { 191 self.free(freeable) 192 return 193 } else if Load8(&self.globalState) != StateFullyClosed { 194 mcall(gosched_m) 195 } else { 196 // queue is closed, decrement the reader index by 1 197 self.readerIndex.Add(math.MaxUint32) 198 queueOpen = false 199 return 200 } 201 case SlotClosed: 202 if slot.CompareAndSwap(SlotClosed, SlotEmpty) { 203 Store8(&self.globalState, StateFullyClosed) 204 } 205 queueOpen = false 206 return 207 case SlotCommitted: 208 continue 209 } 210 } 211 data, queueOpen = slot.item, true 212 slot.Store(SlotEmpty) 213 return 214 } 215 216 // Close closes the ZenQ for further writes 217 // You can only read uptill the last committed write after closing 218 // This function will be blocking in case the queue is full 219 // ZenQ is closed from a writer goroutine by design, hence it should always be called 220 // from a writer goroutine and never from a reader goroutine which might cause the reader to get blocked and hence deadlock 221 // It returns if the queue was already closed for writes or not 222 func (self *ZenQ[T]) Close() (alreadyClosedForWrites bool) { 223 // This ensures a ZenQ is closed only once even if this function is called multiple times making this operation safe 224 if Load8(&self.globalState) != StateOpen { 225 alreadyClosedForWrites = true 226 return 227 } 228 Store8(&self.globalState, StateClosedForWrites) 229 slot := (*slot[T])(unsafe.Pointer(uintptr(self.strideLength)*(uintptr(self.indexMask)&uintptr(self.writerIndex.Add(1))) + uintptr(self.contents))) 230 231 // CAS -> change slot_state to busy if slot_state == empty 232 for !slot.CompareAndSwap(SlotEmpty, SlotBusy) { 233 switch slot.Load() { 234 case SlotBusy, SlotCommitted: 235 mcall(gosched_m) 236 case SlotEmpty: 237 continue 238 case SlotClosed: 239 return 240 } 241 } 242 // Closing commit 243 slot.Store(SlotClosed) 244 return 245 } 246 247 // CloseAsync closes the channel asynchronously 248 // Useful when an user wants to close the channel from a reader end without blocking the thread 249 func (self *ZenQ[T]) CloseAsync() { 250 go self.Close() 251 } 252 253 // The following 4 functions below implement the Selectable interface 254 255 // ReadFromBackLog tries to read a data from backlog if available 256 func (self *ZenQ[T]) ReadFromBackLog() (data any) { 257 if d := self.backlog.Swap(nil); d != nil { 258 data = *((*T)(d)) 259 } 260 return 261 } 262 263 // Signal is the mechanism by which a selector notifies this ZenQ's auxillary thread to contest for the selection 264 func (self *ZenQ[T]) Signal() uint8 { 265 if !self.selectionState.CompareAndSwap(SelectionOpen, SelectionRunning) { 266 return 0 267 } else { 268 safe_ready(self.auxThread) 269 return 1 270 } 271 } 272 273 // EnqueueSelector pushes a calling selector to this ZenQ's selector waitlist 274 func (self *ZenQ[T]) EnqueueSelector(threadPtr *unsafe.Pointer, dataOut *any) { 275 self.waitList.Enqueue(threadPtr, dataOut) 276 } 277 278 // IsClosed returns whether the zenq is closed for both reads and writes 279 func (self *ZenQ[T]) IsClosed() bool { 280 return Load8(&self.globalState) == StateFullyClosed 281 } 282 283 // Reset resets the queue state 284 // This also releases all parked goroutines if any and drains all committed writes 285 func (self *ZenQ[T]) Reset() { 286 // Close() is blocking when queue is full hence execute it asynchronously 287 self.CloseAsync() 288 // drain entire queue 289 for open := true; open; _, open = self.Read() { 290 } 291 Store8(&self.globalState, StateOpen) 292 } 293 294 // Dump dumps the current queue state 295 // Unsafe to be called from multiple goroutines 296 func (self *ZenQ[T]) Dump() { 297 fmt.Printf("writerIndex: %3d, readerIndex: %3d\n contents:-\n\n", self.writerIndex, self.readerIndex) 298 for idx := uintptr(0); idx <= uintptr(self.indexMask); idx++ { 299 slot := (*slot[T])(unsafe.Pointer(uintptr(self.contents) + idx*unsafe.Sizeof(slot[T]{}))) 300 fmt.Printf("Slot -> %#v\n", *slot) 301 } 302 } 303 304 // selectSender is an auxillary thread which remains parked by default 305 // only when a selector sends a signal, it is notified and tries to send back to the selector 306 // if it fails, then it parks again and waits for another signal from another selection process 307 // since it is parked most of the times, it consumes minimal cpu time making the selection process efficient 308 func (self *ZenQ[T]) selectSender() { 309 atomic.StorePointer(&self.auxThread, GetG()) 310 var ( 311 data T 312 threadPtr unsafe.Pointer 313 readState, queueOpen bool = false, true 314 selectorThread *unsafe.Pointer 315 dataOut *any 316 ) 317 318 for { 319 // park by default and wait for Signal() notification from a selection process 320 mcall(fast_park) 321 if !readState { 322 data, queueOpen = self.Read() 323 readState = true 324 } 325 326 selector_dequeue: 327 for { 328 // keep dequeuing selectors from waitlist and try to acquire one 329 // if acquired write to selector, ready it and go back to parking state 330 if selectorThread, dataOut = self.waitList.Dequeue(); selectorThread != nil { 331 if threadPtr = atomic.SwapPointer(selectorThread, nil); threadPtr != nil { 332 // implementaion of sending from closed channel to selector mechanism 333 if queueOpen { 334 // write to the selector 335 *dataOut = data 336 } else { 337 // send nil from closed channel 338 *dataOut = nil 339 } 340 // notify selector 341 safe_ready(threadPtr) 342 readState = false 343 break selector_dequeue 344 } else { 345 continue 346 } 347 } else { 348 break selector_dequeue 349 } 350 } 351 // if not selected by any selector, commit data to backlog and wait for next signal 352 // saves a lot of cpu time 353 if readState && queueOpen { 354 var i T = data 355 self.backlog.Store(&i) 356 } 357 self.selectionState.Store(SelectionOpen) 358 } 359 }