github.com/dshulyak/uring@v0.0.0-20210209113719-1b2ec51f1542/loop/queue.go (about) 1 package loop 2 3 import ( 4 "errors" 5 "runtime" 6 "sync" 7 "sync/atomic" 8 "syscall" 9 "time" 10 11 "github.com/dshulyak/uring" 12 ) 13 14 var ( 15 // ErrClosed returned if queue was closed. 16 ErrClosed = errors.New("uring: closed") 17 // closed is a bit set to sqe.userData to notify completionLoop that ring 18 // is being closed. 19 closed uint64 = 1 << 63 20 ) 21 22 func newResult() *result { 23 return &result{ 24 ch: make(chan struct{}, 1), 25 } 26 } 27 28 // result is an object for sending completion notifications. 29 type result struct { 30 free uint32 31 nonce uint32 32 33 uring.CQEntry 34 ch chan struct{} 35 } 36 37 func (r *result) isFree() bool { 38 return atomic.LoadUint32(&r.free) == 0 39 } 40 41 func (r *result) unfree() { 42 atomic.StoreUint32(&r.free, 1) 43 } 44 45 func (r *result) setFree() { 46 atomic.StoreUint32(&r.free, 0) 47 } 48 49 type SQOperation func(sqe *uring.SQEntry) 50 51 func newQueue(ring *uring.Ring, qp *Params) *queue { 52 var ( 53 minComplete uint32 54 ) 55 if qp.WaitMethod == WaitEnter { 56 minComplete = 1 57 } 58 results := make([]*result, ring.CQSize()) 59 for i := range results { 60 results[i] = newResult() 61 } 62 subLock := sync.Mutex{} 63 64 q := &queue{ 65 ring: ring, 66 submissionTimer: qp.SubmissionTimer, 67 signal: make(chan struct{}, 1), 68 results: results, 69 limit: ring.CQSize(), 70 submitLimit: ring.SQSize(), 71 submitEvent: sync.NewCond(&subLock), 72 minComplete: minComplete, 73 } 74 q.startSubmitLoop() 75 return q 76 } 77 78 // queue provides thread safe access to uring.Ring instance. 79 type queue struct { 80 ring *uring.Ring 81 minComplete uint32 82 83 submissionTimer time.Duration 84 85 mu sync.Mutex 86 nonce uint32 87 closed bool 88 89 signal chan struct{} 90 91 wg sync.WaitGroup 92 93 // results is a free-list with a static size. 94 // it is as large as completion queue. 95 // it will happen that some completions will be faster than other, 96 // in such case complexity of getting free result will grow from O(1) to O(n) 97 // worst case 98 results []*result 99 100 inflight uint32 101 // completion queue size 102 limit uint32 103 104 // submission queue size 105 submitLimit uint32 106 107 submitEvent *sync.Cond 108 submitCount uint32 109 submitCloser chan error 110 } 111 112 func (q *queue) startSubmitLoop() { 113 if q.submissionTimer == 0 { 114 return 115 } 116 q.wg.Add(1) 117 118 var ( 119 duration = q.submissionTimer 120 timeout = false 121 122 timer = time.AfterFunc(duration, func() { 123 q.submitEvent.L.Lock() 124 timeout = true 125 q.submitEvent.Signal() 126 q.submitEvent.L.Unlock() 127 }) 128 ) 129 go func() { 130 defer q.wg.Done() 131 defer timer.Stop() 132 for { 133 q.submitEvent.L.Lock() 134 135 // event is fired: 136 // - when queue is full 137 // - on timer 138 // - when queue is closed 139 140 for q.submitCount != q.submitLimit && !timeout && q.submitCloser == nil { 141 q.submitEvent.Wait() 142 } 143 total := q.submitCount 144 timed := timeout 145 closed := q.submitCloser 146 147 timeout = false 148 q.submitCount = 0 149 q.submitEvent.L.Unlock() 150 151 if closed != nil { 152 closed <- nil 153 return 154 } 155 156 if total > 0 { 157 _, err := q.ring.Enter(total, 0) 158 if err != nil { 159 panic(err) 160 } 161 } 162 if !timed { 163 timer.Stop() 164 } 165 timer.Reset(duration) 166 } 167 }() 168 } 169 170 func (q *queue) startCompletionLoop() { 171 q.wg.Add(1) 172 go q.completionLoop() 173 } 174 175 // completionLoop ... 176 func (q *queue) completionLoop() { 177 defer q.wg.Done() 178 for q.tryComplete() { 179 } 180 } 181 182 func (q *queue) tryComplete() bool { 183 cqe, err := q.ring.GetCQEntry(q.minComplete) 184 // EAGAIN - if head is equal to tail of completion queue 185 if err == syscall.EAGAIN || err == syscall.EINTR { 186 // gosched is needed if q.minComplete = 0 without eventfd 187 runtime.Gosched() 188 return true 189 } else if err != nil { 190 // FIXME 191 panic(err) 192 } 193 if cqe.UserData()&closed > 0 { 194 return false 195 } 196 197 req := q.results[cqe.UserData()%uint64(len(q.results))] 198 req.CQEntry = cqe 199 req.ch <- struct{}{} 200 return true 201 } 202 203 // prepare acquires submission lock and registers n inflights operations. 204 func (q *queue) prepare(n uint32) error { 205 q.mu.Lock() 206 if q.closed { 207 q.mu.Unlock() 208 return ErrClosed 209 } 210 inflight := atomic.AddUint32(&q.inflight, n) 211 if inflight > q.limit { 212 <-q.signal 213 if q.closed { 214 q.mu.Unlock() 215 return ErrClosed 216 } 217 } 218 return nil 219 } 220 221 func (q *queue) getSQEntry() *uring.SQEntry { 222 // we will wait if submition queue is full. 223 // it won't be long cause if it is full submition loop receives 224 // event to submit pending immediatly. 225 for { 226 entry := q.ring.GetSQEntry() 227 if entry != nil { 228 return entry 229 } 230 runtime.Gosched() 231 } 232 } 233 234 // completed must be called after all n completions were reaped and results are not needed. 235 func (q *queue) completed(n uint32) { 236 if atomic.AddUint32(&q.inflight, ^uint32(n-1)) == q.limit { 237 q.signal <- struct{}{} 238 } 239 } 240 241 func (q *queue) fillResult(sqe *uring.SQEntry) *result { 242 var res *result 243 for { 244 pos := q.nonce % uint32(len(q.results)) 245 res = q.results[pos] 246 if res.isFree() { 247 break 248 } 249 q.nonce++ 250 } 251 res.unfree() 252 res.nonce = q.nonce 253 254 sqe.SetUserData(uint64(q.nonce)) 255 q.nonce++ 256 return res 257 } 258 259 func (q *queue) submit(n uint32) error { 260 q.ring.Flush() 261 if q.submissionTimer == 0 { 262 // for sync submit unlock before enter 263 q.mu.Unlock() 264 _, err := q.ring.Enter(n, 0) 265 return err 266 } 267 // for async submit unlock after notifying batch submitter 268 defer q.mu.Unlock() 269 q.submitEvent.L.Lock() 270 q.submitCount += n 271 if q.submitCount == q.submitLimit { 272 q.submitEvent.Signal() 273 } 274 closed := q.submitCloser != nil 275 q.submitEvent.L.Unlock() 276 if closed { 277 return ErrClosed 278 } 279 return nil 280 } 281 282 func (q *queue) Ring() *uring.Ring { 283 return q.ring 284 } 285 286 //go:norace 287 288 // Complete blocks until an available submission exists, submits and blocks until completed. 289 // Goroutine that executes Complete will be parked. 290 func (q *queue) Complete(opt SQOperation) (uring.CQEntry, error) { 291 // acquire lock 292 if err := q.prepare(1); err != nil { 293 return uring.CQEntry{}, err 294 } 295 296 // get sqe and fill it with data 297 sqe := q.getSQEntry() 298 opt(sqe) 299 res := q.fillResult(sqe) 300 301 err := q.submit(1) 302 if err != nil { 303 return uring.CQEntry{}, err 304 } 305 // wait 306 _, open := <-res.ch 307 cqe := res.CQEntry 308 if cqe.UserData() != uint64(res.nonce) { 309 panic("received result for a wrong request") 310 } 311 312 // fillResult method always checks if result is free by atomically loading 313 // free marker. on x86 write is never reordered with older reads 314 // but on systems with less strong memory model it might be possible 315 // 316 // in this part i rely on store/release - load/acquire semantics 317 // to enforce earlier described contstraint 318 res.setFree() 319 q.completed(1) 320 if !open { 321 return uring.CQEntry{}, ErrClosed 322 } 323 return cqe, nil 324 } 325 326 //go:norace 327 328 // Batch submits operations atomically and in the order they are provided. 329 func (q *queue) Batch(cqes []uring.CQEntry, opts []SQOperation) ([]uring.CQEntry, error) { 330 n := uint32(len(opts)) 331 // lock is acqured in prepare and released in submit guarantees 332 // that operations are sent in order. e.g. no other goroutine can concurrently 333 // chime in due to runtime.Gosched in getSQEntry 334 if err := q.prepare(n); err != nil { 335 return nil, err 336 } 337 results := make([]*result, len(opts)) 338 for i := range opts { 339 sqe := q.getSQEntry() 340 opts[i](sqe) 341 results[i] = q.fillResult(sqe) 342 } 343 344 err := q.submit(n) 345 if err != nil { 346 return nil, err 347 } 348 349 exit := false 350 for _, res := range results { 351 _, open := <-res.ch 352 res.setFree() 353 q.completed(1) 354 if !open { 355 exit = true 356 continue 357 } 358 cqes = append(cqes, res.CQEntry) 359 } 360 if exit { 361 return nil, ErrClosed 362 } 363 return cqes, nil 364 } 365 366 func (q *queue) Close() error { 367 q.mu.Lock() 368 q.closed = true 369 q.mu.Unlock() 370 close(q.signal) 371 372 if q.submissionTimer != 0 { 373 q.submitEvent.L.Lock() 374 q.submitCloser = make(chan error, 1) 375 q.submitEvent.Signal() 376 q.submitEvent.L.Unlock() 377 378 <-q.submitCloser 379 } 380 381 sqe := q.ring.GetSQEntry() 382 uring.Nop(sqe) 383 sqe.SetUserData(closed) 384 sqe.SetFlags(uring.IOSQE_IO_DRAIN) 385 386 _, err := q.ring.Submit(0) 387 if err != nil { 388 return err 389 } 390 q.wg.Wait() 391 for _, req := range q.results { 392 close(req.ch) 393 } 394 q.results = nil 395 return nil 396 }