github.com/dshulyak/uring@v0.0.0-20210209113719-1b2ec51f1542/loop/loop.go (about) 1 package loop 2 3 import ( 4 "errors" 5 "fmt" 6 "runtime" 7 "sync" 8 "syscall" 9 "time" 10 11 "github.com/dshulyak/uring" 12 ) 13 14 const ( 15 // WaitPoll monitors completion queue by polling (or IO_URING_ENTER with minComplete=0 in case of IOPOLL) 16 WaitPoll uint = iota 17 // WaitEnter monitors completion queue by waiting on IO_URING_ENTER with minComplete=1 18 // Registering files and buffers requires uring to become idle, with WaitEnter we are 19 // blocking until the next event is completed. Even if queue is empty this 20 // makes uring think that it is not idle. As a consequence Registering files 21 // or buffers leads to deadlock. 22 WaitEnter 23 // WaitEventfd wathches eventfd of each queue in the shard. 24 WaitEventfd 25 ) 26 27 const ( 28 // FlagSharedWorkers shares worker pool from the first ring instance between all shards in the queue. 29 FlagSharedWorkers = 1 << iota 30 31 // FlagBatchSubmission enables feature to batch individual submission together 32 // in one syscall. If this flag is set SubmissionTimer option must be set as well. 33 FlagBatchSubmission 34 ) 35 36 const ( 37 defaultBatchSubmissionTimer = 50 * time.Microsecond 38 ) 39 40 func defaultParams() *Params { 41 return &Params{ 42 Rings: runtime.NumCPU(), 43 WaitMethod: WaitEventfd, 44 Flags: FlagSharedWorkers | FlagBatchSubmission, 45 SubmissionTimer: defaultBatchSubmissionTimer, 46 } 47 } 48 49 // Params ... 50 type Params struct { 51 Rings int 52 WaitMethod uint 53 Flags uint 54 SubmissionTimer time.Duration 55 } 56 57 // Loop ... 58 type Loop struct { 59 qparams *Params 60 // fields are used only if sharding is enabled. 61 queues []*queue 62 n uint64 63 byEventfd map[int32]*queue 64 poll *poll 65 66 wg sync.WaitGroup 67 } 68 69 // Setup setups requested number of shards, with shared kernel worker pool. 70 func Setup(size uint, params *uring.IOUringParams, qp *Params) (*Loop, error) { 71 if qp == nil { 72 qp = defaultParams() 73 } 74 if qp.Rings > 1 && !(qp.WaitMethod == WaitEventfd || qp.WaitMethod == WaitEnter) { 75 return nil, errors.New("use WaitEventfd or WaitEnter if sharding is enabled") 76 } else if qp.Flags&FlagBatchSubmission != 0 && qp.SubmissionTimer == 0 { 77 return nil, errors.New("SubmissionTimer must be non zero if FlagBatchSubmission is used") 78 } else if qp.Flags&FlagBatchSubmission == 0 && qp.SubmissionTimer != 0 { 79 return nil, errors.New("SubmissionTimer erroneously set to non-zero") 80 } 81 q := &Loop{qparams: qp} 82 if qp.Rings > 0 { 83 return q, setupSharded(q, size, params) 84 } 85 return q, setupSimple(q, size, params) 86 } 87 88 func setupSimple(q *Loop, size uint, params *uring.IOUringParams) error { 89 ring, err := uring.Setup(size, params) 90 if err != nil { 91 return err 92 } 93 q.queues = []*queue{newQueue(ring, q.qparams)} 94 q.queues[0].startCompletionLoop() 95 return nil 96 } 97 98 func setupSharded(q *Loop, size uint, params *uring.IOUringParams) (err error) { 99 var ( 100 queues = make([]*queue, q.qparams.Rings) 101 paramsCopy uring.IOUringParams 102 ) 103 104 q.poll, err = newPoll(len(queues)) 105 if err != nil { 106 return err 107 } 108 defer func() { 109 if err != nil { 110 _ = q.poll.close() 111 } 112 }() 113 if params != nil { 114 paramsCopy = *params 115 } 116 var ring *uring.Ring 117 defer func() { 118 if err != nil { 119 for _, subq := range queues { 120 if subq != nil { 121 _ = subq.Ring().Close() 122 } 123 } 124 } 125 }() 126 for i := range queues { 127 use := paramsCopy 128 if q.qparams.Flags&FlagSharedWorkers > 0 && i > 0 { 129 use.Flags |= uring.IORING_SETUP_ATTACH_WQ 130 use.WQFd = uint32(queues[0].Ring().Fd()) 131 } 132 ring, err = uring.Setup(size, &use) 133 if err != nil { 134 err = fmt.Errorf("failed to setup ring %w", err) 135 return 136 } 137 queues[i] = newQueue(ring, q.qparams) 138 } 139 q.queues = queues 140 q.n = uint64(q.qparams.Rings) 141 142 if q.qparams.WaitMethod == WaitEventfd { 143 byEventfd := make(map[int32]*queue, len(queues)) 144 for _, qu := range queues { 145 ring := qu.Ring() 146 for { 147 err = ring.SetupEventfd() 148 if err != nil { 149 if err == syscall.EINTR { 150 continue 151 } 152 err = fmt.Errorf("failed to setup eventfd %w", err) 153 return 154 } 155 break 156 } 157 err = q.poll.addRead(int32(ring.Eventfd())) 158 if err != nil { 159 return 160 } 161 byEventfd[int32(ring.Eventfd())] = qu 162 } 163 q.byEventfd = byEventfd 164 q.wg.Add(1) 165 go q.epollLoop() 166 } else { 167 for _, qu := range queues { 168 qu.startCompletionLoop() 169 } 170 } 171 return 172 } 173 174 func (q *Loop) epollLoop() { 175 defer q.wg.Done() 176 var exit uint64 177 for { 178 if err := q.poll.wait(func(efd int32) { 179 if !q.byEventfd[efd].tryComplete() { 180 exit++ 181 return 182 } 183 }); err != nil { 184 panic(err) 185 } 186 if exit == q.n { 187 return 188 } 189 } 190 } 191 192 // getLoop returns queue for current thread. 193 func (q *Loop) getQueue() *queue { 194 if len(q.queues) == 1 { 195 return q.queues[0] 196 } 197 tid := uint64(syscall.Gettid()) 198 return q.queues[tid%q.n] 199 } 200 201 //go:uintptrescapes 202 203 // Syscall executes operation on one of the internal queues. Additionaly it prevents ptrs from being moved to another location while Syscall is in progress. 204 // WARNING: don't use interface that hides this method. 205 // https://github.com/golang/go/issues/16035#issuecomment-231107512. 206 func (q *Loop) Syscall(opt SQOperation, ptrs ...uintptr) (uring.CQEntry, error) { 207 return q.getQueue().Complete(opt) 208 } 209 210 //go:uintptrescapes 211 212 // BatchSyscall ... 213 func (q *Loop) BatchSyscall(cqes []uring.CQEntry, opts []SQOperation, ptrs ...uintptr) ([]uring.CQEntry, error) { 214 return q.getQueue().Batch(cqes, opts) 215 } 216 217 // tests for Register* methods are in fixed and fs modules. 218 219 // RegisterBuffers will register buffers on all rings (shards). Note that registration 220 // is done with syscall, and will have to wait until rings are idle. 221 // TODO test if IORING_OP_PROVIDE_BUFFERS is supported (5.7?) 222 func (q *Loop) RegisterBuffers(iovec []syscall.Iovec) (err error) { 223 for _, subq := range q.queues { 224 err = subq.Ring().RegisterBuffers(iovec) 225 if err != nil { 226 return 227 } 228 } 229 return 230 } 231 232 // RegisterFiles ... 233 func (q *Loop) RegisterFiles(fds []int32) (err error) { 234 for _, subq := range q.queues { 235 err = subq.Ring().RegisterFiles(fds) 236 if err != nil { 237 return 238 } 239 } 240 return 241 } 242 243 // UpdateFiles ... 244 func (q *Loop) UpdateFiles(fds []int32, off uint32) (err error) { 245 for _, subq := range q.queues { 246 err = subq.Ring().UpdateFiles(fds, off) 247 if err != nil { 248 return 249 } 250 } 251 return 252 } 253 254 // UnregisterFiles ... 255 func (q *Loop) UnregisterFiles() (err error) { 256 for _, subq := range q.queues { 257 err = subq.Ring().UnregisterFiles() 258 if err != nil { 259 return 260 } 261 } 262 return 263 } 264 265 // UnregisterBuffers ... 266 func (q *Loop) UnregisterBuffers() (err error) { 267 for _, qu := range q.queues { 268 if err := qu.Ring().UnregisterBuffers(); err != nil { 269 return err 270 } 271 } 272 return nil 273 } 274 275 // Close closes every shard queue, epoll instance and unregister eventfds. 276 // Close works as follows: 277 // - request close on each queue 278 // - once any queue exits - completionLoop will be terminated 279 // - once completion loop terminated - unregister eventfd's and close rings 280 func (q *Loop) Close() (err0 error) { 281 // FIXME use multierr 282 for _, queue := range q.queues { 283 if err := queue.Close(); err != nil && err0 == nil { 284 err0 = err 285 } 286 } 287 q.wg.Wait() 288 if q.poll != nil { 289 if err := q.poll.close(); err != nil && err0 == nil { 290 err0 = err 291 } 292 } 293 for _, qu := range q.queues { 294 ring := qu.Ring() 295 if err := ring.CloseEventfd(); err != nil && err0 == nil { 296 err0 = err 297 } 298 if err := ring.Close(); err != nil && err0 == nil { 299 err0 = err 300 } 301 } 302 return err0 303 }