github.com/pawelgaczynski/giouring@v0.0.0-20230826085535-69588b89acb9/setup.go (about)

     1  // MIT License
     2  //
     3  // Copyright (c) 2023 Paweł Gaczyński
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a
     6  // copy of this software and associated documentation files (the
     7  // "Software"), to deal in the Software without restriction, including
     8  // without limitation the rights to use, copy, modify, merge, publish,
     9  // distribute, sublicense, and/or sell copies of the Software, and to
    10  // permit persons to whom the Software is furnished to do so, subject to
    11  // the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be included
    14  // in all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    17  // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    18  // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    19  // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    20  // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    21  // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    22  // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    23  
    24  package giouring
    25  
    26  import (
    27  	"math/bits"
    28  	"os"
    29  	"syscall"
    30  	"unsafe"
    31  )
    32  
    33  const (
    34  	kernMaxEntries   = 32768
    35  	kernMaxCQEntries = 2 * kernMaxEntries
    36  )
    37  
    38  func fls(x int) int {
    39  	if x == 0 {
    40  		return 0
    41  	}
    42  
    43  	return 8*int(unsafe.Sizeof(x)) - bits.LeadingZeros32(uint32(x))
    44  }
    45  
    46  func roundupPow2(depth uint32) uint32 {
    47  	return 1 << uint32(fls(int(depth-1)))
    48  }
    49  
    50  const cqEntriesMultiplier = 2
    51  
    52  // liburing: get_sq_cq_entries
    53  func getSqCqEntries(entries uint32, p *Params, sq, cq *uint32) error {
    54  	var cqEntries uint32
    55  
    56  	if entries == 0 {
    57  		return syscall.EINVAL
    58  	}
    59  	if entries > kernMaxEntries {
    60  		if p.flags&SetupClamp == 0 {
    61  			return syscall.EINVAL
    62  		}
    63  		entries = kernMaxEntries
    64  	}
    65  
    66  	entries = roundupPow2(entries)
    67  	if p.flags&SetupCQSize != 0 {
    68  		if p.cqEntries == 0 {
    69  			return syscall.EINVAL
    70  		}
    71  		cqEntries = p.cqEntries
    72  		if cqEntries > kernMaxCQEntries {
    73  			if p.flags&SetupClamp == 0 {
    74  				return syscall.EINVAL
    75  			}
    76  			cqEntries = kernMaxCQEntries
    77  		}
    78  		cqEntries = roundupPow2(cqEntries)
    79  		if cqEntries < entries {
    80  			return syscall.EINVAL
    81  		}
    82  	} else {
    83  		cqEntries = cqEntriesMultiplier * entries
    84  	}
    85  	*sq = entries
    86  	*cq = cqEntries
    87  
    88  	return nil
    89  }
    90  
    91  // liburing: io_uring_unmap_rings
    92  func UnmapRings(sq *SubmissionQueue, cq *CompletionQueue) {
    93  	if sq.ringSize > 0 {
    94  		_ = sysMunmap(uintptr(sq.ringPtr), uintptr(sq.ringSize))
    95  	}
    96  
    97  	if uintptr(cq.ringPtr) != 0 && cq.ringSize > 0 && cq.ringPtr != sq.ringPtr {
    98  		_ = sysMunmap(uintptr(cq.ringPtr), uintptr(cq.ringSize))
    99  	}
   100  }
   101  
   102  // liburing: io_uring_setup_ring_pointers
   103  func SetupRingPointers(p *Params, sq *SubmissionQueue, cq *CompletionQueue) {
   104  	sq.head = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.head)))
   105  	sq.tail = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.tail)))
   106  	sq.ringMask = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.ringMask)))
   107  	sq.ringEntries = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.ringEntries)))
   108  	sq.flags = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.flags)))
   109  	sq.dropped = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.dropped)))
   110  	sq.array = (*uint32)(unsafe.Pointer(uintptr(sq.ringPtr) + uintptr(p.sqOff.array)))
   111  
   112  	cq.head = (*uint32)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.head)))
   113  	cq.tail = (*uint32)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.tail)))
   114  	cq.ringMask = (*uint32)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.ringMask)))
   115  	cq.ringEntries = (*uint32)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.ringEntries)))
   116  	cq.overflow = (*uint32)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.overflow)))
   117  	cq.cqes = (*CompletionQueueEvent)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.cqes)))
   118  	if p.cqOff.flags != 0 {
   119  		cq.flags = (*uint32)(unsafe.Pointer(uintptr(cq.ringPtr) + uintptr(p.cqOff.flags)))
   120  	}
   121  }
   122  
   123  // liburing: io_uring_mmap
   124  func Mmap(fd int, p *Params, sq *SubmissionQueue, cq *CompletionQueue) error {
   125  	var size uintptr
   126  	var err error
   127  
   128  	size = unsafe.Sizeof(CompletionQueueEvent{})
   129  	if p.flags&SetupCQE32 != 0 {
   130  		size += unsafe.Sizeof(CompletionQueueEvent{})
   131  	}
   132  
   133  	sq.ringSize = uint(uintptr(p.sqOff.array) + uintptr(p.sqEntries)*unsafe.Sizeof(uint32(0)))
   134  	cq.ringSize = uint(uintptr(p.cqOff.cqes) + uintptr(p.cqEntries)*size)
   135  
   136  	if p.features&FeatSingleMMap != 0 {
   137  		if cq.ringSize > sq.ringSize {
   138  			sq.ringSize = cq.ringSize
   139  		}
   140  		cq.ringSize = sq.ringSize
   141  	}
   142  
   143  	var ringPtr uintptr
   144  	ringPtr, err = mmap(0, uintptr(sq.ringSize), syscall.PROT_READ|syscall.PROT_WRITE,
   145  		syscall.MAP_SHARED|syscall.MAP_POPULATE, fd,
   146  		int64(offsqRing))
   147  	if err != nil {
   148  		return err
   149  	}
   150  	sq.ringPtr = unsafe.Pointer(ringPtr)
   151  
   152  	if p.features&FeatSingleMMap != 0 {
   153  		cq.ringPtr = sq.ringPtr
   154  	} else {
   155  		ringPtr, err = mmap(0, uintptr(cq.ringSize), syscall.PROT_READ|syscall.PROT_WRITE,
   156  			syscall.MAP_SHARED|syscall.MAP_POPULATE, fd,
   157  			int64(offcqRing))
   158  		if err != nil {
   159  			cq.ringPtr = nil
   160  
   161  			goto err
   162  		}
   163  		cq.ringPtr = unsafe.Pointer(ringPtr)
   164  	}
   165  
   166  	size = unsafe.Sizeof(SubmissionQueueEntry{})
   167  	if p.flags&SetupSQE128 != 0 {
   168  		size += 64
   169  	}
   170  	ringPtr, err = mmap(0, size*uintptr(p.sqEntries), syscall.PROT_READ|syscall.PROT_WRITE,
   171  		syscall.MAP_SHARED|syscall.MAP_POPULATE, fd, int64(offSQEs))
   172  	if err != nil {
   173  		goto err
   174  	}
   175  	sq.sqes = (*SubmissionQueueEntry)(unsafe.Pointer(ringPtr))
   176  	SetupRingPointers(p, sq, cq)
   177  
   178  	return nil
   179  
   180  err:
   181  	UnmapRings(sq, cq)
   182  
   183  	return err
   184  }
   185  
   186  // liburing: io_uring_queue_mmap
   187  func (ring *Ring) QueueMmap(fd int, p *Params) error {
   188  	return Mmap(fd, p, ring.sqRing, ring.cqRing)
   189  }
   190  
   191  // liburing: io_uring_ring_dontfork
   192  func (ring *Ring) RingDontFork() error {
   193  	var length uintptr
   194  	var err error
   195  
   196  	if ring.sqRing.ringPtr == nil || ring.sqRing.sqes == nil || ring.cqRing.ringPtr == nil {
   197  		return syscall.EINVAL
   198  	}
   199  
   200  	length = unsafe.Sizeof(SubmissionQueueEntry{})
   201  	if ring.flags&SetupSQE128 != 0 {
   202  		length += 64
   203  	}
   204  	length *= uintptr(*ring.sqRing.ringEntries)
   205  	err = sysMadvise(uintptr(unsafe.Pointer(ring.sqRing.sqes)), length, syscall.MADV_DONTFORK)
   206  	if err != nil {
   207  		return err
   208  	}
   209  
   210  	length = uintptr(ring.sqRing.ringSize)
   211  	err = sysMadvise(uintptr(ring.sqRing.ringPtr), length, syscall.MADV_DONTFORK)
   212  	if err != nil {
   213  		return err
   214  	}
   215  
   216  	if ring.cqRing.ringPtr != ring.sqRing.ringPtr {
   217  		length = uintptr(ring.cqRing.ringSize)
   218  		err = sysMadvise(uintptr(ring.cqRing.ringPtr), length, syscall.MADV_DONTFORK)
   219  		if err != nil {
   220  			return err
   221  		}
   222  	}
   223  
   224  	return nil
   225  }
   226  
   227  /* FIXME */
   228  const hugePageSize uint64 = 2 * 1024 * 1024
   229  
   230  // liburing: io_uring_alloc_huge
   231  func allocHuge(
   232  	entries uint32, p *Params, sq *SubmissionQueue, cq *CompletionQueue, buf unsafe.Pointer, bufSize uint64,
   233  ) (uint, error) {
   234  	pageSize := uint64(os.Getpagesize())
   235  	var sqEntries, cqEntries uint32
   236  	var ringMem, sqesMem uint64
   237  	var memUsed uint64
   238  	var ptr unsafe.Pointer
   239  
   240  	errno := getSqCqEntries(entries, p, &sqEntries, &cqEntries)
   241  	if errno != nil {
   242  		return 0, errno
   243  	}
   244  
   245  	sqesMem = uint64(sqEntries) * uint64(unsafe.Sizeof(SubmissionQueue{}))
   246  	sqesMem = (sqesMem + pageSize - 1) &^ (pageSize - 1)
   247  	ringMem = uint64(cqEntries) * uint64(unsafe.Sizeof(CompletionQueue{}))
   248  	if p.flags&SetupCQE32 != 0 {
   249  		ringMem *= 2
   250  	}
   251  	ringMem += uint64(sqEntries) * uint64(unsafe.Sizeof(uint32(0)))
   252  	memUsed = sqesMem + ringMem
   253  	memUsed = (memUsed + pageSize - 1) &^ (pageSize - 1)
   254  
   255  	if buf == nil && (sqesMem > hugePageSize || ringMem > hugePageSize) {
   256  		return 0, syscall.ENOMEM
   257  	}
   258  
   259  	if buf != nil {
   260  		if memUsed > bufSize {
   261  			return 0, syscall.ENOMEM
   262  		}
   263  		ptr = buf
   264  	} else {
   265  		var mapHugetlb int
   266  		if sqesMem <= pageSize {
   267  			bufSize = pageSize
   268  		} else {
   269  			bufSize = hugePageSize
   270  			mapHugetlb = syscall.MAP_HUGETLB
   271  		}
   272  		var err error
   273  		ptr, err = sysMmap(
   274  			0, uintptr(bufSize),
   275  			syscall.PROT_READ|syscall.PROT_WRITE,
   276  			syscall.MAP_SHARED|syscall.MAP_ANONYMOUS|mapHugetlb, -1, 0)
   277  		if err != nil {
   278  			return 0, err
   279  		}
   280  	}
   281  
   282  	sq.sqes = (*SubmissionQueueEntry)(ptr)
   283  	if memUsed <= bufSize {
   284  		sq.ringPtr = unsafe.Pointer(uintptr(unsafe.Pointer(sq.sqes)) + uintptr(sqesMem))
   285  		cq.ringSize = 0
   286  		sq.ringSize = 0
   287  	} else {
   288  		var mapHugetlb int
   289  		if ringMem <= pageSize {
   290  			bufSize = pageSize
   291  		} else {
   292  			bufSize = hugePageSize
   293  			mapHugetlb = syscall.MAP_HUGETLB
   294  		}
   295  		var err error
   296  		ptr, err = sysMmap(
   297  			0, uintptr(bufSize),
   298  			syscall.PROT_READ|syscall.PROT_WRITE,
   299  			syscall.MAP_SHARED|syscall.MAP_ANONYMOUS|mapHugetlb, -1, 0)
   300  		if err != nil {
   301  			_ = sysMunmap(uintptr(unsafe.Pointer(sq.sqes)), 1)
   302  
   303  			return 0, err
   304  		}
   305  		sq.ringPtr = ptr
   306  		sq.ringSize = uint(bufSize)
   307  		cq.ringSize = 0
   308  	}
   309  
   310  	cq.ringPtr = sq.ringPtr
   311  	p.sqOff.userAddr = uint64(uintptr(unsafe.Pointer(sq.sqes)))
   312  	p.cqOff.userAddr = uint64(uintptr(sq.ringPtr))
   313  
   314  	return uint(memUsed), nil
   315  }
   316  
   317  // liburing: __io_uring_queue_init_params
   318  func (ring *Ring) internalQueueInitParams(entries uint32, p *Params, buf unsafe.Pointer, bufSize uint64) error {
   319  	var fd int
   320  	var sqEntries, index uint32
   321  	var err error
   322  
   323  	if p.flags&SetupRegisteredFdOnly != 0 && p.flags&SetupNoMmap == 0 {
   324  		return syscall.EINVAL
   325  	}
   326  
   327  	if p.flags&SetupNoMmap != 0 {
   328  		_, err = allocHuge(entries, p, ring.sqRing, ring.cqRing, buf, bufSize)
   329  		if err != nil {
   330  			return err
   331  		}
   332  		if buf != nil {
   333  			ring.intFlags |= IntFlagAppMem
   334  		}
   335  	}
   336  
   337  	fdPtr, _, errno := syscall.Syscall(sysSetup, uintptr(entries), uintptr(unsafe.Pointer(p)), 0)
   338  	if errno != 0 {
   339  		if p.flags&SetupNoMmap != 0 && ring.intFlags&IntFlagAppMem == 0 {
   340  			_ = sysMunmap(uintptr(unsafe.Pointer(ring.sqRing.sqes)), 1)
   341  			UnmapRings(ring.sqRing, ring.cqRing)
   342  		}
   343  
   344  		return errno
   345  	}
   346  	fd = int(fdPtr)
   347  
   348  	if p.flags&SetupNoMmap == 0 {
   349  		err = ring.QueueMmap(fd, p)
   350  		if err != nil {
   351  			syscall.Close(fd)
   352  
   353  			return err
   354  		}
   355  	} else {
   356  		SetupRingPointers(p, ring.sqRing, ring.cqRing)
   357  	}
   358  
   359  	sqEntries = *ring.sqRing.ringEntries
   360  	for index = 0; index < sqEntries; index++ {
   361  		*(*uint32)(
   362  			unsafe.Add(unsafe.Pointer(ring.sqRing.array),
   363  				index*uint32(unsafe.Sizeof(uint32(0))))) = index
   364  	}
   365  
   366  	ring.features = p.features
   367  	ring.flags = p.flags
   368  	ring.enterRingFd = fd
   369  	if p.flags&SetupRegisteredFdOnly != 0 {
   370  		ring.ringFd = -1
   371  		ring.intFlags |= IntFlagRegRing | IntFlagRegRegRing
   372  	} else {
   373  		ring.ringFd = fd
   374  	}
   375  
   376  	return nil
   377  }
   378  
   379  // liburing: io_uring_queue_init_mem
   380  func (ring *Ring) QueueInitMem(entries uint32, p *Params, buf unsafe.Pointer, bufSize uint64) error {
   381  	// should already be set...
   382  	p.flags |= SetupNoMmap
   383  
   384  	return ring.internalQueueInitParams(entries, p, buf, bufSize)
   385  }
   386  
   387  // liburing: io_uring_queue_init_params - https://manpages.debian.org/unstable/liburing-dev/io_uring_queue_init_params.3.en.html
   388  func (ring *Ring) QueueInitParams(entries uint32, p *Params) error {
   389  	return ring.internalQueueInitParams(entries, p, nil, 0)
   390  }
   391  
   392  // liburing: io_uring_queue_init - https://manpages.debian.org/unstable/liburing-dev/io_uring_queue_init.3.en.html
   393  func (ring *Ring) QueueInit(entries uint32, flags uint32) error {
   394  	params := &Params{
   395  		flags: flags,
   396  	}
   397  
   398  	return ring.QueueInitParams(entries, params)
   399  }
   400  
   401  // liburing: io_uring_queue_exit - https://manpages.debian.org/unstable/liburing-dev/io_uring_queue_exit.3.en.html
   402  func (ring *Ring) QueueExit() {
   403  	sq := ring.sqRing
   404  	cq := ring.cqRing
   405  	var sqeSize uintptr
   406  
   407  	if sq.ringSize == 0 {
   408  		sqeSize = unsafe.Sizeof(SubmissionQueueEntry{})
   409  		if ring.flags&SetupSQE128 != 0 {
   410  			sqeSize += 64
   411  		}
   412  		_ = sysMunmap(uintptr(unsafe.Pointer(sq.sqes)), sqeSize*uintptr(*sq.ringEntries))
   413  		UnmapRings(sq, cq)
   414  	} else if ring.intFlags&IntFlagAppMem == 0 {
   415  		_ = sysMunmap(uintptr(unsafe.Pointer(sq.sqes)), uintptr(*sq.ringEntries)*unsafe.Sizeof(SubmissionQueueEntry{}))
   416  		UnmapRings(sq, cq)
   417  	}
   418  
   419  	if ring.intFlags&IntFlagRegRing != 0 {
   420  		_, _ = ring.UnregisterRingFd()
   421  	}
   422  	if ring.ringFd != -1 {
   423  		syscall.Close(ring.ringFd)
   424  	}
   425  }
   426  
   427  const ringSize = 320
   428  
   429  func npages(size uint64, pageSize uint64) uint64 {
   430  	size--
   431  	size /= pageSize
   432  
   433  	return uint64(fls(int(size)))
   434  }
   435  
   436  const (
   437  	not63ul       = 18446744073709551552
   438  	ringSizeCQOff = 63
   439  )
   440  
   441  // liburing: rings_size
   442  func ringsSize(p *Params, entries uint32, cqEntries uint32, pageSize uint64) uint64 {
   443  	var pages, sqSize, cqSize uint64
   444  
   445  	cqSize = uint64(unsafe.Sizeof(CompletionQueueEvent{}))
   446  	if p.flags&SetupCQE32 != 0 {
   447  		cqSize += uint64(unsafe.Sizeof(CompletionQueueEvent{}))
   448  	}
   449  	cqSize *= uint64(cqEntries)
   450  	cqSize += ringSize
   451  	cqSize = (cqSize + ringSizeCQOff) & not63ul
   452  	pages = 1 << npages(cqSize, pageSize)
   453  
   454  	sqSize = uint64(unsafe.Sizeof(SubmissionQueueEntry{}))
   455  	if p.flags&SetupSQE128 != 0 {
   456  		sqSize += 64
   457  	}
   458  	sqSize *= uint64(entries)
   459  	pages += 1 << npages(sqSize, pageSize)
   460  
   461  	return pages * pageSize
   462  }
   463  
   464  // liburing: io_uring_mlock_size_params
   465  func MlockSizeParams(entries uint32, p *Params) (uint64, error) {
   466  	lp := &Params{}
   467  	ring := NewRing()
   468  	var cqEntries, sq uint32
   469  	var pageSize uint64
   470  	var err error
   471  
   472  	err = ring.QueueInitParams(entries, lp)
   473  	if err != nil {
   474  		ring.QueueExit()
   475  	}
   476  
   477  	if lp.features&FeatNativeWorkers != 0 {
   478  		return 0, nil
   479  	}
   480  
   481  	if entries == 0 {
   482  		return 0, syscall.EINVAL
   483  	}
   484  	if entries > kernMaxEntries {
   485  		if p.flags&SetupClamp == 0 {
   486  			return 0, syscall.EINVAL
   487  		}
   488  		entries = kernMaxEntries
   489  	}
   490  
   491  	err = getSqCqEntries(entries, p, &sq, &cqEntries)
   492  	if err != nil {
   493  		return 0, err
   494  	}
   495  
   496  	pageSize = uint64(os.Getpagesize())
   497  
   498  	return ringsSize(p, sq, cqEntries, pageSize), nil
   499  }
   500  
   501  // liburing: io_uring_mlock_size
   502  func MlockSize(entries, flags uint32) (uint64, error) {
   503  	p := &Params{}
   504  	p.flags = flags
   505  
   506  	return MlockSizeParams(entries, p)
   507  }
   508  
   509  // liburing: br_setup
   510  func (ring *Ring) brSetup(nentries uint32, bgid uint16, flags uint32) (*BufAndRing, error) {
   511  	var br *BufAndRing
   512  	var reg BufReg
   513  	var ringSize, brPtr uintptr
   514  	var err error
   515  
   516  	reg = BufReg{}
   517  	ringSize = uintptr(nentries) * unsafe.Sizeof(BufAndRing{})
   518  	brPtr, err = mmap(
   519  		0, ringSize, syscall.PROT_READ|syscall.PROT_WRITE,
   520  		syscall.MAP_ANONYMOUS|syscall.MAP_PRIVATE, -1, 0)
   521  	if err != nil {
   522  		return nil, err
   523  	}
   524  	br = (*BufAndRing)(unsafe.Pointer(brPtr))
   525  
   526  	reg.RingAddr = uint64(uintptr(unsafe.Pointer(br)))
   527  	reg.RingEntries = nentries
   528  	reg.Bgid = bgid
   529  
   530  	_, err = ring.RegisterBufferRing(&reg, flags)
   531  	if err != nil {
   532  		_ = sysMunmap(uintptr(unsafe.Pointer(br)), ringSize)
   533  
   534  		return nil, err
   535  	}
   536  
   537  	return br, nil
   538  }
   539  
   540  // liburing: io_uring_setup_buf_ring - https://manpages.debian.org/unstable/liburing-dev/io_uring_setup_buf_ring.3.en.html
   541  func (ring *Ring) SetupBufRing(nentries uint32, bgid int, flags uint32) (*BufAndRing, error) {
   542  	br, err := ring.brSetup(nentries, uint16(bgid), flags)
   543  	if br != nil {
   544  		br.BufRingInit()
   545  	}
   546  
   547  	return br, err
   548  }
   549  
   550  // liburing: io_uring_free_buf_ring - https://manpages.debian.org/unstable/liburing-dev/io_uring_free_buf_ring.3.en.html
   551  func (ring *Ring) FreeBufRing(bgid int) error {
   552  	_, err := ring.UnregisterBufferRing(bgid)
   553  
   554  	return err
   555  }
   556  
   557  func (ring *Ring) RingFd() int {
   558  	return ring.ringFd
   559  }