github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/vector/compare/axpy.go (about)

     1  package compare
     2  
     3  import (
     4  	"unsafe"
     5  )
     6  
     7  func at[T any](xs []T, index uintptr) *T {
     8  	return (*T)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(xs)), index*unsafe.Sizeof(xs[0])))
     9  }
    10  
    11  //go:noinline
    12  func AxpyBasic(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    13  	xi, yi := uintptr(0), uintptr(0)
    14  	for i := uintptr(0); i < n; i++ {
    15  		ys[yi] += alpha * xs[xi]
    16  
    17  		xi += incx
    18  		yi += incy
    19  	}
    20  }
    21  
    22  //go:noinline
    23  func AxpyUnsafe(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    24  	xi, yi := uintptr(0), uintptr(0)
    25  	for i := uintptr(0); i < n; i++ {
    26  		*at(ys, yi) += alpha * *at(xs, xi)
    27  		xi += incx
    28  		yi += incy
    29  	}
    30  }
    31  
    32  //go:noinline
    33  func AxpyUnsafeX(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    34  	xi, yi := uintptr(0), uintptr(0)
    35  	for ; n > 0; n-- {
    36  		*at(ys, yi) += alpha * *at(xs, xi)
    37  		xi += incx
    38  		yi += incy
    39  	}
    40  }
    41  
    42  //go:noinline
    43  func AxpyUnsafeInline(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    44  	for i := uintptr(0); i < n; i++ {
    45  		*at(ys, i*incy) += alpha * *at(xs, i*incx)
    46  	}
    47  }
    48  
    49  //go:noinline
    50  func AxpyPointer(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    51  	xp := unsafe.Pointer(unsafe.SliceData(xs))
    52  	yp := unsafe.Pointer(unsafe.SliceData(ys))
    53  	xn := unsafe.Add(xp, 4*n*incx)
    54  	for uintptr(xp) < uintptr(xn) {
    55  		*(*float32)(yp) += alpha * *(*float32)(xp)
    56  		xp, yp = unsafe.Add(xp, 4*incx), unsafe.Add(yp, 4*incy)
    57  	}
    58  }
    59  
    60  //go:noinline
    61  func AxpyPointerLoop(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    62  	xp := unsafe.Pointer(unsafe.SliceData(xs))
    63  	yp := unsafe.Pointer(unsafe.SliceData(ys))
    64  	for i := uintptr(0); i < n; i++ {
    65  		*(*float32)(yp) += alpha * *(*float32)(xp)
    66  		xp, yp = unsafe.Add(xp, 4*incx), unsafe.Add(yp, 4*incy)
    67  	}
    68  }
    69  
    70  //go:noinline
    71  func AxpyPointerLoopX(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    72  	xp := unsafe.Pointer(unsafe.SliceData(xs))
    73  	yp := unsafe.Pointer(unsafe.SliceData(ys))
    74  	for ; n > 0; n-- {
    75  		*(*float32)(yp) += alpha * *(*float32)(xp)
    76  		xp, yp = unsafe.Add(xp, 4*incx), unsafe.Add(yp, 4*incy)
    77  	}
    78  }
    79  
    80  const mask4 = ^uintptr(3)
    81  const mask8 = ^uintptr(7)
    82  
    83  //go:noinline
    84  func AxpyBasicR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
    85  	xi, yi := uintptr(0), uintptr(0)
    86  	i := uintptr(0)
    87  	n4 := n & mask4
    88  	for ; i < n4; i += 4 {
    89  		ys[yi+0*incy] += alpha * xs[xi+0*incx]
    90  		ys[yi+1*incy] += alpha * xs[xi+1*incx]
    91  		ys[yi+2*incy] += alpha * xs[xi+2*incx]
    92  		ys[yi+3*incy] += alpha * xs[xi+3*incx]
    93  
    94  		xi += incx * 4
    95  		yi += incy * 4
    96  	}
    97  	for ; i < n; i++ {
    98  		ys[yi] += alpha * xs[xi]
    99  		xi += incx
   100  		yi += incy
   101  	}
   102  }
   103  
   104  //go:noinline
   105  func AxpyUnsafeR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   106  	xi, yi := uintptr(0), uintptr(0)
   107  	i := uintptr(0)
   108  	n4 := n & mask4
   109  	for ; i < n4; i += 4 {
   110  		*at(ys, yi+0*incy) += alpha * *at(xs, xi+0*incx)
   111  		*at(ys, yi+1*incy) += alpha * *at(xs, xi+1*incx)
   112  		*at(ys, yi+2*incy) += alpha * *at(xs, xi+2*incx)
   113  		*at(ys, yi+3*incy) += alpha * *at(xs, xi+3*incx)
   114  		xi += incx * 4
   115  		yi += incy * 4
   116  	}
   117  	for ; i < n; i++ {
   118  		*at(ys, yi+0) += alpha * *at(xs, xi+0)
   119  		xi += incx
   120  		yi += incy
   121  	}
   122  }
   123  
   124  //go:noinline
   125  func AxpyBasicXR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   126  	xi, yi := uintptr(0), uintptr(0)
   127  	for ; n >= 4; n -= 4 {
   128  		ys[yi+0*incy] += alpha * xs[xi+0*incx]
   129  		ys[yi+1*incy] += alpha * xs[xi+1*incx]
   130  		ys[yi+2*incy] += alpha * xs[xi+2*incx]
   131  		ys[yi+3*incy] += alpha * xs[xi+3*incx]
   132  
   133  		xi += incx * 4
   134  		yi += incy * 4
   135  	}
   136  	for ; n > 0; n-- {
   137  		ys[yi] += alpha * xs[xi]
   138  		xi += incx
   139  		yi += incy
   140  	}
   141  }
   142  
   143  //go:noinline
   144  func AxpyUnsafeXR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   145  	xi, yi := uintptr(0), uintptr(0)
   146  	for ; n >= 4; n -= 4 {
   147  		*at(ys, yi+0*incy) += alpha * *at(xs, xi+0*incx)
   148  		*at(ys, yi+1*incy) += alpha * *at(xs, xi+1*incx)
   149  		*at(ys, yi+2*incy) += alpha * *at(xs, xi+2*incx)
   150  		*at(ys, yi+3*incy) += alpha * *at(xs, xi+3*incx)
   151  		xi += incx * 4
   152  		yi += incy * 4
   153  	}
   154  	for ; n > 0; n-- {
   155  		*at(ys, yi+0) += alpha * *at(xs, xi+0)
   156  		xi += incx
   157  		yi += incy
   158  	}
   159  }
   160  
   161  //go:noinline
   162  func AxpyUnsafeR8(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   163  	if n == 0 {
   164  		return
   165  	}
   166  	_, _ = xs[(n-1)*incx], ys[(n-1)*incy]
   167  	xi, yi := uintptr(0), uintptr(0)
   168  	n8 := n & mask8
   169  	i := uintptr(0)
   170  	for ; i < n8; i += 8 {
   171  		*at(ys, yi+0*incy) += alpha * *at(xs, xi+0*incx)
   172  		*at(ys, yi+1*incy) += alpha * *at(xs, xi+1*incx)
   173  		*at(ys, yi+2*incy) += alpha * *at(xs, xi+2*incx)
   174  		*at(ys, yi+3*incy) += alpha * *at(xs, xi+3*incx)
   175  		*at(ys, yi+4*incy) += alpha * *at(xs, xi+4*incx)
   176  		*at(ys, yi+5*incy) += alpha * *at(xs, xi+5*incx)
   177  		*at(ys, yi+6*incy) += alpha * *at(xs, xi+6*incx)
   178  		*at(ys, yi+7*incy) += alpha * *at(xs, xi+7*incx)
   179  		xi += incx * 8
   180  		yi += incy * 8
   181  	}
   182  	for ; i < n; i++ {
   183  		*at(ys, yi+0) += alpha * *at(xs, xi+0)
   184  		xi += incx
   185  		yi += incy
   186  	}
   187  }
   188  
   189  //go:noinline
   190  func AxpyUnsafeXR8(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   191  	if n == 0 {
   192  		return
   193  	}
   194  	_, _ = xs[(n-1)*incx], ys[(n-1)*incy]
   195  	xi, yi := uintptr(0), uintptr(0)
   196  	for ; n >= 8; n -= 8 {
   197  		*at(ys, yi+0*incy) += alpha * *at(xs, xi+0*incx)
   198  		*at(ys, yi+1*incy) += alpha * *at(xs, xi+1*incx)
   199  		*at(ys, yi+2*incy) += alpha * *at(xs, xi+2*incx)
   200  		*at(ys, yi+3*incy) += alpha * *at(xs, xi+3*incx)
   201  		*at(ys, yi+4*incy) += alpha * *at(xs, xi+4*incx)
   202  		*at(ys, yi+5*incy) += alpha * *at(xs, xi+5*incx)
   203  		*at(ys, yi+6*incy) += alpha * *at(xs, xi+6*incx)
   204  		*at(ys, yi+7*incy) += alpha * *at(xs, xi+7*incx)
   205  		xi += incx * 8
   206  		yi += incy * 8
   207  	}
   208  	for ; n > 0; n-- {
   209  		*at(ys, yi+0) += alpha * *at(xs, xi+0)
   210  		xi += incx
   211  		yi += incy
   212  	}
   213  }
   214  
   215  //go:noinline
   216  func AxpyUnsafeInlineR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   217  	i := uintptr(0)
   218  	n4 := n & mask4
   219  	for ; i < n4; i += 4 {
   220  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   221  		*at(ys, (i+1)*incy) += alpha * *at(xs, (i+1)*incx)
   222  		*at(ys, (i+2)*incy) += alpha * *at(xs, (i+2)*incx)
   223  		*at(ys, (i+3)*incy) += alpha * *at(xs, (i+3)*incx)
   224  	}
   225  	for ; i < n; i++ {
   226  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   227  	}
   228  }
   229  
   230  //go:noinline
   231  func AxpyUnsafeInlineXR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   232  	i := uintptr(0)
   233  	for ; n >= 4; n -= 4 {
   234  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   235  		*at(ys, (i+1)*incy) += alpha * *at(xs, (i+1)*incx)
   236  		*at(ys, (i+2)*incy) += alpha * *at(xs, (i+2)*incx)
   237  		*at(ys, (i+3)*incy) += alpha * *at(xs, (i+3)*incx)
   238  		i += 4
   239  	}
   240  	for ; n > 0; n-- {
   241  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   242  		i++
   243  	}
   244  }
   245  
   246  //go:noinline
   247  func AxpyUnsafeInlineR8(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   248  	i := uintptr(0)
   249  	n8 := n & mask8
   250  	for ; i < n8; i += 8 {
   251  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   252  		*at(ys, (i+1)*incy) += alpha * *at(xs, (i+1)*incx)
   253  		*at(ys, (i+2)*incy) += alpha * *at(xs, (i+2)*incx)
   254  		*at(ys, (i+3)*incy) += alpha * *at(xs, (i+3)*incx)
   255  		*at(ys, (i+4)*incy) += alpha * *at(xs, (i+4)*incx)
   256  		*at(ys, (i+5)*incy) += alpha * *at(xs, (i+5)*incx)
   257  		*at(ys, (i+6)*incy) += alpha * *at(xs, (i+6)*incx)
   258  		*at(ys, (i+7)*incy) += alpha * *at(xs, (i+7)*incx)
   259  	}
   260  	for ; i < n; i++ {
   261  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   262  	}
   263  }
   264  
   265  //go:noinline
   266  func AxpyUnsafeInlineXR8(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   267  	i := uintptr(0)
   268  	for ; n >= 8; n -= 8 {
   269  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   270  		*at(ys, (i+1)*incy) += alpha * *at(xs, (i+1)*incx)
   271  		*at(ys, (i+2)*incy) += alpha * *at(xs, (i+2)*incx)
   272  		*at(ys, (i+3)*incy) += alpha * *at(xs, (i+3)*incx)
   273  		*at(ys, (i+4)*incy) += alpha * *at(xs, (i+4)*incx)
   274  		*at(ys, (i+5)*incy) += alpha * *at(xs, (i+5)*incx)
   275  		*at(ys, (i+6)*incy) += alpha * *at(xs, (i+6)*incx)
   276  		*at(ys, (i+7)*incy) += alpha * *at(xs, (i+7)*incx)
   277  		i += 8
   278  	}
   279  	for ; n > 0; n-- {
   280  		*at(ys, (i+0)*incy) += alpha * *at(xs, (i+0)*incx)
   281  		i++
   282  	}
   283  }
   284  
   285  //go:noinline
   286  func AxpyPointerR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   287  	const Size = unsafe.Sizeof(xs[0])
   288  
   289  	xp := unsafe.Pointer(unsafe.SliceData(xs))
   290  	yp := unsafe.Pointer(unsafe.SliceData(ys))
   291  
   292  	xn4 := unsafe.Add(xp, (n&mask4)*incx*Size)
   293  	xn := unsafe.Add(xp, n*incx*Size)
   294  	for uintptr(xp) < uintptr(xn4) {
   295  		*(*float32)(unsafe.Add(yp, 0*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 0*Size*incx))
   296  		*(*float32)(unsafe.Add(yp, 1*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 1*Size*incx))
   297  		*(*float32)(unsafe.Add(yp, 2*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 2*Size*incx))
   298  		*(*float32)(unsafe.Add(yp, 3*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 3*Size*incx))
   299  		xp, yp = unsafe.Add(xp, 4*incx*Size), unsafe.Add(yp, 4*incy*Size)
   300  	}
   301  	for uintptr(xp) < uintptr(xn) {
   302  		*(*float32)(yp) += alpha * *(*float32)(xp)
   303  		xp, yp = unsafe.Add(xp, incx*Size), unsafe.Add(yp, incy*Size)
   304  	}
   305  }
   306  
   307  //go:noinline
   308  func AxpyPointerLoopR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   309  	const Size = unsafe.Sizeof(xs[0])
   310  
   311  	xp := unsafe.Pointer(unsafe.SliceData(xs))
   312  	yp := unsafe.Pointer(unsafe.SliceData(ys))
   313  
   314  	i := uintptr(0)
   315  	n4 := n & mask4
   316  	for ; i < n4; i += 4 {
   317  		*(*float32)(unsafe.Add(yp, 0*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 0*Size*incx))
   318  		*(*float32)(unsafe.Add(yp, 1*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 1*Size*incx))
   319  		*(*float32)(unsafe.Add(yp, 2*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 2*Size*incx))
   320  		*(*float32)(unsafe.Add(yp, 3*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 3*Size*incx))
   321  		xp, yp = unsafe.Add(xp, 4*incx*Size), unsafe.Add(yp, 4*incy*Size)
   322  	}
   323  	for ; i < n; i++ {
   324  		*(*float32)(yp) += alpha * *(*float32)(xp)
   325  		xp, yp = unsafe.Add(xp, incx*Size), unsafe.Add(yp, incy*Size)
   326  	}
   327  }
   328  
   329  //go:noinline
   330  func AxpyPointerLoopXR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   331  	const Size = unsafe.Sizeof(xs[0])
   332  
   333  	xp := unsafe.Pointer(unsafe.SliceData(xs))
   334  	yp := unsafe.Pointer(unsafe.SliceData(ys))
   335  
   336  	for ; n >= 4; n -= 4 {
   337  		*(*float32)(unsafe.Add(yp, 0*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 0*Size*incx))
   338  		*(*float32)(unsafe.Add(yp, 1*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 1*Size*incx))
   339  		*(*float32)(unsafe.Add(yp, 2*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 2*Size*incx))
   340  		*(*float32)(unsafe.Add(yp, 3*Size*incy)) += alpha * *(*float32)(unsafe.Add(xp, 3*Size*incx))
   341  		xp, yp = unsafe.Add(xp, 4*incx*Size), unsafe.Add(yp, 4*incy*Size)
   342  	}
   343  	for ; n > 0; n-- {
   344  		*(*float32)(yp) += alpha * *(*float32)(xp)
   345  		xp, yp = unsafe.Add(xp, incx*Size), unsafe.Add(yp, incy*Size)
   346  	}
   347  }
   348  
   349  //go:noinline
   350  func AxpyPointerLoopInterleaveR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   351  	const Size = unsafe.Sizeof(xs[0])
   352  
   353  	xp := unsafe.Pointer(unsafe.SliceData(xs))
   354  	yp := unsafe.Pointer(unsafe.SliceData(ys))
   355  
   356  	i := uintptr(0)
   357  	n4 := n & mask4
   358  	for ; i < n4; i += 4 {
   359  		x0 := *(*float32)(unsafe.Add(xp, 0*Size*incx))
   360  		x1 := *(*float32)(unsafe.Add(xp, 1*Size*incx))
   361  		x2 := *(*float32)(unsafe.Add(xp, 2*Size*incx))
   362  		x3 := *(*float32)(unsafe.Add(xp, 3*Size*incx))
   363  
   364  		m0 := alpha * x0
   365  		m1 := alpha * x1
   366  		m2 := alpha * x2
   367  		m3 := alpha * x3
   368  
   369  		t0 := *(*float32)(unsafe.Add(yp, 0*Size*incy)) + m0
   370  		t1 := *(*float32)(unsafe.Add(yp, 1*Size*incy)) + m1
   371  		t2 := *(*float32)(unsafe.Add(yp, 2*Size*incy)) + m2
   372  		t3 := *(*float32)(unsafe.Add(yp, 3*Size*incy)) + m3
   373  
   374  		*(*float32)(unsafe.Add(yp, 0*Size*incy)) = t0
   375  		*(*float32)(unsafe.Add(yp, 1*Size*incy)) = t1
   376  		*(*float32)(unsafe.Add(yp, 2*Size*incy)) = t2
   377  		*(*float32)(unsafe.Add(yp, 3*Size*incy)) = t3
   378  
   379  		xp, yp = unsafe.Add(xp, 4*incx*Size), unsafe.Add(yp, 4*incy*Size)
   380  	}
   381  	for ; i < n; i++ {
   382  		*(*float32)(yp) += alpha * *(*float32)(xp)
   383  		xp, yp = unsafe.Add(xp, incx*Size), unsafe.Add(yp, incy*Size)
   384  	}
   385  }
   386  
   387  //go:noinline
   388  func AxpyPointerLoopInterleaveXR4(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   389  	const Size = unsafe.Sizeof(xs[0])
   390  
   391  	xp := unsafe.Pointer(unsafe.SliceData(xs))
   392  	yp := unsafe.Pointer(unsafe.SliceData(ys))
   393  
   394  	for ; n >= 4; n -= 4 {
   395  		x0 := *(*float32)(unsafe.Add(xp, 0*Size*incx))
   396  		x1 := *(*float32)(unsafe.Add(xp, 1*Size*incx))
   397  		x2 := *(*float32)(unsafe.Add(xp, 2*Size*incx))
   398  		x3 := *(*float32)(unsafe.Add(xp, 3*Size*incx))
   399  
   400  		m0 := alpha * x0
   401  		m1 := alpha * x1
   402  		m2 := alpha * x2
   403  		m3 := alpha * x3
   404  
   405  		t0 := *(*float32)(unsafe.Add(yp, 0*Size*incy)) + m0
   406  		t1 := *(*float32)(unsafe.Add(yp, 1*Size*incy)) + m1
   407  		t2 := *(*float32)(unsafe.Add(yp, 2*Size*incy)) + m2
   408  		t3 := *(*float32)(unsafe.Add(yp, 3*Size*incy)) + m3
   409  
   410  		*(*float32)(unsafe.Add(yp, 0*Size*incy)) = t0
   411  		*(*float32)(unsafe.Add(yp, 1*Size*incy)) = t1
   412  		*(*float32)(unsafe.Add(yp, 2*Size*incy)) = t2
   413  		*(*float32)(unsafe.Add(yp, 3*Size*incy)) = t3
   414  
   415  		xp, yp = unsafe.Add(xp, 4*incx*Size), unsafe.Add(yp, 4*incy*Size)
   416  	}
   417  	for ; n > 0; n-- {
   418  		*(*float32)(yp) += alpha * *(*float32)(xp)
   419  		xp, yp = unsafe.Add(xp, incx*Size), unsafe.Add(yp, incy*Size)
   420  	}
   421  }
   422  
   423  //go:noinline
   424  func AxpyPointerR4Alt(alpha float32, xs []float32, incx uintptr, ys []float32, incy uintptr, n uintptr) {
   425  	const Size = unsafe.Sizeof(xs[0])
   426  
   427  	xp, yp := unsafe.SliceData(xs), unsafe.SliceData(ys)
   428  	xn := offset(xp, n*incx)
   429  	xn4 := offset(xp, (n&mask4)*incx)
   430  
   431  	for less(xp, xn4) {
   432  		*offset(yp, 0*incy) += alpha * *offset(xp, 0*incx)
   433  		*offset(yp, 1*incy) += alpha * *offset(xp, 1*incx)
   434  		*offset(yp, 2*incy) += alpha * *offset(xp, 2*incx)
   435  		*offset(yp, 3*incy) += alpha * *offset(xp, 3*incx)
   436  		xp, yp = offset(xp, 4*incx), offset(yp, 4*incy)
   437  	}
   438  	for less(xp, xn) {
   439  		*yp += alpha * *xp
   440  		xp, yp = offset(xp, incx), offset(yp, incy)
   441  	}
   442  }
   443  
   444  func offset[T any](x *T, count uintptr) *T {
   445  	return (*T)(unsafe.Add(unsafe.Pointer(x), count*unsafe.Sizeof(*x)))
   446  }
   447  func less[T any](x, y *T) bool {
   448  	return uintptr(unsafe.Pointer(x)) < uintptr(unsafe.Pointer(y))
   449  }