gitee.com/quant1x/num@v0.3.2/x32/vek.go (about)

     1  package x32
     2  
     3  import (
     4  	"gitee.com/quant1x/num/internal/constraints"
     5  	"gitee.com/quant1x/num/internal/functions"
     6  	"gitee.com/quant1x/num/math32"
     7  	"slices"
     8  	"unsafe"
     9  )
    10  
    11  // Arithmetic
    12  
    13  // Add returns the result of adding two slices element-wise.
    14  func Add(x, y []float32) []float32 {
    15  	x = slices.Clone(x)
    16  	Add_Inplace(x, y)
    17  	return x
    18  }
    19  
    20  // Add_Inplace adds a slice element-wise to the first slice, inplace.
    21  func Add_Inplace(x, y []float32) {
    22  	checkEqualLength(x, y)
    23  	checkOverlap(x, y)
    24  	if functions.UseAVX2 {
    25  		functions.Add_AVX2_F32(x, y)
    26  	} else {
    27  		functions.Add_Go(x, y)
    28  	}
    29  }
    30  
    31  // Add_Into adds two slices element-wise and stores the result in the destination slice.
    32  func Add_Into(dst, x, y []float32) []float32 {
    33  	dst = checkCapacity(dst, x)
    34  	checkOverlap(dst, x)
    35  	copy(dst, x)
    36  	Add_Inplace(dst, y)
    37  	return dst
    38  }
    39  
    40  // Sub returns the result of subtracting two slices element-wise.
    41  func Sub(x, y []float32) []float32 {
    42  	x = slices.Clone(x)
    43  	Sub_Inplace(x, y)
    44  	return x
    45  }
    46  
    47  // Sub_Inplace subtracts a slice element-wise from the first slice, inplace.
    48  func Sub_Inplace(x, y []float32) {
    49  	checkEqualLength(x, y)
    50  	checkOverlap(x, y)
    51  	if functions.UseAVX2 {
    52  		functions.Sub_AVX2_F32(x, y)
    53  	} else {
    54  		functions.Sub_Go(x, y)
    55  	}
    56  }
    57  
    58  // Sub_Into subtracts two slices element-wise and stores the result in the destination slice.
    59  func Sub_Into(dst, x, y []float32) []float32 {
    60  	dst = checkCapacity(dst, x)
    61  	checkOverlap(dst, x)
    62  	copy(dst, x)
    63  	Sub_Inplace(dst, y)
    64  	return dst
    65  }
    66  
    67  // Mul returns the result of multiplying two slices element-wise.
    68  func Mul(x, y []float32) []float32 {
    69  	x = slices.Clone(x)
    70  	Mul_Inplace(x, y)
    71  	return x
    72  }
    73  
    74  // Mul_Inplace multiplies the first slice element-wise by the second, inplace.
    75  func Mul_Inplace(x, y []float32) {
    76  	checkEqualLength(x, y)
    77  	checkOverlap(x, y)
    78  	if functions.UseAVX2 {
    79  		functions.Mul_AVX2_F32(x, y)
    80  	} else {
    81  		functions.Mul_Go(x, y)
    82  	}
    83  }
    84  
    85  // Mul_Into multiplies two slices element-wise and stores the result in the destination slice.
    86  func Mul_Into(dst, x, y []float32) []float32 {
    87  	dst = checkCapacity(dst, x)
    88  	checkOverlap(dst, x)
    89  	copy(dst, x)
    90  	Mul_Inplace(dst, y)
    91  	return dst
    92  }
    93  
    94  // Div returns the result of dividing two slices element-wise.
    95  func Div(x, y []float32) []float32 {
    96  	x = slices.Clone(x)
    97  	Div_Inplace(x, y)
    98  	return x
    99  }
   100  
   101  // Div_Inplace divides the first slice element-wise by the second, inplace.
   102  func Div_Inplace(x, y []float32) {
   103  	checkEqualLength(x, y)
   104  	checkOverlap(x, y)
   105  	if functions.UseAVX2 {
   106  		functions.Div_AVX2_F32(x, y)
   107  	} else {
   108  		functions.Div_Go(x, y)
   109  	}
   110  }
   111  
   112  // Div_Into divides two slices element-wise and stores the result in the destination slice.
   113  func Div_Into(dst, x, y []float32) []float32 {
   114  	dst = checkCapacity(dst, x)
   115  	checkOverlap(dst, x)
   116  	copy(dst, x)
   117  	Div_Inplace(dst, y)
   118  	return dst
   119  }
   120  
   121  // AddNumber returns the result of adding a number to each slice element.
   122  func AddNumber(x []float32, a float32) []float32 {
   123  	x = slices.Clone(x)
   124  	AddNumber_Inplace(x, a)
   125  	return x
   126  }
   127  
   128  // AddNumber_Inplace adds a number to each slice element, inplace.
   129  func AddNumber_Inplace(x []float32, a float32) {
   130  	if functions.UseAVX2 {
   131  		functions.AddNumber_AVX2_F32(x, a)
   132  	} else {
   133  		functions.AddNumber_Go(x, a)
   134  	}
   135  }
   136  
   137  // AddNumber_Into adds a number to each slice element and stores the result in the
   138  // destination slice.
   139  func AddNumber_Into(dst, x []float32, a float32) []float32 {
   140  	dst = checkCapacity(dst, x)
   141  	checkOverlap(dst, x)
   142  	copy(dst, x)
   143  	AddNumber_Inplace(dst, a)
   144  	return dst
   145  }
   146  
   147  // SubNumber returns the result of subtracting a number from each slice element.
   148  func SubNumber(x []float32, a float32) []float32 {
   149  	x = slices.Clone(x)
   150  	SubNumber_Inplace(x, a)
   151  	return x
   152  }
   153  
   154  // SubNumber_Inplace subtracts a number from each slice element, inplace.
   155  func SubNumber_Inplace(x []float32, a float32) {
   156  	if functions.UseAVX2 {
   157  		functions.SubNumber_AVX2_F32(x, a)
   158  	} else {
   159  		functions.SubNumber_Go(x, a)
   160  	}
   161  }
   162  
   163  // SubNumber_Into subtracts a number from each slice element and stores the result in the
   164  // destination slice.
   165  func SubNumber_Into(dst, x []float32, a float32) []float32 {
   166  	dst = checkCapacity(dst, x)
   167  	checkOverlap(dst, x)
   168  	copy(dst, x)
   169  	SubNumber_Inplace(dst, a)
   170  	return dst
   171  }
   172  
   173  // MulNumber returns the result of multiplying each slice element by a number.
   174  func MulNumber(x []float32, a float32) []float32 {
   175  	x = slices.Clone(x)
   176  	MulNumber_Inplace(x, a)
   177  	return x
   178  }
   179  
   180  // MulNumber_Inplace multiplies each slice element by a number, inplace.
   181  func MulNumber_Inplace(x []float32, a float32) {
   182  	if functions.UseAVX2 {
   183  		functions.MulNumber_AVX2_F32(x, a)
   184  	} else {
   185  		functions.MulNumber_Go(x, a)
   186  	}
   187  }
   188  
   189  // MulNumber_Into multiplies each slice element by a number and stores the result in the
   190  // destination slice.
   191  func MulNumber_Into(dst, x []float32, a float32) []float32 {
   192  	dst = checkCapacity(dst, x)
   193  	checkOverlap(dst, x)
   194  	copy(dst, x)
   195  	MulNumber_Inplace(dst, a)
   196  	return dst
   197  }
   198  
   199  // DivNumber returns the result of dividing each slice element by a number.
   200  func DivNumber(x []float32, a float32) []float32 {
   201  	x = slices.Clone(x)
   202  	DivNumber_Inplace(x, a)
   203  	return x
   204  }
   205  
   206  // DivNumber_Inplace divides each slice element by a number, inplace.
   207  func DivNumber_Inplace(x []float32, a float32) {
   208  	if functions.UseAVX2 {
   209  		functions.DivNumber_AVX2_F32(x, a)
   210  	} else {
   211  		functions.DivNumber_Go(x, a)
   212  	}
   213  }
   214  
   215  // DivNumber_Into divides each slice element by a number and stores the result in the
   216  // destination slice.
   217  func DivNumber_Into(dst, x []float32, a float32) []float32 {
   218  	dst = checkCapacity(dst, x)
   219  	checkOverlap(dst, x)
   220  	copy(dst, x)
   221  	DivNumber_Inplace(dst, a)
   222  	return dst
   223  }
   224  
   225  // Abs returns the absolute value of each slice element.
   226  func Abs(x []float32) []float32 {
   227  	x = slices.Clone(x)
   228  	Abs_Inplace(x)
   229  	return x
   230  }
   231  
   232  // Abs_Inplace computes the absolute value of each slice element, inplace.
   233  func Abs_Inplace(x []float32) {
   234  	if functions.UseAVX2 {
   235  		functions.Abs_AVX2_F32(x)
   236  	} else {
   237  		functions.Abs_Go_F32(x)
   238  	}
   239  }
   240  
   241  // Abs_Into computes the absolute value of each slice element and stores the result in the
   242  // destination slice.
   243  func Abs_Into(dst, x []float32) []float32 {
   244  	dst = checkCapacity(dst, x)
   245  	checkOverlap(dst, x)
   246  	copy(dst, x)
   247  	Abs_Inplace(dst)
   248  	return dst
   249  }
   250  
   251  // Neg returns the additive inverse of each slice element.
   252  func Neg(x []float32) []float32 {
   253  	x = slices.Clone(x)
   254  	Neg_Inplace(x)
   255  	return x
   256  }
   257  
   258  // Neg_Inplace computes the additive inverse of each slice element, inplace.
   259  func Neg_Inplace(x []float32) {
   260  	if functions.UseAVX2 {
   261  		functions.Neg_AVX2_F32(x)
   262  	} else {
   263  		functions.Neg_Go(x)
   264  	}
   265  }
   266  
   267  // Neg_Into computes the additive inverse of each slice element and stores the result in the
   268  // destination slice.
   269  func Neg_Into(dst, x []float32) []float32 {
   270  	dst = checkCapacity(dst, x)
   271  	checkOverlap(dst, x)
   272  	copy(dst, x)
   273  	Neg_Inplace(dst)
   274  	return dst
   275  }
   276  
   277  // Inv returns the multiplicative inverse of each slice element.
   278  func Inv(x []float32) []float32 {
   279  	x = slices.Clone(x)
   280  	Inv_Inplace(x)
   281  	return x
   282  }
   283  
   284  // Inv_Inplace computes the multiplicative inverse of each slice element, inplace.
   285  func Inv_Inplace(x []float32) {
   286  	if functions.UseAVX2 {
   287  		functions.Inv_AVX2_F32(x)
   288  	} else {
   289  		functions.Inv_Go(x)
   290  	}
   291  }
   292  
   293  // Inv_Into computes the multiplicative inverse of each slice element and stores the result
   294  // in the destination slice.
   295  func Inv_Into(dst, x []float32) []float32 {
   296  	dst = checkCapacity(dst, x)
   297  	checkOverlap(dst, x)
   298  	copy(dst, x)
   299  	Inv_Inplace(dst)
   300  	return dst
   301  }
   302  
   303  // Aggregates
   304  
   305  // Sum returns the sum of all elements in a slice.
   306  func Sum(x []float32) float32 {
   307  	if functions.UseAVX2 {
   308  		return functions.Sum_AVX2_F32(x)
   309  	} else {
   310  		return functions.Sum_Go(x)
   311  	}
   312  }
   313  
   314  // CumSum returns the cumulative sum of a slice. The element at index i equals the sum of x[:i+1].
   315  func CumSum(x []float32) []float32 {
   316  	x = slices.Clone(x)
   317  	CumSum_Inplace(x)
   318  	return x
   319  }
   320  
   321  // CumSum_Inplace computes the cumulative sum of a slice, inplace. The new element at index i
   322  // equals the sum of x[:i+1].
   323  func CumSum_Inplace(x []float32) {
   324  	if functions.UseAVX2 {
   325  		functions.CumSum_AVX2_F32(x)
   326  	} else {
   327  		functions.CumSum_Go(x)
   328  	}
   329  }
   330  
   331  // CumSum_Into computes the cumulative sum of a slice and stores the result in the destination
   332  // slice. The element at index i equals the sum of x[:i+1].
   333  func CumSum_Into(dst, x []float32) []float32 {
   334  	dst = checkCapacity(dst, x)
   335  	checkOverlap(dst, x)
   336  	copy(dst, x)
   337  	CumSum_Inplace(dst)
   338  	return dst
   339  }
   340  
   341  // Prod returns the product of all elements in a slice.
   342  func Prod(x []float32) float32 {
   343  	if functions.UseAVX2 {
   344  		return functions.Prod_AVX2_F32(x)
   345  	} else {
   346  		return functions.Prod_Go(x)
   347  	}
   348  }
   349  
   350  // CumProd returns the cumulative product of a slice. The element at index i equals the product
   351  // of x[:i+1].
   352  func CumProd(x []float32) []float32 {
   353  	x = slices.Clone(x)
   354  	CumProd_Inplace(x)
   355  	return x
   356  }
   357  
   358  // CumProd_Inplace computes the cumulative product of a slice, inplace. The new element at index i
   359  // equals the product of x[:i+1].
   360  func CumProd_Inplace(x []float32) {
   361  	if functions.UseAVX2 {
   362  		functions.CumProd_AVX2_F32(x)
   363  	} else {
   364  		functions.CumProd_Go(x)
   365  	}
   366  }
   367  
   368  // CumProd_Into computes the cumulative product of a slice and stores the result in the destination
   369  // slice. The element at index i equals the product of x[:i+1].
   370  func CumProd_Into(dst, x []float32) []float32 {
   371  	dst = checkCapacity(dst, x)
   372  	checkOverlap(dst, x)
   373  	copy(dst, x)
   374  	CumProd_Inplace(dst)
   375  	return dst
   376  }
   377  
   378  // Mean returns the arithmetic average of the slice elements.
   379  func Mean(x []float32) float32 {
   380  	checkNotEmpty(x)
   381  	if functions.UseAVX2 {
   382  		return functions.Mean_AVX2_F32(x)
   383  	} else {
   384  		return functions.Mean_Go(x)
   385  	}
   386  }
   387  
   388  // Median returns the median value of the slice elements.
   389  func Median(x []float32) float32 {
   390  	checkNotEmpty(x)
   391  	if functions.UseAVX2 {
   392  		return functions.Median_AVX2_F32(x)
   393  	} else {
   394  		return functions.Median_Go(x)
   395  	}
   396  }
   397  
   398  // Quantile returns the q-th quantile of the slice elements. The value of q should be between
   399  // 0 and 1 (inclusive).
   400  func Quantile(x []float32, q float32) float32 {
   401  	if q < 0 || q > 1 {
   402  		panic("value of q should be between 0 and 1")
   403  	}
   404  	checkNotEmpty(x)
   405  	if functions.UseAVX2 {
   406  		return functions.Quantile_AVX2_F32(x, q)
   407  	} else {
   408  		return functions.Quantile_Go(x, q)
   409  	}
   410  }
   411  
   412  // Distance
   413  
   414  // Dot returns the dot product of two vectors.
   415  func Dot(x, y []float32) float32 {
   416  	checkNotEmpty(x)
   417  	checkEqualLength(x, y)
   418  	if functions.UseAVX2 {
   419  		return functions.Dot_AVX2_F32(x, y)
   420  	} else {
   421  		return functions.Dot_Go(x, y)
   422  	}
   423  }
   424  
   425  // Norm returns the Euclidean norm of a vector, i.e. its length.
   426  func Norm(x []float32) float32 {
   427  	checkNotEmpty(x)
   428  	if functions.UseAVX2 {
   429  		return functions.Norm_AVX2_F32(x)
   430  	} else {
   431  		return functions.Norm_Go_F32(x)
   432  	}
   433  }
   434  
   435  // Distance returns the Euclidean distance between two vectors.
   436  func Distance(x, y []float32) float32 {
   437  	checkNotEmpty(x)
   438  	checkEqualLength(x, y)
   439  	if functions.UseAVX2 {
   440  		return functions.Distance_AVX2_F32(x, y)
   441  	} else {
   442  		return functions.Distance_Go_F32(x, y)
   443  	}
   444  }
   445  
   446  // ManhattanNorm returns the sum of absolute values of the slice elements.
   447  func ManhattanNorm(x []float32) float32 {
   448  	checkNotEmpty(x)
   449  	if functions.UseAVX2 {
   450  		return functions.ManhattanNorm_AVX2_F32(x)
   451  	} else {
   452  		return functions.ManhattanNorm_Go_F32(x)
   453  	}
   454  }
   455  
   456  // ManhattanDistance returns the sum of element-wise absolute differences between two slices.
   457  func ManhattanDistance(x, y []float32) float32 {
   458  	checkNotEmpty(x)
   459  	checkEqualLength(x, y)
   460  	if functions.UseAVX2 {
   461  		return functions.ManhattanDistance_AVX2_F32(x, y)
   462  	} else {
   463  		return functions.ManhattanDistance_Go_F32(x, y)
   464  	}
   465  }
   466  
   467  // CosineSimilarity returns the cosine similarity of two vectors.
   468  func CosineSimilarity(x, y []float32) float32 {
   469  	checkNotEmpty(x)
   470  	checkEqualLength(x, y)
   471  	if functions.UseAVX2 {
   472  		return functions.CosineSimilarity_AVX2_F32(x, y)
   473  	} else {
   474  		return functions.CosineSimilarity_Go_F32(x, y)
   475  	}
   476  }
   477  
   478  // Matrices
   479  
   480  func checkDimensions[T constraints.Float](x, y []T, n int) (int, int) {
   481  	m := len(x) / n
   482  	p := len(y) / n
   483  	if m*n < len(x) || n*p < len(y) {
   484  		panic("slice lengths must be multiple of n")
   485  	}
   486  	return m, p
   487  }
   488  
   489  // MatMul multiplies an m-by-n and n-by-p matrix and returns the resulting m-by-p matrix.
   490  // The matrices should be in row-major order. To multiply a matrix and a vector pass an
   491  // n-by-1 matrix.
   492  func MatMul(x, y []float32, n int) []float32 {
   493  	m, p := checkDimensions(x, y, n)
   494  	dst := make([]float32, m*p)
   495  	if functions.UseAVX2 {
   496  		functions.MatMul_Parallel_AVX2_F32(dst, x, y, m, n, p)
   497  	} else {
   498  		functions.MatMul_Parallel_Go(dst, x, y, m, n, p)
   499  	}
   500  	return dst
   501  }
   502  
   503  // MatMul_Into multiplies an m-by-n and n-by-p matrix and stores the resulting m-by-p matrix
   504  // in the destination slice. The matrices should be in row-major order. To multiply a matrix
   505  // and a vector pass an n-by-1 matrix.
   506  func MatMul_Into(dst, x, y []float32, n int) []float32 {
   507  	m, p := checkDimensions(x, y, n)
   508  	if cap(dst) < m*p {
   509  		panic("destination slice not large enough to hold result")
   510  	}
   511  	Zeros_Into(dst, m*p)
   512  	if functions.UseAVX2 {
   513  		functions.MatMul_Parallel_AVX2_F32(dst, x, y, m, n, p)
   514  	} else {
   515  		functions.MatMul_Parallel_Go(dst, x, y, m, n, p)
   516  	}
   517  	return dst[:m*p]
   518  }
   519  
   520  // Mat4Mul multiplies two 4-by-4 matrices and returns the resulting 4-by-4 matrix. The matrices
   521  // should be in row-major order. To multiply a matrix and a vector batch them into groups of 4.
   522  func Mat4Mul(x, y []float32) []float32 {
   523  	var dst [16]float32
   524  	return Mat4Mul_Into(dst[:], x, y)
   525  }
   526  
   527  // Mat4Mul_Into multiplies two 4-by-4 matrices and stores the resulting 4-by-4 matrix in the
   528  // destination slice. The matrices should be in row-major order. To multiply a matrix and a vector
   529  // batch them into groups of 4.
   530  func Mat4Mul_Into(dst, x, y []float32) []float32 {
   531  	// Note: skipping overlap check due to overhead
   532  	if cap(dst) < 16 || len(x) != 16 || len(y) != 16 {
   533  		panic("slices must be length 16 (4 by 4)")
   534  	}
   535  	if functions.UseAVX2 {
   536  		functions.Mat4Mul_AVX2_F32(dst, x, y)
   537  	} else {
   538  		functions.Mat4Mul_Go(dst, x, y)
   539  	}
   540  	return dst[:16]
   541  }
   542  
   543  // Special
   544  
   545  // Sqrt returns the square root of each slice element.
   546  func Sqrt(x []float32) []float32 {
   547  	x = slices.Clone(x)
   548  	Sqrt_Inplace(x)
   549  	return x
   550  }
   551  
   552  // Sqrt_Inplace computes the square root of each slice element, inplace.
   553  func Sqrt_Inplace(x []float32) {
   554  	if functions.UseAVX2 {
   555  		functions.Sqrt_AVX2_F32(x)
   556  	} else {
   557  		functions.Sqrt_Go_F32(x)
   558  	}
   559  }
   560  
   561  // Sqrt_Into computes the square root of each slice element and stores the result in the
   562  // destination slice.
   563  func Sqrt_Into(dst, x []float32) []float32 {
   564  	dst = checkCapacity(dst, x)
   565  	checkOverlap(dst, x)
   566  	copy(dst, x)
   567  	Sqrt_Inplace(dst)
   568  	return dst
   569  }
   570  
   571  // Round returns the result of rounding each slice element to the nearest integer value.
   572  func Round(x []float32) []float32 {
   573  	x = slices.Clone(x)
   574  	Round_Inplace(x)
   575  	return x
   576  }
   577  
   578  // Round_Inplace rounds each slice element to the nearest integer value, inplace.
   579  func Round_Inplace(x []float32) {
   580  	if functions.UseAVX2 {
   581  		functions.Round_AVX2_F32(x)
   582  	} else {
   583  		functions.Round_Go_F32(x)
   584  	}
   585  }
   586  
   587  // Round_Into rounds each slice element to the nearest integer value and stores the result
   588  // in the destination slice.
   589  func Round_Into(dst, x []float32) []float32 {
   590  	dst = checkCapacity(dst, x)
   591  	checkOverlap(dst, x)
   592  	copy(dst, x)
   593  	Round_Inplace(dst)
   594  	return dst
   595  }
   596  
   597  // Floor returns the result of rounding each slice element to the nearest lesser integer value.
   598  func Floor(x []float32) []float32 {
   599  	x = slices.Clone(x)
   600  	Floor_Inplace(x)
   601  	return x
   602  }
   603  
   604  // Floor_Inplace rounds each slice element to the nearest lesser integer value, inplace.
   605  func Floor_Inplace(x []float32) {
   606  	if functions.UseAVX2 {
   607  		functions.Floor_AVX2_F32(x)
   608  	} else {
   609  		functions.Floor_Go_F32(x)
   610  	}
   611  }
   612  
   613  // Floor_Into rounds each slice element to the nearest lesser integer value and stores the result
   614  // in the destination slice.
   615  func Floor_Into(dst, x []float32) []float32 {
   616  	dst = checkCapacity(dst, x)
   617  	checkOverlap(dst, x)
   618  	copy(dst, x)
   619  	Floor_Inplace(dst)
   620  	return dst
   621  }
   622  
   623  // Ceil returns the result of rounding each slice element to the nearest greater integer value.
   624  func Ceil(x []float32) []float32 {
   625  	x = slices.Clone(x)
   626  	Ceil_Inplace(x)
   627  	return x
   628  }
   629  
   630  // Ceil_Inplace rounds each slice element to the nearest greater integer value, inplace.
   631  func Ceil_Inplace(x []float32) {
   632  	if functions.UseAVX2 {
   633  		functions.Ceil_AVX2_F32(x)
   634  	} else {
   635  		functions.Ceil_Go_F32(x)
   636  	}
   637  }
   638  
   639  // Ceil_Into rounds each slice element to the nearest greater integer value and stores the result
   640  // in the destination slice.
   641  func Ceil_Into(dst, x []float32) []float32 {
   642  	dst = checkCapacity(dst, x)
   643  	checkOverlap(dst, x)
   644  	copy(dst, x)
   645  	Ceil_Inplace(dst)
   646  	return dst
   647  }
   648  
   649  // Pow returns the elements in the first slice raised to the power in the second.
   650  func Pow(x, y []float32) []float32 {
   651  	x = slices.Clone(x)
   652  	Pow_Inplace(x, y)
   653  	return x
   654  }
   655  
   656  // Pow_Inplace raises the elements in the first slice to the power in the second, inplace.
   657  func Pow_Inplace(x, y []float32) {
   658  	checkEqualLength(x, y)
   659  	checkOverlap(x, y)
   660  	if functions.UseAVX2 {
   661  		functions.Pow_AVX2_F32(x, y)
   662  	} else {
   663  		functions.Pow_Go_F32(x, y)
   664  	}
   665  }
   666  
   667  // Pow_Into raises the elements in the first slice to the power in the second and stores the
   668  // result in the destination slice.
   669  func Pow_Into(dst, x, y []float32) []float32 {
   670  	dst = checkCapacity(dst, x)
   671  	checkOverlap(dst, x)
   672  	copy(dst, x)
   673  	Pow_Inplace(dst, y)
   674  	return dst
   675  }
   676  
   677  // Special (32-bit only)
   678  
   679  // Sin returns the sine of each element.
   680  func Sin(x []float32) []float32 {
   681  	x = slices.Clone(x)
   682  	Sin_Inplace(x)
   683  	return x
   684  }
   685  
   686  // Sin_Inplace computes the sine of each element, inplace.
   687  func Sin_Inplace(x []float32) {
   688  	if functions.UseAVX2 {
   689  		functions.Sin_AVX2_F32(x)
   690  	} else {
   691  		functions.Sin_Go_F32(x)
   692  	}
   693  }
   694  
   695  // Sin_Into stores the sine of each element in the destination slice.
   696  func Sin_Into(dst, x []float32) []float32 {
   697  	dst = checkCapacity(dst, x)
   698  	checkOverlap(dst, x)
   699  	copy(dst, x)
   700  	Sin_Inplace(dst)
   701  	return dst
   702  }
   703  
   704  // Cos returns the cosine of each element.
   705  func Cos(x []float32) []float32 {
   706  	x = slices.Clone(x)
   707  	Cos_Inplace(x)
   708  	return x
   709  }
   710  
   711  // Cos_Inplace computes the cosine of each element, inplace.
   712  func Cos_Inplace(x []float32) {
   713  	if functions.UseAVX2 {
   714  		functions.Cos_AVX2_F32(x)
   715  	} else {
   716  		functions.Cos_Go_F32(x)
   717  	}
   718  }
   719  
   720  // Cos_Into stores the cosine of each element in the destination slice.
   721  func Cos_Into(dst, x []float32) []float32 {
   722  	dst = checkCapacity(dst, x)
   723  	checkOverlap(dst, x)
   724  	copy(dst, x)
   725  	Cos_Inplace(dst)
   726  	return dst
   727  }
   728  
   729  // SinCos_Into stores the sine and cosine of each element in the destination slices. Faster than
   730  // calling Sin_Into and Cos_Into individually if both are needed.
   731  func SinCos_Into(dstSin, dstCos, x []float32) {
   732  	dstSin = checkCapacity(dstSin, x)
   733  	dstCos = checkCapacity(dstCos, x)
   734  	checkOverlap(dstSin, x)
   735  	checkOverlap(dstCos, x)
   736  	if functions.UseAVX2 {
   737  		functions.SinCos_AVX2_F32(dstSin, dstCos, x)
   738  	} else {
   739  		functions.SinCos_Go_F32(dstSin, dstCos, x)
   740  	}
   741  }
   742  
   743  // Exp returns the exponential of each element.
   744  func Exp(x []float32) []float32 {
   745  	x = slices.Clone(x)
   746  	Exp_Inplace(x)
   747  	return x
   748  }
   749  
   750  // Exp_Inplace computes the exponential of each element, inplace.
   751  func Exp_Inplace(x []float32) {
   752  	if functions.UseAVX2 {
   753  		functions.Exp_AVX2_F32(x)
   754  	} else {
   755  		functions.Exp_Go_F32(x)
   756  	}
   757  }
   758  
   759  // Exp_Into stores the exponential of each element in the destination slice.
   760  func Exp_Into(dst, x []float32) []float32 {
   761  	dst = checkCapacity(dst, x)
   762  	checkOverlap(dst, x)
   763  	copy(dst, x)
   764  	Exp_Inplace(dst)
   765  	return dst
   766  }
   767  
   768  // Log returns the natural logarithm of each element.
   769  func Log(x []float32) []float32 {
   770  	x = slices.Clone(x)
   771  	Log_Inplace(x)
   772  	return x
   773  }
   774  
   775  // Log_Inplace computes the natural logarithm of each element, inplace.
   776  func Log_Inplace(x []float32) {
   777  	if functions.UseAVX2 {
   778  		functions.Log_AVX2_F32(x)
   779  	} else {
   780  		functions.Log_Go_F32(x)
   781  	}
   782  }
   783  
   784  // Log_Into stores the natural logarithm of each element in the destination slice.
   785  func Log_Into(dst, x []float32) []float32 {
   786  	dst = checkCapacity(dst, x)
   787  	checkOverlap(dst, x)
   788  	copy(dst, x)
   789  	Log_Inplace(dst)
   790  	return dst
   791  }
   792  
   793  // Log2 returns the base 2 logarithm of each element.
   794  func Log2(x []float32) []float32 {
   795  	x = slices.Clone(x)
   796  	Log2_Inplace(x)
   797  	return x
   798  }
   799  
   800  // Log2_Inplace computes the base 2 logarithm of each element, inplace.
   801  func Log2_Inplace(x []float32) {
   802  	if functions.UseAVX2 {
   803  		functions.Log2_AVX2_F32(x)
   804  	} else {
   805  		functions.Log2_Go_F32(x)
   806  	}
   807  }
   808  
   809  // Log2_Into stores the base 2 logarithm of each element in the destination slice.
   810  func Log2_Into(dst, x []float32) []float32 {
   811  	dst = checkCapacity(dst, x)
   812  	checkOverlap(dst, x)
   813  	copy(dst, x)
   814  	Log2_Inplace(dst)
   815  	return dst
   816  }
   817  
   818  // Log10 returns the base 10 logarithm of each element.
   819  func Log10(x []float32) []float32 {
   820  	x = slices.Clone(x)
   821  	Log10_Inplace(x)
   822  	return x
   823  }
   824  
   825  // Log10_Inplace computes the base 10 logarithm of each element, inplace.
   826  func Log10_Inplace(x []float32) {
   827  	if functions.UseAVX2 {
   828  		functions.Log10_AVX2_F32(x)
   829  	} else {
   830  		functions.Log10_Go_F32(x)
   831  	}
   832  }
   833  
   834  // Log10_Into stores the base 10 logarithm of each element in the destination slice.
   835  func Log10_Into(dst, x []float32) []float32 {
   836  	dst = checkCapacity(dst, x)
   837  	checkOverlap(dst, x)
   838  	copy(dst, x)
   839  	Log10_Inplace(dst)
   840  	return dst
   841  }
   842  
   843  // Comparison
   844  
   845  // Min returns the minimum value of a slice.
   846  func Min(x []float32) float32 {
   847  	checkNotEmpty(x)
   848  	if functions.UseAVX2 {
   849  		return functions.Min_AVX2_F32(x)
   850  	} else {
   851  		return functions.Min_Go(x)
   852  	}
   853  }
   854  
   855  // ArgMin returns the (first) index of the minimum value of a slice.
   856  func ArgMin(x []float32) int {
   857  	checkNotEmpty(x)
   858  	if functions.UseAVX2 {
   859  		return functions.ArgMin_AVX2_F32(x)
   860  	} else {
   861  		return functions.ArgMin_Go(x)
   862  	}
   863  }
   864  
   865  // Minimum returns the element-wise minimum values between two slices.
   866  func Minimum(x, y []float32) []float32 {
   867  	x = slices.Clone(x)
   868  	Minimum_Inplace(x, y)
   869  	return x
   870  }
   871  
   872  // Minimum_Inplace compares two slices element-wise and replaces the values in the first slice
   873  // with the minimum value.
   874  func Minimum_Inplace(x, y []float32) {
   875  	checkEqualLength(x, y)
   876  	checkOverlap(x, y)
   877  	if functions.UseAVX2 {
   878  		functions.Minimum_AVX2_F32(x, y)
   879  	} else {
   880  		functions.Minimum_Go(x, y)
   881  	}
   882  }
   883  
   884  // Minimum_Into compares two slices element-wise and stores the minimum values in the destination
   885  // slice.
   886  func Minimum_Into(dst, x, y []float32) []float32 {
   887  	dst = checkCapacity(dst, x)
   888  	checkOverlap(dst, x)
   889  	copy(dst, x)
   890  	Minimum_Inplace(dst, y)
   891  	return dst
   892  }
   893  
   894  // MinimumNumber returns the minimum of a number and each slice element.
   895  func MinimumNumber(x []float32, a float32) []float32 {
   896  	x = slices.Clone(x)
   897  	MinimumNumber_Inplace(x, a)
   898  	return x
   899  }
   900  
   901  // MinimumNumber_Inplace compares a number to each slice element and replaces the values in the
   902  // slice with the minimum value.
   903  func MinimumNumber_Inplace(x []float32, a float32) {
   904  	if functions.UseAVX2 {
   905  		functions.MinimumNumber_AVX2_F32(x, a)
   906  	} else {
   907  		functions.MinimumNumber_Go(x, a)
   908  	}
   909  }
   910  
   911  // MinimumNumber_Into compares a number to each slice element and stores the minimum values in
   912  // the destination slice.
   913  func MinimumNumber_Into(dst, x []float32, a float32) []float32 {
   914  	dst = checkCapacity(dst, x)
   915  	checkOverlap(dst, x)
   916  	copy(dst, x)
   917  	MinimumNumber_Inplace(dst, a)
   918  	return dst
   919  }
   920  
   921  // Max returns the maximum value of a slice.
   922  func Max(x []float32) float32 {
   923  	checkNotEmpty(x)
   924  	if functions.UseAVX2 {
   925  		return functions.Max_AVX2_F32(x)
   926  	} else {
   927  		return functions.Max_Go(x)
   928  	}
   929  }
   930  
   931  // ArgMax returns the (first) index of the maximum value of a slice.
   932  func ArgMax(x []float32) int {
   933  	checkNotEmpty(x)
   934  	if functions.UseAVX2 {
   935  		return functions.ArgMax_AVX2_F32(x)
   936  	} else {
   937  		return functions.ArgMax_Go(x)
   938  	}
   939  }
   940  
   941  // Maximum returns the element-wise maximum values between two slices.
   942  func Maximum(x, y []float32) []float32 {
   943  	x = slices.Clone(x)
   944  	Maximum_Inplace(x, y)
   945  	return x
   946  }
   947  
   948  // Maximum_Inplace compares two slices element-wise and replaces the values in the first slice
   949  // with the maximum value.
   950  func Maximum_Inplace(x, y []float32) {
   951  	checkEqualLength(x, y)
   952  	checkOverlap(x, y)
   953  	if functions.UseAVX2 {
   954  		functions.Maximum_AVX2_F32(x, y)
   955  	} else {
   956  		functions.Maximum_Go(x, y)
   957  	}
   958  }
   959  
   960  // Maximum_Into compares two slices element-wise and stores the maximum values in the destination
   961  // slice.
   962  func Maximum_Into(dst, x, y []float32) []float32 {
   963  	dst = checkCapacity(dst, x)
   964  	checkOverlap(dst, x)
   965  	copy(dst, x)
   966  	Maximum_Inplace(dst, y)
   967  	return dst
   968  }
   969  
   970  // MaximumNumber returns the maximum of a number and each slice element.
   971  func MaximumNumber(x []float32, a float32) []float32 {
   972  	x = slices.Clone(x)
   973  	MaximumNumber_Inplace(x, a)
   974  	return x
   975  }
   976  
   977  // MaximumNumber_Inplace compares a number to each slice element and replaces the values in the
   978  // slice with the maximum value.
   979  func MaximumNumber_Inplace(x []float32, a float32) {
   980  	if functions.UseAVX2 {
   981  		functions.MaximumNumber_AVX2_F32(x, a)
   982  	} else {
   983  		functions.MaximumNumber_Go(x, a)
   984  	}
   985  }
   986  
   987  // MaximumNumber_Into compares a number to each slice element and stores the maximum values in
   988  // the destination slice.
   989  func MaximumNumber_Into(dst, x []float32, a float32) []float32 {
   990  	dst = checkCapacity(dst, x)
   991  	checkOverlap(dst, x)
   992  	copy(dst, x)
   993  	MaximumNumber_Inplace(dst, a)
   994  	return dst
   995  }
   996  
   997  // Find returns the index of the first slice element equal to the given value, or -1 if not found.
   998  func Find(x []float32, a float32) int {
   999  	if functions.UseAVX2 {
  1000  		idx := functions.Find_AVX2_F32(x, a)
  1001  		if idx == len(x) {
  1002  			return -1
  1003  		}
  1004  		return idx
  1005  	} else {
  1006  		return functions.Find_Go(x, a)
  1007  	}
  1008  }
  1009  
  1010  // Lt returns an element-wise "less than" comparison between two slices.
  1011  func Lt(x, y []float32) []bool {
  1012  	dst := make([]bool, len(x))
  1013  	return Lt_Into(dst, x, y)
  1014  }
  1015  
  1016  // Lt_Into stores an element-wise "less than" comparison between two slices in the destination
  1017  // slice.
  1018  func Lt_Into(dst []bool, x, y []float32) []bool {
  1019  	checkEqualLength(x, y)
  1020  	dst = checkCapacity(dst, x)
  1021  	if functions.UseAVX2 {
  1022  		functions.Lt_AVX2_F32(dst, x, y)
  1023  	} else {
  1024  		functions.Lt_Go(dst, x, y)
  1025  	}
  1026  	return dst
  1027  }
  1028  
  1029  // LtNumber returns an element-wise "less than" comparison between each slice element and
  1030  // a number.
  1031  func LtNumber(x []float32, a float32) []bool {
  1032  	dst := make([]bool, len(x))
  1033  	return LtNumber_Into(dst, x, a)
  1034  }
  1035  
  1036  // LtNumber_Into stores an element-wise "less than" comparison between each slice element
  1037  // and a number in the destination slice.
  1038  func LtNumber_Into(dst []bool, x []float32, a float32) []bool {
  1039  	dst = checkCapacity(dst, x)
  1040  	if functions.UseAVX2 {
  1041  		functions.LtNumber_AVX2_F32(dst, x, a)
  1042  	} else {
  1043  		functions.LtNumber_Go(dst, x, a)
  1044  	}
  1045  	return dst
  1046  }
  1047  
  1048  // Lte returns an element-wise "less than or equal" comparison between two slices.
  1049  func Lte(x, y []float32) []bool {
  1050  	dst := make([]bool, len(x))
  1051  	return Lte_Into(dst, x, y)
  1052  }
  1053  
  1054  // Lte_Into stores an element-wise "less than or equal" comparison between two slices in the
  1055  // destination slice.
  1056  func Lte_Into(dst []bool, x, y []float32) []bool {
  1057  	checkEqualLength(x, y)
  1058  	dst = checkCapacity(dst, x)
  1059  	if functions.UseAVX2 {
  1060  		functions.Lte_AVX2_F32(dst, x, y)
  1061  	} else {
  1062  		functions.Lte_Go(dst, x, y)
  1063  	}
  1064  	return dst
  1065  }
  1066  
  1067  // LteNumber returns an element-wise "less than or equal" comparison between each slice element
  1068  // and a number.
  1069  func LteNumber(x []float32, a float32) []bool {
  1070  	dst := make([]bool, len(x))
  1071  	return LteNumber_Into(dst, x, a)
  1072  }
  1073  
  1074  // LteNumber_Into stores an element-wise "less than or equal" comparison between each slice
  1075  // element and a number in the destination slice.
  1076  func LteNumber_Into(dst []bool, x []float32, a float32) []bool {
  1077  	dst = checkCapacity(dst, x)
  1078  	if functions.UseAVX2 {
  1079  		functions.LteNumber_AVX2_F32(dst, x, a)
  1080  	} else {
  1081  		functions.LteNumber_Go(dst, x, a)
  1082  	}
  1083  	return dst
  1084  }
  1085  
  1086  // Gt returns an element-wise "greater than" comparison between two slices.
  1087  func Gt(x, y []float32) []bool {
  1088  	dst := make([]bool, len(x))
  1089  	return Gt_Into(dst, x, y)
  1090  }
  1091  
  1092  // Gt_Into stores an element-wise "greater than" comparison between two slices in the destination
  1093  // slice.
  1094  func Gt_Into(dst []bool, x, y []float32) []bool {
  1095  	checkEqualLength(x, y)
  1096  	dst = checkCapacity(dst, x)
  1097  	if functions.UseAVX2 {
  1098  		functions.Gt_AVX2_F32(dst, x, y)
  1099  	} else {
  1100  		functions.Gt_Go(dst, x, y)
  1101  	}
  1102  	return dst
  1103  }
  1104  
  1105  // GtNumber returns an element-wise "greater than" comparison between each slice element and
  1106  // a number.
  1107  func GtNumber(x []float32, a float32) []bool {
  1108  	dst := make([]bool, len(x))
  1109  	return GtNumber_Into(dst, x, a)
  1110  }
  1111  
  1112  // GtNumber_Into stores an element-wise "greater than" comparison between each slice element and
  1113  // a number in the destination slice.
  1114  func GtNumber_Into(dst []bool, x []float32, a float32) []bool {
  1115  	dst = checkCapacity(dst, x)
  1116  	if functions.UseAVX2 {
  1117  		functions.GtNumber_AVX2_F32(dst, x, a)
  1118  	} else {
  1119  		functions.GtNumber_Go(dst, x, a)
  1120  	}
  1121  	return dst
  1122  }
  1123  
  1124  // Gte returns an element-wise "greater than or equal" comparison between two slices.
  1125  func Gte(x, y []float32) []bool {
  1126  	dst := make([]bool, len(x))
  1127  	return Gte_Into(dst, x, y)
  1128  }
  1129  
  1130  // Gte_Into stores an element-wise "greater than or equal" comparison between two slices in the
  1131  // destination slice.
  1132  func Gte_Into(dst []bool, x, y []float32) []bool {
  1133  	checkEqualLength(x, y)
  1134  	dst = checkCapacity(dst, x)
  1135  	if functions.UseAVX2 {
  1136  		functions.Gte_AVX2_F32(dst, x, y)
  1137  	} else {
  1138  		functions.Gte_Go(dst, x, y)
  1139  	}
  1140  	return dst
  1141  }
  1142  
  1143  // GteNumber returns an element-wise "greater than or equal" comparison between each slice element
  1144  // and a number.
  1145  func GteNumber(x []float32, a float32) []bool {
  1146  	dst := make([]bool, len(x))
  1147  	return GteNumber_Into(dst, x, a)
  1148  }
  1149  
  1150  // GteNumber_Into stores an element-wise "greater than or equal" comparison between each slice
  1151  // element and a number in the destination slice.
  1152  func GteNumber_Into(dst []bool, x []float32, a float32) []bool {
  1153  	dst = checkCapacity(dst, x)
  1154  	if functions.UseAVX2 {
  1155  		functions.GteNumber_AVX2_F32(dst, x, a)
  1156  	} else {
  1157  		functions.GteNumber_Go(dst, x, a)
  1158  	}
  1159  	return dst
  1160  }
  1161  
  1162  // Eq returns an element-wise equality comparison between two slices.
  1163  func Eq(x, y []float32) []bool {
  1164  	dst := make([]bool, len(x))
  1165  	return Eq_Into(dst, x, y)
  1166  }
  1167  
  1168  // Eq_Into stores an element-wise equality comparison between two slices in the destination
  1169  // slice.
  1170  func Eq_Into(dst []bool, x, y []float32) []bool {
  1171  	checkEqualLength(x, y)
  1172  	dst = checkCapacity(dst, x)
  1173  	if functions.UseAVX2 {
  1174  		functions.Eq_AVX2_F32(dst, x, y)
  1175  	} else {
  1176  		functions.Eq_Go(dst, x, y)
  1177  	}
  1178  	return dst
  1179  }
  1180  
  1181  // EqNumber returns an element-wise equality comparison between each slice element and a number.
  1182  func EqNumber(x []float32, a float32) []bool {
  1183  	dst := make([]bool, len(x))
  1184  	return EqNumber_Into(dst, x, a)
  1185  }
  1186  
  1187  // EqNumber_Into stores an element-wise equality comparison between each slice element and a
  1188  // number in the destination slice.
  1189  func EqNumber_Into(dst []bool, x []float32, a float32) []bool {
  1190  	dst = checkCapacity(dst, x)
  1191  	if functions.UseAVX2 {
  1192  		functions.EqNumber_AVX2_F32(dst, x, a)
  1193  	} else {
  1194  		functions.EqNumber_Go(dst, x, a)
  1195  	}
  1196  	return dst
  1197  }
  1198  
  1199  // Neq returns an element-wise "not equal" comparison between two slices.
  1200  func Neq(x, y []float32) []bool {
  1201  	dst := make([]bool, len(x))
  1202  	return Neq_Into(dst, x, y)
  1203  }
  1204  
  1205  // Neq_Into stores an element-wise "not equal" comparison between two slices in the destination
  1206  // slice.
  1207  func Neq_Into(dst []bool, x, y []float32) []bool {
  1208  	checkEqualLength(x, y)
  1209  	dst = checkCapacity(dst, x)
  1210  	if functions.UseAVX2 {
  1211  		functions.Neq_AVX2_F32(dst, x, y)
  1212  	} else {
  1213  		functions.Neq_Go(dst, x, y)
  1214  	}
  1215  	return dst
  1216  }
  1217  
  1218  // NeqNumber returns an element-wise "not equal" comparison between each slice element and a number.
  1219  func NeqNumber(x []float32, a float32) []bool {
  1220  	dst := make([]bool, len(x))
  1221  	return NeqNumber_Into(dst, x, a)
  1222  }
  1223  
  1224  // NeqNumber_Into stores an element-wise "not equal" comparison between each slice element and a
  1225  // number in the destination slice.
  1226  func NeqNumber_Into(dst []bool, x []float32, a float32) []bool {
  1227  	dst = checkCapacity(dst, x)
  1228  	if functions.UseAVX2 {
  1229  		functions.NeqNumber_AVX2_F32(dst, x, a)
  1230  	} else {
  1231  		functions.NeqNumber_Go(dst, x, a)
  1232  	}
  1233  	return dst
  1234  }
  1235  
  1236  // Boolean
  1237  
  1238  // Not returns the logical negation of each slice element.
  1239  func Not(x []bool) []bool {
  1240  	x = slices.Clone(x)
  1241  	Not_Inplace(x)
  1242  	return x
  1243  }
  1244  
  1245  // Not_Inplace computes the logical negation of each slice element, inplace.
  1246  func Not_Inplace(x []bool) {
  1247  	if functions.UseAVX2 {
  1248  		functions.Not_AVX2(x)
  1249  	} else {
  1250  		functions.Not_Go(x)
  1251  	}
  1252  }
  1253  
  1254  // Not_Into stores the logical negation of each slice element in the destination slice.
  1255  func Not_Into(dst, x []bool) []bool {
  1256  	dst = checkCapacity(dst, x)
  1257  	checkOverlap(dst, x)
  1258  	copy(dst, x)
  1259  	Not_Inplace(dst)
  1260  	return dst
  1261  }
  1262  
  1263  // And returns the element-wise logical "and" operation between two slices.
  1264  func And(x, y []bool) []bool {
  1265  	x = slices.Clone(x)
  1266  	And_Inplace(x, y)
  1267  	return x
  1268  }
  1269  
  1270  // And_Inplace computes the element-wise logical "and" operation between two slices, inplace.
  1271  func And_Inplace(x, y []bool) {
  1272  	checkEqualLength(x, y)
  1273  	checkOverlap(x, y)
  1274  	if functions.UseAVX2 {
  1275  		functions.And_AVX2(x, y)
  1276  	} else {
  1277  		functions.And_Go(x, y)
  1278  	}
  1279  }
  1280  
  1281  // And_Into stores the element-wise logical "and" operation between two slices in the destination
  1282  // slice.
  1283  func And_Into(dst, x, y []bool) []bool {
  1284  	dst = checkCapacity(dst, x)
  1285  	checkOverlap(dst, x)
  1286  	copy(dst, x)
  1287  	And_Inplace(dst, y)
  1288  	return dst
  1289  }
  1290  
  1291  // Or returns the element-wise logical "or" operation between two slices.
  1292  func Or(x, y []bool) []bool {
  1293  	x = slices.Clone(x)
  1294  	Or_Inplace(x, y)
  1295  	return x
  1296  }
  1297  
  1298  // Or_Inplace computes the element-wise logical "or" operation between two slices, inplace.
  1299  func Or_Inplace(x, y []bool) {
  1300  	checkEqualLength(x, y)
  1301  	checkOverlap(x, y)
  1302  	if functions.UseAVX2 {
  1303  		functions.Or_AVX2(x, y)
  1304  	} else {
  1305  		functions.Or_Go(x, y)
  1306  	}
  1307  }
  1308  
  1309  // Or_Into stores the element-wise logical "or" operation between two slices in the destination
  1310  // slice.
  1311  func Or_Into(dst, x, y []bool) []bool {
  1312  	dst = checkCapacity(dst, x)
  1313  	checkOverlap(dst, x)
  1314  	copy(dst, x)
  1315  	Or_Inplace(dst, y)
  1316  	return dst
  1317  }
  1318  
  1319  // Xor returns the element-wise "exclusive or" operation between two slices.
  1320  func Xor(x, y []bool) []bool {
  1321  	x = slices.Clone(x)
  1322  	Xor_Inplace(x, y)
  1323  	return x
  1324  }
  1325  
  1326  // Xor_Inplace computes the element-wise "exclusive or" operation between two slices, inplace.
  1327  func Xor_Inplace(x, y []bool) {
  1328  	checkEqualLength(x, y)
  1329  	checkOverlap(x, y)
  1330  	if functions.UseAVX2 {
  1331  		functions.Xor_AVX2(x, y)
  1332  	} else {
  1333  		functions.Xor_Go(x, y)
  1334  	}
  1335  }
  1336  
  1337  // Xor_Into stores the element-wise "exclusive or" operation between two slices in the destination
  1338  // slice.
  1339  func Xor_Into(dst, x, y []bool) []bool {
  1340  	dst = checkCapacity(dst, x)
  1341  	checkOverlap(dst, x)
  1342  	copy(dst, x)
  1343  	Xor_Inplace(dst, y)
  1344  	return dst
  1345  }
  1346  
  1347  // Select returns the slice elements for which the corresponding boolean values are true.
  1348  func Select(x []float32, y []bool) []float32 {
  1349  	dst := make([]float32, 0)
  1350  	return Select_Into(dst, x, y)
  1351  }
  1352  
  1353  // Select_Into stores the slice elements for which the corresponding boolean values are true in
  1354  // the destination slice. This function grows the destination slice if the selection yields more
  1355  // values than it has capacity for.
  1356  func Select_Into(dst, x []float32, y []bool) []float32 {
  1357  	checkEqualLength(x, y)
  1358  	//checkOverlap(dst[:cap(dst)], x)
  1359  	return functions.Select_Go(dst, x, y)
  1360  }
  1361  
  1362  // All returns whether all boolean values in the slice are true.
  1363  func All(x []bool) bool {
  1364  	if functions.UseAVX2 {
  1365  		return functions.All_AVX2(x) != 0
  1366  	} else {
  1367  		return functions.All_Go(x)
  1368  	}
  1369  }
  1370  
  1371  // Any returns whether at least one boolean value in the slice is true.
  1372  func Any(x []bool) bool {
  1373  	if functions.UseAVX2 {
  1374  		return functions.Any_AVX2(x) != 0
  1375  	} else {
  1376  		return functions.Any_Go(x)
  1377  	}
  1378  }
  1379  
  1380  // None returns whether none of the boolean values in the slice are true.
  1381  func None(x []bool) bool {
  1382  	if functions.UseAVX2 {
  1383  		return functions.None_AVX2(x) != 0
  1384  	} else {
  1385  		return functions.None_Go(x)
  1386  	}
  1387  }
  1388  
  1389  // Count returns the number of boolean values that are true.
  1390  func Count(x []bool) int {
  1391  	if functions.UseAVX2 {
  1392  		return functions.Count_AVX2(x)
  1393  	} else {
  1394  		return functions.Count_Go(x)
  1395  	}
  1396  }
  1397  
  1398  // Construction
  1399  
  1400  // Zeros returns a new slice of length n filled with zeros.
  1401  func Zeros(n int) []float32 {
  1402  	dst := make([]float32, n)
  1403  	return Repeat_Into(dst, 0, n)
  1404  }
  1405  
  1406  // Zeros_Into sets the first n elements in the destination slice to zero.
  1407  func Zeros_Into(dst []float32, n int) []float32 {
  1408  	return Repeat_Into(dst, 0, n)
  1409  }
  1410  
  1411  // Ones returns a new slice of length n filled with ones.
  1412  func Ones(n int) []float32 {
  1413  	dst := make([]float32, n)
  1414  	return Repeat_Into(dst, 1, n)
  1415  }
  1416  
  1417  // Ones_Into sets the first n elements in the destination slice to one.
  1418  func Ones_Into(dst []float32, n int) []float32 {
  1419  	return Repeat_Into(dst, 1, n)
  1420  }
  1421  
  1422  // Repeat returns a new slice of length n filled with the given value.
  1423  func Repeat(a float32, n int) []float32 {
  1424  	dst := make([]float32, n)
  1425  	return Repeat_Into(dst, a, n)
  1426  }
  1427  
  1428  // Repeat_Into sets the first n elements in the destination slice to the given value.
  1429  func Repeat_Into(dst []float32, a float32, n int) []float32 {
  1430  	if cap(dst) < n {
  1431  		panic("destination slice not large enough to hold result")
  1432  	}
  1433  	if functions.UseAVX2 {
  1434  		functions.Repeat_AVX2_F32(dst, a, n)
  1435  	} else {
  1436  		functions.Repeat_Go(dst, a, n)
  1437  	}
  1438  	return dst[:n]
  1439  }
  1440  
  1441  // Range returns a new slice with values incrementing from a to b (excl.) in steps of 1.
  1442  func Range(a, b float32) []float32 {
  1443  	dst := make([]float32, int(math32.Max(0, math32.Ceil(b-a))))
  1444  	return Range_Into(dst, a, b)
  1445  }
  1446  
  1447  // Range_Into writes values incrementing from a to b (excl.) in steps of 1 to the destination slice.
  1448  func Range_Into(dst []float32, a, b float32) []float32 {
  1449  	n := int(math32.Max(0, math32.Ceil(b-a)))
  1450  	if cap(dst) < n {
  1451  		panic("destination slice not large enough to hold result")
  1452  	}
  1453  	if functions.UseAVX2 {
  1454  		functions.Range_AVX2_F32(dst, a, n)
  1455  	} else {
  1456  		functions.Range_Go(dst, a, n)
  1457  	}
  1458  	return dst[:n]
  1459  }
  1460  
  1461  // Gather returns a new slice containing just the elements at the given indices.
  1462  func Gather(x []float32, idx []int) []float32 {
  1463  	dst := make([]float32, len(idx))
  1464  	return Gather_Into(dst, x, idx)
  1465  }
  1466  
  1467  // Gather_Into stores the slice elements at the given indices in the destination slice.
  1468  func Gather_Into(dst, x []float32, idx []int) []float32 {
  1469  	dst = checkCapacity(dst, idx)
  1470  	checkOverlap(dst, x)
  1471  	functions.Gather_Go(dst, x, idx)
  1472  	return dst
  1473  }
  1474  
  1475  // Scatter returns a slice of size elements where position idx[i] is set to x[i], for all
  1476  // i < len(x) = len(idx). The other elements are set to zero.
  1477  func Scatter(x []float32, idx []int, size int) []float32 {
  1478  	dst := make([]float32, size)
  1479  	return Scatter_Into(dst, x, idx)
  1480  }
  1481  
  1482  // Scatter_Into sets positions idx[i] to x[i] in the destination slice, for all
  1483  // i < len(x) = len(idx). The other elements are not modified.
  1484  func Scatter_Into(dst, x []float32, idx []int) []float32 {
  1485  	checkOverlap(dst, x)
  1486  	functions.Scatter_Go(dst, x, idx)
  1487  	return dst
  1488  }
  1489  
  1490  // FromBool creates a slice of floats from a slice of bools by converting all false values to 0
  1491  // and all true values to 1.
  1492  func FromBool(x []bool) []float32 {
  1493  	dst := make([]float32, len(x))
  1494  	return FromBool_Into(dst, x)
  1495  }
  1496  
  1497  // FromBool_Into creates a slice of floats from a slice of bools by converting all false values
  1498  // to 0 and all true values to 1. The result is stored in the destination slice.
  1499  func FromBool_Into(dst []float32, x []bool) []float32 {
  1500  	dst = checkCapacity(dst, x)
  1501  	if functions.UseAVX2 {
  1502  		functions.FromBool_AVX2_F32(dst, x)
  1503  	} else {
  1504  		functions.FromBool_Go(dst, x)
  1505  	}
  1506  	return dst
  1507  }
  1508  
  1509  // FromInt64 creates a slice of floats from a slice of 64-bit ints. Standard conversion rules
  1510  // apply.
  1511  func FromInt64(x []int64) []float32 {
  1512  	dst := make([]float32, len(x))
  1513  	return FromInt64_Into(dst, x)
  1514  }
  1515  
  1516  // FromInt64_Into creates a slice of floats from a slice of 64-bit ints. Standard conversion
  1517  // rules apply. The result is stored in the destination slice.
  1518  func FromInt64_Into(dst []float32, x []int64) []float32 {
  1519  	dst = checkCapacity(dst, x)
  1520  	if functions.UseAVX2 {
  1521  		functions.FromInt64_AVX2_F32(dst, x)
  1522  	} else {
  1523  		functions.FromNumber_Go(dst, x)
  1524  	}
  1525  	return dst
  1526  }
  1527  
  1528  // FromInt32 creates a slice of floats from a slice of 32-bit ints. Standard conversion rules
  1529  // apply.
  1530  func FromInt32(x []int32) []float32 {
  1531  	dst := make([]float32, len(x))
  1532  	return FromInt32_Into(dst, x)
  1533  }
  1534  
  1535  // FromInt32_Into creates a slice of floats from a slice of 32-bit ints. Standard conversion
  1536  // rules apply. The result is stored in the destination slice.
  1537  func FromInt32_Into(dst []float32, x []int32) []float32 {
  1538  	dst = checkCapacity(dst, x)
  1539  	if functions.UseAVX2 {
  1540  		functions.FromInt32_AVX2_F32(dst, x)
  1541  	} else {
  1542  		functions.FromNumber_Go(dst, x)
  1543  	}
  1544  	return dst
  1545  }
  1546  
  1547  // FromFloat64 creates a slice of floats from a slice of 64-bit floats. Standard conversion
  1548  // rules apply.
  1549  func FromFloat64(x []float64) []float32 {
  1550  	dst := make([]float32, len(x))
  1551  	return FromFloat64_Into(dst, x)
  1552  }
  1553  
  1554  // FromFloat64_Into creates a slice of floats from a slice of 32-bit floats. Standard conversion
  1555  // rules apply. The result is stored in the destination slice.
  1556  func FromFloat64_Into(dst []float32, x []float64) []float32 {
  1557  	dst = checkCapacity(dst, x)
  1558  	if functions.UseAVX2 {
  1559  		functions.FromFloat64_AVX2_F32(dst, x)
  1560  	} else {
  1561  		functions.FromNumber_Go(dst, x)
  1562  	}
  1563  	return dst
  1564  }
  1565  
  1566  // ToBool converts a slice of floats to a slice of bools by setting all zero values to true and
  1567  // all non-zero values to false.
  1568  func ToBool(x []float64) []bool {
  1569  	dst := make([]bool, len(x))
  1570  	return ToBool_Into(dst, x)
  1571  }
  1572  
  1573  // ToBool_Into converts a slice of floats to a slice of bools by setting all zero values to true
  1574  // and all non-zero values to false. The result is stored in the destination slice.
  1575  func ToBool_Into(dst []bool, x []float64) []bool {
  1576  	dst = checkCapacity(dst, x)
  1577  	if functions.UseAVX2 {
  1578  		functions.ToBool_AVX2_F64(dst, x)
  1579  	} else {
  1580  		functions.ToBool_Go(dst, x)
  1581  	}
  1582  	return dst
  1583  }
  1584  
  1585  // ToInt64 converts a slice of floats to a slice of 64-bit ints. Standard conversion rules apply.
  1586  func ToInt64(x []float32) []int64 {
  1587  	dst := make([]int64, len(x))
  1588  	return ToInt64_Into(dst, x)
  1589  }
  1590  
  1591  // ToInt64_Into converts a slice of floats to a slice of 64-bit ints. Standard conversion rules
  1592  // apply. The result is stored in the destination slice.
  1593  func ToInt64_Into(dst []int64, x []float32) []int64 {
  1594  	dst = checkCapacity(dst, x)
  1595  	if functions.UseAVX2 {
  1596  		functions.ToInt64_AVX2_F32(dst, x)
  1597  	} else {
  1598  		functions.ToNumber_Go(dst, x)
  1599  	}
  1600  	return dst
  1601  }
  1602  
  1603  // ToInt32 converts a slice of floats to a slice of 32-bit ints. Standard conversion rules apply.
  1604  func ToInt32(x []float32) []int32 {
  1605  	dst := make([]int32, len(x))
  1606  	return ToInt32_Into(dst, x)
  1607  }
  1608  
  1609  // ToInt32_Into converts a slice of floats to a slice of 32-bit ints. Standard conversion rules
  1610  // apply. The result is stored in the destination slice.
  1611  func ToInt32_Into(dst []int32, x []float32) []int32 {
  1612  	dst = checkCapacity(dst, x)
  1613  	if functions.UseAVX2 {
  1614  		functions.ToInt32_AVX2_F32(dst, x)
  1615  	} else {
  1616  		functions.ToNumber_Go(dst, x)
  1617  	}
  1618  	return dst
  1619  }
  1620  
  1621  // ToFloat64 converts a slice of floats to a slice of 32-bit floats. Standard conversion rules
  1622  // apply.
  1623  func ToFloat64(x []float32) []float64 {
  1624  	dst := make([]float64, len(x))
  1625  	return ToFloat64_Into(dst, x)
  1626  }
  1627  
  1628  // ToFloat64_Into converts a slice of floats to a slice of 64-bit floats. Standard conversion
  1629  // rules apply. The result is stored in the destination slice.
  1630  func ToFloat64_Into(dst []float64, x []float32) []float64 {
  1631  	dst = checkCapacity(dst, x)
  1632  	if functions.UseAVX2 {
  1633  		functions.FromFloat32_AVX2_F64(dst, x)
  1634  	} else {
  1635  		functions.ToNumber_Go(dst, x)
  1636  	}
  1637  	return dst
  1638  }
  1639  
  1640  // Validation
  1641  
  1642  func slicesOverlap[T, F any](x []T, y []F) bool {
  1643  	if len(x) == 0 || len(y) == 0 {
  1644  		return false
  1645  	}
  1646  	xStart := uintptr(unsafe.Pointer(&x[0]))
  1647  	xEnd := uintptr(unsafe.Pointer(&x[len(x)-1]))
  1648  	yStart := uintptr(unsafe.Pointer(&y[0]))
  1649  	yEnd := uintptr(unsafe.Pointer(&y[len(y)-1]))
  1650  	return xStart <= yEnd && yStart <= xEnd
  1651  }
  1652  
  1653  func checkCapacity[T, F any](dst []T, x []F) []T {
  1654  	if cap(dst) < len(x) {
  1655  		panic("destination slice not large enough to hold result")
  1656  	}
  1657  	return dst[:len(x)]
  1658  }
  1659  
  1660  func checkEqualLength[T, F any](x []T, y []F) {
  1661  	if len(x) != len(y) {
  1662  		panic("slices must be of equal length")
  1663  	}
  1664  }
  1665  
  1666  func checkNotEmpty[T any](x []T) {
  1667  	if len(x) == 0 {
  1668  		panic("slice must not be empty")
  1669  	}
  1670  }
  1671  
  1672  func checkOverlap[T, F any](x []T, y []F) {
  1673  	if slicesOverlap(x, y) {
  1674  		panic("destination slice must not overlap input slice")
  1675  	}
  1676  }