gorgonia.org/gorgonia@v0.9.17/cuda/cmp.go (about)

     1  package cuda
     2  
     3  import (
     4  	"unsafe"
     5  
     6  	"github.com/pkg/errors"
     7  	"gorgonia.org/cu"
     8  	"gorgonia.org/tensor"
     9  )
    10  
    11  // Code generated by gencudaengine, which is a API generation tool for Gorgonia. DO NOT EDIT.
    12  
    13  // Lt implements tensor.Lter. It does not support safe or increment operation options and will return an error if those options are passed in
    14  func (e *Engine) Lt(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
    15  	name := constructName2(a, b, "lt")
    16  
    17  	if !e.HasFunc(name) {
    18  		return nil, errors.Errorf("Unable to perform Lt(). The tensor engine does not have the function %q", name)
    19  	}
    20  
    21  	if err = binaryCheck(a, b); err != nil {
    22  		return nil, errors.Wrap(err, "Basic checks failed for Lt")
    23  	}
    24  
    25  	var reuse tensor.DenseTensor
    26  	var safe, toReuse bool
    27  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
    28  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
    29  	}
    30  
    31  	var mem, memB cu.DevicePtr
    32  	var size int64
    33  
    34  	switch {
    35  	case toReuse:
    36  		mem = cu.DevicePtr(reuse.Uintptr())
    37  		memA := cu.DevicePtr(a.Uintptr())
    38  		memSize := int64(a.MemSize())
    39  		e.memcpy(mem, memA, memSize)
    40  
    41  		size = int64(logicalSize(reuse.Shape()))
    42  		retVal = reuse
    43  	case !safe:
    44  		mem = cu.DevicePtr(a.Uintptr())
    45  		retVal = a
    46  		size = int64(logicalSize(a.Shape()))
    47  	default:
    48  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
    49  	}
    50  
    51  	memB = cu.DevicePtr(b.Uintptr())
    52  	fn := e.f[name]
    53  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
    54  	args := []unsafe.Pointer{
    55  		unsafe.Pointer(&mem),
    56  		unsafe.Pointer(&memB),
    57  		unsafe.Pointer(&size),
    58  	}
    59  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
    60  	logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args)
    61  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
    62  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
    63  	return
    64  }
    65  
    66  // LtScalar implements tensor.Lter. It does not support safe or increment operation options and will return an error if those options are passed in
    67  func (e *Engine) LtScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
    68  	name := constructName1(a, leftTensor, "lt")
    69  	if !e.HasFunc(name) {
    70  		return nil, errors.Errorf("Unable to perform LtScalar(). The tensor engine does not have the function %q", name)
    71  	}
    72  
    73  	var bMem tensor.Memory
    74  	var ok bool
    75  	if bMem, ok = b.(tensor.Memory); !ok {
    76  		return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b)
    77  	}
    78  
    79  	if err = unaryCheck(a); err != nil {
    80  		return nil, errors.Wrap(err, "Basic checks failed for LtScalar")
    81  	}
    82  
    83  	var reuse tensor.DenseTensor
    84  	var safe, toReuse bool
    85  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
    86  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
    87  	}
    88  
    89  	var mem, memB cu.DevicePtr
    90  	var size int64
    91  
    92  	switch {
    93  	case toReuse:
    94  		mem = cu.DevicePtr(reuse.Uintptr())
    95  		memA := cu.DevicePtr(a.Uintptr())
    96  		memSize := int64(a.MemSize())
    97  		e.memcpy(mem, memA, memSize)
    98  
    99  		size = int64(logicalSize(reuse.Shape()))
   100  		retVal = reuse
   101  	case !safe:
   102  		mem = cu.DevicePtr(a.Uintptr())
   103  		retVal = a
   104  		size = int64(logicalSize(a.Shape()))
   105  	default:
   106  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   107  	}
   108  
   109  	memB = cu.DevicePtr(bMem.Uintptr())
   110  	if !leftTensor {
   111  		mem, memB = memB, mem
   112  	}
   113  
   114  	fn := e.f[name]
   115  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   116  	args := []unsafe.Pointer{
   117  		unsafe.Pointer(&mem),
   118  		unsafe.Pointer(&memB),
   119  		unsafe.Pointer(&size),
   120  	}
   121  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   122  	logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args)
   123  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   124  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   125  	return
   126  }
   127  
   128  // Lte implements tensor.Lteer. It does not support safe or increment operation options and will return an error if those options are passed in
   129  func (e *Engine) Lte(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   130  	name := constructName2(a, b, "lte")
   131  
   132  	if !e.HasFunc(name) {
   133  		return nil, errors.Errorf("Unable to perform Lte(). The tensor engine does not have the function %q", name)
   134  	}
   135  
   136  	if err = binaryCheck(a, b); err != nil {
   137  		return nil, errors.Wrap(err, "Basic checks failed for Lte")
   138  	}
   139  
   140  	var reuse tensor.DenseTensor
   141  	var safe, toReuse bool
   142  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   143  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   144  	}
   145  
   146  	var mem, memB cu.DevicePtr
   147  	var size int64
   148  
   149  	switch {
   150  	case toReuse:
   151  		mem = cu.DevicePtr(reuse.Uintptr())
   152  		memA := cu.DevicePtr(a.Uintptr())
   153  		memSize := int64(a.MemSize())
   154  		e.memcpy(mem, memA, memSize)
   155  
   156  		size = int64(logicalSize(reuse.Shape()))
   157  		retVal = reuse
   158  	case !safe:
   159  		mem = cu.DevicePtr(a.Uintptr())
   160  		retVal = a
   161  		size = int64(logicalSize(a.Shape()))
   162  	default:
   163  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   164  	}
   165  
   166  	memB = cu.DevicePtr(b.Uintptr())
   167  	fn := e.f[name]
   168  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   169  	args := []unsafe.Pointer{
   170  		unsafe.Pointer(&mem),
   171  		unsafe.Pointer(&memB),
   172  		unsafe.Pointer(&size),
   173  	}
   174  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   175  	logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args)
   176  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   177  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   178  	return
   179  }
   180  
   181  // LteScalar implements tensor.Lteer. It does not support safe or increment operation options and will return an error if those options are passed in
   182  func (e *Engine) LteScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   183  	name := constructName1(a, leftTensor, "lte")
   184  	if !e.HasFunc(name) {
   185  		return nil, errors.Errorf("Unable to perform LteScalar(). The tensor engine does not have the function %q", name)
   186  	}
   187  
   188  	var bMem tensor.Memory
   189  	var ok bool
   190  	if bMem, ok = b.(tensor.Memory); !ok {
   191  		return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b)
   192  	}
   193  
   194  	if err = unaryCheck(a); err != nil {
   195  		return nil, errors.Wrap(err, "Basic checks failed for LteScalar")
   196  	}
   197  
   198  	var reuse tensor.DenseTensor
   199  	var safe, toReuse bool
   200  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   201  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   202  	}
   203  
   204  	var mem, memB cu.DevicePtr
   205  	var size int64
   206  
   207  	switch {
   208  	case toReuse:
   209  		mem = cu.DevicePtr(reuse.Uintptr())
   210  		memA := cu.DevicePtr(a.Uintptr())
   211  		memSize := int64(a.MemSize())
   212  		e.memcpy(mem, memA, memSize)
   213  
   214  		size = int64(logicalSize(reuse.Shape()))
   215  		retVal = reuse
   216  	case !safe:
   217  		mem = cu.DevicePtr(a.Uintptr())
   218  		retVal = a
   219  		size = int64(logicalSize(a.Shape()))
   220  	default:
   221  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   222  	}
   223  
   224  	memB = cu.DevicePtr(bMem.Uintptr())
   225  	if !leftTensor {
   226  		mem, memB = memB, mem
   227  	}
   228  
   229  	fn := e.f[name]
   230  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   231  	args := []unsafe.Pointer{
   232  		unsafe.Pointer(&mem),
   233  		unsafe.Pointer(&memB),
   234  		unsafe.Pointer(&size),
   235  	}
   236  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   237  	logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args)
   238  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   239  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   240  	return
   241  }
   242  
   243  // Gt implements tensor.Gter. It does not support safe or increment operation options and will return an error if those options are passed in
   244  func (e *Engine) Gt(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   245  	name := constructName2(a, b, "gt")
   246  
   247  	if !e.HasFunc(name) {
   248  		return nil, errors.Errorf("Unable to perform Gt(). The tensor engine does not have the function %q", name)
   249  	}
   250  
   251  	if err = binaryCheck(a, b); err != nil {
   252  		return nil, errors.Wrap(err, "Basic checks failed for Gt")
   253  	}
   254  
   255  	var reuse tensor.DenseTensor
   256  	var safe, toReuse bool
   257  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   258  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   259  	}
   260  
   261  	var mem, memB cu.DevicePtr
   262  	var size int64
   263  
   264  	switch {
   265  	case toReuse:
   266  		mem = cu.DevicePtr(reuse.Uintptr())
   267  		memA := cu.DevicePtr(a.Uintptr())
   268  		memSize := int64(a.MemSize())
   269  		e.memcpy(mem, memA, memSize)
   270  
   271  		size = int64(logicalSize(reuse.Shape()))
   272  		retVal = reuse
   273  	case !safe:
   274  		mem = cu.DevicePtr(a.Uintptr())
   275  		retVal = a
   276  		size = int64(logicalSize(a.Shape()))
   277  	default:
   278  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   279  	}
   280  
   281  	memB = cu.DevicePtr(b.Uintptr())
   282  	fn := e.f[name]
   283  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   284  	args := []unsafe.Pointer{
   285  		unsafe.Pointer(&mem),
   286  		unsafe.Pointer(&memB),
   287  		unsafe.Pointer(&size),
   288  	}
   289  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   290  	logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args)
   291  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   292  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   293  	return
   294  }
   295  
   296  // GtScalar implements tensor.Gter. It does not support safe or increment operation options and will return an error if those options are passed in
   297  func (e *Engine) GtScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   298  	name := constructName1(a, leftTensor, "gt")
   299  	if !e.HasFunc(name) {
   300  		return nil, errors.Errorf("Unable to perform GtScalar(). The tensor engine does not have the function %q", name)
   301  	}
   302  
   303  	var bMem tensor.Memory
   304  	var ok bool
   305  	if bMem, ok = b.(tensor.Memory); !ok {
   306  		return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b)
   307  	}
   308  
   309  	if err = unaryCheck(a); err != nil {
   310  		return nil, errors.Wrap(err, "Basic checks failed for GtScalar")
   311  	}
   312  
   313  	var reuse tensor.DenseTensor
   314  	var safe, toReuse bool
   315  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   316  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   317  	}
   318  
   319  	var mem, memB cu.DevicePtr
   320  	var size int64
   321  
   322  	switch {
   323  	case toReuse:
   324  		mem = cu.DevicePtr(reuse.Uintptr())
   325  		memA := cu.DevicePtr(a.Uintptr())
   326  		memSize := int64(a.MemSize())
   327  		e.memcpy(mem, memA, memSize)
   328  
   329  		size = int64(logicalSize(reuse.Shape()))
   330  		retVal = reuse
   331  	case !safe:
   332  		mem = cu.DevicePtr(a.Uintptr())
   333  		retVal = a
   334  		size = int64(logicalSize(a.Shape()))
   335  	default:
   336  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   337  	}
   338  
   339  	memB = cu.DevicePtr(bMem.Uintptr())
   340  	if !leftTensor {
   341  		mem, memB = memB, mem
   342  	}
   343  
   344  	fn := e.f[name]
   345  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   346  	args := []unsafe.Pointer{
   347  		unsafe.Pointer(&mem),
   348  		unsafe.Pointer(&memB),
   349  		unsafe.Pointer(&size),
   350  	}
   351  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   352  	logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args)
   353  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   354  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   355  	return
   356  }
   357  
   358  // Gte implements tensor.Gteer. It does not support safe or increment operation options and will return an error if those options are passed in
   359  func (e *Engine) Gte(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   360  	name := constructName2(a, b, "gte")
   361  
   362  	if !e.HasFunc(name) {
   363  		return nil, errors.Errorf("Unable to perform Gte(). The tensor engine does not have the function %q", name)
   364  	}
   365  
   366  	if err = binaryCheck(a, b); err != nil {
   367  		return nil, errors.Wrap(err, "Basic checks failed for Gte")
   368  	}
   369  
   370  	var reuse tensor.DenseTensor
   371  	var safe, toReuse bool
   372  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   373  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   374  	}
   375  
   376  	var mem, memB cu.DevicePtr
   377  	var size int64
   378  
   379  	switch {
   380  	case toReuse:
   381  		mem = cu.DevicePtr(reuse.Uintptr())
   382  		memA := cu.DevicePtr(a.Uintptr())
   383  		memSize := int64(a.MemSize())
   384  		e.memcpy(mem, memA, memSize)
   385  
   386  		size = int64(logicalSize(reuse.Shape()))
   387  		retVal = reuse
   388  	case !safe:
   389  		mem = cu.DevicePtr(a.Uintptr())
   390  		retVal = a
   391  		size = int64(logicalSize(a.Shape()))
   392  	default:
   393  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   394  	}
   395  
   396  	memB = cu.DevicePtr(b.Uintptr())
   397  	fn := e.f[name]
   398  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   399  	args := []unsafe.Pointer{
   400  		unsafe.Pointer(&mem),
   401  		unsafe.Pointer(&memB),
   402  		unsafe.Pointer(&size),
   403  	}
   404  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   405  	logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args)
   406  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   407  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   408  	return
   409  }
   410  
   411  // GteScalar implements tensor.Gteer. It does not support safe or increment operation options and will return an error if those options are passed in
   412  func (e *Engine) GteScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   413  	name := constructName1(a, leftTensor, "gte")
   414  	if !e.HasFunc(name) {
   415  		return nil, errors.Errorf("Unable to perform GteScalar(). The tensor engine does not have the function %q", name)
   416  	}
   417  
   418  	var bMem tensor.Memory
   419  	var ok bool
   420  	if bMem, ok = b.(tensor.Memory); !ok {
   421  		return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b)
   422  	}
   423  
   424  	if err = unaryCheck(a); err != nil {
   425  		return nil, errors.Wrap(err, "Basic checks failed for GteScalar")
   426  	}
   427  
   428  	var reuse tensor.DenseTensor
   429  	var safe, toReuse bool
   430  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   431  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   432  	}
   433  
   434  	var mem, memB cu.DevicePtr
   435  	var size int64
   436  
   437  	switch {
   438  	case toReuse:
   439  		mem = cu.DevicePtr(reuse.Uintptr())
   440  		memA := cu.DevicePtr(a.Uintptr())
   441  		memSize := int64(a.MemSize())
   442  		e.memcpy(mem, memA, memSize)
   443  
   444  		size = int64(logicalSize(reuse.Shape()))
   445  		retVal = reuse
   446  	case !safe:
   447  		mem = cu.DevicePtr(a.Uintptr())
   448  		retVal = a
   449  		size = int64(logicalSize(a.Shape()))
   450  	default:
   451  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   452  	}
   453  
   454  	memB = cu.DevicePtr(bMem.Uintptr())
   455  	if !leftTensor {
   456  		mem, memB = memB, mem
   457  	}
   458  
   459  	fn := e.f[name]
   460  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   461  	args := []unsafe.Pointer{
   462  		unsafe.Pointer(&mem),
   463  		unsafe.Pointer(&memB),
   464  		unsafe.Pointer(&size),
   465  	}
   466  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   467  	logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args)
   468  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   469  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   470  	return
   471  }
   472  
   473  // ElEq implements tensor.ElEqer. It does not support safe or increment operation options and will return an error if those options are passed in
   474  func (e *Engine) ElEq(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   475  	name := constructName2(a, b, "eq")
   476  
   477  	if !e.HasFunc(name) {
   478  		return nil, errors.Errorf("Unable to perform ElEq(). The tensor engine does not have the function %q", name)
   479  	}
   480  
   481  	if err = binaryCheck(a, b); err != nil {
   482  		return nil, errors.Wrap(err, "Basic checks failed for ElEq")
   483  	}
   484  
   485  	var reuse tensor.DenseTensor
   486  	var safe, toReuse bool
   487  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   488  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   489  	}
   490  
   491  	var mem, memB cu.DevicePtr
   492  	var size int64
   493  
   494  	switch {
   495  	case toReuse:
   496  		mem = cu.DevicePtr(reuse.Uintptr())
   497  		memA := cu.DevicePtr(a.Uintptr())
   498  		memSize := int64(a.MemSize())
   499  		e.memcpy(mem, memA, memSize)
   500  
   501  		size = int64(logicalSize(reuse.Shape()))
   502  		retVal = reuse
   503  	case !safe:
   504  		mem = cu.DevicePtr(a.Uintptr())
   505  		retVal = a
   506  		size = int64(logicalSize(a.Shape()))
   507  	default:
   508  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   509  	}
   510  
   511  	memB = cu.DevicePtr(b.Uintptr())
   512  	fn := e.f[name]
   513  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   514  	args := []unsafe.Pointer{
   515  		unsafe.Pointer(&mem),
   516  		unsafe.Pointer(&memB),
   517  		unsafe.Pointer(&size),
   518  	}
   519  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   520  	logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args)
   521  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   522  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   523  	return
   524  }
   525  
   526  // EqScalar implements tensor.ElEqer. It does not support safe or increment operation options and will return an error if those options are passed in
   527  func (e *Engine) EqScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   528  	name := constructName1(a, leftTensor, "eq")
   529  	if !e.HasFunc(name) {
   530  		return nil, errors.Errorf("Unable to perform EqScalar(). The tensor engine does not have the function %q", name)
   531  	}
   532  
   533  	var bMem tensor.Memory
   534  	var ok bool
   535  	if bMem, ok = b.(tensor.Memory); !ok {
   536  		return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b)
   537  	}
   538  
   539  	if err = unaryCheck(a); err != nil {
   540  		return nil, errors.Wrap(err, "Basic checks failed for EqScalar")
   541  	}
   542  
   543  	var reuse tensor.DenseTensor
   544  	var safe, toReuse bool
   545  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   546  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   547  	}
   548  
   549  	var mem, memB cu.DevicePtr
   550  	var size int64
   551  
   552  	switch {
   553  	case toReuse:
   554  		mem = cu.DevicePtr(reuse.Uintptr())
   555  		memA := cu.DevicePtr(a.Uintptr())
   556  		memSize := int64(a.MemSize())
   557  		e.memcpy(mem, memA, memSize)
   558  
   559  		size = int64(logicalSize(reuse.Shape()))
   560  		retVal = reuse
   561  	case !safe:
   562  		mem = cu.DevicePtr(a.Uintptr())
   563  		retVal = a
   564  		size = int64(logicalSize(a.Shape()))
   565  	default:
   566  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   567  	}
   568  
   569  	memB = cu.DevicePtr(bMem.Uintptr())
   570  	if !leftTensor {
   571  		mem, memB = memB, mem
   572  	}
   573  
   574  	fn := e.f[name]
   575  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   576  	args := []unsafe.Pointer{
   577  		unsafe.Pointer(&mem),
   578  		unsafe.Pointer(&memB),
   579  		unsafe.Pointer(&size),
   580  	}
   581  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   582  	logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args)
   583  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   584  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   585  	return
   586  }
   587  
   588  // ElNe implements tensor.ElNeer. It does not support safe or increment operation options and will return an error if those options are passed in
   589  func (e *Engine) ElNe(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   590  	name := constructName2(a, b, "ne")
   591  
   592  	if !e.HasFunc(name) {
   593  		return nil, errors.Errorf("Unable to perform ElNe(). The tensor engine does not have the function %q", name)
   594  	}
   595  
   596  	if err = binaryCheck(a, b); err != nil {
   597  		return nil, errors.Wrap(err, "Basic checks failed for ElNe")
   598  	}
   599  
   600  	var reuse tensor.DenseTensor
   601  	var safe, toReuse bool
   602  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   603  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   604  	}
   605  
   606  	var mem, memB cu.DevicePtr
   607  	var size int64
   608  
   609  	switch {
   610  	case toReuse:
   611  		mem = cu.DevicePtr(reuse.Uintptr())
   612  		memA := cu.DevicePtr(a.Uintptr())
   613  		memSize := int64(a.MemSize())
   614  		e.memcpy(mem, memA, memSize)
   615  
   616  		size = int64(logicalSize(reuse.Shape()))
   617  		retVal = reuse
   618  	case !safe:
   619  		mem = cu.DevicePtr(a.Uintptr())
   620  		retVal = a
   621  		size = int64(logicalSize(a.Shape()))
   622  	default:
   623  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   624  	}
   625  
   626  	memB = cu.DevicePtr(b.Uintptr())
   627  	fn := e.f[name]
   628  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   629  	args := []unsafe.Pointer{
   630  		unsafe.Pointer(&mem),
   631  		unsafe.Pointer(&memB),
   632  		unsafe.Pointer(&size),
   633  	}
   634  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   635  	logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args)
   636  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   637  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   638  	return
   639  }
   640  
   641  // NeScalar implements tensor.ElNeer. It does not support safe or increment operation options and will return an error if those options are passed in
   642  func (e *Engine) NeScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) {
   643  	name := constructName1(a, leftTensor, "ne")
   644  	if !e.HasFunc(name) {
   645  		return nil, errors.Errorf("Unable to perform NeScalar(). The tensor engine does not have the function %q", name)
   646  	}
   647  
   648  	var bMem tensor.Memory
   649  	var ok bool
   650  	if bMem, ok = b.(tensor.Memory); !ok {
   651  		return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b)
   652  	}
   653  
   654  	if err = unaryCheck(a); err != nil {
   655  		return nil, errors.Wrap(err, "Basic checks failed for NeScalar")
   656  	}
   657  
   658  	var reuse tensor.DenseTensor
   659  	var safe, toReuse bool
   660  	if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil {
   661  		return nil, errors.Wrap(err, "Unable to handle funcOpts")
   662  	}
   663  
   664  	var mem, memB cu.DevicePtr
   665  	var size int64
   666  
   667  	switch {
   668  	case toReuse:
   669  		mem = cu.DevicePtr(reuse.Uintptr())
   670  		memA := cu.DevicePtr(a.Uintptr())
   671  		memSize := int64(a.MemSize())
   672  		e.memcpy(mem, memA, memSize)
   673  
   674  		size = int64(logicalSize(reuse.Shape()))
   675  		retVal = reuse
   676  	case !safe:
   677  		mem = cu.DevicePtr(a.Uintptr())
   678  		retVal = a
   679  		size = int64(logicalSize(a.Shape()))
   680  	default:
   681  		return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported")
   682  	}
   683  
   684  	memB = cu.DevicePtr(bMem.Uintptr())
   685  	if !leftTensor {
   686  		mem, memB = memB, mem
   687  	}
   688  
   689  	fn := e.f[name]
   690  	gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size))
   691  	args := []unsafe.Pointer{
   692  		unsafe.Pointer(&mem),
   693  		unsafe.Pointer(&memB),
   694  		unsafe.Pointer(&size),
   695  	}
   696  	logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ)
   697  	logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args)
   698  	logf("LaunchKernel Params. mem: %v. Size %v", mem, size)
   699  	e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args)
   700  	return
   701  }