gorgonia.org/gorgonia@v0.9.17/cuda/cmp.go (about) 1 package cuda 2 3 import ( 4 "unsafe" 5 6 "github.com/pkg/errors" 7 "gorgonia.org/cu" 8 "gorgonia.org/tensor" 9 ) 10 11 // Code generated by gencudaengine, which is a API generation tool for Gorgonia. DO NOT EDIT. 12 13 // Lt implements tensor.Lter. It does not support safe or increment operation options and will return an error if those options are passed in 14 func (e *Engine) Lt(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 15 name := constructName2(a, b, "lt") 16 17 if !e.HasFunc(name) { 18 return nil, errors.Errorf("Unable to perform Lt(). The tensor engine does not have the function %q", name) 19 } 20 21 if err = binaryCheck(a, b); err != nil { 22 return nil, errors.Wrap(err, "Basic checks failed for Lt") 23 } 24 25 var reuse tensor.DenseTensor 26 var safe, toReuse bool 27 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 28 return nil, errors.Wrap(err, "Unable to handle funcOpts") 29 } 30 31 var mem, memB cu.DevicePtr 32 var size int64 33 34 switch { 35 case toReuse: 36 mem = cu.DevicePtr(reuse.Uintptr()) 37 memA := cu.DevicePtr(a.Uintptr()) 38 memSize := int64(a.MemSize()) 39 e.memcpy(mem, memA, memSize) 40 41 size = int64(logicalSize(reuse.Shape())) 42 retVal = reuse 43 case !safe: 44 mem = cu.DevicePtr(a.Uintptr()) 45 retVal = a 46 size = int64(logicalSize(a.Shape())) 47 default: 48 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 49 } 50 51 memB = cu.DevicePtr(b.Uintptr()) 52 fn := e.f[name] 53 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 54 args := []unsafe.Pointer{ 55 unsafe.Pointer(&mem), 56 unsafe.Pointer(&memB), 57 unsafe.Pointer(&size), 58 } 59 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 60 logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args) 61 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 62 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 63 return 64 } 65 66 // LtScalar implements tensor.Lter. It does not support safe or increment operation options and will return an error if those options are passed in 67 func (e *Engine) LtScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 68 name := constructName1(a, leftTensor, "lt") 69 if !e.HasFunc(name) { 70 return nil, errors.Errorf("Unable to perform LtScalar(). The tensor engine does not have the function %q", name) 71 } 72 73 var bMem tensor.Memory 74 var ok bool 75 if bMem, ok = b.(tensor.Memory); !ok { 76 return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b) 77 } 78 79 if err = unaryCheck(a); err != nil { 80 return nil, errors.Wrap(err, "Basic checks failed for LtScalar") 81 } 82 83 var reuse tensor.DenseTensor 84 var safe, toReuse bool 85 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 86 return nil, errors.Wrap(err, "Unable to handle funcOpts") 87 } 88 89 var mem, memB cu.DevicePtr 90 var size int64 91 92 switch { 93 case toReuse: 94 mem = cu.DevicePtr(reuse.Uintptr()) 95 memA := cu.DevicePtr(a.Uintptr()) 96 memSize := int64(a.MemSize()) 97 e.memcpy(mem, memA, memSize) 98 99 size = int64(logicalSize(reuse.Shape())) 100 retVal = reuse 101 case !safe: 102 mem = cu.DevicePtr(a.Uintptr()) 103 retVal = a 104 size = int64(logicalSize(a.Shape())) 105 default: 106 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 107 } 108 109 memB = cu.DevicePtr(bMem.Uintptr()) 110 if !leftTensor { 111 mem, memB = memB, mem 112 } 113 114 fn := e.f[name] 115 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 116 args := []unsafe.Pointer{ 117 unsafe.Pointer(&mem), 118 unsafe.Pointer(&memB), 119 unsafe.Pointer(&size), 120 } 121 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 122 logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args) 123 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 124 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 125 return 126 } 127 128 // Lte implements tensor.Lteer. It does not support safe or increment operation options and will return an error if those options are passed in 129 func (e *Engine) Lte(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 130 name := constructName2(a, b, "lte") 131 132 if !e.HasFunc(name) { 133 return nil, errors.Errorf("Unable to perform Lte(). The tensor engine does not have the function %q", name) 134 } 135 136 if err = binaryCheck(a, b); err != nil { 137 return nil, errors.Wrap(err, "Basic checks failed for Lte") 138 } 139 140 var reuse tensor.DenseTensor 141 var safe, toReuse bool 142 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 143 return nil, errors.Wrap(err, "Unable to handle funcOpts") 144 } 145 146 var mem, memB cu.DevicePtr 147 var size int64 148 149 switch { 150 case toReuse: 151 mem = cu.DevicePtr(reuse.Uintptr()) 152 memA := cu.DevicePtr(a.Uintptr()) 153 memSize := int64(a.MemSize()) 154 e.memcpy(mem, memA, memSize) 155 156 size = int64(logicalSize(reuse.Shape())) 157 retVal = reuse 158 case !safe: 159 mem = cu.DevicePtr(a.Uintptr()) 160 retVal = a 161 size = int64(logicalSize(a.Shape())) 162 default: 163 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 164 } 165 166 memB = cu.DevicePtr(b.Uintptr()) 167 fn := e.f[name] 168 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 169 args := []unsafe.Pointer{ 170 unsafe.Pointer(&mem), 171 unsafe.Pointer(&memB), 172 unsafe.Pointer(&size), 173 } 174 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 175 logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args) 176 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 177 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 178 return 179 } 180 181 // LteScalar implements tensor.Lteer. It does not support safe or increment operation options and will return an error if those options are passed in 182 func (e *Engine) LteScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 183 name := constructName1(a, leftTensor, "lte") 184 if !e.HasFunc(name) { 185 return nil, errors.Errorf("Unable to perform LteScalar(). The tensor engine does not have the function %q", name) 186 } 187 188 var bMem tensor.Memory 189 var ok bool 190 if bMem, ok = b.(tensor.Memory); !ok { 191 return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b) 192 } 193 194 if err = unaryCheck(a); err != nil { 195 return nil, errors.Wrap(err, "Basic checks failed for LteScalar") 196 } 197 198 var reuse tensor.DenseTensor 199 var safe, toReuse bool 200 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 201 return nil, errors.Wrap(err, "Unable to handle funcOpts") 202 } 203 204 var mem, memB cu.DevicePtr 205 var size int64 206 207 switch { 208 case toReuse: 209 mem = cu.DevicePtr(reuse.Uintptr()) 210 memA := cu.DevicePtr(a.Uintptr()) 211 memSize := int64(a.MemSize()) 212 e.memcpy(mem, memA, memSize) 213 214 size = int64(logicalSize(reuse.Shape())) 215 retVal = reuse 216 case !safe: 217 mem = cu.DevicePtr(a.Uintptr()) 218 retVal = a 219 size = int64(logicalSize(a.Shape())) 220 default: 221 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 222 } 223 224 memB = cu.DevicePtr(bMem.Uintptr()) 225 if !leftTensor { 226 mem, memB = memB, mem 227 } 228 229 fn := e.f[name] 230 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 231 args := []unsafe.Pointer{ 232 unsafe.Pointer(&mem), 233 unsafe.Pointer(&memB), 234 unsafe.Pointer(&size), 235 } 236 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 237 logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args) 238 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 239 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 240 return 241 } 242 243 // Gt implements tensor.Gter. It does not support safe or increment operation options and will return an error if those options are passed in 244 func (e *Engine) Gt(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 245 name := constructName2(a, b, "gt") 246 247 if !e.HasFunc(name) { 248 return nil, errors.Errorf("Unable to perform Gt(). The tensor engine does not have the function %q", name) 249 } 250 251 if err = binaryCheck(a, b); err != nil { 252 return nil, errors.Wrap(err, "Basic checks failed for Gt") 253 } 254 255 var reuse tensor.DenseTensor 256 var safe, toReuse bool 257 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 258 return nil, errors.Wrap(err, "Unable to handle funcOpts") 259 } 260 261 var mem, memB cu.DevicePtr 262 var size int64 263 264 switch { 265 case toReuse: 266 mem = cu.DevicePtr(reuse.Uintptr()) 267 memA := cu.DevicePtr(a.Uintptr()) 268 memSize := int64(a.MemSize()) 269 e.memcpy(mem, memA, memSize) 270 271 size = int64(logicalSize(reuse.Shape())) 272 retVal = reuse 273 case !safe: 274 mem = cu.DevicePtr(a.Uintptr()) 275 retVal = a 276 size = int64(logicalSize(a.Shape())) 277 default: 278 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 279 } 280 281 memB = cu.DevicePtr(b.Uintptr()) 282 fn := e.f[name] 283 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 284 args := []unsafe.Pointer{ 285 unsafe.Pointer(&mem), 286 unsafe.Pointer(&memB), 287 unsafe.Pointer(&size), 288 } 289 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 290 logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args) 291 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 292 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 293 return 294 } 295 296 // GtScalar implements tensor.Gter. It does not support safe or increment operation options and will return an error if those options are passed in 297 func (e *Engine) GtScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 298 name := constructName1(a, leftTensor, "gt") 299 if !e.HasFunc(name) { 300 return nil, errors.Errorf("Unable to perform GtScalar(). The tensor engine does not have the function %q", name) 301 } 302 303 var bMem tensor.Memory 304 var ok bool 305 if bMem, ok = b.(tensor.Memory); !ok { 306 return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b) 307 } 308 309 if err = unaryCheck(a); err != nil { 310 return nil, errors.Wrap(err, "Basic checks failed for GtScalar") 311 } 312 313 var reuse tensor.DenseTensor 314 var safe, toReuse bool 315 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 316 return nil, errors.Wrap(err, "Unable to handle funcOpts") 317 } 318 319 var mem, memB cu.DevicePtr 320 var size int64 321 322 switch { 323 case toReuse: 324 mem = cu.DevicePtr(reuse.Uintptr()) 325 memA := cu.DevicePtr(a.Uintptr()) 326 memSize := int64(a.MemSize()) 327 e.memcpy(mem, memA, memSize) 328 329 size = int64(logicalSize(reuse.Shape())) 330 retVal = reuse 331 case !safe: 332 mem = cu.DevicePtr(a.Uintptr()) 333 retVal = a 334 size = int64(logicalSize(a.Shape())) 335 default: 336 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 337 } 338 339 memB = cu.DevicePtr(bMem.Uintptr()) 340 if !leftTensor { 341 mem, memB = memB, mem 342 } 343 344 fn := e.f[name] 345 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 346 args := []unsafe.Pointer{ 347 unsafe.Pointer(&mem), 348 unsafe.Pointer(&memB), 349 unsafe.Pointer(&size), 350 } 351 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 352 logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args) 353 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 354 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 355 return 356 } 357 358 // Gte implements tensor.Gteer. It does not support safe or increment operation options and will return an error if those options are passed in 359 func (e *Engine) Gte(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 360 name := constructName2(a, b, "gte") 361 362 if !e.HasFunc(name) { 363 return nil, errors.Errorf("Unable to perform Gte(). The tensor engine does not have the function %q", name) 364 } 365 366 if err = binaryCheck(a, b); err != nil { 367 return nil, errors.Wrap(err, "Basic checks failed for Gte") 368 } 369 370 var reuse tensor.DenseTensor 371 var safe, toReuse bool 372 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 373 return nil, errors.Wrap(err, "Unable to handle funcOpts") 374 } 375 376 var mem, memB cu.DevicePtr 377 var size int64 378 379 switch { 380 case toReuse: 381 mem = cu.DevicePtr(reuse.Uintptr()) 382 memA := cu.DevicePtr(a.Uintptr()) 383 memSize := int64(a.MemSize()) 384 e.memcpy(mem, memA, memSize) 385 386 size = int64(logicalSize(reuse.Shape())) 387 retVal = reuse 388 case !safe: 389 mem = cu.DevicePtr(a.Uintptr()) 390 retVal = a 391 size = int64(logicalSize(a.Shape())) 392 default: 393 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 394 } 395 396 memB = cu.DevicePtr(b.Uintptr()) 397 fn := e.f[name] 398 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 399 args := []unsafe.Pointer{ 400 unsafe.Pointer(&mem), 401 unsafe.Pointer(&memB), 402 unsafe.Pointer(&size), 403 } 404 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 405 logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args) 406 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 407 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 408 return 409 } 410 411 // GteScalar implements tensor.Gteer. It does not support safe or increment operation options and will return an error if those options are passed in 412 func (e *Engine) GteScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 413 name := constructName1(a, leftTensor, "gte") 414 if !e.HasFunc(name) { 415 return nil, errors.Errorf("Unable to perform GteScalar(). The tensor engine does not have the function %q", name) 416 } 417 418 var bMem tensor.Memory 419 var ok bool 420 if bMem, ok = b.(tensor.Memory); !ok { 421 return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b) 422 } 423 424 if err = unaryCheck(a); err != nil { 425 return nil, errors.Wrap(err, "Basic checks failed for GteScalar") 426 } 427 428 var reuse tensor.DenseTensor 429 var safe, toReuse bool 430 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 431 return nil, errors.Wrap(err, "Unable to handle funcOpts") 432 } 433 434 var mem, memB cu.DevicePtr 435 var size int64 436 437 switch { 438 case toReuse: 439 mem = cu.DevicePtr(reuse.Uintptr()) 440 memA := cu.DevicePtr(a.Uintptr()) 441 memSize := int64(a.MemSize()) 442 e.memcpy(mem, memA, memSize) 443 444 size = int64(logicalSize(reuse.Shape())) 445 retVal = reuse 446 case !safe: 447 mem = cu.DevicePtr(a.Uintptr()) 448 retVal = a 449 size = int64(logicalSize(a.Shape())) 450 default: 451 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 452 } 453 454 memB = cu.DevicePtr(bMem.Uintptr()) 455 if !leftTensor { 456 mem, memB = memB, mem 457 } 458 459 fn := e.f[name] 460 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 461 args := []unsafe.Pointer{ 462 unsafe.Pointer(&mem), 463 unsafe.Pointer(&memB), 464 unsafe.Pointer(&size), 465 } 466 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 467 logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args) 468 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 469 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 470 return 471 } 472 473 // ElEq implements tensor.ElEqer. It does not support safe or increment operation options and will return an error if those options are passed in 474 func (e *Engine) ElEq(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 475 name := constructName2(a, b, "eq") 476 477 if !e.HasFunc(name) { 478 return nil, errors.Errorf("Unable to perform ElEq(). The tensor engine does not have the function %q", name) 479 } 480 481 if err = binaryCheck(a, b); err != nil { 482 return nil, errors.Wrap(err, "Basic checks failed for ElEq") 483 } 484 485 var reuse tensor.DenseTensor 486 var safe, toReuse bool 487 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 488 return nil, errors.Wrap(err, "Unable to handle funcOpts") 489 } 490 491 var mem, memB cu.DevicePtr 492 var size int64 493 494 switch { 495 case toReuse: 496 mem = cu.DevicePtr(reuse.Uintptr()) 497 memA := cu.DevicePtr(a.Uintptr()) 498 memSize := int64(a.MemSize()) 499 e.memcpy(mem, memA, memSize) 500 501 size = int64(logicalSize(reuse.Shape())) 502 retVal = reuse 503 case !safe: 504 mem = cu.DevicePtr(a.Uintptr()) 505 retVal = a 506 size = int64(logicalSize(a.Shape())) 507 default: 508 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 509 } 510 511 memB = cu.DevicePtr(b.Uintptr()) 512 fn := e.f[name] 513 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 514 args := []unsafe.Pointer{ 515 unsafe.Pointer(&mem), 516 unsafe.Pointer(&memB), 517 unsafe.Pointer(&size), 518 } 519 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 520 logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args) 521 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 522 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 523 return 524 } 525 526 // EqScalar implements tensor.ElEqer. It does not support safe or increment operation options and will return an error if those options are passed in 527 func (e *Engine) EqScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 528 name := constructName1(a, leftTensor, "eq") 529 if !e.HasFunc(name) { 530 return nil, errors.Errorf("Unable to perform EqScalar(). The tensor engine does not have the function %q", name) 531 } 532 533 var bMem tensor.Memory 534 var ok bool 535 if bMem, ok = b.(tensor.Memory); !ok { 536 return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b) 537 } 538 539 if err = unaryCheck(a); err != nil { 540 return nil, errors.Wrap(err, "Basic checks failed for EqScalar") 541 } 542 543 var reuse tensor.DenseTensor 544 var safe, toReuse bool 545 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 546 return nil, errors.Wrap(err, "Unable to handle funcOpts") 547 } 548 549 var mem, memB cu.DevicePtr 550 var size int64 551 552 switch { 553 case toReuse: 554 mem = cu.DevicePtr(reuse.Uintptr()) 555 memA := cu.DevicePtr(a.Uintptr()) 556 memSize := int64(a.MemSize()) 557 e.memcpy(mem, memA, memSize) 558 559 size = int64(logicalSize(reuse.Shape())) 560 retVal = reuse 561 case !safe: 562 mem = cu.DevicePtr(a.Uintptr()) 563 retVal = a 564 size = int64(logicalSize(a.Shape())) 565 default: 566 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 567 } 568 569 memB = cu.DevicePtr(bMem.Uintptr()) 570 if !leftTensor { 571 mem, memB = memB, mem 572 } 573 574 fn := e.f[name] 575 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 576 args := []unsafe.Pointer{ 577 unsafe.Pointer(&mem), 578 unsafe.Pointer(&memB), 579 unsafe.Pointer(&size), 580 } 581 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 582 logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args) 583 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 584 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 585 return 586 } 587 588 // ElNe implements tensor.ElNeer. It does not support safe or increment operation options and will return an error if those options are passed in 589 func (e *Engine) ElNe(a tensor.Tensor, b tensor.Tensor, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 590 name := constructName2(a, b, "ne") 591 592 if !e.HasFunc(name) { 593 return nil, errors.Errorf("Unable to perform ElNe(). The tensor engine does not have the function %q", name) 594 } 595 596 if err = binaryCheck(a, b); err != nil { 597 return nil, errors.Wrap(err, "Basic checks failed for ElNe") 598 } 599 600 var reuse tensor.DenseTensor 601 var safe, toReuse bool 602 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 603 return nil, errors.Wrap(err, "Unable to handle funcOpts") 604 } 605 606 var mem, memB cu.DevicePtr 607 var size int64 608 609 switch { 610 case toReuse: 611 mem = cu.DevicePtr(reuse.Uintptr()) 612 memA := cu.DevicePtr(a.Uintptr()) 613 memSize := int64(a.MemSize()) 614 e.memcpy(mem, memA, memSize) 615 616 size = int64(logicalSize(reuse.Shape())) 617 retVal = reuse 618 case !safe: 619 mem = cu.DevicePtr(a.Uintptr()) 620 retVal = a 621 size = int64(logicalSize(a.Shape())) 622 default: 623 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 624 } 625 626 memB = cu.DevicePtr(b.Uintptr()) 627 fn := e.f[name] 628 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 629 args := []unsafe.Pointer{ 630 unsafe.Pointer(&mem), 631 unsafe.Pointer(&memB), 632 unsafe.Pointer(&size), 633 } 634 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 635 logf("CUDADO %q, Mem: %v MemB: %v size %v, args %v", name, mem, memB, size, args) 636 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 637 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 638 return 639 } 640 641 // NeScalar implements tensor.ElNeer. It does not support safe or increment operation options and will return an error if those options are passed in 642 func (e *Engine) NeScalar(a tensor.Tensor, b interface{}, leftTensor bool, opts ...tensor.FuncOpt) (retVal tensor.Tensor, err error) { 643 name := constructName1(a, leftTensor, "ne") 644 if !e.HasFunc(name) { 645 return nil, errors.Errorf("Unable to perform NeScalar(). The tensor engine does not have the function %q", name) 646 } 647 648 var bMem tensor.Memory 649 var ok bool 650 if bMem, ok = b.(tensor.Memory); !ok { 651 return nil, errors.Errorf("b has to be a tensor.Memory. Got %T instead", b) 652 } 653 654 if err = unaryCheck(a); err != nil { 655 return nil, errors.Wrap(err, "Basic checks failed for NeScalar") 656 } 657 658 var reuse tensor.DenseTensor 659 var safe, toReuse bool 660 if reuse, safe, toReuse, _, _, err = handleFuncOpts(a.Shape(), a.Dtype(), a.DataOrder(), true, opts...); err != nil { 661 return nil, errors.Wrap(err, "Unable to handle funcOpts") 662 } 663 664 var mem, memB cu.DevicePtr 665 var size int64 666 667 switch { 668 case toReuse: 669 mem = cu.DevicePtr(reuse.Uintptr()) 670 memA := cu.DevicePtr(a.Uintptr()) 671 memSize := int64(a.MemSize()) 672 e.memcpy(mem, memA, memSize) 673 674 size = int64(logicalSize(reuse.Shape())) 675 retVal = reuse 676 case !safe: 677 mem = cu.DevicePtr(a.Uintptr()) 678 retVal = a 679 size = int64(logicalSize(a.Shape())) 680 default: 681 return nil, errors.New("Impossible state: A reuse tensor must be passed in, or the operation must be unsafe. Incr and safe operations are not supported") 682 } 683 684 memB = cu.DevicePtr(bMem.Uintptr()) 685 if !leftTensor { 686 mem, memB = memB, mem 687 } 688 689 fn := e.f[name] 690 gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ := e.ElemGridSize(int(size)) 691 args := []unsafe.Pointer{ 692 unsafe.Pointer(&mem), 693 unsafe.Pointer(&memB), 694 unsafe.Pointer(&size), 695 } 696 logf("gx %d, gy %d, gz %d | bx %d by %d, bz %d", gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ) 697 logf("CUDADO %q, Mem: %v size %v, args %v", name, mem, size, args) 698 logf("LaunchKernel Params. mem: %v. Size %v", mem, size) 699 e.c.LaunchAndSync(fn, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, 0, cu.NoStream, args) 700 return 701 }