github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/memutils/memory/cuda_malloc.cu (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <cuda_runtime.h>
    16  #include <cuda_profiler_api.h>
    17  #include <cstdio>
    18  #include <cstring>
    19  #include "../memory.h"
    20  
    21  const int MAX_ERROR_LEN = 100;
    22  
    23  // checkCUDAError checks the cuda error of last runtime calls and returns the
    24  // pointer to the buffer of error message. This buffer needs to be released
    25  // by caller or upper callers.
    26  char *checkCUDAError(const char *message) {
    27    cudaError_t error = cudaGetLastError();
    28    if (error != cudaSuccess) {
    29      char *buffer = reinterpret_cast<char *>(malloc(MAX_ERROR_LEN));
    30      snprintf(buffer, MAX_ERROR_LEN,
    31               "ERROR when calling CUDA functions: %s: %s\n",
    32               message, cudaGetErrorString(error));
    33      return buffer;
    34    }
    35    return NULL;
    36  }
    37  
    38  DeviceMemoryFlags GetFlags() {
    39    return DEVICE_MEMORY_IMPLEMENTATION_FLAG;
    40  }
    41  
    42  CGoCallResHandle Init() {
    43    CGoCallResHandle resHandle = {NULL, NULL};
    44    return resHandle;
    45  }
    46  
    47  CGoCallResHandle HostAlloc(size_t bytes) {
    48    CGoCallResHandle resHandle = {NULL, NULL};
    49    // cudaHostAllocPortable makes sure that the allocation is associated with all
    50    // devices, not just the current device.
    51    cudaHostAlloc(&resHandle.res, bytes, cudaHostAllocPortable);
    52    memset(resHandle.res, 0, bytes);
    53    resHandle.pStrErr = checkCUDAError("Allocate");
    54    return resHandle;
    55  }
    56  
    57  CGoCallResHandle HostFree(void *p) {
    58    CGoCallResHandle resHandle = {NULL, NULL};
    59    cudaFreeHost(p);
    60    resHandle.pStrErr = checkCUDAError("Free");
    61    return resHandle;
    62  }
    63  
    64  CGoCallResHandle CreateCudaStream(int device) {
    65    CGoCallResHandle resHandle = {NULL, NULL};
    66    cudaSetDevice(device);
    67    cudaStream_t s = NULL;
    68    cudaStreamCreate(&s);
    69    resHandle.res = reinterpret_cast<void *>(s);
    70    resHandle.pStrErr = checkCUDAError("CreateCudaStream");
    71    return resHandle;
    72  }
    73  
    74  CGoCallResHandle WaitForCudaStream(void *s, int device) {
    75    CGoCallResHandle resHandle = {NULL, NULL};
    76    cudaSetDevice(device);
    77    cudaStreamSynchronize((cudaStream_t) s);
    78    resHandle.pStrErr = checkCUDAError("WaitForCudaStream");
    79    return resHandle;
    80  }
    81  
    82  CGoCallResHandle DestroyCudaStream(void *s, int device) {
    83    CGoCallResHandle resHandle = {NULL, NULL};
    84    cudaSetDevice(device);
    85    cudaStreamDestroy((cudaStream_t) s);
    86    resHandle.pStrErr = checkCUDAError("DestroyCudaStream");
    87    return resHandle;
    88  }
    89  
    90  CGoCallResHandle DeviceAllocate(size_t bytes, int device) {
    91    CGoCallResHandle resHandle = {NULL, NULL};
    92    cudaSetDevice(device);
    93    cudaMalloc(&resHandle.res, bytes);
    94    cudaMemset(resHandle.res, 0, bytes);
    95    resHandle.pStrErr = checkCUDAError("DeviceAllocate");
    96    return resHandle;
    97  }
    98  
    99  CGoCallResHandle DeviceFree(void *p, int device) {
   100    CGoCallResHandle resHandle = {NULL, NULL};
   101    cudaSetDevice(device);
   102    cudaFree(p);
   103    resHandle.pStrErr = checkCUDAError("DeviceFree");
   104    return resHandle;
   105  }
   106  
   107  CGoCallResHandle AsyncCopyHostToDevice(
   108      void *dst, void *src, size_t bytes, void *stream, int device) {
   109    CGoCallResHandle resHandle = {NULL, NULL};
   110    cudaSetDevice(device);
   111    cudaMemcpyAsync(dst, src, bytes,
   112                    cudaMemcpyHostToDevice, (cudaStream_t) stream);
   113    resHandle.pStrErr = checkCUDAError("AsyncCopyHostToDevice");
   114    return resHandle;
   115  }
   116  
   117  CGoCallResHandle AsyncCopyDeviceToDevice(
   118      void *dst, void *src, size_t bytes, void *stream, int device) {
   119    CGoCallResHandle resHandle = {NULL, NULL};
   120    cudaSetDevice(device);
   121    cudaMemcpyAsync(dst, src, bytes,
   122                    cudaMemcpyDeviceToDevice, (cudaStream_t) stream);
   123    resHandle.pStrErr = checkCUDAError("AsyncCopyDeviceToDevice");
   124    return resHandle;
   125  }
   126  
   127  CGoCallResHandle AsyncCopyDeviceToHost(
   128      void *dst, void *src, size_t bytes, void *stream, int device) {
   129    CGoCallResHandle resHandle = {NULL, NULL};
   130    cudaSetDevice(device);
   131    cudaMemcpyAsync(dst, src, bytes,
   132                    cudaMemcpyDeviceToHost, (cudaStream_t) stream);
   133    resHandle.pStrErr = checkCUDAError("AsyncCopyDeviceToHost");
   134    return resHandle;
   135  }
   136  
   137  CGoCallResHandle GetDeviceCount() {
   138    CGoCallResHandle resHandle = {NULL, NULL};
   139    cudaGetDeviceCount(reinterpret_cast<int *>(&resHandle.res));
   140    resHandle.pStrErr = checkCUDAError("GetDeviceCount");
   141    return resHandle;
   142  }
   143  
   144  CGoCallResHandle GetDeviceGlobalMemoryInMB(int device) {
   145    CGoCallResHandle resHandle = {NULL, NULL};
   146    cudaDeviceProp prop;
   147    cudaGetDeviceProperties(&prop, device);
   148    resHandle.res = reinterpret_cast<void *>(prop.totalGlobalMem / (1024 * 1024));
   149    resHandle.pStrErr = checkCUDAError("GetDeviceGlobalMemoryInMB");
   150    return resHandle;
   151  }
   152  
   153  CGoCallResHandle CudaProfilerStart() {
   154    CGoCallResHandle resHandle = {NULL, NULL};
   155    cudaProfilerStart();
   156    resHandle.pStrErr = checkCUDAError("cudaProfilerStart");
   157    return resHandle;
   158  }
   159  
   160  CGoCallResHandle CudaProfilerStop() {
   161    CGoCallResHandle resHandle = {NULL, NULL};
   162    cudaDeviceSynchronize();
   163    cudaProfilerStop();
   164    resHandle.pStrErr = checkCUDAError("cudaProfilerStop");
   165    return resHandle;
   166  }
   167  
   168  CGoCallResHandle GetDeviceMemoryInfo(size_t *freeSize, size_t *totalSize,
   169                                       int device) {
   170    char* pStrErr = reinterpret_cast<char *>(
   171        malloc(sizeof(NOT_SUPPORTED_ERR_MSG)));
   172    snprintf(pStrErr, sizeof(NOT_SUPPORTED_ERR_MSG),
   173        NOT_SUPPORTED_ERR_MSG);
   174    CGoCallResHandle resHandle = {NULL, pStrErr};
   175    return resHandle;
   176  }
   177  
   178  CGoCallResHandle deviceMalloc(void **devPtr, size_t size) {
   179    CGoCallResHandle resHandle = {NULL, NULL};
   180    cudaMalloc(devPtr, size);
   181    resHandle.pStrErr = checkCUDAError("deviceMalloc");
   182    return resHandle;
   183  }
   184  
   185  CGoCallResHandle deviceFree(void *devPtr) {
   186    CGoCallResHandle resHandle = {NULL, NULL};
   187    cudaFree(devPtr);
   188    resHandle.pStrErr = checkCUDAError("deviceFree");
   189    return resHandle;
   190  }
   191  
   192  CGoCallResHandle deviceMemset(void *devPtr, int value, size_t count) {
   193    CGoCallResHandle resHandle = {NULL, NULL};
   194    cudaMemset(devPtr, value, count);
   195    resHandle.pStrErr = checkCUDAError("deviceMemset");
   196    return resHandle;
   197  }
   198  
   199  CGoCallResHandle asyncCopyHostToDevice(void* dst, const void* src,
   200      size_t count, void* stream) {
   201    CGoCallResHandle resHandle = {NULL, NULL};
   202    cudaMemcpyAsync(dst, src, count,
   203                    cudaMemcpyHostToDevice, (cudaStream_t) stream);
   204    resHandle.pStrErr = checkCUDAError("asyncCopyHostToDevice");
   205    return resHandle;
   206  }