github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/memutils/memory/cuda_malloc.cu (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <cuda_runtime.h> 16 #include <cuda_profiler_api.h> 17 #include <cstdio> 18 #include <cstring> 19 #include "../memory.h" 20 21 const int MAX_ERROR_LEN = 100; 22 23 // checkCUDAError checks the cuda error of last runtime calls and returns the 24 // pointer to the buffer of error message. This buffer needs to be released 25 // by caller or upper callers. 26 char *checkCUDAError(const char *message) { 27 cudaError_t error = cudaGetLastError(); 28 if (error != cudaSuccess) { 29 char *buffer = reinterpret_cast<char *>(malloc(MAX_ERROR_LEN)); 30 snprintf(buffer, MAX_ERROR_LEN, 31 "ERROR when calling CUDA functions: %s: %s\n", 32 message, cudaGetErrorString(error)); 33 return buffer; 34 } 35 return NULL; 36 } 37 38 DeviceMemoryFlags GetFlags() { 39 return DEVICE_MEMORY_IMPLEMENTATION_FLAG; 40 } 41 42 CGoCallResHandle Init() { 43 CGoCallResHandle resHandle = {NULL, NULL}; 44 return resHandle; 45 } 46 47 CGoCallResHandle HostAlloc(size_t bytes) { 48 CGoCallResHandle resHandle = {NULL, NULL}; 49 // cudaHostAllocPortable makes sure that the allocation is associated with all 50 // devices, not just the current device. 51 cudaHostAlloc(&resHandle.res, bytes, cudaHostAllocPortable); 52 memset(resHandle.res, 0, bytes); 53 resHandle.pStrErr = checkCUDAError("Allocate"); 54 return resHandle; 55 } 56 57 CGoCallResHandle HostFree(void *p) { 58 CGoCallResHandle resHandle = {NULL, NULL}; 59 cudaFreeHost(p); 60 resHandle.pStrErr = checkCUDAError("Free"); 61 return resHandle; 62 } 63 64 CGoCallResHandle CreateCudaStream(int device) { 65 CGoCallResHandle resHandle = {NULL, NULL}; 66 cudaSetDevice(device); 67 cudaStream_t s = NULL; 68 cudaStreamCreate(&s); 69 resHandle.res = reinterpret_cast<void *>(s); 70 resHandle.pStrErr = checkCUDAError("CreateCudaStream"); 71 return resHandle; 72 } 73 74 CGoCallResHandle WaitForCudaStream(void *s, int device) { 75 CGoCallResHandle resHandle = {NULL, NULL}; 76 cudaSetDevice(device); 77 cudaStreamSynchronize((cudaStream_t) s); 78 resHandle.pStrErr = checkCUDAError("WaitForCudaStream"); 79 return resHandle; 80 } 81 82 CGoCallResHandle DestroyCudaStream(void *s, int device) { 83 CGoCallResHandle resHandle = {NULL, NULL}; 84 cudaSetDevice(device); 85 cudaStreamDestroy((cudaStream_t) s); 86 resHandle.pStrErr = checkCUDAError("DestroyCudaStream"); 87 return resHandle; 88 } 89 90 CGoCallResHandle DeviceAllocate(size_t bytes, int device) { 91 CGoCallResHandle resHandle = {NULL, NULL}; 92 cudaSetDevice(device); 93 cudaMalloc(&resHandle.res, bytes); 94 cudaMemset(resHandle.res, 0, bytes); 95 resHandle.pStrErr = checkCUDAError("DeviceAllocate"); 96 return resHandle; 97 } 98 99 CGoCallResHandle DeviceFree(void *p, int device) { 100 CGoCallResHandle resHandle = {NULL, NULL}; 101 cudaSetDevice(device); 102 cudaFree(p); 103 resHandle.pStrErr = checkCUDAError("DeviceFree"); 104 return resHandle; 105 } 106 107 CGoCallResHandle AsyncCopyHostToDevice( 108 void *dst, void *src, size_t bytes, void *stream, int device) { 109 CGoCallResHandle resHandle = {NULL, NULL}; 110 cudaSetDevice(device); 111 cudaMemcpyAsync(dst, src, bytes, 112 cudaMemcpyHostToDevice, (cudaStream_t) stream); 113 resHandle.pStrErr = checkCUDAError("AsyncCopyHostToDevice"); 114 return resHandle; 115 } 116 117 CGoCallResHandle AsyncCopyDeviceToDevice( 118 void *dst, void *src, size_t bytes, void *stream, int device) { 119 CGoCallResHandle resHandle = {NULL, NULL}; 120 cudaSetDevice(device); 121 cudaMemcpyAsync(dst, src, bytes, 122 cudaMemcpyDeviceToDevice, (cudaStream_t) stream); 123 resHandle.pStrErr = checkCUDAError("AsyncCopyDeviceToDevice"); 124 return resHandle; 125 } 126 127 CGoCallResHandle AsyncCopyDeviceToHost( 128 void *dst, void *src, size_t bytes, void *stream, int device) { 129 CGoCallResHandle resHandle = {NULL, NULL}; 130 cudaSetDevice(device); 131 cudaMemcpyAsync(dst, src, bytes, 132 cudaMemcpyDeviceToHost, (cudaStream_t) stream); 133 resHandle.pStrErr = checkCUDAError("AsyncCopyDeviceToHost"); 134 return resHandle; 135 } 136 137 CGoCallResHandle GetDeviceCount() { 138 CGoCallResHandle resHandle = {NULL, NULL}; 139 cudaGetDeviceCount(reinterpret_cast<int *>(&resHandle.res)); 140 resHandle.pStrErr = checkCUDAError("GetDeviceCount"); 141 return resHandle; 142 } 143 144 CGoCallResHandle GetDeviceGlobalMemoryInMB(int device) { 145 CGoCallResHandle resHandle = {NULL, NULL}; 146 cudaDeviceProp prop; 147 cudaGetDeviceProperties(&prop, device); 148 resHandle.res = reinterpret_cast<void *>(prop.totalGlobalMem / (1024 * 1024)); 149 resHandle.pStrErr = checkCUDAError("GetDeviceGlobalMemoryInMB"); 150 return resHandle; 151 } 152 153 CGoCallResHandle CudaProfilerStart() { 154 CGoCallResHandle resHandle = {NULL, NULL}; 155 cudaProfilerStart(); 156 resHandle.pStrErr = checkCUDAError("cudaProfilerStart"); 157 return resHandle; 158 } 159 160 CGoCallResHandle CudaProfilerStop() { 161 CGoCallResHandle resHandle = {NULL, NULL}; 162 cudaDeviceSynchronize(); 163 cudaProfilerStop(); 164 resHandle.pStrErr = checkCUDAError("cudaProfilerStop"); 165 return resHandle; 166 } 167 168 CGoCallResHandle GetDeviceMemoryInfo(size_t *freeSize, size_t *totalSize, 169 int device) { 170 char* pStrErr = reinterpret_cast<char *>( 171 malloc(sizeof(NOT_SUPPORTED_ERR_MSG))); 172 snprintf(pStrErr, sizeof(NOT_SUPPORTED_ERR_MSG), 173 NOT_SUPPORTED_ERR_MSG); 174 CGoCallResHandle resHandle = {NULL, pStrErr}; 175 return resHandle; 176 } 177 178 CGoCallResHandle deviceMalloc(void **devPtr, size_t size) { 179 CGoCallResHandle resHandle = {NULL, NULL}; 180 cudaMalloc(devPtr, size); 181 resHandle.pStrErr = checkCUDAError("deviceMalloc"); 182 return resHandle; 183 } 184 185 CGoCallResHandle deviceFree(void *devPtr) { 186 CGoCallResHandle resHandle = {NULL, NULL}; 187 cudaFree(devPtr); 188 resHandle.pStrErr = checkCUDAError("deviceFree"); 189 return resHandle; 190 } 191 192 CGoCallResHandle deviceMemset(void *devPtr, int value, size_t count) { 193 CGoCallResHandle resHandle = {NULL, NULL}; 194 cudaMemset(devPtr, value, count); 195 resHandle.pStrErr = checkCUDAError("deviceMemset"); 196 return resHandle; 197 } 198 199 CGoCallResHandle asyncCopyHostToDevice(void* dst, const void* src, 200 size_t count, void* stream) { 201 CGoCallResHandle resHandle = {NULL, NULL}; 202 cudaMemcpyAsync(dst, src, count, 203 cudaMemcpyHostToDevice, (cudaStream_t) stream); 204 resHandle.pStrErr = checkCUDAError("asyncCopyHostToDevice"); 205 return resHandle; 206 }