github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/utils.cu (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "query/utils.hpp" 16 #include <cuda_runtime.h> 17 #include <iostream> 18 #include <string> 19 20 const int MAX_CUDA_ERROR_LEN = 80; 21 22 uint16_t DAYS_BEFORE_MONTH_HOST[13] = { 23 0, 24 31, 25 31 + 28, 26 31 + 28 + 31, 27 31 + 28 + 31 + 30, 28 31 + 28 + 31 + 30 + 31, 29 31 + 28 + 31 + 30 + 31 + 30, 30 31 + 28 + 31 + 30 + 31 + 30 + 31, 31 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, 32 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, 33 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, 34 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, 35 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31, 36 }; 37 38 __constant__ uint16_t DAYS_BEFORE_MONTH_DEVICE[13]; 39 40 // CheckCUDAError implementation. Notes for host we don't throw the exception 41 // on purpose since we will always receive error messages like "insufficient 42 // driver version". 43 void CheckCUDAError(const char *message) { 44 cudaError_t error = cudaGetLastError(); 45 if (error != cudaSuccess) { 46 char buf[MAX_CUDA_ERROR_LEN]; 47 snprintf(buf, 48 sizeof(buf), 49 "ERROR: %s: %s", 50 message, 51 cudaGetErrorString(error)); 52 #ifdef RUN_ON_DEVICE 53 throw AlgorithmError(buf); 54 #else 55 printf("%s\n", buf); 56 #endif 57 } 58 } 59 60 // BootstrapDevice implementation. It should be only called by unit tests 61 // and golang code. 62 CGoCallResHandle BootstrapDevice() { 63 CGoCallResHandle resHandle = {nullptr, nullptr}; 64 try { 65 #ifdef RUN_ON_DEVICE 66 int deviceCount; 67 cudaGetDeviceCount(&deviceCount); 68 CheckCUDAError("cudaGetDeviceCount"); 69 for (int device = 0; device < deviceCount; device++) { 70 cudaSetDevice(device); 71 CheckCUDAError("cudaSetDevice"); 72 cudaMemcpyToSymbol(DAYS_BEFORE_MONTH_DEVICE, DAYS_BEFORE_MONTH_HOST, 73 sizeof(DAYS_BEFORE_MONTH_HOST)); 74 CheckCUDAError("cudaMemcpyToSymbol"); 75 } 76 #endif 77 } 78 catch (std::exception &e) { 79 std::cerr << "Exception happened when bootstraping device:" << e.what() 80 << std::endl; 81 resHandle.pStrErr = strdup(e.what()); 82 } 83 return resHandle; 84 } 85 86 const char * AlgorithmError::what() const throw() { 87 return message_.c_str(); 88 } 89 90 AlgorithmError::AlgorithmError(const std::string &message) { 91 message_ = message; 92 } 93 94 namespace ares { 95 96 __host__ __device__ uint64_t rotl64(uint64_t x, int8_t r) { 97 return (x << r) | (x >> (64 - r)); 98 } 99 100 __host__ __device__ uint64_t fmix64(uint64_t k) { 101 k ^= k >> 33; 102 k *= 0xff51afd7ed558ccdLLU; 103 k ^= k >> 33; 104 k *= 0xc4ceb9fe1a85ec53LLU; 105 k ^= k >> 33; 106 return k; 107 } 108 109 // Murmur3Sum32 implements Murmur3Sum32 hash algorithm. 110 __host__ __device__ uint32_t murmur3sum32(const uint8_t *key, int bytes, 111 uint32_t seed) { 112 uint32_t h1 = seed; 113 int nBlocks = bytes / 4; 114 const uint8_t *p = key; 115 const uint8_t *p1 = p + 4 * nBlocks; 116 117 for (; p < p1; p += 4) { 118 uint32_t k1 = *reinterpret_cast<const uint32_t *>(p); 119 k1 *= 0xcc9e2d51; 120 k1 = (k1 << 15) | (k1 >> 17); 121 k1 *= 0x1b873593; 122 123 h1 ^= k1; 124 h1 = (h1 << 13) | (h1 >> 19); 125 h1 = h1 * 5 + 0xe6546b64; 126 } 127 128 int tailBytes = bytes - nBlocks * 4; 129 const uint8_t *tail = p1; 130 131 uint32_t k1 = 0; 132 switch (tailBytes & 3) { 133 case 3:k1 ^= (uint32_t) tail[2] << 16; 134 case 2:k1 ^= (uint32_t) tail[1] << 8; 135 case 1:k1 ^= (uint32_t) tail[0]; 136 k1 *= 0xcc9e2d51; 137 k1 = (k1 << 15) | (k1 >> 17); 138 k1 *= 0x1b873593; 139 h1 ^= k1; 140 break; 141 } 142 143 h1 ^= bytes; 144 h1 ^= h1 >> 16; 145 h1 *= 0x85ebca6b; 146 h1 ^= h1 >> 13; 147 h1 *= 0xc2b2ae35; 148 h1 ^= h1 >> 16; 149 150 return h1; 151 } 152 153 __host__ __device__ void murmur3sum128(const uint8_t *key, int len, 154 uint32_t seed, uint64_t *out) { 155 const uint8_t *data = key; 156 const int nblocks = len / 16; 157 int i; 158 159 uint64_t h1 = seed; 160 uint64_t h2 = seed; 161 162 uint64_t c1 = 0x87c37b91114253d5LLU; 163 uint64_t c2 = 0x4cf5ad432745937fLLU; 164 165 const uint64_t *blocks = reinterpret_cast<const uint64_t *>(data); 166 167 for (i = 0; i < nblocks; i++) { 168 uint64_t k1 = blocks[i * 2]; 169 uint64_t k2 = blocks[i * 2 + 1]; 170 171 k1 *= c1; 172 k1 = rotl64(k1, 31); 173 k1 *= c2; 174 h1 ^= k1; 175 176 h1 = rotl64(h1, 27); 177 h1 += h2; 178 h1 = h1 * 5 + 0x52dce729; 179 180 k2 *= c2; 181 k2 = rotl64(k2, 33); 182 k2 *= c1; 183 h2 ^= k2; 184 185 h2 = rotl64(h2, 31); 186 h2 += h1; 187 h2 = h2 * 5 + 0x38495ab5; 188 } 189 190 const uint8_t *tail = reinterpret_cast<const uint8_t *>(data + nblocks * 16); 191 192 uint64_t k1 = 0; 193 uint64_t k2 = 0; 194 195 switch (len & 15) { 196 case 15:k2 ^= (uint64_t) (tail[14]) << 48; 197 case 14:k2 ^= (uint64_t) (tail[13]) << 40; 198 case 13:k2 ^= (uint64_t) (tail[12]) << 32; 199 case 12:k2 ^= (uint64_t) (tail[11]) << 24; 200 case 11:k2 ^= (uint64_t) (tail[10]) << 16; 201 case 10:k2 ^= (uint64_t) (tail[9]) << 8; 202 case 9:k2 ^= (uint64_t) (tail[8]) << 0; 203 k2 *= c2; 204 k2 = rotl64(k2, 33); 205 k2 *= c1; 206 h2 ^= k2; 207 208 case 8:k1 ^= (uint64_t) (tail[7]) << 56; 209 case 7:k1 ^= (uint64_t) (tail[6]) << 48; 210 case 6:k1 ^= (uint64_t) (tail[5]) << 40; 211 case 5:k1 ^= (uint64_t) (tail[4]) << 32; 212 case 4:k1 ^= (uint64_t) (tail[3]) << 24; 213 case 3:k1 ^= (uint64_t) (tail[2]) << 16; 214 case 2:k1 ^= (uint64_t) (tail[1]) << 8; 215 case 1:k1 ^= (uint64_t) (tail[0]) << 0; 216 k1 *= c1; 217 k1 = rotl64(k1, 31); 218 k1 *= c2; 219 h1 ^= k1; 220 } 221 222 h1 ^= len; 223 h2 ^= len; 224 225 h1 += h2; 226 h2 += h1; 227 228 h1 = fmix64(h1); 229 h2 = fmix64(h2); 230 231 h1 += h2; 232 h2 += h1; 233 234 out[0] = h1; 235 out[1] = h2; 236 } 237 238 } // namespace ares