github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/utils.cu (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include "query/utils.hpp"
    16  #include <cuda_runtime.h>
    17  #include <iostream>
    18  #include <string>
    19  
    20  const int MAX_CUDA_ERROR_LEN = 80;
    21  
    22  uint16_t DAYS_BEFORE_MONTH_HOST[13] = {
    23      0,
    24      31,
    25      31 + 28,
    26      31 + 28 + 31,
    27      31 + 28 + 31 + 30,
    28      31 + 28 + 31 + 30 + 31,
    29      31 + 28 + 31 + 30 + 31 + 30,
    30      31 + 28 + 31 + 30 + 31 + 30 + 31,
    31      31 + 28 + 31 + 30 + 31 + 30 + 31 + 31,
    32      31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30,
    33      31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31,
    34      31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30,
    35      31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31,
    36  };
    37  
    38  __constant__ uint16_t DAYS_BEFORE_MONTH_DEVICE[13];
    39  
    40  // CheckCUDAError implementation. Notes for host we don't throw the exception
    41  // on purpose since we will always receive error messages like "insufficient
    42  // driver version".
    43  void CheckCUDAError(const char *message) {
    44    cudaError_t error = cudaGetLastError();
    45    if (error != cudaSuccess) {
    46      char buf[MAX_CUDA_ERROR_LEN];
    47      snprintf(buf,
    48               sizeof(buf),
    49               "ERROR: %s: %s",
    50               message,
    51               cudaGetErrorString(error));
    52  #ifdef RUN_ON_DEVICE
    53      throw AlgorithmError(buf);
    54  #else
    55      printf("%s\n", buf);
    56  #endif
    57    }
    58  }
    59  
    60  // BootstrapDevice implementation. It should be only called by unit tests
    61  // and golang code.
    62  CGoCallResHandle BootstrapDevice() {
    63    CGoCallResHandle resHandle = {nullptr, nullptr};
    64    try {
    65  #ifdef RUN_ON_DEVICE
    66      int deviceCount;
    67      cudaGetDeviceCount(&deviceCount);
    68      CheckCUDAError("cudaGetDeviceCount");
    69      for (int device = 0; device < deviceCount; device++) {
    70        cudaSetDevice(device);
    71        CheckCUDAError("cudaSetDevice");
    72        cudaMemcpyToSymbol(DAYS_BEFORE_MONTH_DEVICE, DAYS_BEFORE_MONTH_HOST,
    73                           sizeof(DAYS_BEFORE_MONTH_HOST));
    74        CheckCUDAError("cudaMemcpyToSymbol");
    75      }
    76  #endif
    77    }
    78    catch (std::exception &e) {
    79      std::cerr << "Exception happened when bootstraping device:" << e.what()
    80                << std::endl;
    81      resHandle.pStrErr = strdup(e.what());
    82    }
    83    return resHandle;
    84  }
    85  
    86  const char * AlgorithmError::what() const throw() {
    87      return message_.c_str();
    88  }
    89  
    90  AlgorithmError::AlgorithmError(const std::string &message) {
    91    message_ = message;
    92  }
    93  
    94  namespace ares {
    95  
    96  __host__ __device__ uint64_t rotl64(uint64_t x, int8_t r) {
    97    return (x << r) | (x >> (64 - r));
    98  }
    99  
   100  __host__ __device__ uint64_t fmix64(uint64_t k) {
   101    k ^= k >> 33;
   102    k *= 0xff51afd7ed558ccdLLU;
   103    k ^= k >> 33;
   104    k *= 0xc4ceb9fe1a85ec53LLU;
   105    k ^= k >> 33;
   106    return k;
   107  }
   108  
   109  // Murmur3Sum32 implements Murmur3Sum32 hash algorithm.
   110  __host__ __device__ uint32_t murmur3sum32(const uint8_t *key, int bytes,
   111                                            uint32_t seed) {
   112    uint32_t h1 = seed;
   113    int nBlocks = bytes / 4;
   114    const uint8_t *p = key;
   115    const uint8_t *p1 = p + 4 * nBlocks;
   116  
   117    for (; p < p1; p += 4) {
   118      uint32_t k1 = *reinterpret_cast<const uint32_t *>(p);
   119      k1 *= 0xcc9e2d51;
   120      k1 = (k1 << 15) | (k1 >> 17);
   121      k1 *= 0x1b873593;
   122  
   123      h1 ^= k1;
   124      h1 = (h1 << 13) | (h1 >> 19);
   125      h1 = h1 * 5 + 0xe6546b64;
   126    }
   127  
   128    int tailBytes = bytes - nBlocks * 4;
   129    const uint8_t *tail = p1;
   130  
   131    uint32_t k1 = 0;
   132    switch (tailBytes & 3) {
   133      case 3:k1 ^= (uint32_t) tail[2] << 16;
   134      case 2:k1 ^= (uint32_t) tail[1] << 8;
   135      case 1:k1 ^= (uint32_t) tail[0];
   136        k1 *= 0xcc9e2d51;
   137        k1 = (k1 << 15) | (k1 >> 17);
   138        k1 *= 0x1b873593;
   139        h1 ^= k1;
   140        break;
   141    }
   142  
   143    h1 ^= bytes;
   144    h1 ^= h1 >> 16;
   145    h1 *= 0x85ebca6b;
   146    h1 ^= h1 >> 13;
   147    h1 *= 0xc2b2ae35;
   148    h1 ^= h1 >> 16;
   149  
   150    return h1;
   151  }
   152  
   153  __host__ __device__ void murmur3sum128(const uint8_t *key, int len,
   154                                         uint32_t seed, uint64_t *out) {
   155    const uint8_t *data = key;
   156    const int nblocks = len / 16;
   157    int i;
   158  
   159    uint64_t h1 = seed;
   160    uint64_t h2 = seed;
   161  
   162    uint64_t c1 = 0x87c37b91114253d5LLU;
   163    uint64_t c2 = 0x4cf5ad432745937fLLU;
   164  
   165    const uint64_t *blocks = reinterpret_cast<const uint64_t *>(data);
   166  
   167    for (i = 0; i < nblocks; i++) {
   168      uint64_t k1 = blocks[i * 2];
   169      uint64_t k2 = blocks[i * 2 + 1];
   170  
   171      k1 *= c1;
   172      k1 = rotl64(k1, 31);
   173      k1 *= c2;
   174      h1 ^= k1;
   175  
   176      h1 = rotl64(h1, 27);
   177      h1 += h2;
   178      h1 = h1 * 5 + 0x52dce729;
   179  
   180      k2 *= c2;
   181      k2 = rotl64(k2, 33);
   182      k2 *= c1;
   183      h2 ^= k2;
   184  
   185      h2 = rotl64(h2, 31);
   186      h2 += h1;
   187      h2 = h2 * 5 + 0x38495ab5;
   188    }
   189  
   190    const uint8_t *tail = reinterpret_cast<const uint8_t *>(data + nblocks * 16);
   191  
   192    uint64_t k1 = 0;
   193    uint64_t k2 = 0;
   194  
   195    switch (len & 15) {
   196      case 15:k2 ^= (uint64_t) (tail[14]) << 48;
   197      case 14:k2 ^= (uint64_t) (tail[13]) << 40;
   198      case 13:k2 ^= (uint64_t) (tail[12]) << 32;
   199      case 12:k2 ^= (uint64_t) (tail[11]) << 24;
   200      case 11:k2 ^= (uint64_t) (tail[10]) << 16;
   201      case 10:k2 ^= (uint64_t) (tail[9]) << 8;
   202      case 9:k2 ^= (uint64_t) (tail[8]) << 0;
   203        k2 *= c2;
   204        k2 = rotl64(k2, 33);
   205        k2 *= c1;
   206        h2 ^= k2;
   207  
   208      case 8:k1 ^= (uint64_t) (tail[7]) << 56;
   209      case 7:k1 ^= (uint64_t) (tail[6]) << 48;
   210      case 6:k1 ^= (uint64_t) (tail[5]) << 40;
   211      case 5:k1 ^= (uint64_t) (tail[4]) << 32;
   212      case 4:k1 ^= (uint64_t) (tail[3]) << 24;
   213      case 3:k1 ^= (uint64_t) (tail[2]) << 16;
   214      case 2:k1 ^= (uint64_t) (tail[1]) << 8;
   215      case 1:k1 ^= (uint64_t) (tail[0]) << 0;
   216        k1 *= c1;
   217        k1 = rotl64(k1, 31);
   218        k1 *= c2;
   219        h1 ^= k1;
   220    }
   221  
   222    h1 ^= len;
   223    h2 ^= len;
   224  
   225    h1 += h2;
   226    h2 += h1;
   227  
   228    h1 = fmix64(h1);
   229    h2 = fmix64(h2);
   230  
   231    h1 += h2;
   232    h2 += h1;
   233  
   234    out[0] = h1;
   235    out[1] = h2;
   236  }
   237  
   238  }  // namespace ares