github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/utils.hpp (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #ifndef QUERY_UTILS_HPP_
    16  #define QUERY_UTILS_HPP_
    17  #include <cuda_runtime.h>
    18  #include <cfloat>
    19  #include <cmath>
    20  #include <cstdint>
    21  #include <exception>
    22  #include <type_traits>
    23  #include <stdexcept>
    24  #include <string>
    25  #include "query/time_series_aggregate.h"
    26  #ifdef USE_RMM
    27  #include "query/thrust_rmm_allocator.hpp"
    28  #endif
    29  
    30  // We need this macro to define functions that can only be called in host
    31  // mode or device mode, but not both. The reason to have this mode is because
    32  // a "device and host" function can only call "device and host" function. They
    33  // cannot call device-only functions like "atomicAdd" even we call them under
    34  // RUN_ON_DEVICE macro.
    35  #ifdef RUN_ON_DEVICE
    36  #define __host_or_device__ __device__
    37  #else
    38  #define __host_or_device__ __host__
    39  #endif
    40  
    41  // This macro is for setting the correct thrust execution policy given whether
    42  // RUN_ON_DEVICE and USE_RMM
    43  #ifdef RUN_ON_DEVICE
    44  #  ifdef USE_RMM
    45  #    define GET_EXECUTION_POLICY(cudaStream) \
    46         rmm::exec_policy(cudaStream)->on(cudaStream)
    47  #  else
    48  #    define GET_EXECUTION_POLICY(cudaStream) \
    49          thrust::cuda::par.on(cudaStream)
    50  #  endif
    51  #else
    52  #  define GET_EXECUTION_POLICY(cudaStream) thrust::host
    53  #endif
    54  
    55  
    56  // This function will check the cuda error of current thread and throw an
    57  // exception if any.
    58  void CheckCUDAError(const char *message);
    59  
    60  // AlgorithmError represents a exception class that contains a error message.
    61  class AlgorithmError : public std::exception {
    62   protected:
    63    std::string message_;
    64   public:
    65    explicit AlgorithmError(const std::string &message);
    66    virtual const char *what() const throw();
    67  };
    68  
    69  namespace ares {
    70  
    71  // Parameters for custom kernel.
    72  const unsigned int WARP_SIZE = 32;
    73  const unsigned int STEP_SIZE = 64;
    74  const unsigned int BLOCK_SIZE = 512;
    75  
    76  // common_type determines the common type between type A and B,
    77  // that is the type both types can be implicitly converted to.
    78  template <typename A, typename B>
    79  struct common_type {
    80    typedef typename std::conditional<
    81        std::is_floating_point<A>::value || std::is_floating_point<B>::value,
    82        float_t,
    83        typename std::conditional<
    84            std::is_same<A, int64_t>::value || std::is_same<B, int64_t>::value,
    85            int64_t,
    86            typename std::conditional<std::is_signed<A>::value ||
    87                                          std::is_signed<B>::value,
    88                                      int32_t, uint32_t>::type>::type>::type type;
    89  };
    90  
    91  // Special common_type for GeoPointT
    92  template<>
    93  struct common_type<GeoPointT, GeoPointT> {
    94    typedef GeoPointT type;
    95  };
    96  
    97  // get_identity_value returns the identity value for the aggregation function.
    98  // Identity value is a special type of element of a set with respect to a
    99  // binary operation on that set, which leaves other elements unchanged when
   100  // combined with them.
   101  template <typename Value>
   102  __host__ __device__ Value get_identity_value(AggregateFunction aggFunc) {
   103    switch (aggFunc) {
   104      case AGGR_AVG_FLOAT:return 0;  // zero avg and zero count.
   105      case AGGR_SUM_UNSIGNED:
   106      case AGGR_SUM_SIGNED:
   107      case AGGR_SUM_FLOAT:return 0;
   108      case AGGR_MIN_UNSIGNED:return UINT32_MAX;
   109      case AGGR_MIN_SIGNED:return INT32_MAX;
   110      case AGGR_MIN_FLOAT:return FLT_MAX;
   111      case AGGR_MAX_UNSIGNED:return 0;
   112      case AGGR_MAX_SIGNED:return INT32_MIN;
   113      case AGGR_MAX_FLOAT:return FLT_MIN;
   114      default:return 0;
   115    }
   116  }
   117  
   118  inline uint8_t getStepInBytes(DataType dataType) {
   119    switch (dataType) {
   120      case Bool:
   121      case Int8:
   122      case Uint8:return 1;
   123      case Int16:
   124      case Uint16:return 2;
   125      case Int32:
   126      case Uint32:
   127      case Float32:return 4;
   128      case GeoPoint:
   129      case Int64:
   130      case Uint64: return 8;
   131      case UUID: return 16;
   132      default:
   133        throw std::invalid_argument(
   134            "Unsupported data type for VectorPartyInput");
   135    }
   136  }
   137  
   138  // GPU memory access has to be aligned to 1, 2, 4, 8, 16 bytes
   139  // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses
   140  // therefore we do byte to byte comparison here
   141  inline __host__ __device__ bool memequal(const uint8_t *lhs, const uint8_t *rhs,
   142                                           int bytes) {
   143    for (int i = 0; i < bytes; i++) {
   144      if (lhs[i] != rhs[i]) {
   145        return false;
   146      }
   147    }
   148    return true;
   149  }
   150  
   151  __host__ __device__ uint32_t murmur3sum32(const uint8_t *key, int bytes,
   152                                            uint32_t seed);
   153  __host__ __device__ void murmur3sum128(const uint8_t *key, int len,
   154                                         uint32_t seed, uint64_t *out);
   155  }  // namespace ares
   156  #endif  // QUERY_UTILS_HPP_