github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/utils.hpp (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef QUERY_UTILS_HPP_ 16 #define QUERY_UTILS_HPP_ 17 #include <cuda_runtime.h> 18 #include <cfloat> 19 #include <cmath> 20 #include <cstdint> 21 #include <exception> 22 #include <type_traits> 23 #include <stdexcept> 24 #include <string> 25 #include "query/time_series_aggregate.h" 26 #ifdef USE_RMM 27 #include "query/thrust_rmm_allocator.hpp" 28 #endif 29 30 // We need this macro to define functions that can only be called in host 31 // mode or device mode, but not both. The reason to have this mode is because 32 // a "device and host" function can only call "device and host" function. They 33 // cannot call device-only functions like "atomicAdd" even we call them under 34 // RUN_ON_DEVICE macro. 35 #ifdef RUN_ON_DEVICE 36 #define __host_or_device__ __device__ 37 #else 38 #define __host_or_device__ __host__ 39 #endif 40 41 // This macro is for setting the correct thrust execution policy given whether 42 // RUN_ON_DEVICE and USE_RMM 43 #ifdef RUN_ON_DEVICE 44 # ifdef USE_RMM 45 # define GET_EXECUTION_POLICY(cudaStream) \ 46 rmm::exec_policy(cudaStream)->on(cudaStream) 47 # else 48 # define GET_EXECUTION_POLICY(cudaStream) \ 49 thrust::cuda::par.on(cudaStream) 50 # endif 51 #else 52 # define GET_EXECUTION_POLICY(cudaStream) thrust::host 53 #endif 54 55 56 // This function will check the cuda error of current thread and throw an 57 // exception if any. 58 void CheckCUDAError(const char *message); 59 60 // AlgorithmError represents a exception class that contains a error message. 61 class AlgorithmError : public std::exception { 62 protected: 63 std::string message_; 64 public: 65 explicit AlgorithmError(const std::string &message); 66 virtual const char *what() const throw(); 67 }; 68 69 namespace ares { 70 71 // Parameters for custom kernel. 72 const unsigned int WARP_SIZE = 32; 73 const unsigned int STEP_SIZE = 64; 74 const unsigned int BLOCK_SIZE = 512; 75 76 // common_type determines the common type between type A and B, 77 // that is the type both types can be implicitly converted to. 78 template <typename A, typename B> 79 struct common_type { 80 typedef typename std::conditional< 81 std::is_floating_point<A>::value || std::is_floating_point<B>::value, 82 float_t, 83 typename std::conditional< 84 std::is_same<A, int64_t>::value || std::is_same<B, int64_t>::value, 85 int64_t, 86 typename std::conditional<std::is_signed<A>::value || 87 std::is_signed<B>::value, 88 int32_t, uint32_t>::type>::type>::type type; 89 }; 90 91 // Special common_type for GeoPointT 92 template<> 93 struct common_type<GeoPointT, GeoPointT> { 94 typedef GeoPointT type; 95 }; 96 97 // get_identity_value returns the identity value for the aggregation function. 98 // Identity value is a special type of element of a set with respect to a 99 // binary operation on that set, which leaves other elements unchanged when 100 // combined with them. 101 template <typename Value> 102 __host__ __device__ Value get_identity_value(AggregateFunction aggFunc) { 103 switch (aggFunc) { 104 case AGGR_AVG_FLOAT:return 0; // zero avg and zero count. 105 case AGGR_SUM_UNSIGNED: 106 case AGGR_SUM_SIGNED: 107 case AGGR_SUM_FLOAT:return 0; 108 case AGGR_MIN_UNSIGNED:return UINT32_MAX; 109 case AGGR_MIN_SIGNED:return INT32_MAX; 110 case AGGR_MIN_FLOAT:return FLT_MAX; 111 case AGGR_MAX_UNSIGNED:return 0; 112 case AGGR_MAX_SIGNED:return INT32_MIN; 113 case AGGR_MAX_FLOAT:return FLT_MIN; 114 default:return 0; 115 } 116 } 117 118 inline uint8_t getStepInBytes(DataType dataType) { 119 switch (dataType) { 120 case Bool: 121 case Int8: 122 case Uint8:return 1; 123 case Int16: 124 case Uint16:return 2; 125 case Int32: 126 case Uint32: 127 case Float32:return 4; 128 case GeoPoint: 129 case Int64: 130 case Uint64: return 8; 131 case UUID: return 16; 132 default: 133 throw std::invalid_argument( 134 "Unsupported data type for VectorPartyInput"); 135 } 136 } 137 138 // GPU memory access has to be aligned to 1, 2, 4, 8, 16 bytes 139 // http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-accesses 140 // therefore we do byte to byte comparison here 141 inline __host__ __device__ bool memequal(const uint8_t *lhs, const uint8_t *rhs, 142 int bytes) { 143 for (int i = 0; i < bytes; i++) { 144 if (lhs[i] != rhs[i]) { 145 return false; 146 } 147 } 148 return true; 149 } 150 151 __host__ __device__ uint32_t murmur3sum32(const uint8_t *key, int bytes, 152 uint32_t seed); 153 __host__ __device__ void murmur3sum128(const uint8_t *key, int len, 154 uint32_t seed, uint64_t *out); 155 } // namespace ares 156 #endif // QUERY_UTILS_HPP_