github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/geo_intersects.cu (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <cuda_runtime.h> 16 #include <thrust/iterator/discard_iterator.h> 17 #include <thrust/transform.h> 18 #include <algorithm> 19 #include <vector> 20 #include "query/algorithm.hpp" 21 #include "query/binder.hpp" 22 #include "query/memory.hpp" 23 24 namespace ares { 25 class GeoIntersectionContext { 26 public: 27 GeoIntersectionContext(GeoShapeBatch geoShapes, 28 int indexVectorLength, 29 uint32_t startCount, 30 RecordID **recordIDVectors, 31 int numForeignTables, 32 uint32_t *outputPredicate, 33 bool inOrOut, 34 void *cudaStream) 35 : geoShapes(geoShapes), 36 indexVectorLength(indexVectorLength), 37 startCount(startCount), 38 recordIDVectors(recordIDVectors), 39 numForeignTables(numForeignTables), 40 outputPredicate(outputPredicate), 41 inOrOut(inOrOut), 42 cudaStream(reinterpret_cast<cudaStream_t>(cudaStream)) {} 43 44 private: 45 GeoShapeBatch geoShapes; 46 int indexVectorLength; 47 uint32_t startCount; 48 RecordID **recordIDVectors; 49 int numForeignTables; 50 uint32_t *outputPredicate; 51 bool inOrOut; 52 cudaStream_t cudaStream; 53 54 template<typename IndexZipIterator> 55 int executeRemoveIf(IndexZipIterator indexZipIterator); 56 57 public: 58 cudaStream_t getStream() const { 59 return cudaStream; 60 } 61 62 template<typename InputIterator> 63 int run(uint32_t *indexVector, InputIterator inputIterator); 64 }; 65 66 // Specialized for GeoIntersectionContext. 67 template <> 68 class InputVectorBinder<GeoIntersectionContext, 1> : 69 public InputVectorBinderBase<GeoIntersectionContext, 1, 1> { 70 typedef InputVectorBinderBase<GeoIntersectionContext, 1, 1> super_t; 71 72 public: 73 explicit InputVectorBinder(GeoIntersectionContext context, 74 std::vector<InputVector> inputVectors, 75 uint32_t *indexVector, uint32_t *baseCounts, 76 uint32_t startCount) : super_t(context, 77 inputVectors, 78 indexVector, 79 baseCounts, 80 startCount) { 81 } 82 public: 83 template<typename ...InputIterators> 84 int bind(InputIterators... boundInputIterators); 85 }; 86 87 } // namespace ares 88 89 CGoCallResHandle GeoBatchIntersects( 90 GeoShapeBatch geoShapes, InputVector points, uint32_t *indexVector, 91 int indexVectorLength, uint32_t startCount, RecordID **recordIDVectors, 92 int numForeignTables, uint32_t *outputPredicate, bool inOrOut, 93 void *cudaStream, int device) { 94 CGoCallResHandle resHandle = {0, nullptr}; 95 try { 96 #ifdef RUN_ON_DEVICE 97 cudaSetDevice(device); 98 #endif 99 ares::GeoIntersectionContext 100 ctx(geoShapes, indexVectorLength, startCount, 101 recordIDVectors, numForeignTables, outputPredicate, inOrOut, 102 cudaStream); 103 std::vector<InputVector> inputVectors = {points}; 104 ares::InputVectorBinder<ares::GeoIntersectionContext, 1> 105 binder(ctx, inputVectors, indexVector, nullptr, startCount); 106 resHandle.res = reinterpret_cast<void *>(binder.bind()); 107 CheckCUDAError("GeoBatchIntersects"); 108 return resHandle; 109 } catch (const std::exception &e) { 110 std::cerr << "Exception happened when doing GeoBatchIntersects:" << e.what() 111 << std::endl; 112 resHandle.pStrErr = strdup(e.what()); 113 } 114 return resHandle; 115 } 116 117 CGoCallResHandle WriteGeoShapeDim( 118 int shapeTotalWords, DimensionOutputVector dimOut, 119 int indexVectorLengthBeforeGeo, 120 uint32_t *outputPredicate, void *cudaStream, int device) { 121 CGoCallResHandle resHandle = {nullptr, nullptr}; 122 try { 123 #ifdef RUN_ON_DEVICE 124 cudaSetDevice(device); 125 #endif 126 ares::write_geo_shape_dim(shapeTotalWords, dimOut, 127 indexVectorLengthBeforeGeo, 128 outputPredicate, 129 reinterpret_cast<cudaStream_t>(cudaStream)); 130 CheckCUDAError("WriteGeoShapeDim"); 131 return resHandle; 132 } catch (const std::exception &e) { 133 std::cerr << "Exception happened when doing GeoIntersectsJoin:" << e.what() 134 << std::endl; 135 resHandle.pStrErr = strdup(e.what()); 136 } 137 return resHandle; 138 } 139 140 namespace ares { 141 142 template<typename ...InputIterators> 143 int InputVectorBinder<GeoIntersectionContext, 1>::bind( 144 InputIterators... boundInputIterators) { 145 InputVector input = super_t::inputVectors[0]; 146 uint32_t *indexVector = super_t::indexVector; 147 uint32_t startCount = super_t::startCount; 148 GeoIntersectionContext context = super_t::context; 149 150 if (input.Type == VectorPartyInput) { 151 VectorPartySlice points = input.Vector.VP; 152 if (points.DataType != GeoPoint) { 153 throw std::invalid_argument( 154 "only geo point column are allowed in geo_intersects"); 155 } 156 157 if (points.BasePtr == nullptr) { 158 return 0; 159 } 160 161 uint8_t *basePtr = points.BasePtr; 162 uint32_t nullsOffset = points.NullsOffset; 163 uint32_t valueOffset = points.ValuesOffset; 164 uint8_t startingIndex = points.StartingIndex; 165 uint8_t stepInBytes = 8; 166 uint32_t length = points.Length; 167 auto columnIter = make_column_iterator<GeoPointT>( 168 indexVector, nullptr, startCount, basePtr, nullsOffset, valueOffset, 169 length, stepInBytes, startingIndex); 170 return context.run(indexVector, columnIter); 171 } else if (input.Type == ForeignColumnInput) { 172 DataType dataType = input.Vector.ForeignVP.DataType; 173 174 if (dataType != GeoPoint) { 175 throw std::invalid_argument( 176 "only geo point column are allowed in geo_intersects"); 177 } 178 // Note: for now foreign vectors are dimension table columns 179 // that are not compressed nor pre sliced 180 RecordID *recordIDs = input.Vector.ForeignVP.RecordIDs; 181 const int32_t numBatches = input.Vector.ForeignVP.NumBatches; 182 const int32_t baseBatchID = input.Vector.ForeignVP.BaseBatchID; 183 VectorPartySlice *vpSlices = input.Vector.ForeignVP.Batches; 184 const int32_t numRecordsInLastBatch = 185 input.Vector.ForeignVP.NumRecordsInLastBatch; 186 bool hasDefault = input.Vector.ForeignVP.DefaultValue.HasDefault; 187 DefaultValue defaultValueStruct = input.Vector.ForeignVP.DefaultValue; 188 uint8_t stepInBytes = getStepInBytes(dataType); 189 190 ForeignTableIterator<GeoPointT> *vpIters = prepareForeignTableIterators( 191 numBatches, 192 vpSlices, 193 stepInBytes, 194 hasDefault, 195 defaultValueStruct.Value.GeoPointVal, 196 context.getStream()); 197 int res = 198 context.run(indexVector, RecordIDJoinIterator<GeoPointT>( 199 recordIDs, 200 numBatches, 201 baseBatchID, 202 vpIters, 203 numRecordsInLastBatch, 204 nullptr, 0)); 205 deviceFree(vpIters); 206 return res; 207 } 208 throw std::invalid_argument( 209 "Unsupported data type " + std::to_string(__LINE__) 210 + "for geo intersection contexts"); 211 } 212 213 // GeoRemoveFilter 214 template<typename Value> 215 struct GeoRemoveFilter { 216 explicit GeoRemoveFilter(GeoPredicateIterator predicates, bool inOrOut) 217 : predicates(predicates), inOrOut(inOrOut) {} 218 219 GeoPredicateIterator predicates; 220 bool inOrOut; 221 222 __host__ __device__ 223 bool operator()(const Value &index) { 224 return inOrOut == predicates[thrust::get<0>(index)] < 0; 225 } 226 }; 227 228 // actual function for executing geo filter in batch. 229 template<typename IndexZipIterator> 230 int GeoIntersectionContext::executeRemoveIf(IndexZipIterator indexZipIterator) { 231 GeoPredicateIterator predIter(outputPredicate, geoShapes.TotalWords); 232 GeoRemoveFilter< 233 typename IndexZipIterator::value_type> removeFilter(predIter, inOrOut); 234 235 return thrust::remove_if(GET_EXECUTION_POLICY(cudaStream), indexZipIterator, 236 indexZipIterator + indexVectorLength, removeFilter) - 237 indexZipIterator; 238 } 239 240 template<typename GeoIter> 241 __global__ 242 void geo_for_each_kernel(GeoIter iter, int64_t count) { 243 int64_t start = threadIdx.x + blockIdx.x * blockDim.x; 244 int64_t step = blockDim.x * gridDim.x; 245 iter += start; 246 for (int64_t i = start; i < count; i += step, iter += step) { 247 // call dereference of GeoIter will do the actual geo intersection 248 // algorithm. 249 *iter; 250 } 251 } 252 253 // run intersection algorithm for points and 1 geoshape, side effect is 254 // modifying output predicate vector 255 template<typename InputIterator> 256 void calculateBatchIntersection(GeoShapeBatch geoShapes, 257 InputIterator geoPoints, uint32_t *indexVector, 258 int indexVectorLength, uint32_t startCount, 259 uint32_t *outputPredicate, bool inOrOut, 260 cudaStream_t cudaStream) { 261 auto geoIter = make_geo_batch_intersect_iterator(geoPoints, geoShapes, 262 outputPredicate, inOrOut); 263 int64_t iterLength = (int64_t) indexVectorLength * geoShapes.TotalNumPoints; 264 265 #ifdef RUN_ON_DEVICE 266 int min_grid_size, block_size; 267 cudaOccupancyMaxPotentialBlockSize(&min_grid_size, 268 &block_size, 269 geo_for_each_kernel<decltype(geoIter)>); 270 CheckCUDAError("cudaOccupancyMaxPotentialBlockSize"); 271 // find needed gridsize 272 int64_t needed_grid_size = (iterLength + block_size - 1) / block_size; 273 int64_t grid_size = std::min(static_cast<int64_t>(min_grid_size), 274 needed_grid_size); 275 geo_for_each_kernel<<<grid_size, block_size, 0, cudaStream>>>( 276 geoIter, iterLength); 277 CheckCUDAError("geo_for_each_kernel"); 278 // Wait for kernel to finish. 279 cudaStreamSynchronize(cudaStream); 280 CheckCUDAError("cudaStreamSynchronize"); 281 #else 282 // In host mode, thrust for_each is just doing a sequential 283 // loop, so there is no overflow issue as in device mode. 284 thrust::for_each(GET_EXECUTION_POLICY(cudaStream), geoIter, 285 geoIter + iterLength, VoidFunctor()); 286 #endif 287 } 288 289 template<typename InputIterator> 290 int GeoIntersectionContext::run(uint32_t *indexVector, 291 InputIterator inputIterator) { 292 calculateBatchIntersection(geoShapes, inputIterator, 293 indexVector, indexVectorLength, 294 startCount, outputPredicate, 295 inOrOut, cudaStream); 296 297 switch (numForeignTables) { 298 #define EXECUTE_GEO_REMOVE_IF(NumTotalForeignTables) \ 299 case NumTotalForeignTables: { \ 300 IndexZipIteratorMaker<NumTotalForeignTables> maker; \ 301 return executeRemoveIf(maker.make(indexVector, recordIDVectors)); \ 302 } 303 304 EXECUTE_GEO_REMOVE_IF(0) 305 EXECUTE_GEO_REMOVE_IF(1) 306 EXECUTE_GEO_REMOVE_IF(2) 307 EXECUTE_GEO_REMOVE_IF(3) 308 EXECUTE_GEO_REMOVE_IF(4) 309 EXECUTE_GEO_REMOVE_IF(5) 310 EXECUTE_GEO_REMOVE_IF(6) 311 EXECUTE_GEO_REMOVE_IF(7) 312 EXECUTE_GEO_REMOVE_IF(8) 313 default:throw std::invalid_argument("only support up to 8 foreign tables"); 314 } 315 } 316 317 struct is_non_negative { 318 __host__ __device__ 319 bool operator()(const int val) { 320 return val >= 0; 321 } 322 }; 323 324 void write_geo_shape_dim( 325 int shapeTotalWords, 326 DimensionOutputVector dimOut, int indexVectorLengthBeforeGeo, 327 uint32_t *outputPredicate, cudaStream_t cudaStream) { 328 typedef thrust::tuple<int8_t, uint8_t> DimensionOutputIterValue; 329 GeoPredicateIterator geoPredicateIter(outputPredicate, shapeTotalWords); 330 331 auto zippedShapeIndexIter = thrust::make_zip_iterator(thrust::make_tuple( 332 geoPredicateIter, thrust::constant_iterator<uint8_t>(1))); 333 334 thrust::copy_if( 335 GET_EXECUTION_POLICY(cudaStream), 336 zippedShapeIndexIter, zippedShapeIndexIter + indexVectorLengthBeforeGeo, 337 geoPredicateIter, 338 ares::make_dimension_output_iterator<uint8_t>(dimOut.DimValues, 339 dimOut.DimNulls), 340 is_non_negative()); 341 } 342 343 } // namespace ares