github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/geo_intersects.cu (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <cuda_runtime.h>
    16  #include <thrust/iterator/discard_iterator.h>
    17  #include <thrust/transform.h>
    18  #include <algorithm>
    19  #include <vector>
    20  #include "query/algorithm.hpp"
    21  #include "query/binder.hpp"
    22  #include "query/memory.hpp"
    23  
    24  namespace ares {
    25  class GeoIntersectionContext {
    26   public:
    27    GeoIntersectionContext(GeoShapeBatch geoShapes,
    28               int indexVectorLength,
    29               uint32_t startCount,
    30               RecordID **recordIDVectors,
    31               int numForeignTables,
    32               uint32_t *outputPredicate,
    33               bool inOrOut,
    34               void *cudaStream)
    35        : geoShapes(geoShapes),
    36            indexVectorLength(indexVectorLength),
    37            startCount(startCount),
    38            recordIDVectors(recordIDVectors),
    39            numForeignTables(numForeignTables),
    40            outputPredicate(outputPredicate),
    41            inOrOut(inOrOut),
    42            cudaStream(reinterpret_cast<cudaStream_t>(cudaStream)) {}
    43  
    44   private:
    45    GeoShapeBatch geoShapes;
    46    int indexVectorLength;
    47    uint32_t startCount;
    48    RecordID **recordIDVectors;
    49    int numForeignTables;
    50    uint32_t *outputPredicate;
    51    bool inOrOut;
    52    cudaStream_t cudaStream;
    53  
    54    template<typename IndexZipIterator>
    55    int executeRemoveIf(IndexZipIterator indexZipIterator);
    56  
    57   public:
    58    cudaStream_t getStream() const {
    59      return cudaStream;
    60    }
    61  
    62    template<typename InputIterator>
    63    int run(uint32_t *indexVector, InputIterator inputIterator);
    64  };
    65  
    66  // Specialized for GeoIntersectionContext.
    67  template <>
    68  class InputVectorBinder<GeoIntersectionContext, 1> :
    69      public InputVectorBinderBase<GeoIntersectionContext, 1, 1> {
    70    typedef InputVectorBinderBase<GeoIntersectionContext, 1, 1> super_t;
    71  
    72   public:
    73    explicit InputVectorBinder(GeoIntersectionContext context,
    74                                  std::vector<InputVector> inputVectors,
    75                                  uint32_t *indexVector, uint32_t *baseCounts,
    76                                  uint32_t startCount) : super_t(context,
    77                                                                 inputVectors,
    78                                                                 indexVector,
    79                                                                 baseCounts,
    80                                                                 startCount) {
    81    }
    82   public:
    83    template<typename ...InputIterators>
    84    int bind(InputIterators... boundInputIterators);
    85  };
    86  
    87  }  // namespace ares
    88  
    89  CGoCallResHandle GeoBatchIntersects(
    90      GeoShapeBatch geoShapes, InputVector points, uint32_t *indexVector,
    91      int indexVectorLength, uint32_t startCount, RecordID **recordIDVectors,
    92      int numForeignTables, uint32_t *outputPredicate, bool inOrOut,
    93      void *cudaStream, int device) {
    94    CGoCallResHandle resHandle = {0, nullptr};
    95    try {
    96  #ifdef RUN_ON_DEVICE
    97      cudaSetDevice(device);
    98  #endif
    99      ares::GeoIntersectionContext
   100          ctx(geoShapes, indexVectorLength, startCount,
   101              recordIDVectors, numForeignTables, outputPredicate, inOrOut,
   102              cudaStream);
   103      std::vector<InputVector> inputVectors = {points};
   104      ares::InputVectorBinder<ares::GeoIntersectionContext, 1>
   105          binder(ctx, inputVectors, indexVector, nullptr, startCount);
   106      resHandle.res = reinterpret_cast<void *>(binder.bind());
   107      CheckCUDAError("GeoBatchIntersects");
   108      return resHandle;
   109    } catch (const std::exception &e) {
   110      std::cerr << "Exception happened when doing GeoBatchIntersects:" << e.what()
   111                << std::endl;
   112      resHandle.pStrErr = strdup(e.what());
   113    }
   114    return resHandle;
   115  }
   116  
   117  CGoCallResHandle WriteGeoShapeDim(
   118      int shapeTotalWords, DimensionOutputVector dimOut,
   119      int indexVectorLengthBeforeGeo,
   120      uint32_t *outputPredicate, void *cudaStream, int device) {
   121    CGoCallResHandle resHandle = {nullptr, nullptr};
   122    try {
   123  #ifdef RUN_ON_DEVICE
   124      cudaSetDevice(device);
   125  #endif
   126      ares::write_geo_shape_dim(shapeTotalWords, dimOut,
   127                                indexVectorLengthBeforeGeo,
   128                                outputPredicate,
   129                                reinterpret_cast<cudaStream_t>(cudaStream));
   130      CheckCUDAError("WriteGeoShapeDim");
   131      return resHandle;
   132    } catch (const std::exception &e) {
   133      std::cerr << "Exception happened when doing GeoIntersectsJoin:" << e.what()
   134                << std::endl;
   135      resHandle.pStrErr = strdup(e.what());
   136    }
   137    return resHandle;
   138  }
   139  
   140  namespace ares {
   141  
   142  template<typename ...InputIterators>
   143  int InputVectorBinder<GeoIntersectionContext, 1>::bind(
   144      InputIterators... boundInputIterators) {
   145    InputVector input = super_t::inputVectors[0];
   146    uint32_t *indexVector = super_t::indexVector;
   147    uint32_t startCount = super_t::startCount;
   148    GeoIntersectionContext context = super_t::context;
   149  
   150    if (input.Type == VectorPartyInput) {
   151      VectorPartySlice points = input.Vector.VP;
   152      if (points.DataType != GeoPoint) {
   153        throw std::invalid_argument(
   154            "only geo point column are allowed in geo_intersects");
   155      }
   156  
   157      if (points.BasePtr == nullptr) {
   158        return 0;
   159      }
   160  
   161      uint8_t *basePtr = points.BasePtr;
   162      uint32_t nullsOffset = points.NullsOffset;
   163      uint32_t valueOffset = points.ValuesOffset;
   164      uint8_t startingIndex = points.StartingIndex;
   165      uint8_t stepInBytes = 8;
   166      uint32_t length = points.Length;
   167      auto columnIter = make_column_iterator<GeoPointT>(
   168          indexVector, nullptr, startCount, basePtr, nullsOffset, valueOffset,
   169          length, stepInBytes, startingIndex);
   170      return context.run(indexVector, columnIter);
   171    } else if (input.Type == ForeignColumnInput) {
   172      DataType dataType = input.Vector.ForeignVP.DataType;
   173  
   174      if (dataType != GeoPoint) {
   175        throw std::invalid_argument(
   176            "only geo point column are allowed in geo_intersects");
   177      }
   178      // Note: for now foreign vectors are dimension table columns
   179      // that are not compressed nor pre sliced
   180      RecordID *recordIDs = input.Vector.ForeignVP.RecordIDs;
   181      const int32_t numBatches = input.Vector.ForeignVP.NumBatches;
   182      const int32_t baseBatchID = input.Vector.ForeignVP.BaseBatchID;
   183      VectorPartySlice *vpSlices = input.Vector.ForeignVP.Batches;
   184      const int32_t numRecordsInLastBatch =
   185          input.Vector.ForeignVP.NumRecordsInLastBatch;
   186      bool hasDefault = input.Vector.ForeignVP.DefaultValue.HasDefault;
   187      DefaultValue defaultValueStruct = input.Vector.ForeignVP.DefaultValue;
   188      uint8_t stepInBytes = getStepInBytes(dataType);
   189  
   190      ForeignTableIterator<GeoPointT> *vpIters = prepareForeignTableIterators(
   191          numBatches,
   192          vpSlices,
   193          stepInBytes,
   194          hasDefault,
   195          defaultValueStruct.Value.GeoPointVal,
   196          context.getStream());
   197      int res =
   198          context.run(indexVector, RecordIDJoinIterator<GeoPointT>(
   199              recordIDs,
   200              numBatches,
   201              baseBatchID,
   202              vpIters,
   203              numRecordsInLastBatch,
   204              nullptr, 0));
   205      deviceFree(vpIters);
   206      return res;
   207    }
   208    throw std::invalid_argument(
   209        "Unsupported data type " + std::to_string(__LINE__)
   210            + "for geo intersection contexts");
   211  }
   212  
   213  // GeoRemoveFilter
   214  template<typename Value>
   215  struct GeoRemoveFilter {
   216    explicit GeoRemoveFilter(GeoPredicateIterator predicates, bool inOrOut)
   217        : predicates(predicates), inOrOut(inOrOut) {}
   218  
   219    GeoPredicateIterator predicates;
   220    bool inOrOut;
   221  
   222    __host__ __device__
   223    bool operator()(const Value &index) {
   224      return inOrOut == predicates[thrust::get<0>(index)] < 0;
   225    }
   226  };
   227  
   228  // actual function for executing geo filter in batch.
   229  template<typename IndexZipIterator>
   230  int GeoIntersectionContext::executeRemoveIf(IndexZipIterator indexZipIterator) {
   231    GeoPredicateIterator predIter(outputPredicate, geoShapes.TotalWords);
   232    GeoRemoveFilter<
   233        typename IndexZipIterator::value_type> removeFilter(predIter, inOrOut);
   234  
   235    return thrust::remove_if(GET_EXECUTION_POLICY(cudaStream), indexZipIterator,
   236                             indexZipIterator + indexVectorLength, removeFilter) -
   237        indexZipIterator;
   238  }
   239  
   240  template<typename GeoIter>
   241  __global__
   242  void geo_for_each_kernel(GeoIter iter, int64_t count) {
   243    int64_t start = threadIdx.x + blockIdx.x * blockDim.x;
   244    int64_t step = blockDim.x * gridDim.x;
   245    iter += start;
   246    for (int64_t i = start; i < count; i += step, iter += step) {
   247      // call dereference of GeoIter will do the actual geo intersection
   248      // algorithm.
   249      *iter;
   250    }
   251  }
   252  
   253  // run intersection algorithm for points and 1 geoshape, side effect is
   254  // modifying output predicate vector
   255  template<typename InputIterator>
   256  void calculateBatchIntersection(GeoShapeBatch geoShapes,
   257                                  InputIterator geoPoints, uint32_t *indexVector,
   258                                  int indexVectorLength, uint32_t startCount,
   259                                  uint32_t *outputPredicate, bool inOrOut,
   260                                  cudaStream_t cudaStream) {
   261    auto geoIter = make_geo_batch_intersect_iterator(geoPoints, geoShapes,
   262                                                     outputPredicate, inOrOut);
   263    int64_t iterLength = (int64_t) indexVectorLength * geoShapes.TotalNumPoints;
   264  
   265  #ifdef RUN_ON_DEVICE
   266    int min_grid_size, block_size;
   267    cudaOccupancyMaxPotentialBlockSize(&min_grid_size,
   268                                       &block_size,
   269                                       geo_for_each_kernel<decltype(geoIter)>);
   270    CheckCUDAError("cudaOccupancyMaxPotentialBlockSize");
   271    // find needed gridsize
   272    int64_t needed_grid_size = (iterLength + block_size - 1) / block_size;
   273    int64_t grid_size = std::min(static_cast<int64_t>(min_grid_size),
   274        needed_grid_size);
   275    geo_for_each_kernel<<<grid_size, block_size, 0, cudaStream>>>(
   276        geoIter, iterLength);
   277    CheckCUDAError("geo_for_each_kernel");
   278    // Wait for kernel to finish.
   279    cudaStreamSynchronize(cudaStream);
   280    CheckCUDAError("cudaStreamSynchronize");
   281  #else
   282    // In host mode, thrust for_each is just doing a sequential
   283    // loop, so there is no overflow issue as in device mode.
   284    thrust::for_each(GET_EXECUTION_POLICY(cudaStream), geoIter,
   285        geoIter + iterLength, VoidFunctor());
   286  #endif
   287  }
   288  
   289  template<typename InputIterator>
   290  int GeoIntersectionContext::run(uint32_t *indexVector,
   291                                  InputIterator inputIterator) {
   292    calculateBatchIntersection(geoShapes, inputIterator,
   293                               indexVector, indexVectorLength,
   294                               startCount, outputPredicate,
   295                               inOrOut, cudaStream);
   296  
   297    switch (numForeignTables) {
   298      #define EXECUTE_GEO_REMOVE_IF(NumTotalForeignTables) \
   299      case NumTotalForeignTables: { \
   300        IndexZipIteratorMaker<NumTotalForeignTables> maker; \
   301        return executeRemoveIf(maker.make(indexVector, recordIDVectors)); \
   302      }
   303  
   304      EXECUTE_GEO_REMOVE_IF(0)
   305      EXECUTE_GEO_REMOVE_IF(1)
   306      EXECUTE_GEO_REMOVE_IF(2)
   307      EXECUTE_GEO_REMOVE_IF(3)
   308      EXECUTE_GEO_REMOVE_IF(4)
   309      EXECUTE_GEO_REMOVE_IF(5)
   310      EXECUTE_GEO_REMOVE_IF(6)
   311      EXECUTE_GEO_REMOVE_IF(7)
   312      EXECUTE_GEO_REMOVE_IF(8)
   313      default:throw std::invalid_argument("only support up to 8 foreign tables");
   314    }
   315  }
   316  
   317  struct is_non_negative {
   318    __host__ __device__
   319    bool operator()(const int val) {
   320      return val >= 0;
   321    }
   322  };
   323  
   324  void write_geo_shape_dim(
   325      int shapeTotalWords,
   326      DimensionOutputVector dimOut, int indexVectorLengthBeforeGeo,
   327      uint32_t *outputPredicate, cudaStream_t cudaStream) {
   328    typedef thrust::tuple<int8_t, uint8_t> DimensionOutputIterValue;
   329    GeoPredicateIterator geoPredicateIter(outputPredicate, shapeTotalWords);
   330  
   331    auto zippedShapeIndexIter = thrust::make_zip_iterator(thrust::make_tuple(
   332        geoPredicateIter, thrust::constant_iterator<uint8_t>(1)));
   333  
   334    thrust::copy_if(
   335        GET_EXECUTION_POLICY(cudaStream),
   336        zippedShapeIndexIter, zippedShapeIndexIter + indexVectorLengthBeforeGeo,
   337        geoPredicateIter,
   338        ares::make_dimension_output_iterator<uint8_t>(dimOut.DimValues,
   339                                                      dimOut.DimNulls),
   340                                                      is_non_negative());
   341  }
   342  
   343  }  // namespace ares