github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/iterator.hpp (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef QUERY_ITERATOR_HPP_ 16 #define QUERY_ITERATOR_HPP_ 17 18 #include <cuda_runtime.h> 19 #include <thrust/detail/internal_functional.h> 20 #include <thrust/iterator/constant_iterator.h> 21 #include <thrust/iterator/iterator_adaptor.h> 22 #include <thrust/iterator/permutation_iterator.h> 23 #include <thrust/iterator/zip_iterator.h> 24 #include <thrust/tuple.h> 25 #include <thrust/iterator/detail/normal_iterator.h> 26 #include <cfloat> 27 #include <cmath> 28 #include <tuple> 29 #include "query/time_series_aggregate.h" 30 #include "query/utils.hpp" 31 32 namespace ares { 33 34 // VectorPartyIterator iterates <value,validity> tuples against a column 35 // (vector parties) under 3 modes: 36 // 1: all values are present 37 // 2: uncompressed 38 // 3: compressed 39 // based on the presences of 3 vectors (values, nulls, counts). 40 // Only 4 type of values vector is supported here: 41 // Uint32, Int32, Float32 and Bool. Bool vector is packed into 1 bit. 42 // More details 43 // can be found here: 44 // https://github.com/uber/aresdb/wiki/VectorStore 45 // 46 // This struct aims at reduce overall struct size so that we will use as few 47 // registers per thread as possible when launching kernels. 48 // We made following efforts to reduce overall size: 49 // 1. Put in a single struct to avoid too much padding. 50 // 2. Passing values pointers only and passing nulls and counts as offset to 51 // value pointers. When allocating device space, we will store values, nulls 52 // and counts consecutively. 53 // 3. Base counts and start count will share the same memory address. 54 // 55 // Mode 0 vp is modeled as constant vector so value pointer always presents. 56 // Length is only used by compressed column. 57 // 58 // The mode of the column is judged by following logic: 59 // If valuesOffset == 0, it means it’s a mode 1 vector. 60 // Otherwise if nullsOffset == 0, it’s a mode 2 vector. 61 // Otherwise it’s a mode 3 vector. 62 template <typename Value> 63 class VectorPartyIterator 64 : public thrust::iterator_adaptor< 65 VectorPartyIterator<Value>, uint32_t *, thrust::tuple<Value, bool>, 66 thrust::use_default, thrust::use_default, thrust::tuple<Value, bool>, 67 thrust::use_default> { 68 public: 69 friend class thrust::iterator_core_access; 70 71 typedef thrust::iterator_adaptor<VectorPartyIterator<Value>, 72 uint32_t *, thrust::tuple<Value, bool>, 73 thrust::use_default, thrust::use_default, 74 thrust::tuple<Value, bool>, 75 thrust::use_default> super_t; 76 77 __host__ __device__ VectorPartyIterator() {} 78 79 // base is counts vector if mode 0. nulls vector if mode 2, values vector 80 // if mode 0. 81 __host__ __device__ 82 VectorPartyIterator( 83 uint32_t *baseCounts, 84 const uint32_t startCount, 85 uint8_t *basePtr, 86 uint32_t nullsOffset, 87 uint32_t valuesOffset, 88 uint32_t length, 89 uint8_t stepInBytes, 90 uint8_t nullBitOffset) 91 : super_t(baseCounts != nullptr 92 ? baseCounts : reinterpret_cast<uint32_t *>(startCount)), 93 basePtr(basePtr), 94 nullsOffset(nullsOffset), 95 valuesOffset(valuesOffset), 96 length(length), 97 currentPtrIndex(0), 98 stepInBytes(stepInBytes), 99 nullBitOffset(nullBitOffset) { 100 mode = judge_mode(); 101 hasBaseCounts = baseCounts != nullptr; 102 } 103 104 private: 105 uint8_t *basePtr; 106 uint32_t nullsOffset; // Offset of nullsPtr. 107 uint32_t valuesOffset; // Offset of basePtr. 108 uint32_t currentPtrIndex; 109 uint32_t length; 110 uint8_t stepInBytes; 111 uint8_t hasBaseCounts; // Whether the base iterator is a base count vector or 112 // a start count. 113 uint8_t nullBitOffset; // Starting bit offset in null vector. 114 uint8_t mode; // For convenience we store here as this does not add to the 115 // final space of this iterator. 116 117 __host__ __device__ 118 uint8_t judge_mode() const { 119 if (valuesOffset == 0) { 120 return 1; 121 } 122 if (nullsOffset == 0) { 123 return 2; 124 } 125 return 3; 126 } 127 128 // offset must be greater than 0, otherwise it may lead to access to undesired 129 // location. 130 __host__ __device__ 131 bool get_null() const { 132 if (mode >= 2) { 133 return get_bool(basePtr + nullsOffset); 134 } 135 return true; 136 } 137 138 __host__ __device__ 139 bool get_bool(uint8_t *ptr) const { 140 uint32_t nullIndex = currentPtrIndex + nullBitOffset; 141 return ptr[nullIndex / 8] & (1 << nullIndex % 8); 142 } 143 144 // overloaded functions for different value types. 145 __host__ __device__ 146 uint32_t get_value(uint32_t *values) const { 147 uint8_t *ptr = 148 reinterpret_cast<uint8_t *>(values) + currentPtrIndex * stepInBytes; 149 switch (stepInBytes) { 150 case 2:return *reinterpret_cast<uint16_t *>(ptr); 151 case 4:return *reinterpret_cast<uint32_t *>(ptr); 152 default:return *ptr; 153 } 154 } 155 156 __host__ __device__ 157 int32_t get_value(int32_t *values) const { 158 int8_t *ptr = 159 reinterpret_cast<int8_t *>(values) + currentPtrIndex * stepInBytes; 160 switch (stepInBytes) { 161 case 2:return *reinterpret_cast<int16_t *>(ptr); 162 case 4:return *reinterpret_cast<int32_t *>(ptr); 163 default:return *ptr; 164 } 165 } 166 167 __host__ __device__ 168 float_t get_value(float_t *values) const { 169 return values[currentPtrIndex]; 170 } 171 172 __host__ __device__ 173 GeoPointT get_value(GeoPointT *values) const { 174 return values[currentPtrIndex]; 175 } 176 177 __host__ __device__ 178 int64_t get_value(int64_t *values) const { 179 return values[currentPtrIndex]; 180 } 181 182 __host__ __device__ 183 UUIDT get_value(UUIDT *values) const { 184 return values[currentPtrIndex]; 185 } 186 187 __host__ __device__ 188 bool get_value(bool *values) const { 189 return get_bool(reinterpret_cast<uint8_t *>(values)); 190 } 191 192 __host__ __device__ 193 Value get_value() const { 194 return get_value(reinterpret_cast<Value *>( 195 basePtr + valuesOffset)); 196 } 197 198 // Should be called only if it's a mode 3 vector. 199 __host__ __device__ 200 uint32_t get_count(uint32_t index) const { 201 return reinterpret_cast<uint32_t *>(basePtr)[currentPtrIndex + index]; 202 } 203 204 __host__ __device__ 205 thrust::tuple<Value, bool> dereference() const { 206 return thrust::make_tuple(get_value(), 207 get_null()); 208 } 209 210 // For compressed vp, moving strategy is like following steps: 211 // 1. if the offset is less than next count, we will not move the iterator. 212 // 2. if the offset is less than the warp size(32 or 64), we will do a 213 // linear search. 214 // 3. otherwise, we will do a binary search to go to the correct position. 215 // It works best if n is larger than 0 since we will do quick check and 216 // linear check, otherwise we will do binary search for negative n. 217 __host__ __device__ 218 void advance(typename super_t::difference_type n) { 219 if (hasBaseCounts) { 220 this->base_reference() += n; 221 } else { 222 this->base_reference() = reinterpret_cast<uint32_t *>( 223 reinterpret_cast<uintptr_t >(this->base_reference()) + n); 224 } 225 226 if (mode == 3) { 227 uint32_t newIndex; 228 if (hasBaseCounts) { 229 newIndex = *this->base_reference(); 230 } else { 231 newIndex = reinterpret_cast<uintptr_t>(this->base_reference()); 232 } 233 234 // quick check to avoid binary search 235 if (newIndex >= get_count(0) 236 && newIndex < get_count(1)) { 237 return; 238 } 239 240 // do linear search if n is less than or equal to wrap size. 241 if (n <= WARP_SIZE && n >= 0) { 242 for (int i = 1; i < length - currentPtrIndex; i++) { 243 if (newIndex >= get_count(i) 244 && newIndex < get_count(i + 1)) { 245 currentPtrIndex += i; 246 return; 247 } 248 } 249 } 250 251 uint32_t first = currentPtrIndex; 252 uint32_t last = length; 253 254 if (n < 0) { 255 first = 0; 256 last = currentPtrIndex; 257 } 258 259 // this algorithm computes upper_bound - 1 of [first, last), 260 // which is the last element in the [first, last) that is 261 // smaller or equal to the given index 262 while (first < last) { 263 uint32_t mid = first + (last - first) / 2; 264 if (get_count(mid) > newIndex) { 265 last = mid; 266 } else { 267 first = mid + 1; 268 } 269 } 270 currentPtrIndex = first - 1; 271 } else { 272 // If this column is not compressed, this means the counting 273 // iterator is simply thrust counting iterator since base count 274 // column should be also uncompressed as well. We can safely 275 // advance our zip iterator with n; 276 currentPtrIndex += n; 277 } 278 } 279 280 __host__ __device__ 281 void increment() { 282 advance(1); 283 } 284 285 __host__ __device__ 286 void decrement() { 287 advance(-1); 288 } 289 }; 290 291 template <> 292 class VectorPartyIterator<GeoPointT> 293 : public thrust::iterator_adaptor< 294 VectorPartyIterator<GeoPointT>, uint32_t *, 295 thrust::tuple<GeoPointT, bool>, thrust::use_default, 296 thrust::use_default, thrust::tuple<GeoPointT, bool>, 297 thrust::use_default> { 298 public: 299 friend class thrust::iterator_core_access; 300 301 typedef thrust::iterator_adaptor< 302 VectorPartyIterator<GeoPointT>, uint32_t *, 303 thrust::tuple<GeoPointT, bool>, thrust::use_default, thrust::use_default, 304 thrust::tuple<GeoPointT, bool>, thrust::use_default> 305 super_t; 306 307 __host__ __device__ VectorPartyIterator() {} 308 309 __host__ __device__ VectorPartyIterator( 310 uint32_t *baseCounts, const uint32_t startCount, uint8_t *basePtr, 311 uint32_t nullsOffset, uint32_t valuesOffset, uint32_t length, 312 uint8_t stepInBytes, uint8_t nullBitOffset) 313 : super_t(reinterpret_cast<uint32_t *>(startCount)), 314 basePtr(basePtr), 315 valuesOffset(valuesOffset), 316 currentPtrIndex(0), 317 nullBitOffset(nullBitOffset) { 318 } 319 320 private: 321 uint8_t *basePtr; 322 uint32_t currentPtrIndex; 323 uint32_t valuesOffset; 324 uint8_t nullBitOffset; 325 326 __host__ __device__ bool get_null() const { 327 // mode 1 328 if (valuesOffset == 0) { 329 return true; 330 } 331 return get_bool(basePtr); 332 } 333 334 __host__ __device__ GeoPointT get_value() const { 335 return reinterpret_cast<GeoPointT *>(basePtr+valuesOffset)[currentPtrIndex]; 336 } 337 338 __host__ __device__ bool get_bool(uint8_t *ptr) const { 339 uint32_t nullIndex = currentPtrIndex + nullBitOffset; 340 return ptr[nullIndex / 8] & (1 << nullIndex % 8); 341 } 342 343 __host__ __device__ thrust::tuple<GeoPointT, bool> dereference() const { 344 return thrust::make_tuple(get_value(), get_null()); 345 } 346 347 __host__ __device__ void advance(typename super_t::difference_type n) { 348 this->base_reference() = reinterpret_cast<uint32_t *>( 349 reinterpret_cast<uintptr_t>(this->base_reference()) + n); 350 currentPtrIndex += n; 351 } 352 353 __host__ __device__ void increment() { advance(1); } 354 355 __host__ __device__ void decrement() { advance(-1); } 356 }; 357 358 template<typename Value> 359 using ColumnIterator = 360 thrust::permutation_iterator<VectorPartyIterator<Value>, uint32_t *>; 361 362 // Helper function for creating VectorPartyIterator without specifying 363 // Value template argument. 364 template <class Value> 365 ColumnIterator<Value> make_column_iterator( 366 uint32_t *indexVector, uint32_t *baseCounts, uint32_t startCount, 367 uint8_t *basePtr, uint32_t nullsOffset, uint32_t valuesOffset, 368 uint32_t length, uint8_t stepInBytes, uint8_t nullBitOffset) { 369 return thrust::make_permutation_iterator( 370 VectorPartyIterator<Value>(baseCounts, startCount, basePtr, nullsOffset, 371 valuesOffset, length, stepInBytes, 372 nullBitOffset), 373 indexVector); 374 } 375 376 // SimpleIterator combines interators in 4 different cases: 377 // 1. Constant value. 378 // 2. Mode 0 column, equivalent to constant value. 379 // 3. Scratch space. 380 // 381 // We will use a value pointer and null offset to represent values 382 // and nulls. If values or nulls are missing, we will reuse the space 383 // to store constant values. 384 // Only 4 Value types are supported: Uint32, Int32, Float32 and Bool. 385 // Each bool value take 1 byte. 386 // We will use the first 4 bytes of the value pointer to store default value 387 // and rest 4 bytes as the position of the iterator. 388 template<typename Value> 389 class SimpleIterator : 390 public thrust::iterator_adaptor<SimpleIterator<Value>, 391 Value *, thrust::tuple<Value, bool>, 392 thrust::use_default, thrust::use_default, 393 thrust::tuple<Value, bool>, 394 thrust::use_default> { 395 public: 396 friend class thrust::iterator_core_access; 397 398 typedef thrust::iterator_adaptor<SimpleIterator<Value>, 399 Value *, thrust::tuple<Value, bool>, 400 thrust::use_default, thrust::use_default, 401 thrust::tuple<Value, bool>, 402 thrust::use_default> super_t; 403 404 __host__ __device__ SimpleIterator() {} 405 406 SimpleIterator( 407 Value *values, 408 uint32_t nullsOffset, 409 Value defaultValue, 410 bool defaultNull 411 ) 412 : super_t(values != nullptr 413 ? values : reinterpret_cast<Value *>( 414 *reinterpret_cast<uintptr_t *>(&defaultValue) << 32)), 415 useDefault(values == nullptr) { 416 if (useDefault) { 417 nulls.defaultNull = defaultNull; 418 } else { 419 nulls.nullsOffset = nullsOffset; 420 } 421 } 422 423 private: 424 union { 425 uint32_t nullsOffset; 426 bool defaultNull; 427 } nulls; 428 bool useDefault; 429 430 __host__ __device__ 431 thrust::tuple<Value, bool> dereference() const { 432 if (useDefault) { 433 uintptr_t val = reinterpret_cast<uintptr_t>(this->base_reference()) >> 32; 434 return thrust::make_tuple(*reinterpret_cast<Value *>(&val), 435 nulls.defaultNull); 436 } 437 return thrust::make_tuple(*this->base_reference(), 438 *(reinterpret_cast<bool *>(this->base_reference()) 439 + nulls.nullsOffset)); 440 } 441 442 __host__ __device__ 443 void advance(typename super_t::difference_type n) { 444 this->base_reference() += n; 445 if (!useDefault) { 446 nulls.nullsOffset += (1 - sizeof(Value)) * n; 447 } 448 } 449 450 __host__ __device__ 451 void increment() { 452 advance(1); 453 } 454 455 __host__ __device__ 456 void decrement() { 457 advance(-1); 458 } 459 }; 460 461 template <typename Value> 462 struct ConstantIterator { 463 typedef typename std::conditional< 464 std::is_same<Value, UUIDT>::value || std::is_same<Value, int64_t>::value, 465 thrust::constant_iterator<thrust::tuple<Value, bool>>, 466 SimpleIterator<Value>>::type type; 467 }; 468 469 template <typename Value> 470 inline typename ConstantIterator<Value>::type make_constant_iterator( 471 Value defaultValue, bool defaultNull) { 472 return SimpleIterator<Value>(nullptr, 0, defaultValue, defaultNull); 473 } 474 475 template <> 476 inline typename ConstantIterator<int64_t>::type make_constant_iterator( 477 int64_t defaultValue, bool defaultNull) { 478 return thrust::make_constant_iterator( 479 thrust::make_tuple<int64_t, bool>(defaultValue, defaultNull)); 480 } 481 482 template <> 483 inline typename ConstantIterator<UUIDT>::type make_constant_iterator( 484 UUIDT defaultValue, bool defaultNull) { 485 return thrust::make_constant_iterator( 486 thrust::make_tuple<UUIDT, bool>(defaultValue, defaultNull)); 487 } 488 489 using GeoSimpleIterator = thrust::constant_iterator<thrust::tuple<GeoPointT, 490 bool>>; 491 492 // DimensionOutputIterator is for writing individual dimension transform 493 // results into a consecutive chunk of memory which later will be used 494 // as the key for sort and reduce. 495 template<typename Value> 496 using DimensionOutputIterator = thrust::zip_iterator< 497 thrust::tuple<thrust::detail::normal_iterator<Value *>, 498 thrust::detail::normal_iterator<bool *> > >; 499 500 template<typename Value> 501 DimensionOutputIterator<Value> make_dimension_output_iterator( 502 uint8_t *dimValues, uint8_t *dimNulls) { 503 return thrust::make_zip_iterator( 504 thrust::make_tuple(reinterpret_cast<Value *>(dimValues), 505 reinterpret_cast<bool *>(dimNulls))); 506 } 507 508 template<typename Value> 509 SimpleIterator<Value> make_scratch_space_input_iterator( 510 uint8_t *valueIter, uint32_t nullOffset) { 511 return SimpleIterator<Value>(reinterpret_cast<Value *>(valueIter), 512 nullOffset, 0, 0); 513 } 514 515 template<typename Value> 516 using ScratchSpaceOutputIterator = thrust::zip_iterator< 517 thrust::tuple< 518 thrust::detail::normal_iterator<Value *>, 519 thrust::detail::normal_iterator<bool *> > >; 520 521 // Helper function for creating ScratchSpaceIterator without specifying 522 // Value template argument. 523 template<typename Value> 524 ScratchSpaceOutputIterator<Value> make_scratch_space_output_iterator( 525 Value *v, uint32_t nullOffset) { 526 return thrust::make_zip_iterator( 527 thrust::make_tuple(v, reinterpret_cast<bool *>(v) + nullOffset)); 528 } 529 530 template<typename Value> 531 class MeasureProxy { 532 public: 533 __host__ __device__ 534 MeasureProxy(Value *outputIter, Value identity, uint32_t count, bool isAvg) 535 : outputIter(outputIter), 536 count(count), 537 identity(identity), isAvg(isAvg) { 538 } 539 540 __host__ __device__ 541 MeasureProxy operator=(thrust::tuple<Value, bool> t) const { 542 if (!thrust::get<1>(t)) { 543 *outputIter = identity; 544 } else if (isAvg) { 545 return assignAvg(t); 546 } else { 547 *outputIter = thrust::get<0>(t) * count; 548 } 549 return *this; 550 } 551 552 __host__ __device__ 553 MeasureProxy assignAvg(thrust::tuple<Value, bool> t) const { 554 // Each operand is 64bits where higher 32bits are used for count and 555 // lower bits are for intermediate average. 556 float_t v = thrust::get<0>(t); 557 *reinterpret_cast<float_t *>(outputIter) = v; 558 *(reinterpret_cast<uint32_t *>(outputIter) + 1) = count; 559 return *this; 560 } 561 562 private: 563 Value *outputIter; 564 Value identity; 565 uint32_t count; 566 bool isAvg; 567 }; 568 569 // MeasureOutputIterator is the output iterator for the final step of measure 570 // transformation and as the input for aggregation. It needs to take care 571 // of 2 things: 572 // 1. Given a <value, validity> tuple, write to a single value vector while 573 // write null value as the identity value for different aggregation function 574 // 2. If base column is a compressed column, also need to recover the original 575 // value instead of the compressed value. e.g. if we have a value 3 with count 3 576 // and the aggregation function is sum. We need to write 3 * 3 to measure 577 // output iterator. 578 template<typename Value> 579 class MeasureOutputIterator : public thrust::iterator_adaptor< 580 MeasureOutputIterator<Value>, Value *, thrust::tuple<Value, bool>, 581 thrust::use_default, thrust::use_default, MeasureProxy<Value>, 582 thrust::use_default> { 583 public: 584 friend class thrust::iterator_core_access; 585 586 // shorthand for the name of the iterator_adaptor we're deriving from. 587 typedef thrust::iterator_adaptor<MeasureOutputIterator<Value>, 588 Value *, 589 thrust::tuple<Value, bool>, 590 thrust::use_default, 591 thrust::use_default, 592 MeasureProxy<Value>, 593 thrust::use_default> super_t; 594 595 __host__ __device__ 596 MeasureOutputIterator(Value *base, const uint32_t *baseCounts, 597 const uint32_t *indexVector, 598 AggregateFunction aggFunc) 599 : super_t(base), 600 baseCounts(baseCounts), 601 indexVector(indexVector) { 602 // We don't need to do anything for this value if it's not sum. 603 if (!((aggFunc >= AGGR_SUM_UNSIGNED && aggFunc <= AGGR_SUM_FLOAT) 604 || (aggFunc == AGGR_AVG_FLOAT))) { 605 skipCount = true; 606 } 607 isAvg = aggFunc == AGGR_AVG_FLOAT; 608 identity = get_identity_value<Value>(aggFunc); 609 } 610 611 private: 612 const uint32_t *baseCounts; 613 const uint32_t *indexVector; 614 Value identity; 615 bool skipCount = false; 616 bool isAvg = false; 617 618 __host__ __device__ 619 typename super_t::reference dereference() const { 620 uint32_t index = *indexVector; 621 uint32_t count = !skipCount && baseCounts != nullptr ? baseCounts[index + 1] 622 - baseCounts[index] : 1; 623 return MeasureProxy<Value>(this->base_reference(), identity, count, isAvg); 624 } 625 626 __host__ __device__ 627 void advance(typename super_t::difference_type n) { 628 this->base_reference() += n; 629 indexVector += n; 630 } 631 632 __host__ __device__ 633 void increment() { 634 advance(1); 635 } 636 637 __host__ __device__ 638 void decrement() { 639 advance(-1); 640 } 641 }; 642 643 // Helper function for creating MeasureOutputIterator without specifying 644 // Value template argument. 645 template<typename Value> 646 MeasureOutputIterator<Value> make_measure_output_iterator( 647 Value *v, uint32_t *indexVector, uint32_t *baseCounts, 648 AggregateFunction aggFunc) { 649 return MeasureOutputIterator<Value>(v, baseCounts, 650 indexVector, aggFunc); 651 } 652 653 class IndexProxy { 654 public: 655 __host__ __device__ 656 explicit IndexProxy(uint32_t *outputIndex) 657 : outputIndex(outputIndex) { 658 } 659 660 // Parameter is a tuple of <index row number, index value>. 661 __host__ __device__ 662 IndexProxy operator=(thrust::tuple<uint32_t, uint32_t> t) const { 663 *outputIndex = thrust::get<1>(t); 664 return *this; 665 } 666 667 private: 668 uint32_t *outputIndex; 669 }; 670 671 // IndexOutputIterator is the output iterator for writing filtered index 672 // to. The input will be a tuple of <index row number, index value>. We 673 // will discard the index row number and only write the filtered index value 674 // into the output index value vector which is a uint32_t pointer. 675 class IndexOutputIterator : 676 public thrust::iterator_adaptor<IndexOutputIterator, 677 uint32_t *, 678 thrust::tuple< 679 uint32_t, 680 uint32_t>, 681 thrust::use_default, 682 thrust::use_default, 683 IndexProxy, 684 thrust::use_default> { 685 public: 686 friend class thrust::iterator_core_access; 687 688 // shorthand for the name of the iterator_adaptor we're deriving from. 689 typedef thrust::iterator_adaptor<IndexOutputIterator, 690 uint32_t *, 691 thrust::tuple<uint32_t, uint32_t>, 692 thrust::use_default, 693 thrust::use_default, 694 IndexProxy, 695 thrust::use_default> super_t; 696 697 __host__ __device__ 698 explicit IndexOutputIterator(uint32_t *base) 699 : super_t(base) { 700 } 701 702 private: 703 __host__ __device__ 704 typename super_t::reference dereference() const { 705 return IndexProxy(this->base_reference()); 706 } 707 }; 708 709 // The column mode for foreign tables are either 0 or 2, which is 710 // presented by two different type of iterators: SimpleIterator and 711 // VectorPartyIterator. And the mode of batch columns will be interleaving. 712 // To make all iterators have the same size, we put both cases into a 713 // single union and has a boolean flag to tell which iterator it is. 714 template <typename Value> 715 struct ForeignTableIterator { 716 __host__ __device__ ForeignTableIterator() {} 717 718 __host__ __device__ 719 ForeignTableIterator(const ForeignTableIterator<Value> &other) { 720 this->isConst = other.isConst; 721 if (this->isConst) { 722 this->iter.constantIter = other.iter.constantIter; 723 } else { 724 this->iter.columnIter = other.iter.columnIter; 725 } 726 } 727 728 __host__ __device__ ForeignTableIterator &operator=( 729 const ForeignTableIterator<Value> &other) { 730 this->isConst = other.isConst; 731 if (this->isConst) { 732 this->iter.constantIter = other.iter.constantIter; 733 } else { 734 this->iter.columnIter = other.iter.columnIter; 735 } 736 return *this; 737 } 738 739 explicit ForeignTableIterator(VectorPartyIterator<Value> columnIter) 740 : isConst(false) { 741 iter.columnIter = columnIter; 742 } 743 744 explicit ForeignTableIterator( 745 typename ConstantIterator<Value>::type constantIter) 746 : isConst(true) { 747 iter.constantIter = constantIter; 748 } 749 750 union ForeignBatchIter { 751 VectorPartyIterator<Value> columnIter; 752 typename ConstantIterator<Value>::type constantIter; 753 __host__ __device__ ForeignBatchIter() {} 754 } iter; 755 bool isConst; 756 }; 757 758 // RecordIDJoinIterator reads the foreign table column. 759 template<typename Value> 760 class RecordIDJoinIterator 761 : public thrust::iterator_adaptor< 762 RecordIDJoinIterator<Value>, RecordID *, 763 thrust::tuple<Value, bool>, thrust::use_default, thrust::use_default, 764 thrust::tuple<Value, bool>, 765 thrust::use_default> { 766 public: 767 friend class thrust::iterator_core_access; 768 769 typedef ForeignTableIterator<Value> ValueIter; 770 771 typedef thrust::iterator_adaptor< 772 RecordIDJoinIterator<Value>, RecordID *, 773 thrust::tuple<Value, bool>, thrust::use_default, thrust::use_default, 774 thrust::tuple<Value, bool>, thrust::use_default> 775 super_t; 776 777 __host__ __device__ RecordIDJoinIterator( 778 RecordID *base, 779 int32_t numBatches, 780 int32_t baseBatchID, 781 ValueIter *batches, 782 int32_t numRecordsInLastBatch, 783 int16_t *timezoneLookupTable, 784 int16_t timezoneLookupSize) 785 : super_t(base), 786 batches(batches), 787 baseBatchID(baseBatchID), 788 numBatches(numBatches), 789 numRecordsInLastBatch(numRecordsInLastBatch), 790 timezoneLookupTable(timezoneLookupTable), 791 timezoneLookupSize(timezoneLookupSize) { 792 } 793 794 private: 795 ValueIter *batches; 796 int32_t baseBatchID; 797 int32_t numBatches; 798 int32_t numRecordsInLastBatch; 799 int16_t *timezoneLookupTable; 800 int16_t timezoneLookupSize; 801 802 __host__ __device__ 803 thrust::tuple<GeoPointT, bool> timezoneLookup( 804 thrust::tuple<GeoPointT, bool> val) const { 805 return val; 806 } 807 808 __host__ __device__ thrust::tuple<UUIDT, bool> timezoneLookup( 809 thrust::tuple<UUIDT, bool> val) const { 810 return val; 811 } 812 813 template<typename V> 814 __host__ __device__ 815 thrust::tuple<V, bool> timezoneLookup( 816 thrust::tuple<V, bool> val) const { 817 if (timezoneLookupTable) { 818 // if timezoneLookup is not null, we must be iterating a enum column 819 int enumVal = static_cast<int>(thrust::get<0>(val)); 820 if (enumVal < timezoneLookupSize) { 821 return thrust::make_tuple( 822 timezoneLookupTable[enumVal], thrust::get<1>(val)); 823 } else { 824 return thrust::make_tuple(0, thrust::get<1>(val)); 825 } 826 } 827 return val; 828 } 829 830 __host__ __device__ typename super_t::reference dereference() const { 831 RecordID recordID = *this->base_reference(); 832 if (recordID.batchID && (recordID.batchID - baseBatchID < numBatches - 1 || 833 recordID.index < numRecordsInLastBatch)) { 834 ForeignTableIterator<Value> 835 iter = batches[recordID.batchID - baseBatchID]; 836 if (iter.isConst) { 837 return iter.iter.constantIter[recordID.index]; 838 } 839 thrust::tuple<Value, bool> val = iter.iter.columnIter[recordID.index]; 840 return timezoneLookup(val); 841 } 842 Value defaultValue; 843 return thrust::make_tuple(defaultValue, false); 844 } 845 }; 846 847 // DimensionHashIterator reads DimensionColumnVector and produce hashed values 848 class DimensionHashIterator 849 : public thrust::iterator_adaptor< 850 DimensionHashIterator, uint32_t *, uint64_t, thrust::use_default, 851 thrust::use_default, uint64_t, thrust::use_default> { 852 public: 853 friend class thrust::iterator_core_access; 854 typedef thrust::iterator_adaptor<DimensionHashIterator, uint32_t *, uint64_t, 855 thrust::use_default, thrust::use_default, 856 uint64_t, thrust::use_default> 857 super_t; 858 859 __host__ __device__ 860 DimensionHashIterator(uint8_t *dimValues, uint32_t *indexVector, 861 uint8_t _numDimsPerDimWidth[NUM_DIM_WIDTH], int length) 862 : super_t(indexVector), dimValues(dimValues), length(length) { 863 totalNumDims = 0; 864 rowBytes = 0; 865 for (int i = 0; i < NUM_DIM_WIDTH; i++) { 866 numDimsPerDimWidth[i] = _numDimsPerDimWidth[i]; 867 totalNumDims += _numDimsPerDimWidth[i]; 868 uint8_t dimBytes = 1 << (NUM_DIM_WIDTH - 1 - i); 869 rowBytes += dimBytes * numDimsPerDimWidth[i]; 870 } 871 nullValues = dimValues + rowBytes * length; 872 // include null values, 1 byte per each dim 873 rowBytes += totalNumDims; 874 } 875 876 private: 877 uint8_t *dimValues; 878 uint8_t *nullValues; 879 uint8_t numDimsPerDimWidth[NUM_DIM_WIDTH]; 880 uint8_t totalNumDims; 881 uint8_t rowBytes; 882 int length; 883 884 __host__ __device__ typename super_t::reference dereference() const { 885 uint32_t index = *this->base_reference(); 886 uint8_t dimRow[MAX_DIMENSION_BYTES] = {0}; 887 uint64_t hashedOutput[2]; 888 // read from 889 uint8_t *inputValueStart = dimValues; 890 uint8_t *inputNullStart = nullValues; 891 // write to 892 uint8_t *outputValuePtr = dimRow; 893 uint8_t *outputNullPtr = outputValuePtr + (rowBytes - totalNumDims); 894 uint8_t numDims = 0; 895 for (int i = 0; i < NUM_DIM_WIDTH; i++) { 896 uint8_t dimBytes = 1 << (NUM_DIM_WIDTH - 1 - i); 897 for (int j = numDims; j < numDims + numDimsPerDimWidth[i]; j++) { 898 switch (dimBytes) { 899 case 16: 900 *reinterpret_cast<UUIDT *>(outputValuePtr) = 901 reinterpret_cast<UUIDT *>(inputValueStart)[index]; 902 break; 903 case 8: 904 *reinterpret_cast<uint64_t *>(outputValuePtr) = 905 reinterpret_cast<uint64_t *>(inputValueStart)[index]; 906 break; 907 case 4: 908 *reinterpret_cast<uint32_t *>(outputValuePtr) = 909 reinterpret_cast<uint32_t *>(inputValueStart)[index]; 910 break; 911 case 2: 912 *reinterpret_cast<uint16_t *>(outputValuePtr) = 913 reinterpret_cast<uint16_t *>(inputValueStart)[index]; 914 break; 915 case 1: 916 *outputValuePtr = inputValueStart[index]; 917 break; 918 } 919 outputValuePtr += dimBytes; 920 inputValueStart += dimBytes * length; 921 *outputNullPtr = inputNullStart[index]; 922 outputNullPtr++; 923 inputNullStart += length; 924 } 925 numDims += numDimsPerDimWidth[i]; 926 } 927 murmur3sum128(dimRow, rowBytes, 0, hashedOutput); 928 // only use the first 64bit of the 128bit hash 929 return hashedOutput[0]; 930 } 931 }; 932 933 class DimValueProxy { 934 public: 935 __host__ __device__ DimValueProxy(uint8_t *ptr, int dimBytes) 936 : ptr(ptr), dimBytes(dimBytes) {} 937 938 __host__ __device__ DimValueProxy operator=(DimValueProxy t) { 939 switch (dimBytes) { 940 case 16: 941 *reinterpret_cast<UUIDT *>(ptr) = *reinterpret_cast<UUIDT *>(t.ptr); 942 case 8: 943 *reinterpret_cast<uint64_t *>(ptr) = 944 *reinterpret_cast<uint64_t *>(t.ptr); 945 case 4: 946 *reinterpret_cast<uint32_t *>(ptr) = 947 *reinterpret_cast<uint32_t *>(t.ptr); 948 case 2: 949 *reinterpret_cast<uint16_t *>(ptr) = 950 *reinterpret_cast<uint16_t *>(t.ptr); 951 case 1:*ptr = *t.ptr; 952 } 953 return *this; 954 } 955 956 private: 957 uint8_t *ptr; 958 int dimBytes; 959 }; 960 961 class DimensionColumnPermutateIterator 962 : public thrust::iterator_adaptor<DimensionColumnPermutateIterator, 963 uint32_t *, DimValueProxy, 964 thrust::use_default, thrust::use_default, 965 DimValueProxy, thrust::use_default> { 966 public: 967 friend class thrust::iterator_core_access; 968 typedef thrust::iterator_adaptor<DimensionColumnPermutateIterator, uint32_t *, 969 DimValueProxy, thrust::use_default, 970 thrust::use_default, DimValueProxy, 971 thrust::use_default> 972 super_t; 973 974 __host__ __device__ DimensionColumnPermutateIterator( 975 uint8_t *values, uint32_t *indexVector, int dimInputLength, 976 int dimOutputLength, uint8_t _numDimsPerDimWidth[NUM_DIM_WIDTH]) 977 : super_t(indexVector), 978 begin(indexVector), 979 values(values), 980 dimInputLength(dimInputLength), 981 dimOutputLength(dimOutputLength) { 982 for (int i = 0; i < NUM_DIM_WIDTH; i++) { 983 numDimsPerDimWidth[i] = _numDimsPerDimWidth[i]; 984 } 985 } 986 987 private: 988 uint8_t *values; 989 uint32_t *begin; 990 int dimInputLength; 991 int dimOutputLength; 992 uint8_t numDimsPerDimWidth[NUM_DIM_WIDTH]; 993 994 __host__ __device__ typename super_t::reference dereference() const { 995 int baseIndex = this->base_reference() - begin; 996 int dimIndex = baseIndex / dimOutputLength; 997 // index in current dimension vector 998 int localIndex = *(begin + (baseIndex % dimOutputLength)); 999 int bytes = 0; 1000 uint8_t numDims = 0; 1001 uint8_t dimBytes = 0; 1002 int i = 0; 1003 for (; i < NUM_DIM_WIDTH; i++) { 1004 dimBytes = 1 << (NUM_DIM_WIDTH - i - 1); 1005 if (dimIndex < numDims + numDimsPerDimWidth[i]) { 1006 bytes += 1007 ((dimIndex - numDims) * dimInputLength + localIndex) * dimBytes; 1008 break; 1009 } else { 1010 bytes += numDimsPerDimWidth[i] * dimInputLength * dimBytes; 1011 } 1012 numDims += numDimsPerDimWidth[i]; 1013 } 1014 // null vector 1015 if (i == NUM_DIM_WIDTH) { 1016 bytes += ((dimIndex - numDims) * dimInputLength + localIndex) * dimBytes; 1017 return DimValueProxy(values + bytes, dimBytes); 1018 } 1019 return DimValueProxy(values + bytes, dimBytes); 1020 } 1021 }; 1022 1023 // DimensionColumnOutputIterator output dimension of dimOutputLength rows into 1024 // a bigger dimension space which can hold capacity rows, the smallest unit of 1025 // the output is one single dimension column 1026 class DimensionColumnOutputIterator 1027 : public thrust::iterator_adaptor< 1028 DimensionColumnOutputIterator, thrust::counting_iterator<int>, 1029 DimValueProxy, thrust::use_default, thrust::use_default, 1030 DimValueProxy, thrust::use_default> { 1031 public: 1032 friend class thrust::iterator_core_access; 1033 typedef thrust::iterator_adaptor< 1034 DimensionColumnOutputIterator, thrust::counting_iterator<int>, 1035 DimValueProxy, thrust::use_default, thrust::use_default, DimValueProxy, 1036 thrust::use_default> 1037 super_t; 1038 1039 __host__ __device__ DimensionColumnOutputIterator( 1040 uint8_t *values, int capacity, int dimOutputLength, 1041 uint8_t _numDimsPerDimWidth[NUM_DIM_WIDTH], 1042 int offset) 1043 : super_t(thrust::make_counting_iterator<int>(0)), 1044 values(values), 1045 dimOutputLength(dimOutputLength), 1046 capacity(capacity), 1047 offset(offset) { 1048 for (int i = 0; i < NUM_DIM_WIDTH; i++) { 1049 numDimsPerDimWidth[i] = _numDimsPerDimWidth[i]; 1050 } 1051 } 1052 1053 private: 1054 uint8_t *values; 1055 int capacity; 1056 int dimOutputLength; 1057 uint8_t numDimsPerDimWidth[NUM_DIM_WIDTH]; 1058 // the start row to write output data, this is used to append operation 1059 int offset; 1060 1061 __host__ __device__ typename super_t::reference dereference() const { 1062 int baseIndex = *this->base_reference(); 1063 int dimIndex = baseIndex / dimOutputLength; 1064 int globalIndex = baseIndex + (capacity - dimOutputLength) * dimIndex 1065 + offset; 1066 uint8_t numDims = 0; 1067 int bytes = 0; 1068 uint8_t dimBytes = 0; 1069 int i = 0; 1070 for (; i < NUM_DIM_WIDTH; i++) { 1071 dimBytes = 1 << (NUM_DIM_WIDTH - i - 1); 1072 if (dimIndex < numDims + numDimsPerDimWidth[i]) { 1073 bytes += (globalIndex - numDims * capacity) * dimBytes; 1074 break; 1075 } else { 1076 bytes += numDimsPerDimWidth[i] * capacity * dimBytes; 1077 } 1078 numDims += numDimsPerDimWidth[i]; 1079 } 1080 if (i == NUM_DIM_WIDTH) { 1081 bytes += (globalIndex - numDims * capacity) * dimBytes; 1082 return DimValueProxy(values + bytes, dimBytes); 1083 } 1084 return DimValueProxy(values + bytes, dimBytes); 1085 } 1086 }; 1087 1088 // HLLRegIDHeadFlagIterator 1089 class HLLRegIDHeadFlagIterator 1090 : public thrust::iterator_adaptor< 1091 HLLRegIDHeadFlagIterator, thrust::counting_iterator<int>, 1092 unsigned int, thrust::use_default, thrust::use_default, unsigned int, 1093 thrust::use_default> { 1094 public: 1095 friend class thrust::iterator_core_access; 1096 typedef thrust::iterator_adaptor<HLLRegIDHeadFlagIterator, 1097 thrust::counting_iterator<int>, 1098 unsigned int, 1099 thrust::use_default, thrust::use_default, 1100 unsigned int, thrust::use_default> 1101 super_t; 1102 1103 __host__ __device__ HLLRegIDHeadFlagIterator(uint64_t *hashValues) 1104 : super_t(thrust::make_counting_iterator(0)), hashValues(hashValues) {} 1105 1106 private: 1107 uint64_t *hashValues; 1108 1109 __host__ __device__ typename super_t::reference dereference() const { 1110 int index = *this->base_reference(); 1111 return (index == 0 || hashValues[index] != hashValues[index - 1]); 1112 } 1113 }; 1114 1115 // HLLValueOutputIterator 1116 class HLLValueOutputIterator 1117 : public thrust::iterator_adaptor< 1118 HLLValueOutputIterator, thrust::counting_iterator<int>, 1119 thrust::tuple<uint64_t, uint32_t, int>, thrust::use_default, 1120 thrust::use_default, thrust::tuple<uint64_t, uint32_t, int>, 1121 thrust::use_default> { 1122 public: 1123 friend class thrust::iterator_core_access; 1124 typedef thrust::iterator_adaptor< 1125 HLLValueOutputIterator, thrust::counting_iterator<int>, 1126 thrust::tuple<uint64_t, uint32_t, int>, thrust::use_default, 1127 thrust::use_default, thrust::tuple<uint64_t, uint32_t, int>, 1128 thrust::use_default> 1129 super_t; 1130 1131 __host__ __device__ HLLValueOutputIterator(unsigned int *dimCount, 1132 uint32_t *hllMeasureValues, 1133 uint64_t *hllRegIDCumCount, 1134 uint64_t *hllDimRegIDCumCount, 1135 uint64_t *hllVectorOffsets) 1136 : super_t(thrust::make_counting_iterator(0)), 1137 dimCount(dimCount), 1138 hllMeasureValues(hllMeasureValues), 1139 hllRegIDCumCount(hllRegIDCumCount), 1140 hllDimRegIDCumCount(hllDimRegIDCumCount), 1141 hllVectorOffsets(hllVectorOffsets) {} 1142 1143 private: 1144 unsigned int *dimCount; 1145 uint32_t *hllMeasureValues; 1146 uint64_t *hllRegIDCumCount; 1147 uint64_t *hllDimRegIDCumCount; 1148 uint64_t *hllVectorOffsets; 1149 1150 // get the reg id within dim group 1151 // index: 0 1 2 3 4 5 6 7 8 9 10 11 1152 // regHeadIter: 1 0 0 0 1 0 0 0 1 0 0 0 1153 // hllRegIDCumCount: 1 1 1 1 2 2 2 2 3 3 3 3 1154 // dimCount: 1 1 1 1 1 1 1 1 2 2 2 2 1155 // hllDimRegIDCumCount: 0 2 3 1156 // hllVectorOffsets: 0 8 12 1157 __host__ __device__ typename super_t::reference dereference() const { 1158 int index = *this->base_reference(); 1159 unsigned int dimIndex = dimCount[index] - 1; 1160 int dimRegIDCount = 1161 hllDimRegIDCumCount[dimIndex + 1] - hllDimRegIDCumCount[dimIndex]; 1162 uint64_t hllVectorOffset = hllVectorOffsets[dimIndex]; 1163 if (dimRegIDCount < HLL_DENSE_THRESHOLD) { 1164 // sparse mode 1165 uint64_t regIDCumCount = hllRegIDCumCount[index]; 1166 uint64_t dimRegIDcumCount = hllDimRegIDCumCount[dimIndex]; 1167 int regIDIndexWithinDim = regIDCumCount - dimRegIDcumCount - 1; 1168 return thrust::make_tuple(hllVectorOffset + regIDIndexWithinDim * 4, 1169 hllMeasureValues[index], 4); 1170 } else { 1171 // dense mode 1172 int regID = hllMeasureValues[index] & 0x3FFF; 1173 return thrust::make_tuple(hllVectorOffset + regID, 1174 hllMeasureValues[index], 1); 1175 } 1176 } 1177 }; 1178 1179 // GeoPredicateIterator reads geo intersection output predicate vector. 1180 // It output the first intersected geoshape index, or -1 if no intersected 1181 // geoshape found. 1182 class GeoPredicateIterator 1183 : public thrust::iterator_adaptor<GeoPredicateIterator, uint32_t *, int8_t, 1184 thrust::use_default, thrust::use_default, 1185 int8_t, thrust::use_default> { 1186 public: 1187 friend class thrust::iterator_core_access; 1188 1189 typedef thrust::iterator_adaptor<GeoPredicateIterator, uint32_t *, int8_t, 1190 thrust::use_default, thrust::use_default, 1191 int8_t, thrust::use_default> 1192 super_t; 1193 1194 __host__ __device__ GeoPredicateIterator(uint32_t *predicateIter, 1195 uint8_t stepInWords) 1196 : super_t(predicateIter), stepInWords(stepInWords) {} 1197 1198 private: 1199 uint8_t stepInWords; 1200 1201 __host__ __device__ int8_t get_first_none_zero(uint32_t word) const { 1202 int i = 0; 1203 while (i < 32) { 1204 if ((word >> i) & 1) { 1205 return i; 1206 } 1207 i++; 1208 } 1209 return -1; 1210 } 1211 1212 __host__ __device__ typename super_t::reference dereference() const { 1213 for (int i = 0; i < stepInWords; i++) { 1214 int8_t index = get_first_none_zero(this->base_reference()[i]); 1215 if (index >= 0) { 1216 return (int8_t)(i * 32 + index); 1217 } 1218 } 1219 return -1; 1220 } 1221 1222 __host__ __device__ void advance(typename super_t::difference_type n) { 1223 this->base_reference() += n * stepInWords; 1224 } 1225 1226 __host__ __device__ void increment() { advance(1); } 1227 1228 __host__ __device__ void decrement() { advance(-1); } 1229 1230 __host__ __device__ typename super_t::difference_type distance_to( 1231 const GeoPredicateIterator &other) const { 1232 typename super_t::difference_type dist = 1233 other.base_reference() - this->base_reference(); 1234 return dist / stepInWords; 1235 } 1236 }; 1237 1238 // Used as void value and reference type. 1239 struct EmptyStruct {}; 1240 1241 // GeoBatchIntersectIterator is the iterator to compute whether the 1242 // semi-infinite ray horizontally emitted from the geo point crosses a single 1243 // edge of the geoshape. Note value and dereference type of this iterator are 1244 // all default which means accessing and assigning value to this iterator has 1245 // no meaning. The dereference function will directly write to the output 1246 // predicate vector using atomicXor. 1247 template <typename GeoInputIterator> 1248 class GeoBatchIntersectIterator 1249 : public thrust::iterator_adaptor< 1250 GeoBatchIntersectIterator<GeoInputIterator>, 1251 GeoInputIterator, EmptyStruct, 1252 thrust::use_default, thrust::use_default, 1253 EmptyStruct, thrust::use_default> { 1254 public: 1255 friend class thrust::iterator_core_access; 1256 1257 typedef thrust::iterator_adaptor<GeoBatchIntersectIterator<GeoInputIterator>, 1258 GeoInputIterator, EmptyStruct, 1259 thrust::use_default, thrust::use_default, 1260 EmptyStruct, thrust::use_default> 1261 super_t; 1262 1263 __host__ __device__ GeoBatchIntersectIterator() {} 1264 1265 __host__ __device__ GeoBatchIntersectIterator( 1266 GeoInputIterator geoPoints, GeoShapeBatch geoShapes, 1267 uint32_t *outputPredicate, bool inOrOut) 1268 : super_t(geoPoints), 1269 geoShapes(geoShapes), 1270 outputPredicate(outputPredicate), 1271 pointIndex(0), 1272 inOrOut(inOrOut) {} 1273 1274 private: 1275 GeoShapeBatch geoShapes; 1276 uint32_t *outputPredicate; 1277 int32_t pointIndex; 1278 bool inOrOut; 1279 1280 __host_or_device__ 1281 typename super_t::reference dereference() const { 1282 // offset to shapeVector in Bytes is totalNumberPoints * 2 * 4 1283 uint8_t shapeIndex = 1284 geoShapes.LatLongs[geoShapes.TotalNumPoints * 2 * 4 + pointIndex]; 1285 EmptyStruct emptyRes; 1286 // nothing need to be done for the last point of a shape. 1287 if (pointIndex >= geoShapes.TotalNumPoints - 1) { 1288 return emptyRes; 1289 } 1290 1291 // shapeIndex change marks the last point of a shape, therefore nothing 1292 // needs to be done 1293 if (shapeIndex != 1294 geoShapes.LatLongs[geoShapes.TotalNumPoints * 2 * 4 + pointIndex + 1]) { 1295 return emptyRes; 1296 } 1297 1298 auto testPoint = *this->base_reference(); 1299 // testPoint is null, we just write false to the output predicate. 1300 if (!thrust::get<1>(testPoint)) { 1301 // only first pointer is responsible for write. 1302 if (pointIndex == 0) { 1303 for (int i = 0; i < geoShapes.TotalWords; i++) { 1304 outputPredicate[i] = !inOrOut; 1305 } 1306 } 1307 return emptyRes; 1308 } 1309 1310 float testLat = thrust::get<0>(testPoint).Lat; 1311 float testLong = thrust::get<0>(testPoint).Long; 1312 // the latitude of first point of the edge. 1313 float edgeLat1 = reinterpret_cast<float *>(geoShapes.LatLongs)[pointIndex]; 1314 // the latitude of second point of the edge. 1315 float edgeLat2 = 1316 reinterpret_cast<float *>(geoShapes.LatLongs)[pointIndex + 1]; 1317 if (edgeLat1 < FLT_MAX && edgeLat2 < FLT_MAX) { 1318 float edgeLong1 = reinterpret_cast<float *>( 1319 geoShapes.LatLongs)[geoShapes.TotalNumPoints + pointIndex]; 1320 float edgeLong2 = reinterpret_cast<float *>( 1321 geoShapes.LatLongs)[geoShapes.TotalNumPoints + pointIndex + 1]; 1322 if (((edgeLong1 > testLong) != (edgeLong2 > testLong)) && 1323 (testLat < (edgeLat2 - edgeLat1) * (testLong - edgeLong1) / 1324 (edgeLong2 - edgeLong1) + 1325 edgeLat1)) { 1326 #ifdef RUN_ON_DEVICE 1327 atomicXor(outputPredicate + (shapeIndex / 32), 1328 (1 << (shapeIndex % 32))); 1329 #else 1330 // When we are running in host mode, we are running sequentially. 1331 // So non atomic access is ok for now. 1332 // If we switch to parallel host execution in future, 1333 // we need to find out how to do atomic operation on an existing 1334 // pointer in c++. 1335 outputPredicate[shapeIndex / 32] ^= (1 << (shapeIndex % 32)); 1336 #endif 1337 } 1338 } 1339 return emptyRes; 1340 } 1341 1342 __host__ __device__ void advance(typename super_t::difference_type n) { 1343 int64_t newPointIndex = (int64_t)pointIndex + n; 1344 if (newPointIndex >= geoShapes.TotalNumPoints) { 1345 int steps = newPointIndex / geoShapes.TotalNumPoints; 1346 this->base_reference() += steps; 1347 pointIndex = newPointIndex %= geoShapes.TotalNumPoints; 1348 outputPredicate += steps * geoShapes.TotalWords; 1349 } else if (newPointIndex < 0) { 1350 int steps = (newPointIndex - geoShapes.TotalNumPoints + 1) / 1351 geoShapes.TotalNumPoints; 1352 this->base_reference() += steps; 1353 pointIndex = newPointIndex -= steps * geoShapes.TotalNumPoints; 1354 outputPredicate += steps * geoShapes.TotalWords; 1355 } else { 1356 pointIndex = (int32_t)newPointIndex; 1357 } 1358 } 1359 1360 __host__ __device__ void increment() { advance(1); } 1361 1362 __host__ __device__ void decrement() { advance(-1); } 1363 1364 __host__ __device__ typename super_t::difference_type distance_to( 1365 const GeoBatchIntersectIterator &other) const { 1366 typename super_t::difference_type dist = 1367 other.base_reference() - this->base_reference(); 1368 return dist * geoShapes.TotalNumPoints + 1369 (other.pointIndex - this->pointIndex); 1370 } 1371 }; 1372 1373 template<typename GeoInputIterator> 1374 GeoBatchIntersectIterator<GeoInputIterator> make_geo_batch_intersect_iterator( 1375 GeoInputIterator points, GeoShapeBatch geoShape, 1376 uint32_t *outputPredicate, bool inOrOut) { 1377 return GeoBatchIntersectIterator<GeoInputIterator>(points, 1378 geoShape, 1379 outputPredicate, 1380 inOrOut); 1381 } 1382 1383 // Iterator to retrieve counts on index, which is usually for 1384 // mode 3 archive vectorparty 1385 class IndexCountIterator : 1386 public thrust::iterator_adaptor<IndexCountIterator, 1387 uint32_t *, uint32_t, 1388 thrust::use_default, thrust::use_default, 1389 uint32_t, 1390 thrust::use_default> { 1391 public: 1392 friend class thrust::iterator_core_access; 1393 1394 typedef thrust::iterator_adaptor<IndexCountIterator, 1395 uint32_t *, uint32_t, 1396 thrust::use_default, thrust::use_default, 1397 uint32_t, 1398 thrust::use_default> super_t; 1399 1400 __host__ __device__ IndexCountIterator() {} 1401 1402 IndexCountIterator(uint32_t *baseCount, uint32_t *indexVector) : 1403 super_t(indexVector), baseCount(baseCount) {} 1404 1405 private: 1406 uint32_t *baseCount; 1407 1408 __host__ __device__ uint32_t dereference() const { 1409 return baseCount[*this->base_reference()+1] 1410 - baseCount[*this->base_reference()]; 1411 } 1412 }; 1413 1414 } // namespace ares 1415 1416 namespace thrust { 1417 namespace detail { 1418 // For execution policy with cuda, it requires the output iterator's reference 1419 // type to be a actual reference. However when using MeasureProxy and 1420 // IndexProxy, they are value types. So we need to specialize the 1421 // is_non_const_reference to traits to treat all MeasureProxy template classes 1422 // as reference type. 1423 template<> 1424 struct is_non_const_reference< 1425 ares::MeasureProxy<int32_t> > : public true_type { 1426 }; 1427 1428 template<> 1429 struct is_non_const_reference< 1430 ares::MeasureProxy<uint32_t> > : public true_type { 1431 }; 1432 1433 template<> 1434 struct is_non_const_reference< 1435 ares::MeasureProxy<float_t> > : public true_type { 1436 }; 1437 1438 template<> 1439 struct is_non_const_reference< 1440 ares::MeasureProxy<int64_t> > : public true_type { 1441 }; 1442 1443 template<> 1444 struct is_non_const_reference< 1445 ares::MeasureProxy<double_t> > : public true_type { 1446 }; 1447 1448 template<> 1449 struct is_non_const_reference<ares::IndexProxy> : public true_type { 1450 }; 1451 1452 template<> 1453 struct is_non_const_reference<ares::DimValueProxy> : public true_type { 1454 }; 1455 } // namespace detail 1456 } // namespace thrust 1457 #endif // QUERY_ITERATOR_HPP_