github.com/kaydxh/golang@v0.0.131/pkg/gocv/cgo/third_path/opencv4/include/opencv2/flann/lsh_table.h (about) 1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 31 /*********************************************************************** 32 * Author: Vincent Rabaud 33 *************************************************************************/ 34 35 #ifndef OPENCV_FLANN_LSH_TABLE_H_ 36 #define OPENCV_FLANN_LSH_TABLE_H_ 37 38 //! @cond IGNORED 39 40 #include <algorithm> 41 #include <iostream> 42 #include <iomanip> 43 #include <limits.h> 44 // TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP 45 #ifdef __GXX_EXPERIMENTAL_CXX0X__ 46 # define USE_UNORDERED_MAP 1 47 #else 48 # define USE_UNORDERED_MAP 0 49 #endif 50 #if USE_UNORDERED_MAP 51 #include <unordered_map> 52 #else 53 #include <map> 54 #endif 55 #include <math.h> 56 #include <stddef.h> 57 58 #include "dynamic_bitset.h" 59 #include "matrix.h" 60 61 #ifdef _MSC_VER 62 #pragma warning(push) 63 #pragma warning(disable: 4702) //disable unreachable code 64 #endif 65 66 67 namespace cvflann 68 { 69 70 namespace lsh 71 { 72 73 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 74 75 /** What is stored in an LSH bucket 76 */ 77 typedef uint32_t FeatureIndex; 78 /** The id from which we can get a bucket back in an LSH table 79 */ 80 typedef unsigned int BucketKey; 81 82 /** A bucket in an LSH table 83 */ 84 typedef std::vector<FeatureIndex> Bucket; 85 86 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 87 88 /** POD for stats about an LSH table 89 */ 90 struct LshStats 91 { 92 std::vector<unsigned int> bucket_sizes_; 93 size_t n_buckets_; 94 size_t bucket_size_mean_; 95 size_t bucket_size_median_; 96 size_t bucket_size_min_; 97 size_t bucket_size_max_; 98 size_t bucket_size_std_dev; 99 /** Each contained vector contains three value: beginning/end for interval, number of elements in the bin 100 */ 101 std::vector<std::vector<unsigned int> > size_histogram_; 102 }; 103 104 /** Overload the << operator for LshStats 105 * @param out the streams 106 * @param stats the stats to display 107 * @return the streams 108 */ 109 inline std::ostream& operator <<(std::ostream& out, const LshStats& stats) 110 { 111 int w = 20; 112 out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : " 113 << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : " 114 << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w) 115 << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w) 116 << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left) 117 << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : " 118 << std::setiosflags(std::ios::left) << stats.bucket_size_max_; 119 120 // Display the histogram 121 out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : " 122 << std::setiosflags(std::ios::left); 123 for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end = 124 stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ", "; 125 126 return out; 127 } 128 129 130 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 131 132 /** Lsh hash table. As its key is a sub-feature, and as usually 133 * the size of it is pretty small, we keep it as a continuous memory array. 134 * The value is an index in the corpus of features (we keep it as an unsigned 135 * int for pure memory reasons, it could be a size_t) 136 */ 137 template<typename ElementType> 138 class LshTable 139 { 140 public: 141 /** A container of all the feature indices. Optimized for space 142 */ 143 #if USE_UNORDERED_MAP 144 typedef std::unordered_map<BucketKey, Bucket> BucketsSpace; 145 #else 146 typedef std::map<BucketKey, Bucket> BucketsSpace; 147 #endif 148 149 /** A container of all the feature indices. Optimized for speed 150 */ 151 typedef std::vector<Bucket> BucketsSpeed; 152 153 /** Default constructor 154 */ 155 LshTable() 156 { 157 key_size_ = 0; 158 feature_size_ = 0; 159 speed_level_ = kArray; 160 } 161 162 /** Default constructor 163 * Create the mask and allocate the memory 164 * @param feature_size is the size of the feature (considered as a ElementType[]) 165 * @param key_size is the number of bits that are turned on in the feature 166 */ 167 LshTable(unsigned int feature_size, unsigned int key_size) 168 { 169 feature_size_ = feature_size; 170 CV_UNUSED(key_size); 171 CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" ); 172 } 173 174 /** Add a feature to the table 175 * @param value the value to store for that feature 176 * @param feature the feature itself 177 */ 178 void add(unsigned int value, const ElementType* feature) 179 { 180 // Add the value to the corresponding bucket 181 BucketKey key = (lsh::BucketKey)getKey(feature); 182 183 switch (speed_level_) { 184 case kArray: 185 // That means we get the buckets from an array 186 buckets_speed_[key].push_back(value); 187 break; 188 case kBitsetHash: 189 // That means we can check the bitset for the presence of a key 190 key_bitset_.set(key); 191 buckets_space_[key].push_back(value); 192 break; 193 case kHash: 194 { 195 // That means we have to check for the hash table for the presence of a key 196 buckets_space_[key].push_back(value); 197 break; 198 } 199 } 200 } 201 202 /** Add a set of features to the table 203 * @param dataset the values to store 204 */ 205 void add(Matrix<ElementType> dataset) 206 { 207 #if USE_UNORDERED_MAP 208 buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2); 209 #endif 210 // Add the features to the table 211 for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]); 212 // Now that the table is full, optimize it for speed/space 213 optimize(); 214 } 215 216 /** Get a bucket given the key 217 * @param key 218 * @return 219 */ 220 inline const Bucket* getBucketFromKey(BucketKey key) const 221 { 222 // Generate other buckets 223 switch (speed_level_) { 224 case kArray: 225 // That means we get the buckets from an array 226 return &buckets_speed_[key]; 227 break; 228 case kBitsetHash: 229 // That means we can check the bitset for the presence of a key 230 if (key_bitset_.test(key)) return &buckets_space_.find(key)->second; 231 else return 0; 232 break; 233 case kHash: 234 { 235 // That means we have to check for the hash table for the presence of a key 236 BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end(); 237 bucket_it = buckets_space_.find(key); 238 // Stop here if that bucket does not exist 239 if (bucket_it == bucket_end) return 0; 240 else return &bucket_it->second; 241 break; 242 } 243 } 244 return 0; 245 } 246 247 /** Compute the sub-signature of a feature 248 */ 249 size_t getKey(const ElementType* /*feature*/) const 250 { 251 CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" ); 252 return 0; 253 } 254 255 /** Get statistics about the table 256 * @return 257 */ 258 LshStats getStats() const; 259 260 private: 261 /** defines the speed fo the implementation 262 * kArray uses a vector for storing data 263 * kBitsetHash uses a hash map but checks for the validity of a key with a bitset 264 * kHash uses a hash map only 265 */ 266 enum SpeedLevel 267 { 268 kArray, kBitsetHash, kHash 269 }; 270 271 /** Initialize some variables 272 */ 273 void initialize(size_t key_size) 274 { 275 const size_t key_size_lower_bound = 1; 276 //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t 277 const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT); 278 if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound) 279 { 280 CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound)); 281 } 282 283 speed_level_ = kHash; 284 key_size_ = (unsigned)key_size; 285 } 286 287 /** Optimize the table for speed/space 288 */ 289 void optimize() 290 { 291 // If we are already using the fast storage, no need to do anything 292 if (speed_level_ == kArray) return; 293 294 // Use an array if it will be more than half full 295 if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) { 296 speed_level_ = kArray; 297 // Fill the array version of it 298 buckets_speed_.resize(size_t(1) << key_size_); 299 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second; 300 301 // Empty the hash table 302 buckets_space_.clear(); 303 return; 304 } 305 306 // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two 307 // for the vector) or less than 512MB (key_size_ <= 30) 308 if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10 309 >= (size_t(1) << key_size_)) || (key_size_ <= 32)) { 310 speed_level_ = kBitsetHash; 311 key_bitset_.resize(size_t(1) << key_size_); 312 key_bitset_.reset(); 313 // Try with the BucketsSpace 314 for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first); 315 } 316 else { 317 speed_level_ = kHash; 318 key_bitset_.clear(); 319 } 320 } 321 322 /** The vector of all the buckets if they are held for speed 323 */ 324 BucketsSpeed buckets_speed_; 325 326 /** The hash table of all the buckets in case we cannot use the speed version 327 */ 328 BucketsSpace buckets_space_; 329 330 /** What is used to store the data */ 331 SpeedLevel speed_level_; 332 333 /** If the subkey is small enough, it will keep track of which subkeys are set through that bitset 334 * That is just a speedup so that we don't look in the hash table (which can be mush slower that checking a bitset) 335 */ 336 DynamicBitset key_bitset_; 337 338 /** The size of the sub-signature in bits 339 */ 340 unsigned int key_size_; 341 342 unsigned int feature_size_; 343 344 // Members only used for the unsigned char specialization 345 /** The mask to apply to a feature to get the hash key 346 * Only used in the unsigned char case 347 */ 348 std::vector<size_t> mask_; 349 }; 350 351 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 352 // Specialization for unsigned char 353 354 template<> 355 inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size) 356 { 357 feature_size_ = feature_size; 358 initialize(subsignature_size); 359 // Allocate the mask 360 mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0); 361 362 // A bit brutal but fast to code 363 std::vector<int> indices(feature_size * CHAR_BIT); 364 for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i; 365 #ifndef OPENCV_FLANN_USE_STD_RAND 366 cv::randShuffle(indices); 367 #else 368 std::random_shuffle(indices.begin(), indices.end()); 369 #endif 370 371 // Generate a random set of order of subsignature_size_ bits 372 for (unsigned int i = 0; i < key_size_; ++i) { 373 size_t index = indices[i]; 374 375 // Set that bit in the mask 376 size_t divisor = CHAR_BIT * sizeof(size_t); 377 size_t idx = index / divisor; //pick the right size_t index 378 mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset 379 } 380 381 // Set to 1 if you want to display the mask for debug 382 #if 0 383 { 384 size_t bcount = 0; 385 BOOST_FOREACH(size_t mask_block, mask_){ 386 out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block 387 << std::endl; 388 bcount += __builtin_popcountll(mask_block); 389 } 390 out << "bit count : " << std::dec << bcount << std::endl; 391 out << "mask size : " << mask_.size() << std::endl; 392 return out; 393 } 394 #endif 395 } 396 397 /** Return the Subsignature of a feature 398 * @param feature the feature to analyze 399 */ 400 template<> 401 inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const 402 { 403 // no need to check if T is dividable by sizeof(size_t) like in the Hamming 404 // distance computation as we have a mask 405 // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer 406 const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature); 407 408 // Figure out the subsignature of the feature 409 // Given the feature ABCDEF, and the mask 001011, the output will be 410 // 000CEF 411 size_t subsignature = 0; 412 size_t bit_index = 1; 413 414 for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) { 415 // get the mask and signature blocks 416 size_t feature_block; 417 if (i <= feature_size_ - sizeof(size_t)) 418 { 419 feature_block = *feature_block_ptr; 420 } 421 else 422 { 423 size_t tmp = 0; 424 memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order 425 feature_block = tmp; 426 } 427 size_t mask_block = mask_[i / sizeof(size_t)]; 428 while (mask_block) { 429 // Get the lowest set bit in the mask block 430 size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block); 431 // Add it to the current subsignature if necessary 432 subsignature += (feature_block & lowest_bit) ? bit_index : 0; 433 // Reset the bit in the mask block 434 mask_block ^= lowest_bit; 435 // increment the bit index for the subsignature 436 bit_index <<= 1; 437 } 438 // Check the next feature block 439 ++feature_block_ptr; 440 } 441 return subsignature; 442 } 443 444 template<> 445 inline LshStats LshTable<unsigned char>::getStats() const 446 { 447 LshStats stats; 448 stats.bucket_size_mean_ = 0; 449 if ((buckets_speed_.empty()) && (buckets_space_.empty())) { 450 stats.n_buckets_ = 0; 451 stats.bucket_size_median_ = 0; 452 stats.bucket_size_min_ = 0; 453 stats.bucket_size_max_ = 0; 454 return stats; 455 } 456 457 if (!buckets_speed_.empty()) { 458 for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) { 459 stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size()); 460 stats.bucket_size_mean_ += pbucket->size(); 461 } 462 stats.bucket_size_mean_ /= buckets_speed_.size(); 463 stats.n_buckets_ = buckets_speed_.size(); 464 } 465 else { 466 for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) { 467 stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size()); 468 stats.bucket_size_mean_ += x->second.size(); 469 } 470 stats.bucket_size_mean_ /= buckets_space_.size(); 471 stats.n_buckets_ = buckets_space_.size(); 472 } 473 474 std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end()); 475 476 // BOOST_FOREACH(int size, stats.bucket_sizes_) 477 // std::cout << size << " "; 478 // std::cout << std::endl; 479 stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2]; 480 stats.bucket_size_min_ = stats.bucket_sizes_.front(); 481 stats.bucket_size_max_ = stats.bucket_sizes_.back(); 482 483 // TODO compute mean and std 484 /*float mean, stddev; 485 stats.bucket_size_mean_ = mean; 486 stats.bucket_size_std_dev = stddev;*/ 487 488 // Include a histogram of the buckets 489 unsigned int bin_start = 0; 490 unsigned int bin_end = 20; 491 bool is_new_bin = true; 492 for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator 493 != end; ) 494 if (*iterator < bin_end) { 495 if (is_new_bin) { 496 stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0)); 497 stats.size_histogram_.back()[0] = bin_start; 498 stats.size_histogram_.back()[1] = bin_end - 1; 499 is_new_bin = false; 500 } 501 ++stats.size_histogram_.back()[2]; 502 ++iterator; 503 } 504 else { 505 bin_start += 20; 506 bin_end += 20; 507 is_new_bin = true; 508 } 509 510 return stats; 511 } 512 513 // End the two namespaces 514 } 515 } 516 517 #ifdef _MSC_VER 518 #pragma warning(pop) 519 #endif 520 521 //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 522 523 //! @endcond 524 525 #endif /* OPENCV_FLANN_LSH_TABLE_H_ */