github.com/kaydxh/golang@v0.0.131/pkg/gocv/cgo/third_path/opencv4/include/opencv2/flann/lsh_table.h (about)

     1  /***********************************************************************
     2   * Software License Agreement (BSD License)
     3   *
     4   * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
     5   * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
     6   *
     7   * THE BSD LICENSE
     8   *
     9   * Redistribution and use in source and binary forms, with or without
    10   * modification, are permitted provided that the following conditions
    11   * are met:
    12   *
    13   * 1. Redistributions of source code must retain the above copyright
    14   *    notice, this list of conditions and the following disclaimer.
    15   * 2. Redistributions in binary form must reproduce the above copyright
    16   *    notice, this list of conditions and the following disclaimer in the
    17   *    documentation and/or other materials provided with the distribution.
    18   *
    19   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
    20   * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
    21   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
    22   * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
    23   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
    24   * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    25   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    26   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
    28   * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29   *************************************************************************/
    30  
    31  /***********************************************************************
    32   * Author: Vincent Rabaud
    33   *************************************************************************/
    34  
    35  #ifndef OPENCV_FLANN_LSH_TABLE_H_
    36  #define OPENCV_FLANN_LSH_TABLE_H_
    37  
    38  //! @cond IGNORED
    39  
    40  #include <algorithm>
    41  #include <iostream>
    42  #include <iomanip>
    43  #include <limits.h>
    44  // TODO as soon as we use C++0x, use the code in USE_UNORDERED_MAP
    45  #ifdef __GXX_EXPERIMENTAL_CXX0X__
    46  #  define USE_UNORDERED_MAP 1
    47  #else
    48  #  define USE_UNORDERED_MAP 0
    49  #endif
    50  #if USE_UNORDERED_MAP
    51  #include <unordered_map>
    52  #else
    53  #include <map>
    54  #endif
    55  #include <math.h>
    56  #include <stddef.h>
    57  
    58  #include "dynamic_bitset.h"
    59  #include "matrix.h"
    60  
    61  #ifdef _MSC_VER
    62  #pragma warning(push)
    63  #pragma warning(disable: 4702) //disable unreachable code
    64  #endif
    65  
    66  
    67  namespace cvflann
    68  {
    69  
    70  namespace lsh
    71  {
    72  
    73  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    74  
    75  /** What is stored in an LSH bucket
    76   */
    77  typedef uint32_t FeatureIndex;
    78  /** The id from which we can get a bucket back in an LSH table
    79   */
    80  typedef unsigned int BucketKey;
    81  
    82  /** A bucket in an LSH table
    83   */
    84  typedef std::vector<FeatureIndex> Bucket;
    85  
    86  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    87  
    88  /** POD for stats about an LSH table
    89   */
    90  struct LshStats
    91  {
    92      std::vector<unsigned int> bucket_sizes_;
    93      size_t n_buckets_;
    94      size_t bucket_size_mean_;
    95      size_t bucket_size_median_;
    96      size_t bucket_size_min_;
    97      size_t bucket_size_max_;
    98      size_t bucket_size_std_dev;
    99      /** Each contained vector contains three value: beginning/end for interval, number of elements in the bin
   100       */
   101      std::vector<std::vector<unsigned int> > size_histogram_;
   102  };
   103  
   104  /** Overload the << operator for LshStats
   105   * @param out the streams
   106   * @param stats the stats to display
   107   * @return the streams
   108   */
   109  inline std::ostream& operator <<(std::ostream& out, const LshStats& stats)
   110  {
   111      int w = 20;
   112      out << "Lsh Table Stats:\n" << std::setw(w) << std::setiosflags(std::ios::right) << "N buckets : "
   113      << stats.n_buckets_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "mean size : "
   114      << std::setiosflags(std::ios::left) << stats.bucket_size_mean_ << "\n" << std::setw(w)
   115      << std::setiosflags(std::ios::right) << "median size : " << stats.bucket_size_median_ << "\n" << std::setw(w)
   116      << std::setiosflags(std::ios::right) << "min size : " << std::setiosflags(std::ios::left)
   117      << stats.bucket_size_min_ << "\n" << std::setw(w) << std::setiosflags(std::ios::right) << "max size : "
   118      << std::setiosflags(std::ios::left) << stats.bucket_size_max_;
   119  
   120      // Display the histogram
   121      out << std::endl << std::setw(w) << std::setiosflags(std::ios::right) << "histogram : "
   122      << std::setiosflags(std::ios::left);
   123      for (std::vector<std::vector<unsigned int> >::const_iterator iterator = stats.size_histogram_.begin(), end =
   124               stats.size_histogram_.end(); iterator != end; ++iterator) out << (*iterator)[0] << "-" << (*iterator)[1] << ": " << (*iterator)[2] << ",  ";
   125  
   126      return out;
   127  }
   128  
   129  
   130  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   131  
   132  /** Lsh hash table. As its key is a sub-feature, and as usually
   133   * the size of it is pretty small, we keep it as a continuous memory array.
   134   * The value is an index in the corpus of features (we keep it as an unsigned
   135   * int for pure memory reasons, it could be a size_t)
   136   */
   137  template<typename ElementType>
   138  class LshTable
   139  {
   140  public:
   141      /** A container of all the feature indices. Optimized for space
   142       */
   143  #if USE_UNORDERED_MAP
   144      typedef std::unordered_map<BucketKey, Bucket> BucketsSpace;
   145  #else
   146      typedef std::map<BucketKey, Bucket> BucketsSpace;
   147  #endif
   148  
   149      /** A container of all the feature indices. Optimized for speed
   150       */
   151      typedef std::vector<Bucket> BucketsSpeed;
   152  
   153      /** Default constructor
   154       */
   155      LshTable()
   156      {
   157          key_size_ = 0;
   158          feature_size_ = 0;
   159          speed_level_ = kArray;
   160      }
   161  
   162      /** Default constructor
   163       * Create the mask and allocate the memory
   164       * @param feature_size is the size of the feature (considered as a ElementType[])
   165       * @param key_size is the number of bits that are turned on in the feature
   166       */
   167      LshTable(unsigned int feature_size, unsigned int key_size)
   168      {
   169          feature_size_ = feature_size;
   170          CV_UNUSED(key_size);
   171          CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );
   172      }
   173  
   174      /** Add a feature to the table
   175       * @param value the value to store for that feature
   176       * @param feature the feature itself
   177       */
   178      void add(unsigned int value, const ElementType* feature)
   179      {
   180          // Add the value to the corresponding bucket
   181          BucketKey key = (lsh::BucketKey)getKey(feature);
   182  
   183          switch (speed_level_) {
   184          case kArray:
   185              // That means we get the buckets from an array
   186              buckets_speed_[key].push_back(value);
   187              break;
   188          case kBitsetHash:
   189              // That means we can check the bitset for the presence of a key
   190              key_bitset_.set(key);
   191              buckets_space_[key].push_back(value);
   192              break;
   193          case kHash:
   194          {
   195              // That means we have to check for the hash table for the presence of a key
   196              buckets_space_[key].push_back(value);
   197              break;
   198          }
   199          }
   200      }
   201  
   202      /** Add a set of features to the table
   203       * @param dataset the values to store
   204       */
   205      void add(Matrix<ElementType> dataset)
   206      {
   207  #if USE_UNORDERED_MAP
   208          buckets_space_.rehash((buckets_space_.size() + dataset.rows) * 1.2);
   209  #endif
   210          // Add the features to the table
   211          for (unsigned int i = 0; i < dataset.rows; ++i) add(i, dataset[i]);
   212          // Now that the table is full, optimize it for speed/space
   213          optimize();
   214      }
   215  
   216      /** Get a bucket given the key
   217       * @param key
   218       * @return
   219       */
   220      inline const Bucket* getBucketFromKey(BucketKey key) const
   221      {
   222          // Generate other buckets
   223          switch (speed_level_) {
   224          case kArray:
   225              // That means we get the buckets from an array
   226              return &buckets_speed_[key];
   227              break;
   228          case kBitsetHash:
   229              // That means we can check the bitset for the presence of a key
   230              if (key_bitset_.test(key)) return &buckets_space_.find(key)->second;
   231              else return 0;
   232              break;
   233          case kHash:
   234          {
   235              // That means we have to check for the hash table for the presence of a key
   236              BucketsSpace::const_iterator bucket_it, bucket_end = buckets_space_.end();
   237              bucket_it = buckets_space_.find(key);
   238              // Stop here if that bucket does not exist
   239              if (bucket_it == bucket_end) return 0;
   240              else return &bucket_it->second;
   241              break;
   242          }
   243          }
   244          return 0;
   245      }
   246  
   247      /** Compute the sub-signature of a feature
   248       */
   249      size_t getKey(const ElementType* /*feature*/) const
   250      {
   251          CV_Error(cv::Error::StsUnsupportedFormat, "LSH is not implemented for that type" );
   252          return 0;
   253      }
   254  
   255      /** Get statistics about the table
   256       * @return
   257       */
   258      LshStats getStats() const;
   259  
   260  private:
   261      /** defines the speed fo the implementation
   262       * kArray uses a vector for storing data
   263       * kBitsetHash uses a hash map but checks for the validity of a key with a bitset
   264       * kHash uses a hash map only
   265       */
   266      enum SpeedLevel
   267      {
   268          kArray, kBitsetHash, kHash
   269      };
   270  
   271      /** Initialize some variables
   272       */
   273      void initialize(size_t key_size)
   274      {
   275          const size_t key_size_lower_bound = 1;
   276          //a value (size_t(1) << key_size) must fit the size_t type so key_size has to be strictly less than size of size_t
   277          const size_t key_size_upper_bound = (std::min)(sizeof(BucketKey) * CHAR_BIT + 1, sizeof(size_t) * CHAR_BIT);
   278          if (key_size < key_size_lower_bound || key_size >= key_size_upper_bound)
   279          {
   280              CV_Error(cv::Error::StsBadArg, cv::format("Invalid key_size (=%d). Valid values for your system are %d <= key_size < %d.", (int)key_size, (int)key_size_lower_bound, (int)key_size_upper_bound));
   281          }
   282  
   283          speed_level_ = kHash;
   284          key_size_ = (unsigned)key_size;
   285      }
   286  
   287      /** Optimize the table for speed/space
   288       */
   289      void optimize()
   290      {
   291          // If we are already using the fast storage, no need to do anything
   292          if (speed_level_ == kArray) return;
   293  
   294          // Use an array if it will be more than half full
   295          if (buckets_space_.size() > ((size_t(1) << key_size_) / 2)) {
   296              speed_level_ = kArray;
   297              // Fill the array version of it
   298              buckets_speed_.resize(size_t(1) << key_size_);
   299              for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) buckets_speed_[key_bucket->first] = key_bucket->second;
   300  
   301              // Empty the hash table
   302              buckets_space_.clear();
   303              return;
   304          }
   305  
   306          // If the bitset is going to use less than 10% of the RAM of the hash map (at least 1 size_t for the key and two
   307          // for the vector) or less than 512MB (key_size_ <= 30)
   308          if (((std::max(buckets_space_.size(), buckets_speed_.size()) * CHAR_BIT * 3 * sizeof(BucketKey)) / 10
   309               >= (size_t(1) << key_size_)) || (key_size_ <= 32)) {
   310              speed_level_ = kBitsetHash;
   311              key_bitset_.resize(size_t(1) << key_size_);
   312              key_bitset_.reset();
   313              // Try with the BucketsSpace
   314              for (BucketsSpace::const_iterator key_bucket = buckets_space_.begin(); key_bucket != buckets_space_.end(); ++key_bucket) key_bitset_.set(key_bucket->first);
   315          }
   316          else {
   317              speed_level_ = kHash;
   318              key_bitset_.clear();
   319          }
   320      }
   321  
   322      /** The vector of all the buckets if they are held for speed
   323       */
   324      BucketsSpeed buckets_speed_;
   325  
   326      /** The hash table of all the buckets in case we cannot use the speed version
   327       */
   328      BucketsSpace buckets_space_;
   329  
   330      /** What is used to store the data */
   331      SpeedLevel speed_level_;
   332  
   333      /** If the subkey is small enough, it will keep track of which subkeys are set through that bitset
   334       * That is just a speedup so that we don't look in the hash table (which can be mush slower that checking a bitset)
   335       */
   336      DynamicBitset key_bitset_;
   337  
   338      /** The size of the sub-signature in bits
   339       */
   340      unsigned int key_size_;
   341  
   342      unsigned int feature_size_;
   343  
   344      // Members only used for the unsigned char specialization
   345      /** The mask to apply to a feature to get the hash key
   346       * Only used in the unsigned char case
   347       */
   348      std::vector<size_t> mask_;
   349  };
   350  
   351  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   352  // Specialization for unsigned char
   353  
   354  template<>
   355  inline LshTable<unsigned char>::LshTable(unsigned int feature_size, unsigned int subsignature_size)
   356  {
   357      feature_size_ = feature_size;
   358      initialize(subsignature_size);
   359      // Allocate the mask
   360      mask_ = std::vector<size_t>((feature_size * sizeof(char) + sizeof(size_t) - 1) / sizeof(size_t), 0);
   361  
   362      // A bit brutal but fast to code
   363      std::vector<int> indices(feature_size * CHAR_BIT);
   364      for (size_t i = 0; i < feature_size * CHAR_BIT; ++i) indices[i] = (int)i;
   365  #ifndef OPENCV_FLANN_USE_STD_RAND
   366      cv::randShuffle(indices);
   367  #else
   368      std::random_shuffle(indices.begin(), indices.end());
   369  #endif
   370  
   371      // Generate a random set of order of subsignature_size_ bits
   372      for (unsigned int i = 0; i < key_size_; ++i) {
   373          size_t index = indices[i];
   374  
   375          // Set that bit in the mask
   376          size_t divisor = CHAR_BIT * sizeof(size_t);
   377          size_t idx = index / divisor; //pick the right size_t index
   378          mask_[idx] |= size_t(1) << (index % divisor); //use modulo to find the bit offset
   379      }
   380  
   381      // Set to 1 if you want to display the mask for debug
   382  #if 0
   383      {
   384          size_t bcount = 0;
   385          BOOST_FOREACH(size_t mask_block, mask_){
   386              out << std::setw(sizeof(size_t) * CHAR_BIT / 4) << std::setfill('0') << std::hex << mask_block
   387                  << std::endl;
   388              bcount += __builtin_popcountll(mask_block);
   389          }
   390          out << "bit count : " << std::dec << bcount << std::endl;
   391          out << "mask size : " << mask_.size() << std::endl;
   392          return out;
   393      }
   394  #endif
   395  }
   396  
   397  /** Return the Subsignature of a feature
   398   * @param feature the feature to analyze
   399   */
   400  template<>
   401  inline size_t LshTable<unsigned char>::getKey(const unsigned char* feature) const
   402  {
   403      // no need to check if T is dividable by sizeof(size_t) like in the Hamming
   404      // distance computation as we have a mask
   405      // FIXIT: This is bad assumption, because we reading tail bytes after of the allocated features buffer
   406      const size_t* feature_block_ptr = reinterpret_cast<const size_t*> ((const void*)feature);
   407  
   408      // Figure out the subsignature of the feature
   409      // Given the feature ABCDEF, and the mask 001011, the output will be
   410      // 000CEF
   411      size_t subsignature = 0;
   412      size_t bit_index = 1;
   413  
   414      for (unsigned i = 0; i < feature_size_; i += sizeof(size_t)) {
   415          // get the mask and signature blocks
   416          size_t feature_block;
   417          if (i <= feature_size_ - sizeof(size_t))
   418          {
   419              feature_block = *feature_block_ptr;
   420          }
   421          else
   422          {
   423              size_t tmp = 0;
   424              memcpy(&tmp, feature_block_ptr, feature_size_ - i); // preserve bytes order
   425              feature_block = tmp;
   426          }
   427          size_t mask_block = mask_[i / sizeof(size_t)];
   428          while (mask_block) {
   429              // Get the lowest set bit in the mask block
   430              size_t lowest_bit = mask_block & (-(ptrdiff_t)mask_block);
   431              // Add it to the current subsignature if necessary
   432              subsignature += (feature_block & lowest_bit) ? bit_index : 0;
   433              // Reset the bit in the mask block
   434              mask_block ^= lowest_bit;
   435              // increment the bit index for the subsignature
   436              bit_index <<= 1;
   437          }
   438          // Check the next feature block
   439          ++feature_block_ptr;
   440      }
   441      return subsignature;
   442  }
   443  
   444  template<>
   445  inline LshStats LshTable<unsigned char>::getStats() const
   446  {
   447      LshStats stats;
   448      stats.bucket_size_mean_ = 0;
   449      if ((buckets_speed_.empty()) && (buckets_space_.empty())) {
   450          stats.n_buckets_ = 0;
   451          stats.bucket_size_median_ = 0;
   452          stats.bucket_size_min_ = 0;
   453          stats.bucket_size_max_ = 0;
   454          return stats;
   455      }
   456  
   457      if (!buckets_speed_.empty()) {
   458          for (BucketsSpeed::const_iterator pbucket = buckets_speed_.begin(); pbucket != buckets_speed_.end(); ++pbucket) {
   459              stats.bucket_sizes_.push_back((lsh::FeatureIndex)pbucket->size());
   460              stats.bucket_size_mean_ += pbucket->size();
   461          }
   462          stats.bucket_size_mean_ /= buckets_speed_.size();
   463          stats.n_buckets_ = buckets_speed_.size();
   464      }
   465      else {
   466          for (BucketsSpace::const_iterator x = buckets_space_.begin(); x != buckets_space_.end(); ++x) {
   467              stats.bucket_sizes_.push_back((lsh::FeatureIndex)x->second.size());
   468              stats.bucket_size_mean_ += x->second.size();
   469          }
   470          stats.bucket_size_mean_ /= buckets_space_.size();
   471          stats.n_buckets_ = buckets_space_.size();
   472      }
   473  
   474      std::sort(stats.bucket_sizes_.begin(), stats.bucket_sizes_.end());
   475  
   476      //  BOOST_FOREACH(int size, stats.bucket_sizes_)
   477      //          std::cout << size << " ";
   478      //  std::cout << std::endl;
   479      stats.bucket_size_median_ = stats.bucket_sizes_[stats.bucket_sizes_.size() / 2];
   480      stats.bucket_size_min_ = stats.bucket_sizes_.front();
   481      stats.bucket_size_max_ = stats.bucket_sizes_.back();
   482  
   483      // TODO compute mean and std
   484      /*float mean, stddev;
   485         stats.bucket_size_mean_ = mean;
   486         stats.bucket_size_std_dev = stddev;*/
   487  
   488      // Include a histogram of the buckets
   489      unsigned int bin_start = 0;
   490      unsigned int bin_end = 20;
   491      bool is_new_bin = true;
   492      for (std::vector<unsigned int>::iterator iterator = stats.bucket_sizes_.begin(), end = stats.bucket_sizes_.end(); iterator
   493           != end; )
   494          if (*iterator < bin_end) {
   495              if (is_new_bin) {
   496                  stats.size_histogram_.push_back(std::vector<unsigned int>(3, 0));
   497                  stats.size_histogram_.back()[0] = bin_start;
   498                  stats.size_histogram_.back()[1] = bin_end - 1;
   499                  is_new_bin = false;
   500              }
   501              ++stats.size_histogram_.back()[2];
   502              ++iterator;
   503          }
   504          else {
   505              bin_start += 20;
   506              bin_end += 20;
   507              is_new_bin = true;
   508          }
   509  
   510      return stats;
   511  }
   512  
   513  // End the two namespaces
   514  }
   515  }
   516  
   517  #ifdef _MSC_VER
   518  #pragma warning(pop)
   519  #endif
   520  
   521  ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
   522  
   523  //! @endcond
   524  
   525  #endif /* OPENCV_FLANN_LSH_TABLE_H_ */