github.com/kaydxh/golang@v0.0.131/pkg/gocv/cgo/third_path/opencv4/include/opencv2/flann/autotuned_index.h (about) 1 /*********************************************************************** 2 * Software License Agreement (BSD License) 3 * 4 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 5 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 6 * 7 * THE BSD LICENSE 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 *************************************************************************/ 30 #ifndef OPENCV_FLANN_AUTOTUNED_INDEX_H_ 31 #define OPENCV_FLANN_AUTOTUNED_INDEX_H_ 32 33 //! @cond IGNORED 34 35 #include <sstream> 36 37 #include "nn_index.h" 38 #include "ground_truth.h" 39 #include "index_testing.h" 40 #include "sampling.h" 41 #include "kdtree_index.h" 42 #include "kdtree_single_index.h" 43 #include "kmeans_index.h" 44 #include "composite_index.h" 45 #include "linear_index.h" 46 #include "logger.h" 47 48 namespace cvflann 49 { 50 51 template<typename Distance> 52 NNIndex<Distance>* create_index_by_type(const Matrix<typename Distance::ElementType>& dataset, const IndexParams& params, const Distance& distance); 53 54 55 struct AutotunedIndexParams : public IndexParams 56 { 57 AutotunedIndexParams(float target_precision = 0.8, float build_weight = 0.01, float memory_weight = 0, float sample_fraction = 0.1) 58 { 59 (*this)["algorithm"] = FLANN_INDEX_AUTOTUNED; 60 // precision desired (used for autotuning, -1 otherwise) 61 (*this)["target_precision"] = target_precision; 62 // build tree time weighting factor 63 (*this)["build_weight"] = build_weight; 64 // index memory weighting factor 65 (*this)["memory_weight"] = memory_weight; 66 // what fraction of the dataset to use for autotuning 67 (*this)["sample_fraction"] = sample_fraction; 68 } 69 }; 70 71 72 template <typename Distance> 73 class AutotunedIndex : public NNIndex<Distance> 74 { 75 public: 76 typedef typename Distance::ElementType ElementType; 77 typedef typename Distance::ResultType DistanceType; 78 79 AutotunedIndex(const Matrix<ElementType>& inputData, const IndexParams& params = AutotunedIndexParams(), Distance d = Distance()) : 80 dataset_(inputData), distance_(d) 81 { 82 target_precision_ = get_param(params, "target_precision",0.8f); 83 build_weight_ = get_param(params,"build_weight", 0.01f); 84 memory_weight_ = get_param(params, "memory_weight", 0.0f); 85 sample_fraction_ = get_param(params,"sample_fraction", 0.1f); 86 bestIndex_ = NULL; 87 speedup_ = 0; 88 } 89 90 AutotunedIndex(const AutotunedIndex&); 91 AutotunedIndex& operator=(const AutotunedIndex&); 92 93 virtual ~AutotunedIndex() 94 { 95 if (bestIndex_ != NULL) { 96 delete bestIndex_; 97 bestIndex_ = NULL; 98 } 99 } 100 101 /** 102 * Method responsible with building the index. 103 */ 104 virtual void buildIndex() CV_OVERRIDE 105 { 106 std::ostringstream stream; 107 bestParams_ = estimateBuildParams(); 108 print_params(bestParams_, stream); 109 Logger::info("----------------------------------------------------\n"); 110 Logger::info("Autotuned parameters:\n"); 111 Logger::info("%s", stream.str().c_str()); 112 Logger::info("----------------------------------------------------\n"); 113 114 bestIndex_ = create_index_by_type(dataset_, bestParams_, distance_); 115 bestIndex_->buildIndex(); 116 speedup_ = estimateSearchParams(bestSearchParams_); 117 stream.str(std::string()); 118 print_params(bestSearchParams_, stream); 119 Logger::info("----------------------------------------------------\n"); 120 Logger::info("Search parameters:\n"); 121 Logger::info("%s", stream.str().c_str()); 122 Logger::info("----------------------------------------------------\n"); 123 } 124 125 /** 126 * Saves the index to a stream 127 */ 128 virtual void saveIndex(FILE* stream) CV_OVERRIDE 129 { 130 save_value(stream, (int)bestIndex_->getType()); 131 bestIndex_->saveIndex(stream); 132 save_value(stream, get_param<int>(bestSearchParams_, "checks")); 133 } 134 135 /** 136 * Loads the index from a stream 137 */ 138 virtual void loadIndex(FILE* stream) CV_OVERRIDE 139 { 140 int index_type; 141 142 load_value(stream, index_type); 143 IndexParams params; 144 params["algorithm"] = (flann_algorithm_t)index_type; 145 bestIndex_ = create_index_by_type<Distance>(dataset_, params, distance_); 146 bestIndex_->loadIndex(stream); 147 int checks; 148 load_value(stream, checks); 149 bestSearchParams_["checks"] = checks; 150 } 151 152 /** 153 * Method that searches for nearest-neighbors 154 */ 155 virtual void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams) CV_OVERRIDE 156 { 157 int checks = get_param<int>(searchParams,"checks",FLANN_CHECKS_AUTOTUNED); 158 if (checks == FLANN_CHECKS_AUTOTUNED) { 159 bestIndex_->findNeighbors(result, vec, bestSearchParams_); 160 } 161 else { 162 bestIndex_->findNeighbors(result, vec, searchParams); 163 } 164 } 165 166 167 IndexParams getParameters() const CV_OVERRIDE 168 { 169 return bestIndex_->getParameters(); 170 } 171 172 SearchParams getSearchParameters() const 173 { 174 return bestSearchParams_; 175 } 176 177 float getSpeedup() const 178 { 179 return speedup_; 180 } 181 182 183 /** 184 * Number of features in this index. 185 */ 186 virtual size_t size() const CV_OVERRIDE 187 { 188 return bestIndex_->size(); 189 } 190 191 /** 192 * The length of each vector in this index. 193 */ 194 virtual size_t veclen() const CV_OVERRIDE 195 { 196 return bestIndex_->veclen(); 197 } 198 199 /** 200 * The amount of memory (in bytes) this index uses. 201 */ 202 virtual int usedMemory() const CV_OVERRIDE 203 { 204 return bestIndex_->usedMemory(); 205 } 206 207 /** 208 * Algorithm name 209 */ 210 virtual flann_algorithm_t getType() const CV_OVERRIDE 211 { 212 return FLANN_INDEX_AUTOTUNED; 213 } 214 215 private: 216 217 struct CostData 218 { 219 float searchTimeCost; 220 float buildTimeCost; 221 float memoryCost; 222 float totalCost; 223 IndexParams params; 224 }; 225 226 void evaluate_kmeans(CostData& cost) 227 { 228 StartStopTimer t; 229 int checks; 230 const int nn = 1; 231 232 Logger::info("KMeansTree using params: max_iterations=%d, branching=%d\n", 233 get_param<int>(cost.params,"iterations"), 234 get_param<int>(cost.params,"branching")); 235 KMeansIndex<Distance> kmeans(sampledDataset_, cost.params, distance_); 236 // measure index build time 237 t.start(); 238 kmeans.buildIndex(); 239 t.stop(); 240 float buildTime = (float)t.value; 241 242 // measure search time 243 float searchTime = test_index_precision(kmeans, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn); 244 245 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float)); 246 cost.memoryCost = (kmeans.usedMemory() + datasetMemory) / datasetMemory; 247 cost.searchTimeCost = searchTime; 248 cost.buildTimeCost = buildTime; 249 Logger::info("KMeansTree buildTime=%g, searchTime=%g, build_weight=%g\n", buildTime, searchTime, build_weight_); 250 } 251 252 253 void evaluate_kdtree(CostData& cost) 254 { 255 StartStopTimer t; 256 int checks; 257 const int nn = 1; 258 259 Logger::info("KDTree using params: trees=%d\n", get_param<int>(cost.params,"trees")); 260 KDTreeIndex<Distance> kdtree(sampledDataset_, cost.params, distance_); 261 262 t.start(); 263 kdtree.buildIndex(); 264 t.stop(); 265 float buildTime = (float)t.value; 266 267 //measure search time 268 float searchTime = test_index_precision(kdtree, sampledDataset_, testDataset_, gt_matches_, target_precision_, checks, distance_, nn); 269 270 float datasetMemory = float(sampledDataset_.rows * sampledDataset_.cols * sizeof(float)); 271 cost.memoryCost = (kdtree.usedMemory() + datasetMemory) / datasetMemory; 272 cost.searchTimeCost = searchTime; 273 cost.buildTimeCost = buildTime; 274 Logger::info("KDTree buildTime=%g, searchTime=%g\n", buildTime, searchTime); 275 } 276 277 278 // struct KMeansSimpleDownhillFunctor { 279 // 280 // Autotune& autotuner; 281 // KMeansSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {} 282 // 283 // float operator()(int* params) { 284 // 285 // float maxFloat = numeric_limits<float>::max(); 286 // 287 // if (params[0]<2) return maxFloat; 288 // if (params[1]<0) return maxFloat; 289 // 290 // CostData c; 291 // c.params["algorithm"] = KMEANS; 292 // c.params["centers-init"] = CENTERS_RANDOM; 293 // c.params["branching"] = params[0]; 294 // c.params["max-iterations"] = params[1]; 295 // 296 // autotuner.evaluate_kmeans(c); 297 // 298 // return c.timeCost; 299 // 300 // } 301 // }; 302 // 303 // struct KDTreeSimpleDownhillFunctor { 304 // 305 // Autotune& autotuner; 306 // KDTreeSimpleDownhillFunctor(Autotune& autotuner_) : autotuner(autotuner_) {} 307 // 308 // float operator()(int* params) { 309 // float maxFloat = numeric_limits<float>::max(); 310 // 311 // if (params[0]<1) return maxFloat; 312 // 313 // CostData c; 314 // c.params["algorithm"] = KDTREE; 315 // c.params["trees"] = params[0]; 316 // 317 // autotuner.evaluate_kdtree(c); 318 // 319 // return c.timeCost; 320 // 321 // } 322 // }; 323 324 325 326 void optimizeKMeans(std::vector<CostData>& costs) 327 { 328 Logger::info("KMEANS, Step 1: Exploring parameter space\n"); 329 330 // explore kmeans parameters space using combinations of the parameters below 331 int maxIterations[] = { 1, 5, 10, 15 }; 332 int branchingFactors[] = { 16, 32, 64, 128, 256 }; 333 334 int kmeansParamSpaceSize = FLANN_ARRAY_LEN(maxIterations) * FLANN_ARRAY_LEN(branchingFactors); 335 costs.reserve(costs.size() + kmeansParamSpaceSize); 336 337 // evaluate kmeans for all parameter combinations 338 for (size_t i = 0; i < FLANN_ARRAY_LEN(maxIterations); ++i) { 339 for (size_t j = 0; j < FLANN_ARRAY_LEN(branchingFactors); ++j) { 340 CostData cost; 341 cost.params["algorithm"] = FLANN_INDEX_KMEANS; 342 cost.params["centers_init"] = FLANN_CENTERS_RANDOM; 343 cost.params["iterations"] = maxIterations[i]; 344 cost.params["branching"] = branchingFactors[j]; 345 346 evaluate_kmeans(cost); 347 costs.push_back(cost); 348 } 349 } 350 351 // Logger::info("KMEANS, Step 2: simplex-downhill optimization\n"); 352 // 353 // const int n = 2; 354 // // choose initial simplex points as the best parameters so far 355 // int kmeansNMPoints[n*(n+1)]; 356 // float kmeansVals[n+1]; 357 // for (int i=0;i<n+1;++i) { 358 // kmeansNMPoints[i*n] = (int)kmeansCosts[i].params["branching"]; 359 // kmeansNMPoints[i*n+1] = (int)kmeansCosts[i].params["max-iterations"]; 360 // kmeansVals[i] = kmeansCosts[i].timeCost; 361 // } 362 // KMeansSimpleDownhillFunctor kmeans_cost_func(*this); 363 // // run optimization 364 // optimizeSimplexDownhill(kmeansNMPoints,n,kmeans_cost_func,kmeansVals); 365 // // store results 366 // for (int i=0;i<n+1;++i) { 367 // kmeansCosts[i].params["branching"] = kmeansNMPoints[i*2]; 368 // kmeansCosts[i].params["max-iterations"] = kmeansNMPoints[i*2+1]; 369 // kmeansCosts[i].timeCost = kmeansVals[i]; 370 // } 371 } 372 373 374 void optimizeKDTree(std::vector<CostData>& costs) 375 { 376 Logger::info("KD-TREE, Step 1: Exploring parameter space\n"); 377 378 // explore kd-tree parameters space using the parameters below 379 int testTrees[] = { 1, 4, 8, 16, 32 }; 380 381 // evaluate kdtree for all parameter combinations 382 for (size_t i = 0; i < FLANN_ARRAY_LEN(testTrees); ++i) { 383 CostData cost; 384 cost.params["algorithm"] = FLANN_INDEX_KDTREE; 385 cost.params["trees"] = testTrees[i]; 386 387 evaluate_kdtree(cost); 388 costs.push_back(cost); 389 } 390 391 // Logger::info("KD-TREE, Step 2: simplex-downhill optimization\n"); 392 // 393 // const int n = 1; 394 // // choose initial simplex points as the best parameters so far 395 // int kdtreeNMPoints[n*(n+1)]; 396 // float kdtreeVals[n+1]; 397 // for (int i=0;i<n+1;++i) { 398 // kdtreeNMPoints[i] = (int)kdtreeCosts[i].params["trees"]; 399 // kdtreeVals[i] = kdtreeCosts[i].timeCost; 400 // } 401 // KDTreeSimpleDownhillFunctor kdtree_cost_func(*this); 402 // // run optimization 403 // optimizeSimplexDownhill(kdtreeNMPoints,n,kdtree_cost_func,kdtreeVals); 404 // // store results 405 // for (int i=0;i<n+1;++i) { 406 // kdtreeCosts[i].params["trees"] = kdtreeNMPoints[i]; 407 // kdtreeCosts[i].timeCost = kdtreeVals[i]; 408 // } 409 } 410 411 /** 412 * Chooses the best nearest-neighbor algorithm and estimates the optimal 413 * parameters to use when building the index (for a given precision). 414 * Returns a dictionary with the optimal parameters. 415 */ 416 IndexParams estimateBuildParams() 417 { 418 std::vector<CostData> costs; 419 420 int sampleSize = int(sample_fraction_ * dataset_.rows); 421 int testSampleSize = std::min(sampleSize / 10, 1000); 422 423 Logger::info("Entering autotuning, dataset size: %d, sampleSize: %d, testSampleSize: %d, target precision: %g\n", dataset_.rows, sampleSize, testSampleSize, target_precision_); 424 425 // For a very small dataset, it makes no sense to build any fancy index, just 426 // use linear search 427 if (testSampleSize < 10) { 428 Logger::info("Choosing linear, dataset too small\n"); 429 return LinearIndexParams(); 430 } 431 432 // We use a fraction of the original dataset to speedup the autotune algorithm 433 sampledDataset_ = random_sample(dataset_, sampleSize); 434 // We use a cross-validation approach, first we sample a testset from the dataset 435 testDataset_ = random_sample(sampledDataset_, testSampleSize, true); 436 437 // We compute the ground truth using linear search 438 Logger::info("Computing ground truth... \n"); 439 gt_matches_ = Matrix<int>(new int[testDataset_.rows], testDataset_.rows, 1); 440 StartStopTimer t; 441 t.start(); 442 compute_ground_truth<Distance>(sampledDataset_, testDataset_, gt_matches_, 0, distance_); 443 t.stop(); 444 445 CostData linear_cost; 446 linear_cost.searchTimeCost = (float)t.value; 447 linear_cost.buildTimeCost = 0; 448 linear_cost.memoryCost = 0; 449 linear_cost.params["algorithm"] = FLANN_INDEX_LINEAR; 450 451 costs.push_back(linear_cost); 452 453 // Start parameter autotune process 454 Logger::info("Autotuning parameters...\n"); 455 456 optimizeKMeans(costs); 457 optimizeKDTree(costs); 458 459 float bestTimeCost = costs[0].searchTimeCost; 460 for (size_t i = 0; i < costs.size(); ++i) { 461 float timeCost = costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost; 462 if (timeCost < bestTimeCost) { 463 bestTimeCost = timeCost; 464 } 465 } 466 467 float bestCost = costs[0].searchTimeCost / bestTimeCost; 468 IndexParams bestParams = costs[0].params; 469 if (bestTimeCost > 0) { 470 for (size_t i = 0; i < costs.size(); ++i) { 471 float crtCost = (costs[i].buildTimeCost * build_weight_ + costs[i].searchTimeCost) / bestTimeCost + 472 memory_weight_ * costs[i].memoryCost; 473 if (crtCost < bestCost) { 474 bestCost = crtCost; 475 bestParams = costs[i].params; 476 } 477 } 478 } 479 480 delete[] gt_matches_.data; 481 delete[] testDataset_.data; 482 delete[] sampledDataset_.data; 483 484 return bestParams; 485 } 486 487 488 489 /** 490 * Estimates the search time parameters needed to get the desired precision. 491 * Precondition: the index is built 492 * Postcondition: the searchParams will have the optimum params set, also the speedup obtained over linear search. 493 */ 494 float estimateSearchParams(SearchParams& searchParams) 495 { 496 const int nn = 1; 497 const size_t SAMPLE_COUNT = 1000; 498 499 CV_Assert(bestIndex_ != NULL && "Requires a valid index"); // must have a valid index 500 501 float speedup = 0; 502 503 int samples = (int)std::min(dataset_.rows / 10, SAMPLE_COUNT); 504 if (samples > 0) { 505 Matrix<ElementType> testDataset = random_sample(dataset_, samples); 506 507 Logger::info("Computing ground truth\n"); 508 509 // we need to compute the ground truth first 510 Matrix<int> gt_matches(new int[testDataset.rows], testDataset.rows, 1); 511 StartStopTimer t; 512 t.start(); 513 compute_ground_truth<Distance>(dataset_, testDataset, gt_matches, 1, distance_); 514 t.stop(); 515 float linear = (float)t.value; 516 517 int checks; 518 Logger::info("Estimating number of checks\n"); 519 520 float searchTime; 521 float cb_index; 522 if (bestIndex_->getType() == FLANN_INDEX_KMEANS) { 523 Logger::info("KMeans algorithm, estimating cluster border factor\n"); 524 KMeansIndex<Distance>* kmeans = (KMeansIndex<Distance>*)bestIndex_; 525 float bestSearchTime = -1; 526 float best_cb_index = -1; 527 int best_checks = -1; 528 for (cb_index = 0; cb_index < 1.1f; cb_index += 0.2f) { 529 kmeans->set_cb_index(cb_index); 530 searchTime = test_index_precision(*kmeans, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1); 531 if ((searchTime < bestSearchTime) || (bestSearchTime == -1)) { 532 bestSearchTime = searchTime; 533 best_cb_index = cb_index; 534 best_checks = checks; 535 } 536 } 537 searchTime = bestSearchTime; 538 cb_index = best_cb_index; 539 checks = best_checks; 540 541 kmeans->set_cb_index(best_cb_index); 542 Logger::info("Optimum cb_index: %g\n", cb_index); 543 bestParams_["cb_index"] = cb_index; 544 } 545 else { 546 searchTime = test_index_precision(*bestIndex_, dataset_, testDataset, gt_matches, target_precision_, checks, distance_, nn, 1); 547 } 548 549 Logger::info("Required number of checks: %d \n", checks); 550 searchParams["checks"] = checks; 551 552 speedup = linear / searchTime; 553 554 delete[] gt_matches.data; 555 delete[] testDataset.data; 556 } 557 558 return speedup; 559 } 560 561 private: 562 NNIndex<Distance>* bestIndex_; 563 564 IndexParams bestParams_; 565 SearchParams bestSearchParams_; 566 567 Matrix<ElementType> sampledDataset_; 568 Matrix<ElementType> testDataset_; 569 Matrix<int> gt_matches_; 570 571 float speedup_; 572 573 /** 574 * The dataset used by this index 575 */ 576 const Matrix<ElementType> dataset_; 577 578 /** 579 * Index parameters 580 */ 581 float target_precision_; 582 float build_weight_; 583 float memory_weight_; 584 float sample_fraction_; 585 586 Distance distance_; 587 588 589 }; 590 } 591 592 //! @endcond 593 594 #endif /* OPENCV_FLANN_AUTOTUNED_INDEX_H_ */