github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/test_recall_hnswlib.py (about)

     1  import hnswlib
     2  import numpy as np
     3  import time
     4  import json
     5  
     6  data=None
     7  queries=None
     8  truths=None
     9  
    10  with open("recall_vectors.json", 'r') as f:
    11      data = json.load(f)
    12  
    13  with open("recall_queries.json", 'r') as f:
    14      queries = json.load(f)
    15  
    16  with open("recall_truths.json", 'r') as f:
    17      truths = json.load(f)
    18  
    19  num_elements = len(data)
    20  dim = len(data[0])
    21  data_labels = np.arange(num_elements)
    22  
    23  # Declaring index
    24  p = hnswlib.Index(space = 'cosine', dim = dim) # possible options are l2, cosine or ip
    25  
    26  # Initializing index - the maximum number of elements should be known beforehand
    27  p.init_index(max_elements = num_elements, ef_construction = 2000, M = 100)
    28  
    29  before = time.time()
    30  # Element insertion (can be called several times):
    31  p.add_items(data, data_labels)
    32  print("import took {}".format(time.time() - before))
    33  
    34  # Controlling the recall by setting ef:
    35  p.set_ef(100) # ef should always be > k
    36  
    37  # Query dataset, k - number of closest elements (returns 2 numpy arrays)
    38  results, distances = p.knn_query(queries, k = 1)
    39  
    40  relevant=0
    41  retrieved=0
    42  
    43  for i, res in enumerate(results):
    44      retrieved+=1
    45  
    46      # take elem 0 because k==1
    47      if res[0] == truths[i][0]:
    48          relevant+=1
    49  
    50  print("Recall: {}".format(relevant/retrieved))