github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/vector/hnsw/test_recall_hnswlib.py (about) 1 import hnswlib 2 import numpy as np 3 import time 4 import json 5 6 data=None 7 queries=None 8 truths=None 9 10 with open("recall_vectors.json", 'r') as f: 11 data = json.load(f) 12 13 with open("recall_queries.json", 'r') as f: 14 queries = json.load(f) 15 16 with open("recall_truths.json", 'r') as f: 17 truths = json.load(f) 18 19 num_elements = len(data) 20 dim = len(data[0]) 21 data_labels = np.arange(num_elements) 22 23 # Declaring index 24 p = hnswlib.Index(space = 'cosine', dim = dim) # possible options are l2, cosine or ip 25 26 # Initializing index - the maximum number of elements should be known beforehand 27 p.init_index(max_elements = num_elements, ef_construction = 2000, M = 100) 28 29 before = time.time() 30 # Element insertion (can be called several times): 31 p.add_items(data, data_labels) 32 print("import took {}".format(time.time() - before)) 33 34 # Controlling the recall by setting ef: 35 p.set_ef(100) # ef should always be > k 36 37 # Query dataset, k - number of closest elements (returns 2 numpy arrays) 38 results, distances = p.knn_query(queries, k = 1) 39 40 relevant=0 41 retrieved=0 42 43 for i, res in enumerate(results): 44 retrieved+=1 45 46 # take elem 0 because k==1 47 if res[0] == truths[i][0]: 48 relevant+=1 49 50 print("Recall: {}".format(relevant/retrieved))