github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/merge_benchmarks_test.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"math"
    16  	"math/rand"
    17  	"sort"
    18  	"testing"
    19  )
    20  
    21  // func BenchmarkAnd10k1m_Old(b *testing.B) {
    22  // 	b.StopTimer()
    23  
    24  // 	list1 := propValuePair{
    25  // 		docIDs: docPointers{
    26  // 			docIDs:   randomIDs(1e4),
    27  // 			checksum: []byte{0x01},
    28  // 		},
    29  // 		operator: filters.OperatorEqual,
    30  // 	}
    31  
    32  // 	list2 := propValuePair{
    33  // 		docIDs: docPointers{
    34  // 			docIDs:   randomIDs(1e6),
    35  // 			checksum: []byte{0x02},
    36  // 		},
    37  // 		operator: filters.OperatorEqual,
    38  // 	}
    39  
    40  // 	b.StartTimer()
    41  // 	for i := 0; i < b.N; i++ {
    42  // 		mergeAnd([]*propValuePair{&list1, &list2}, false)
    43  // 	}
    44  // }
    45  
    46  // func BenchmarkAnd10k1m_Optimized(b *testing.B) {
    47  // 	b.StopTimer()
    48  
    49  // 	list1 := propValuePair{
    50  // 		docIDs: docPointers{
    51  // 			docIDs:   randomIDs(1e4),
    52  // 			checksum: []byte{0x01},
    53  // 		},
    54  // 		operator: filters.OperatorEqual,
    55  // 	}
    56  
    57  // 	list2 := propValuePair{
    58  // 		docIDs: docPointers{
    59  // 			docIDs:   randomIDs(1e6),
    60  // 			checksum: []byte{0x02},
    61  // 		},
    62  // 		operator: filters.OperatorEqual,
    63  // 	}
    64  
    65  // 	b.StartTimer()
    66  // 	for i := 0; i < b.N; i++ {
    67  // 		mergeAndOptimized([]*propValuePair{&list1, &list2}, false)
    68  // 	}
    69  // }
    70  
    71  // func BenchmarkMultipleListsOf20k_Old(b *testing.B) {
    72  // 	b.StopTimer()
    73  
    74  // 	lists := make([]*propValuePair, 10)
    75  // 	for i := range lists {
    76  // 		lists[i] = &propValuePair{
    77  // 			docIDs: docPointers{
    78  // 				docIDs:   randomIDs(2e4),
    79  // 				checksum: []byte{uint8(i)},
    80  // 			},
    81  // 			operator: filters.OperatorEqual,
    82  // 		}
    83  // 	}
    84  
    85  // 	b.StartTimer()
    86  // 	for i := 0; i < b.N; i++ {
    87  // 		mergeAnd(lists, false)
    88  // 	}
    89  // }
    90  
    91  // func BenchmarkMultipleListsOf20k_Optimized(b *testing.B) {
    92  // 	b.StopTimer()
    93  
    94  // 	lists := make([]*propValuePair, 10)
    95  // 	for i := range lists {
    96  // 		lists[i] = &propValuePair{
    97  // 			docIDs: docPointers{
    98  // 				docIDs:   randomIDs(2e4),
    99  // 				checksum: []byte{uint8(i)},
   100  // 			},
   101  // 			operator: filters.OperatorEqual,
   102  // 		}
   103  // 	}
   104  
   105  // 	b.StartTimer()
   106  // 	for i := 0; i < b.N; i++ {
   107  // 		mergeAndOptimized(lists, false)
   108  // 	}
   109  // }
   110  
   111  func BenchmarkSort10k(b *testing.B) {
   112  	for i := 0; i < b.N; i++ {
   113  		b.StopTimer()
   114  		list := randomIDs(1e4)
   115  		b.StartTimer()
   116  
   117  		sort.Slice(list, func(a, b int) bool {
   118  			return list[a] < list[b]
   119  		})
   120  	}
   121  }
   122  
   123  func BenchmarkUnsortedLinearSearch(b *testing.B) {
   124  	searchTargets := randomIDs(1e5)
   125  
   126  	for i := 0; i < b.N; i++ {
   127  		b.StopTimer()
   128  		list := randomIDs(1e5)
   129  		b.StartTimer()
   130  
   131  		for i := range searchTargets {
   132  			linearSearchUnsorted(list, searchTargets[i])
   133  		}
   134  	}
   135  }
   136  
   137  func BenchmarkSortedBinarySearch(b *testing.B) {
   138  	searchTargets := randomIDs(1e6)
   139  
   140  	for i := 0; i < b.N; i++ {
   141  		b.StopTimer()
   142  		list := randomIDs(1e4)
   143  		b.StartTimer()
   144  
   145  		sort.Slice(list, func(a, b int) bool {
   146  			return list[a] < list[b]
   147  		})
   148  
   149  		for i := range searchTargets {
   150  			binarySearch(list, searchTargets[i])
   151  		}
   152  	}
   153  }
   154  
   155  func BenchmarkHashmap(b *testing.B) {
   156  	searchTargets := randomIDs(1e6)
   157  
   158  	for i := 0; i < b.N; i++ {
   159  		b.StopTimer()
   160  		list := randomIDs(1e4)
   161  		b.StartTimer()
   162  
   163  		lookup := make(map[uint64]struct{}, len(list))
   164  		for i := range list {
   165  			lookup[list[i]] = struct{}{}
   166  		}
   167  
   168  		for i := range searchTargets {
   169  			_, ok := lookup[searchTargets[i]]
   170  			_ = ok
   171  		}
   172  	}
   173  }
   174  
   175  func randomIDs(count int) []uint64 {
   176  	out := make([]uint64, count)
   177  	for i := range out {
   178  		out[i] = rand.Uint64()
   179  	}
   180  
   181  	return out
   182  }
   183  
   184  func linearSearchUnsorted(in []uint64, needle uint64) bool {
   185  	for i := range in {
   186  		if in[i] == needle {
   187  			return true
   188  		}
   189  	}
   190  
   191  	return false
   192  }
   193  
   194  // function binary_search(A, n, T) is
   195  //     L := 0
   196  //     R := n − 1
   197  //     while L ≤ R do
   198  //         m := floor((L + R) / 2)
   199  //         if A[m] < T then
   200  //             L := m + 1
   201  //         else if A[m] > T then
   202  //             R := m − 1
   203  //         else:
   204  //             return m
   205  //     return unsuccessful
   206  
   207  func binarySearch(in []uint64, needle uint64) bool {
   208  	left := 0
   209  	right := len(in) - 1
   210  
   211  	for left <= right {
   212  		m := int(math.Floor(float64((left + right)) / float64(2)))
   213  		if in[m] < needle {
   214  			left = m + 1
   215  		} else if in[m] > needle {
   216  			right = m - 1
   217  		} else {
   218  			return true
   219  		}
   220  	}
   221  
   222  	return false
   223  }