github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/util/queries_test.go (about) 1 package util 2 3 import ( 4 "context" 5 "strconv" 6 "sync" 7 "testing" 8 9 "github.com/stretchr/testify/require" 10 11 "github.com/grafana/loki/pkg/storage/stores/series/index" 12 ) 13 14 type mockTableQuerier struct { 15 sync.Mutex 16 queries map[string]index.Query 17 } 18 19 func (m *mockTableQuerier) MultiQueries(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 20 m.Lock() 21 defer m.Unlock() 22 23 for _, query := range queries { 24 m.queries[query.HashValue] = query 25 } 26 27 return nil 28 } 29 30 func (m *mockTableQuerier) hasQueries(t *testing.T, count int) { 31 require.Len(t, m.queries, count) 32 for i := 0; i < count; i++ { 33 idx := strconv.Itoa(i) 34 35 require.Equal(t, m.queries[idx], index.Query{ 36 HashValue: idx, 37 ValueEqual: []byte(idx), 38 }) 39 } 40 } 41 42 func TestDoParallelQueries(t *testing.T) { 43 for _, tc := range []struct { 44 name string 45 queryCount int 46 }{ 47 { 48 name: "queries < maxQueriesPerGoroutine", 49 queryCount: maxQueriesBatch / 2, 50 }, 51 { 52 name: "queries = maxQueriesPerGoroutine", 53 queryCount: maxQueriesBatch, 54 }, 55 { 56 name: "queries > maxQueriesPerGoroutine", 57 queryCount: maxQueriesBatch * 2, 58 }, 59 } { 60 t.Run(tc.name, func(t *testing.T) { 61 queries := buildQueries(tc.queryCount) 62 63 tableQuerier := mockTableQuerier{ 64 queries: map[string]index.Query{}, 65 } 66 67 err := DoParallelQueries(context.Background(), tableQuerier.MultiQueries, queries, func(query index.Query, batch index.ReadBatchResult) bool { 68 return false 69 }) 70 require.NoError(t, err) 71 72 tableQuerier.hasQueries(t, tc.queryCount) 73 }) 74 } 75 } 76 77 func buildQueries(n int) []index.Query { 78 queries := make([]index.Query, 0, n) 79 for i := 0; i < n; i++ { 80 idx := strconv.Itoa(i) 81 queries = append(queries, index.Query{ 82 HashValue: idx, 83 ValueEqual: []byte(idx), 84 }) 85 } 86 87 return queries 88 } 89 90 func TestIndexDeduper(t *testing.T) { 91 for _, tc := range []struct { 92 name string 93 batches []batch 94 expectedValues map[string][][]byte 95 }{ 96 { 97 name: "single batch", 98 batches: []batch{ 99 { 100 hashValue: "1", 101 rangeValues: [][]byte{[]byte("a"), []byte("b")}, 102 }, 103 }, 104 expectedValues: map[string][][]byte{ 105 "1": {[]byte("a"), []byte("b")}, 106 }, 107 }, 108 { 109 name: "multiple batches, no duplicates", 110 batches: []batch{ 111 { 112 hashValue: "1", 113 rangeValues: [][]byte{[]byte("a"), []byte("b")}, 114 }, 115 { 116 hashValue: "2", 117 rangeValues: [][]byte{[]byte("c"), []byte("d")}, 118 }, 119 }, 120 expectedValues: map[string][][]byte{ 121 "1": {[]byte("a"), []byte("b")}, 122 "2": {[]byte("c"), []byte("d")}, 123 }, 124 }, 125 { 126 name: "duplicate rangeValues but different hashValues", 127 batches: []batch{ 128 { 129 hashValue: "1", 130 rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("c")}, 131 }, 132 { 133 hashValue: "2", 134 rangeValues: [][]byte{[]byte("a"), []byte("b")}, 135 }, 136 }, 137 expectedValues: map[string][][]byte{ 138 "1": {[]byte("a"), []byte("b"), []byte("c")}, 139 "2": {[]byte("a"), []byte("b")}, 140 }, 141 }, 142 { 143 name: "duplicate rangeValues in same hashValues", 144 batches: []batch{ 145 { 146 hashValue: "1", 147 rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("c")}, 148 }, 149 { 150 hashValue: "1", 151 rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("d")}, 152 }, 153 }, 154 expectedValues: map[string][][]byte{ 155 "1": {[]byte("a"), []byte("b"), []byte("c"), []byte("d")}, 156 }, 157 }, 158 } { 159 t.Run(tc.name, func(t *testing.T) { 160 t.Run("sync", func(t *testing.T) { 161 actualValues := map[string][][]byte{} 162 deduper := NewSyncCallbackDeduper(func(query index.Query, readBatch index.ReadBatchResult) bool { 163 itr := readBatch.Iterator() 164 for itr.Next() { 165 actualValues[query.HashValue] = append(actualValues[query.HashValue], itr.RangeValue()) 166 } 167 return true 168 }, 0) 169 170 for _, batch := range tc.batches { 171 deduper(index.Query{HashValue: batch.hashValue}, batch) 172 } 173 174 require.Equal(t, tc.expectedValues, actualValues) 175 }) 176 177 t.Run("nosync", func(t *testing.T) { 178 actualValues := map[string][][]byte{} 179 deduper := NewCallbackDeduper(func(query index.Query, readBatch index.ReadBatchResult) bool { 180 itr := readBatch.Iterator() 181 for itr.Next() { 182 actualValues[query.HashValue] = append(actualValues[query.HashValue], itr.RangeValue()) 183 } 184 return true 185 }, 0) 186 187 for _, batch := range tc.batches { 188 deduper(index.Query{HashValue: batch.hashValue}, batch) 189 } 190 191 require.Equal(t, tc.expectedValues, actualValues) 192 }) 193 }) 194 } 195 } 196 197 type batch struct { 198 hashValue string 199 rangeValues [][]byte 200 } 201 202 func (b batch) Iterator() index.ReadBatchIterator { 203 return &batchIterator{ 204 rangeValues: b.rangeValues, 205 } 206 } 207 208 type batchIterator struct { 209 rangeValues [][]byte 210 idx int 211 } 212 213 func (b *batchIterator) Next() bool { 214 if b.idx >= len(b.rangeValues) { 215 return false 216 } 217 218 b.idx++ 219 return true 220 } 221 222 func (b batchIterator) RangeValue() []byte { 223 return b.rangeValues[b.idx-1] 224 } 225 226 func (b batchIterator) Value() []byte { 227 panic("implement me") 228 } 229 230 func Benchmark_DedupeCallback(b *testing.B) { 231 deduper := NewCallbackDeduper(func(_ index.Query, readBatch index.ReadBatchResult) bool { 232 itr := readBatch.Iterator() 233 for itr.Next() { 234 _ = itr.RangeValue() 235 } 236 return true 237 }, 1) 238 q := index.Query{HashValue: "1"} 239 batch1 := batch{ 240 hashValue: "1", 241 rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("c")}, 242 } 243 244 b.ReportAllocs() 245 b.ResetTimer() 246 for i := 0; i < b.N; i++ { 247 deduper(q, batch1) 248 } 249 } 250 251 type TableQuerierFunc func(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error 252 253 func (f TableQuerierFunc) MultiQueries(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 254 return f(ctx, queries, callback) 255 } 256 257 func Benchmark_MultiQueries(b *testing.B) { 258 benchmarkMultiQueries(b, 50) 259 benchmarkMultiQueries(b, 100) 260 benchmarkMultiQueries(b, 1000) 261 benchmarkMultiQueries(b, 10000) 262 benchmarkMultiQueries(b, 50000) 263 } 264 265 func benchmarkMultiQueries(b *testing.B, n int) { 266 b.Run(strconv.Itoa(n), func(b *testing.B) { 267 callback := index.QueryPagesCallback(func(_ index.Query, readBatch index.ReadBatchResult) bool { 268 itr := readBatch.Iterator() 269 for itr.Next() { 270 _ = itr.RangeValue() 271 } 272 return true 273 }) 274 queries := make([]index.Query, n) 275 for i := range queries { 276 queries[i] = index.Query{HashValue: strconv.Itoa(i)} 277 } 278 ranges := [][]byte{[]byte("a"), []byte("b"), []byte("c")} 279 ctx := context.Background() 280 b.ReportAllocs() 281 b.ResetTimer() 282 for i := 0; i < b.N; i++ { 283 _ = DoParallelQueries(ctx, func(_ context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 284 for _, query := range queries { 285 callback(query, batch{ 286 hashValue: query.HashValue, 287 rangeValues: ranges, 288 }) 289 callback(query, batch{ 290 hashValue: query.HashValue, 291 rangeValues: ranges, 292 }) 293 callback(query, batch{ 294 hashValue: query.HashValue, 295 rangeValues: ranges, 296 }) 297 callback(query, batch{ 298 hashValue: query.HashValue, 299 rangeValues: ranges, 300 }) 301 } 302 return nil 303 }, queries, callback) 304 } 305 }) 306 }