github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/util/queries_test.go (about)

     1  package util
     2  
     3  import (
     4  	"context"
     5  	"strconv"
     6  	"sync"
     7  	"testing"
     8  
     9  	"github.com/stretchr/testify/require"
    10  
    11  	"github.com/grafana/loki/pkg/storage/stores/series/index"
    12  )
    13  
    14  type mockTableQuerier struct {
    15  	sync.Mutex
    16  	queries map[string]index.Query
    17  }
    18  
    19  func (m *mockTableQuerier) MultiQueries(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error {
    20  	m.Lock()
    21  	defer m.Unlock()
    22  
    23  	for _, query := range queries {
    24  		m.queries[query.HashValue] = query
    25  	}
    26  
    27  	return nil
    28  }
    29  
    30  func (m *mockTableQuerier) hasQueries(t *testing.T, count int) {
    31  	require.Len(t, m.queries, count)
    32  	for i := 0; i < count; i++ {
    33  		idx := strconv.Itoa(i)
    34  
    35  		require.Equal(t, m.queries[idx], index.Query{
    36  			HashValue:  idx,
    37  			ValueEqual: []byte(idx),
    38  		})
    39  	}
    40  }
    41  
    42  func TestDoParallelQueries(t *testing.T) {
    43  	for _, tc := range []struct {
    44  		name       string
    45  		queryCount int
    46  	}{
    47  		{
    48  			name:       "queries < maxQueriesPerGoroutine",
    49  			queryCount: maxQueriesBatch / 2,
    50  		},
    51  		{
    52  			name:       "queries = maxQueriesPerGoroutine",
    53  			queryCount: maxQueriesBatch,
    54  		},
    55  		{
    56  			name:       "queries > maxQueriesPerGoroutine",
    57  			queryCount: maxQueriesBatch * 2,
    58  		},
    59  	} {
    60  		t.Run(tc.name, func(t *testing.T) {
    61  			queries := buildQueries(tc.queryCount)
    62  
    63  			tableQuerier := mockTableQuerier{
    64  				queries: map[string]index.Query{},
    65  			}
    66  
    67  			err := DoParallelQueries(context.Background(), tableQuerier.MultiQueries, queries, func(query index.Query, batch index.ReadBatchResult) bool {
    68  				return false
    69  			})
    70  			require.NoError(t, err)
    71  
    72  			tableQuerier.hasQueries(t, tc.queryCount)
    73  		})
    74  	}
    75  }
    76  
    77  func buildQueries(n int) []index.Query {
    78  	queries := make([]index.Query, 0, n)
    79  	for i := 0; i < n; i++ {
    80  		idx := strconv.Itoa(i)
    81  		queries = append(queries, index.Query{
    82  			HashValue:  idx,
    83  			ValueEqual: []byte(idx),
    84  		})
    85  	}
    86  
    87  	return queries
    88  }
    89  
    90  func TestIndexDeduper(t *testing.T) {
    91  	for _, tc := range []struct {
    92  		name           string
    93  		batches        []batch
    94  		expectedValues map[string][][]byte
    95  	}{
    96  		{
    97  			name: "single batch",
    98  			batches: []batch{
    99  				{
   100  					hashValue:   "1",
   101  					rangeValues: [][]byte{[]byte("a"), []byte("b")},
   102  				},
   103  			},
   104  			expectedValues: map[string][][]byte{
   105  				"1": {[]byte("a"), []byte("b")},
   106  			},
   107  		},
   108  		{
   109  			name: "multiple batches, no duplicates",
   110  			batches: []batch{
   111  				{
   112  					hashValue:   "1",
   113  					rangeValues: [][]byte{[]byte("a"), []byte("b")},
   114  				},
   115  				{
   116  					hashValue:   "2",
   117  					rangeValues: [][]byte{[]byte("c"), []byte("d")},
   118  				},
   119  			},
   120  			expectedValues: map[string][][]byte{
   121  				"1": {[]byte("a"), []byte("b")},
   122  				"2": {[]byte("c"), []byte("d")},
   123  			},
   124  		},
   125  		{
   126  			name: "duplicate rangeValues but different hashValues",
   127  			batches: []batch{
   128  				{
   129  					hashValue:   "1",
   130  					rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("c")},
   131  				},
   132  				{
   133  					hashValue:   "2",
   134  					rangeValues: [][]byte{[]byte("a"), []byte("b")},
   135  				},
   136  			},
   137  			expectedValues: map[string][][]byte{
   138  				"1": {[]byte("a"), []byte("b"), []byte("c")},
   139  				"2": {[]byte("a"), []byte("b")},
   140  			},
   141  		},
   142  		{
   143  			name: "duplicate rangeValues in same hashValues",
   144  			batches: []batch{
   145  				{
   146  					hashValue:   "1",
   147  					rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("c")},
   148  				},
   149  				{
   150  					hashValue:   "1",
   151  					rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("d")},
   152  				},
   153  			},
   154  			expectedValues: map[string][][]byte{
   155  				"1": {[]byte("a"), []byte("b"), []byte("c"), []byte("d")},
   156  			},
   157  		},
   158  	} {
   159  		t.Run(tc.name, func(t *testing.T) {
   160  			t.Run("sync", func(t *testing.T) {
   161  				actualValues := map[string][][]byte{}
   162  				deduper := NewSyncCallbackDeduper(func(query index.Query, readBatch index.ReadBatchResult) bool {
   163  					itr := readBatch.Iterator()
   164  					for itr.Next() {
   165  						actualValues[query.HashValue] = append(actualValues[query.HashValue], itr.RangeValue())
   166  					}
   167  					return true
   168  				}, 0)
   169  
   170  				for _, batch := range tc.batches {
   171  					deduper(index.Query{HashValue: batch.hashValue}, batch)
   172  				}
   173  
   174  				require.Equal(t, tc.expectedValues, actualValues)
   175  			})
   176  
   177  			t.Run("nosync", func(t *testing.T) {
   178  				actualValues := map[string][][]byte{}
   179  				deduper := NewCallbackDeduper(func(query index.Query, readBatch index.ReadBatchResult) bool {
   180  					itr := readBatch.Iterator()
   181  					for itr.Next() {
   182  						actualValues[query.HashValue] = append(actualValues[query.HashValue], itr.RangeValue())
   183  					}
   184  					return true
   185  				}, 0)
   186  
   187  				for _, batch := range tc.batches {
   188  					deduper(index.Query{HashValue: batch.hashValue}, batch)
   189  				}
   190  
   191  				require.Equal(t, tc.expectedValues, actualValues)
   192  			})
   193  		})
   194  	}
   195  }
   196  
   197  type batch struct {
   198  	hashValue   string
   199  	rangeValues [][]byte
   200  }
   201  
   202  func (b batch) Iterator() index.ReadBatchIterator {
   203  	return &batchIterator{
   204  		rangeValues: b.rangeValues,
   205  	}
   206  }
   207  
   208  type batchIterator struct {
   209  	rangeValues [][]byte
   210  	idx         int
   211  }
   212  
   213  func (b *batchIterator) Next() bool {
   214  	if b.idx >= len(b.rangeValues) {
   215  		return false
   216  	}
   217  
   218  	b.idx++
   219  	return true
   220  }
   221  
   222  func (b batchIterator) RangeValue() []byte {
   223  	return b.rangeValues[b.idx-1]
   224  }
   225  
   226  func (b batchIterator) Value() []byte {
   227  	panic("implement me")
   228  }
   229  
   230  func Benchmark_DedupeCallback(b *testing.B) {
   231  	deduper := NewCallbackDeduper(func(_ index.Query, readBatch index.ReadBatchResult) bool {
   232  		itr := readBatch.Iterator()
   233  		for itr.Next() {
   234  			_ = itr.RangeValue()
   235  		}
   236  		return true
   237  	}, 1)
   238  	q := index.Query{HashValue: "1"}
   239  	batch1 := batch{
   240  		hashValue:   "1",
   241  		rangeValues: [][]byte{[]byte("a"), []byte("b"), []byte("c")},
   242  	}
   243  
   244  	b.ReportAllocs()
   245  	b.ResetTimer()
   246  	for i := 0; i < b.N; i++ {
   247  		deduper(q, batch1)
   248  	}
   249  }
   250  
   251  type TableQuerierFunc func(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error
   252  
   253  func (f TableQuerierFunc) MultiQueries(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error {
   254  	return f(ctx, queries, callback)
   255  }
   256  
   257  func Benchmark_MultiQueries(b *testing.B) {
   258  	benchmarkMultiQueries(b, 50)
   259  	benchmarkMultiQueries(b, 100)
   260  	benchmarkMultiQueries(b, 1000)
   261  	benchmarkMultiQueries(b, 10000)
   262  	benchmarkMultiQueries(b, 50000)
   263  }
   264  
   265  func benchmarkMultiQueries(b *testing.B, n int) {
   266  	b.Run(strconv.Itoa(n), func(b *testing.B) {
   267  		callback := index.QueryPagesCallback(func(_ index.Query, readBatch index.ReadBatchResult) bool {
   268  			itr := readBatch.Iterator()
   269  			for itr.Next() {
   270  				_ = itr.RangeValue()
   271  			}
   272  			return true
   273  		})
   274  		queries := make([]index.Query, n)
   275  		for i := range queries {
   276  			queries[i] = index.Query{HashValue: strconv.Itoa(i)}
   277  		}
   278  		ranges := [][]byte{[]byte("a"), []byte("b"), []byte("c")}
   279  		ctx := context.Background()
   280  		b.ReportAllocs()
   281  		b.ResetTimer()
   282  		for i := 0; i < b.N; i++ {
   283  			_ = DoParallelQueries(ctx, func(_ context.Context, queries []index.Query, callback index.QueryPagesCallback) error {
   284  				for _, query := range queries {
   285  					callback(query, batch{
   286  						hashValue:   query.HashValue,
   287  						rangeValues: ranges,
   288  					})
   289  					callback(query, batch{
   290  						hashValue:   query.HashValue,
   291  						rangeValues: ranges,
   292  					})
   293  					callback(query, batch{
   294  						hashValue:   query.HashValue,
   295  						rangeValues: ranges,
   296  					})
   297  					callback(query, batch{
   298  						hashValue:   query.HashValue,
   299  						rangeValues: ranges,
   300  					})
   301  				}
   302  				return nil
   303  			}, queries, callback)
   304  		}
   305  	})
   306  }