github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/util/queries.go (about) 1 package util 2 3 import ( 4 "context" 5 "sync" 6 7 "github.com/grafana/dskit/concurrency" 8 9 "github.com/grafana/loki/pkg/storage/stores/series/index" 10 util_math "github.com/grafana/loki/pkg/util/math" 11 ) 12 13 const ( 14 maxQueriesBatch = 100 15 maxConcurrency = 10 16 ) 17 18 type QueryIndexFunc func(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error 19 20 // QueriesByTable groups and returns queries by tables. 21 func QueriesByTable(queries []index.Query) map[string][]index.Query { 22 queriesByTable := make(map[string][]index.Query) 23 for _, query := range queries { 24 if _, ok := queriesByTable[query.TableName]; !ok { 25 queriesByTable[query.TableName] = []index.Query{} 26 } 27 28 queriesByTable[query.TableName] = append(queriesByTable[query.TableName], query) 29 } 30 31 return queriesByTable 32 } 33 34 func DoParallelQueries(ctx context.Context, queryIndex QueryIndexFunc, queries []index.Query, callback index.QueryPagesCallback) error { 35 if len(queries) == 0 { 36 return nil 37 } 38 if len(queries) <= maxQueriesBatch { 39 return queryIndex(ctx, queries, NewCallbackDeduper(callback, len(queries))) 40 } 41 42 jobsCount := len(queries) / maxQueriesBatch 43 if len(queries)%maxQueriesBatch != 0 { 44 jobsCount++ 45 } 46 callback = NewSyncCallbackDeduper(callback, len(queries)) 47 return concurrency.ForEachJob(ctx, jobsCount, maxConcurrency, func(ctx context.Context, idx int) error { 48 return queryIndex(ctx, queries[idx*maxQueriesBatch:util_math.Min((idx+1)*maxQueriesBatch, len(queries))], callback) 49 }) 50 } 51 52 // NewSyncCallbackDeduper should always be used on table level not the whole query level because it just looks at range values which can be repeated across tables 53 // NewSyncCallbackDeduper is safe to used by multiple goroutines 54 // Cortex anyways dedupes entries across tables 55 func NewSyncCallbackDeduper(callback index.QueryPagesCallback, queries int) index.QueryPagesCallback { 56 syncMap := &syncMap{ 57 seen: make(map[string]map[string]struct{}, queries), 58 } 59 return func(q index.Query, rbr index.ReadBatchResult) bool { 60 return callback(q, &readBatchDeduperSync{ 61 syncMap: syncMap, 62 hashValue: q.HashValue, 63 ReadBatchIterator: rbr.Iterator(), 64 }) 65 } 66 } 67 68 // NewCallbackDeduper should always be used on table level not the whole query level because it just looks at range values which can be repeated across tables 69 // NewCallbackDeduper is safe not to used by multiple goroutines 70 // Cortex anyways dedupes entries across tables 71 func NewCallbackDeduper(callback index.QueryPagesCallback, queries int) index.QueryPagesCallback { 72 f := &readBatchDeduper{ 73 seen: make(map[string]map[string]struct{}, queries), 74 } 75 return func(q index.Query, rbr index.ReadBatchResult) bool { 76 f.hashValue = q.HashValue 77 f.ReadBatchIterator = rbr.Iterator() 78 return callback(q, f) 79 } 80 } 81 82 type readBatchDeduper struct { 83 index.ReadBatchIterator 84 hashValue string 85 seen map[string]map[string]struct{} 86 } 87 88 func (f *readBatchDeduper) Iterator() index.ReadBatchIterator { 89 return f 90 } 91 92 func (f *readBatchDeduper) Next() bool { 93 for f.ReadBatchIterator.Next() { 94 rangeValue := f.RangeValue() 95 hashes, ok := f.seen[f.hashValue] 96 if !ok { 97 hashes = map[string]struct{}{} 98 hashes[GetUnsafeString(rangeValue)] = struct{}{} 99 f.seen[f.hashValue] = hashes 100 return true 101 } 102 h := GetUnsafeString(rangeValue) 103 if _, loaded := hashes[h]; loaded { 104 continue 105 } 106 hashes[h] = struct{}{} 107 return true 108 } 109 110 return false 111 } 112 113 type syncMap struct { 114 seen map[string]map[string]struct{} 115 rw sync.RWMutex // nolint: structcheck 116 } 117 118 type readBatchDeduperSync struct { 119 index.ReadBatchIterator 120 hashValue string 121 *syncMap 122 } 123 124 func (f *readBatchDeduperSync) Iterator() index.ReadBatchIterator { 125 return f 126 } 127 128 func (f *readBatchDeduperSync) Next() bool { 129 for f.ReadBatchIterator.Next() { 130 rangeValue := f.RangeValue() 131 f.rw.RLock() 132 hashes, ok := f.seen[f.hashValue] 133 if ok { 134 h := GetUnsafeString(rangeValue) 135 if _, loaded := hashes[h]; loaded { 136 f.rw.RUnlock() 137 continue 138 } 139 f.rw.RUnlock() 140 f.rw.Lock() 141 if _, loaded := hashes[h]; loaded { 142 f.rw.Unlock() 143 continue 144 } 145 hashes[h] = struct{}{} 146 f.rw.Unlock() 147 return true 148 } 149 f.rw.RUnlock() 150 f.rw.Lock() 151 if _, ok := f.seen[f.hashValue]; ok { 152 f.rw.Unlock() 153 continue 154 } 155 f.seen[f.hashValue] = map[string]struct{}{ 156 GetUnsafeString(rangeValue): {}, 157 } 158 f.rw.Unlock() 159 return true 160 } 161 162 return false 163 }