github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/shipper/util/queries.go (about)

     1  package util
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  
     7  	"github.com/grafana/dskit/concurrency"
     8  
     9  	"github.com/grafana/loki/pkg/storage/stores/series/index"
    10  	util_math "github.com/grafana/loki/pkg/util/math"
    11  )
    12  
    13  const (
    14  	maxQueriesBatch = 100
    15  	maxConcurrency  = 10
    16  )
    17  
    18  type QueryIndexFunc func(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error
    19  
    20  // QueriesByTable groups and returns queries by tables.
    21  func QueriesByTable(queries []index.Query) map[string][]index.Query {
    22  	queriesByTable := make(map[string][]index.Query)
    23  	for _, query := range queries {
    24  		if _, ok := queriesByTable[query.TableName]; !ok {
    25  			queriesByTable[query.TableName] = []index.Query{}
    26  		}
    27  
    28  		queriesByTable[query.TableName] = append(queriesByTable[query.TableName], query)
    29  	}
    30  
    31  	return queriesByTable
    32  }
    33  
    34  func DoParallelQueries(ctx context.Context, queryIndex QueryIndexFunc, queries []index.Query, callback index.QueryPagesCallback) error {
    35  	if len(queries) == 0 {
    36  		return nil
    37  	}
    38  	if len(queries) <= maxQueriesBatch {
    39  		return queryIndex(ctx, queries, NewCallbackDeduper(callback, len(queries)))
    40  	}
    41  
    42  	jobsCount := len(queries) / maxQueriesBatch
    43  	if len(queries)%maxQueriesBatch != 0 {
    44  		jobsCount++
    45  	}
    46  	callback = NewSyncCallbackDeduper(callback, len(queries))
    47  	return concurrency.ForEachJob(ctx, jobsCount, maxConcurrency, func(ctx context.Context, idx int) error {
    48  		return queryIndex(ctx, queries[idx*maxQueriesBatch:util_math.Min((idx+1)*maxQueriesBatch, len(queries))], callback)
    49  	})
    50  }
    51  
    52  // NewSyncCallbackDeduper should always be used on table level not the whole query level because it just looks at range values which can be repeated across tables
    53  // NewSyncCallbackDeduper is safe to used by multiple goroutines
    54  // Cortex anyways dedupes entries across tables
    55  func NewSyncCallbackDeduper(callback index.QueryPagesCallback, queries int) index.QueryPagesCallback {
    56  	syncMap := &syncMap{
    57  		seen: make(map[string]map[string]struct{}, queries),
    58  	}
    59  	return func(q index.Query, rbr index.ReadBatchResult) bool {
    60  		return callback(q, &readBatchDeduperSync{
    61  			syncMap:           syncMap,
    62  			hashValue:         q.HashValue,
    63  			ReadBatchIterator: rbr.Iterator(),
    64  		})
    65  	}
    66  }
    67  
    68  // NewCallbackDeduper should always be used on table level not the whole query level because it just looks at range values which can be repeated across tables
    69  // NewCallbackDeduper is safe not to used by multiple goroutines
    70  // Cortex anyways dedupes entries across tables
    71  func NewCallbackDeduper(callback index.QueryPagesCallback, queries int) index.QueryPagesCallback {
    72  	f := &readBatchDeduper{
    73  		seen: make(map[string]map[string]struct{}, queries),
    74  	}
    75  	return func(q index.Query, rbr index.ReadBatchResult) bool {
    76  		f.hashValue = q.HashValue
    77  		f.ReadBatchIterator = rbr.Iterator()
    78  		return callback(q, f)
    79  	}
    80  }
    81  
    82  type readBatchDeduper struct {
    83  	index.ReadBatchIterator
    84  	hashValue string
    85  	seen      map[string]map[string]struct{}
    86  }
    87  
    88  func (f *readBatchDeduper) Iterator() index.ReadBatchIterator {
    89  	return f
    90  }
    91  
    92  func (f *readBatchDeduper) Next() bool {
    93  	for f.ReadBatchIterator.Next() {
    94  		rangeValue := f.RangeValue()
    95  		hashes, ok := f.seen[f.hashValue]
    96  		if !ok {
    97  			hashes = map[string]struct{}{}
    98  			hashes[GetUnsafeString(rangeValue)] = struct{}{}
    99  			f.seen[f.hashValue] = hashes
   100  			return true
   101  		}
   102  		h := GetUnsafeString(rangeValue)
   103  		if _, loaded := hashes[h]; loaded {
   104  			continue
   105  		}
   106  		hashes[h] = struct{}{}
   107  		return true
   108  	}
   109  
   110  	return false
   111  }
   112  
   113  type syncMap struct {
   114  	seen map[string]map[string]struct{}
   115  	rw   sync.RWMutex // nolint: structcheck
   116  }
   117  
   118  type readBatchDeduperSync struct {
   119  	index.ReadBatchIterator
   120  	hashValue string
   121  	*syncMap
   122  }
   123  
   124  func (f *readBatchDeduperSync) Iterator() index.ReadBatchIterator {
   125  	return f
   126  }
   127  
   128  func (f *readBatchDeduperSync) Next() bool {
   129  	for f.ReadBatchIterator.Next() {
   130  		rangeValue := f.RangeValue()
   131  		f.rw.RLock()
   132  		hashes, ok := f.seen[f.hashValue]
   133  		if ok {
   134  			h := GetUnsafeString(rangeValue)
   135  			if _, loaded := hashes[h]; loaded {
   136  				f.rw.RUnlock()
   137  				continue
   138  			}
   139  			f.rw.RUnlock()
   140  			f.rw.Lock()
   141  			if _, loaded := hashes[h]; loaded {
   142  				f.rw.Unlock()
   143  				continue
   144  			}
   145  			hashes[h] = struct{}{}
   146  			f.rw.Unlock()
   147  			return true
   148  		}
   149  		f.rw.RUnlock()
   150  		f.rw.Lock()
   151  		if _, ok := f.seen[f.hashValue]; ok {
   152  			f.rw.Unlock()
   153  			continue
   154  		}
   155  		f.seen[f.hashValue] = map[string]struct{}{
   156  			GetUnsafeString(rangeValue): {},
   157  		}
   158  		f.rw.Unlock()
   159  		return true
   160  	}
   161  
   162  	return false
   163  }