github.com/thanos-io/thanos@v0.32.5/pkg/store/proxy_heap.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package store
     5  
     6  import (
     7  	"container/heap"
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"sort"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/cespare/xxhash/v2"
    16  	"github.com/go-kit/log"
    17  	"github.com/go-kit/log/level"
    18  	grpc_opentracing "github.com/grpc-ecosystem/go-grpc-middleware/v2/interceptors/tracing"
    19  	"github.com/opentracing/opentracing-go"
    20  	"github.com/pkg/errors"
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"github.com/prometheus/prometheus/model/labels"
    23  
    24  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    25  	"github.com/thanos-io/thanos/pkg/store/storepb"
    26  	"github.com/thanos-io/thanos/pkg/tracing"
    27  )
    28  
    29  type dedupResponseHeap struct {
    30  	h *ProxyResponseHeap
    31  
    32  	bufferedSameSeries []*storepb.SeriesResponse
    33  
    34  	bufferedResp []*storepb.SeriesResponse
    35  	buffRespI    int
    36  
    37  	prev *storepb.SeriesResponse
    38  	ok   bool
    39  }
    40  
    41  // NewDedupResponseHeap returns a wrapper around ProxyResponseHeap that merged duplicated series messages into one.
    42  // It also deduplicates identical chunks identified by the same checksum from each series message.
    43  func NewDedupResponseHeap(h *ProxyResponseHeap) *dedupResponseHeap {
    44  	ok := h.Next()
    45  	var prev *storepb.SeriesResponse
    46  	if ok {
    47  		prev = h.At()
    48  	}
    49  	return &dedupResponseHeap{
    50  		h:    h,
    51  		ok:   ok,
    52  		prev: prev,
    53  	}
    54  }
    55  
    56  func (d *dedupResponseHeap) Next() bool {
    57  	if d.buffRespI+1 < len(d.bufferedResp) {
    58  		d.buffRespI++
    59  		return true
    60  	}
    61  
    62  	if !d.ok && d.prev == nil {
    63  		return false
    64  	}
    65  
    66  	d.buffRespI = 0
    67  	d.bufferedResp = d.bufferedResp[:0]
    68  	d.bufferedSameSeries = d.bufferedSameSeries[:0]
    69  
    70  	var s *storepb.SeriesResponse
    71  	for {
    72  		if d.prev == nil {
    73  			d.ok = d.h.Next()
    74  			if !d.ok {
    75  				if len(d.bufferedSameSeries) > 0 {
    76  					d.bufferedResp = append(d.bufferedResp, chainSeriesAndRemIdenticalChunks(d.bufferedSameSeries))
    77  				}
    78  				return len(d.bufferedResp) > 0
    79  			}
    80  			s = d.h.At()
    81  		} else {
    82  			s = d.prev
    83  			d.prev = nil
    84  		}
    85  
    86  		if s.GetSeries() == nil {
    87  			d.bufferedResp = append(d.bufferedResp, s)
    88  			continue
    89  		}
    90  
    91  		if len(d.bufferedSameSeries) == 0 {
    92  			d.bufferedSameSeries = append(d.bufferedSameSeries, s)
    93  			continue
    94  		}
    95  
    96  		lbls := d.bufferedSameSeries[0].GetSeries().Labels
    97  		atLbls := s.GetSeries().Labels
    98  
    99  		if labels.Compare(labelpb.ZLabelsToPromLabels(lbls), labelpb.ZLabelsToPromLabels(atLbls)) == 0 {
   100  			d.bufferedSameSeries = append(d.bufferedSameSeries, s)
   101  			continue
   102  		}
   103  
   104  		d.bufferedResp = append(d.bufferedResp, chainSeriesAndRemIdenticalChunks(d.bufferedSameSeries))
   105  		d.prev = s
   106  
   107  		return true
   108  	}
   109  }
   110  
   111  func chainSeriesAndRemIdenticalChunks(series []*storepb.SeriesResponse) *storepb.SeriesResponse {
   112  	chunkDedupMap := map[uint64]*storepb.AggrChunk{}
   113  
   114  	for _, s := range series {
   115  		for _, chk := range s.GetSeries().Chunks {
   116  			for _, field := range []*storepb.Chunk{
   117  				chk.Raw, chk.Count, chk.Max, chk.Min, chk.Sum, chk.Counter,
   118  			} {
   119  				if field == nil {
   120  					continue
   121  				}
   122  				hash := field.Hash
   123  				if hash == 0 {
   124  					hash = xxhash.Sum64(field.Data)
   125  				}
   126  
   127  				if _, ok := chunkDedupMap[hash]; !ok {
   128  					chk := chk
   129  					chunkDedupMap[hash] = &chk
   130  					break
   131  				}
   132  			}
   133  		}
   134  	}
   135  
   136  	// If no chunks were requested.
   137  	if len(chunkDedupMap) == 0 {
   138  		return series[0]
   139  	}
   140  
   141  	finalChunks := make([]storepb.AggrChunk, 0, len(chunkDedupMap))
   142  	for _, chk := range chunkDedupMap {
   143  		finalChunks = append(finalChunks, *chk)
   144  	}
   145  
   146  	sort.Slice(finalChunks, func(i, j int) bool {
   147  		return finalChunks[i].Compare(finalChunks[j]) > 0
   148  	})
   149  
   150  	return storepb.NewSeriesResponse(&storepb.Series{
   151  		Labels: series[0].GetSeries().Labels,
   152  		Chunks: finalChunks,
   153  	})
   154  }
   155  
   156  func (d *dedupResponseHeap) At() *storepb.SeriesResponse {
   157  	return d.bufferedResp[d.buffRespI]
   158  }
   159  
   160  // ProxyResponseHeap is a heap for storepb.SeriesSets.
   161  // It performs k-way merge between all of those sets.
   162  // TODO(GiedriusS): can be improved with a tournament tree.
   163  // This is O(n*logk) but can be Theta(n*logk). However,
   164  // tournament trees need n-1 auxiliary nodes so there
   165  // might not be much of a difference.
   166  type ProxyResponseHeap struct {
   167  	nodes []ProxyResponseHeapNode
   168  }
   169  
   170  func (h *ProxyResponseHeap) Less(i, j int) bool {
   171  	iResp := h.nodes[i].rs.At()
   172  	jResp := h.nodes[j].rs.At()
   173  
   174  	if iResp.GetSeries() != nil && jResp.GetSeries() != nil {
   175  		iLbls := labelpb.ZLabelsToPromLabels(iResp.GetSeries().Labels)
   176  		jLbls := labelpb.ZLabelsToPromLabels(jResp.GetSeries().Labels)
   177  
   178  		return labels.Compare(iLbls, jLbls) < 0
   179  	} else if iResp.GetSeries() == nil && jResp.GetSeries() != nil {
   180  		return true
   181  	} else if iResp.GetSeries() != nil && jResp.GetSeries() == nil {
   182  		return false
   183  	}
   184  
   185  	// If it is not a series then the order does not matter. What matters
   186  	// is that we get different types of responses one after another.
   187  	return false
   188  }
   189  
   190  func (h *ProxyResponseHeap) Len() int {
   191  	return len(h.nodes)
   192  }
   193  
   194  func (h *ProxyResponseHeap) Swap(i, j int) {
   195  	h.nodes[i], h.nodes[j] = h.nodes[j], h.nodes[i]
   196  }
   197  
   198  func (h *ProxyResponseHeap) Push(x interface{}) {
   199  	h.nodes = append(h.nodes, x.(ProxyResponseHeapNode))
   200  }
   201  
   202  func (h *ProxyResponseHeap) Pop() (v interface{}) {
   203  	h.nodes, v = h.nodes[:h.Len()-1], h.nodes[h.Len()-1]
   204  	return
   205  }
   206  
   207  func (h *ProxyResponseHeap) Empty() bool {
   208  	return h.Len() == 0
   209  }
   210  
   211  func (h *ProxyResponseHeap) Min() *ProxyResponseHeapNode {
   212  	return &h.nodes[0]
   213  }
   214  
   215  type ProxyResponseHeapNode struct {
   216  	rs respSet
   217  }
   218  
   219  // NewProxyResponseHeap returns heap that k-way merge series together.
   220  // It's agnostic to duplicates and overlaps, it forwards all duplicated series in random order.
   221  func NewProxyResponseHeap(seriesSets ...respSet) *ProxyResponseHeap {
   222  	ret := ProxyResponseHeap{
   223  		nodes: make([]ProxyResponseHeapNode, 0, len(seriesSets)),
   224  	}
   225  
   226  	for _, ss := range seriesSets {
   227  		if ss.Empty() {
   228  			continue
   229  		}
   230  		ss := ss
   231  		ret.Push(ProxyResponseHeapNode{rs: ss})
   232  	}
   233  
   234  	heap.Init(&ret)
   235  
   236  	return &ret
   237  }
   238  
   239  func (h *ProxyResponseHeap) Next() bool {
   240  	return !h.Empty()
   241  }
   242  
   243  func (h *ProxyResponseHeap) At() *storepb.SeriesResponse {
   244  	min := h.Min().rs
   245  
   246  	atResp := min.At()
   247  
   248  	if min.Next() {
   249  		heap.Fix(h, 0)
   250  	} else {
   251  		heap.Remove(h, 0)
   252  	}
   253  
   254  	return atResp
   255  }
   256  
   257  func (l *lazyRespSet) StoreID() string {
   258  	return l.storeName
   259  }
   260  
   261  func (l *lazyRespSet) Labelset() string {
   262  	return labelpb.PromLabelSetsToString(l.storeLabelSets)
   263  }
   264  
   265  func (l *lazyRespSet) StoreLabels() map[string]struct{} {
   266  	return l.storeLabels
   267  }
   268  
   269  // lazyRespSet is a lazy storepb.SeriesSet that buffers
   270  // everything as fast as possible while at the same it permits
   271  // reading response-by-response. It blocks if there is no data
   272  // in Next().
   273  type lazyRespSet struct {
   274  	// Generic parameters.
   275  	span           opentracing.Span
   276  	cl             storepb.Store_SeriesClient
   277  	closeSeries    context.CancelFunc
   278  	storeName      string
   279  	storeLabelSets []labels.Labels
   280  	storeLabels    map[string]struct{}
   281  	frameTimeout   time.Duration
   282  	ctx            context.Context
   283  
   284  	// Internal bookkeeping.
   285  	dataOrFinishEvent    *sync.Cond
   286  	bufferedResponses    []*storepb.SeriesResponse
   287  	bufferedResponsesMtx *sync.Mutex
   288  	lastResp             *storepb.SeriesResponse
   289  
   290  	noMoreData  bool
   291  	initialized bool
   292  
   293  	shardMatcher *storepb.ShardMatcher
   294  }
   295  
   296  func (l *lazyRespSet) Empty() bool {
   297  	l.bufferedResponsesMtx.Lock()
   298  	defer l.bufferedResponsesMtx.Unlock()
   299  
   300  	// NOTE(GiedriusS): need to wait here for at least one
   301  	// response so that we could build the heap properly.
   302  	if l.noMoreData && len(l.bufferedResponses) == 0 {
   303  		return true
   304  	}
   305  
   306  	for len(l.bufferedResponses) == 0 {
   307  		l.dataOrFinishEvent.Wait()
   308  		if l.noMoreData && len(l.bufferedResponses) == 0 {
   309  			break
   310  		}
   311  	}
   312  
   313  	return len(l.bufferedResponses) == 0 && l.noMoreData
   314  }
   315  
   316  // Next either blocks until more data is available or reads
   317  // the next response. If it is not lazy then it waits for everything
   318  // to finish.
   319  func (l *lazyRespSet) Next() bool {
   320  	l.bufferedResponsesMtx.Lock()
   321  	defer l.bufferedResponsesMtx.Unlock()
   322  
   323  	if l.noMoreData && len(l.bufferedResponses) == 0 {
   324  		l.lastResp = nil
   325  
   326  		return false
   327  	}
   328  
   329  	for len(l.bufferedResponses) == 0 {
   330  		l.dataOrFinishEvent.Wait()
   331  		if l.noMoreData && len(l.bufferedResponses) == 0 {
   332  			break
   333  		}
   334  	}
   335  
   336  	if len(l.bufferedResponses) > 0 {
   337  		l.lastResp = l.bufferedResponses[0]
   338  		l.bufferedResponses = l.bufferedResponses[1:]
   339  		return true
   340  	}
   341  
   342  	l.lastResp = nil
   343  	return false
   344  }
   345  
   346  func (l *lazyRespSet) At() *storepb.SeriesResponse {
   347  	// We need to wait for at least one response so that we would be able to properly build the heap.
   348  	if !l.initialized {
   349  		l.Next()
   350  		l.initialized = true
   351  		return l.lastResp
   352  	}
   353  
   354  	// Next() was called previously.
   355  	return l.lastResp
   356  }
   357  
   358  func newLazyRespSet(
   359  	ctx context.Context,
   360  	span opentracing.Span,
   361  	frameTimeout time.Duration,
   362  	storeName string,
   363  	storeLabelSets []labels.Labels,
   364  	closeSeries context.CancelFunc,
   365  	cl storepb.Store_SeriesClient,
   366  	shardMatcher *storepb.ShardMatcher,
   367  	applySharding bool,
   368  	emptyStreamResponses prometheus.Counter,
   369  ) respSet {
   370  	bufferedResponses := []*storepb.SeriesResponse{}
   371  	bufferedResponsesMtx := &sync.Mutex{}
   372  	dataAvailable := sync.NewCond(bufferedResponsesMtx)
   373  
   374  	respSet := &lazyRespSet{
   375  		frameTimeout:         frameTimeout,
   376  		cl:                   cl,
   377  		storeName:            storeName,
   378  		storeLabelSets:       storeLabelSets,
   379  		closeSeries:          closeSeries,
   380  		span:                 span,
   381  		ctx:                  ctx,
   382  		dataOrFinishEvent:    dataAvailable,
   383  		bufferedResponsesMtx: bufferedResponsesMtx,
   384  		bufferedResponses:    bufferedResponses,
   385  		shardMatcher:         shardMatcher,
   386  	}
   387  	respSet.storeLabels = make(map[string]struct{})
   388  	for _, ls := range storeLabelSets {
   389  		for _, l := range ls {
   390  			respSet.storeLabels[l.Name] = struct{}{}
   391  		}
   392  	}
   393  
   394  	go func(st string, l *lazyRespSet) {
   395  		bytesProcessed := 0
   396  		seriesStats := &storepb.SeriesStatsCounter{}
   397  
   398  		defer func() {
   399  			l.span.SetTag("processed.series", seriesStats.Series)
   400  			l.span.SetTag("processed.chunks", seriesStats.Chunks)
   401  			l.span.SetTag("processed.samples", seriesStats.Samples)
   402  			l.span.SetTag("processed.bytes", bytesProcessed)
   403  			l.span.Finish()
   404  		}()
   405  
   406  		numResponses := 0
   407  		defer func() {
   408  			if numResponses == 0 {
   409  				emptyStreamResponses.Inc()
   410  			}
   411  		}()
   412  
   413  		handleRecvResponse := func(t *time.Timer) bool {
   414  			if t != nil {
   415  				defer t.Reset(frameTimeout)
   416  			}
   417  
   418  			select {
   419  			case <-l.ctx.Done():
   420  				err := errors.Wrapf(l.ctx.Err(), "failed to receive any data from %s", st)
   421  				l.span.SetTag("err", err.Error())
   422  
   423  				l.bufferedResponsesMtx.Lock()
   424  				l.bufferedResponses = append(l.bufferedResponses, storepb.NewWarnSeriesResponse(err))
   425  				l.noMoreData = true
   426  				l.dataOrFinishEvent.Signal()
   427  				l.bufferedResponsesMtx.Unlock()
   428  				return false
   429  			default:
   430  				resp, err := cl.Recv()
   431  				if err == io.EOF {
   432  					l.bufferedResponsesMtx.Lock()
   433  					l.noMoreData = true
   434  					l.dataOrFinishEvent.Signal()
   435  					l.bufferedResponsesMtx.Unlock()
   436  					return false
   437  				}
   438  
   439  				if err != nil {
   440  					// TODO(bwplotka): Return early on error. Don't wait of dedup, merge and sort if partial response is disabled.
   441  					var rerr error
   442  					if t != nil && !t.Stop() && errors.Is(err, context.Canceled) {
   443  						// Most likely the per-Recv timeout has been reached.
   444  						// There's a small race between canceling and the Recv()
   445  						// but this is most likely true.
   446  						rerr = errors.Wrapf(err, "failed to receive any data in %s from %s", l.frameTimeout, st)
   447  					} else {
   448  						rerr = errors.Wrapf(err, "receive series from %s", st)
   449  					}
   450  
   451  					l.span.SetTag("err", rerr.Error())
   452  
   453  					l.bufferedResponsesMtx.Lock()
   454  					l.bufferedResponses = append(l.bufferedResponses, storepb.NewWarnSeriesResponse(rerr))
   455  					l.noMoreData = true
   456  					l.dataOrFinishEvent.Signal()
   457  					l.bufferedResponsesMtx.Unlock()
   458  					return false
   459  				}
   460  
   461  				numResponses++
   462  				bytesProcessed += resp.Size()
   463  
   464  				if resp.GetSeries() != nil && applySharding && !shardMatcher.MatchesZLabels(resp.GetSeries().Labels) {
   465  					return true
   466  				}
   467  
   468  				if resp.GetSeries() != nil {
   469  					seriesStats.Count(resp.GetSeries())
   470  				}
   471  
   472  				l.bufferedResponsesMtx.Lock()
   473  				l.bufferedResponses = append(l.bufferedResponses, resp)
   474  				l.dataOrFinishEvent.Signal()
   475  				l.bufferedResponsesMtx.Unlock()
   476  				return true
   477  			}
   478  		}
   479  
   480  		var t *time.Timer
   481  		if frameTimeout > 0 {
   482  			t = time.AfterFunc(frameTimeout, closeSeries)
   483  			defer t.Stop()
   484  		}
   485  		for {
   486  			if !handleRecvResponse(t) {
   487  				return
   488  			}
   489  		}
   490  	}(storeName, respSet)
   491  
   492  	return respSet
   493  }
   494  
   495  // RetrievalStrategy stores what kind of retrieval strategy
   496  // shall be used for the async response set.
   497  type RetrievalStrategy string
   498  
   499  const (
   500  	// LazyRetrieval allows readers (e.g. PromQL engine) to use (stream) data as soon as possible.
   501  	LazyRetrieval RetrievalStrategy = "lazy"
   502  	// EagerRetrieval is optimized to read all into internal buffer before returning to readers (e.g. PromQL engine).
   503  	// This currently preferred because:
   504  	// * Both PromQL engines (old and new) want all series ASAP to make decisions.
   505  	// * Querier buffers all responses when using StoreAPI internally.
   506  	EagerRetrieval RetrievalStrategy = "eager"
   507  )
   508  
   509  func newAsyncRespSet(
   510  	ctx context.Context,
   511  	st Client,
   512  	req *storepb.SeriesRequest,
   513  	frameTimeout time.Duration,
   514  	retrievalStrategy RetrievalStrategy,
   515  	buffers *sync.Pool,
   516  	shardInfo *storepb.ShardInfo,
   517  	logger log.Logger,
   518  	emptyStreamResponses prometheus.Counter,
   519  ) (respSet, error) {
   520  
   521  	var span opentracing.Span
   522  	var closeSeries context.CancelFunc
   523  
   524  	storeAddr, isLocalStore := st.Addr()
   525  	storeID := labelpb.PromLabelSetsToString(st.LabelSets())
   526  	if storeID == "" {
   527  		storeID = "Store Gateway"
   528  	}
   529  
   530  	seriesCtx := grpc_opentracing.ClientAddContextTags(ctx, opentracing.Tags{
   531  		"target": storeAddr,
   532  	})
   533  
   534  	span, seriesCtx = tracing.StartSpan(seriesCtx, "proxy.series", tracing.Tags{
   535  		"store.id":       storeID,
   536  		"store.is_local": isLocalStore,
   537  		"store.addr":     storeAddr,
   538  	})
   539  
   540  	seriesCtx, closeSeries = context.WithCancel(seriesCtx)
   541  
   542  	shardMatcher := shardInfo.Matcher(buffers)
   543  
   544  	applySharding := shardInfo != nil && !st.SupportsSharding()
   545  	if applySharding {
   546  		level.Debug(logger).Log("msg", "Applying series sharding in the proxy since there is not support in the underlying store", "store", st.String())
   547  	}
   548  
   549  	cl, err := st.Series(seriesCtx, req)
   550  	if err != nil {
   551  		err = errors.Wrapf(err, "fetch series for %s %s", storeID, st)
   552  
   553  		span.SetTag("err", err.Error())
   554  		span.Finish()
   555  		closeSeries()
   556  		return nil, err
   557  	}
   558  
   559  	var labelsToRemove map[string]struct{}
   560  	if !st.SupportsWithoutReplicaLabels() && len(req.WithoutReplicaLabels) > 0 {
   561  		level.Warn(logger).Log("msg", "detecting store that does not support without replica label setting. "+
   562  			"Falling back to eager retrieval with additional sort. Make sure your storeAPI supports it to speed up your queries", "store", st.String())
   563  		retrievalStrategy = EagerRetrieval
   564  
   565  		labelsToRemove = make(map[string]struct{})
   566  		for _, replicaLabel := range req.WithoutReplicaLabels {
   567  			labelsToRemove[replicaLabel] = struct{}{}
   568  		}
   569  	}
   570  
   571  	switch retrievalStrategy {
   572  	case LazyRetrieval:
   573  		return newLazyRespSet(
   574  			seriesCtx,
   575  			span,
   576  			frameTimeout,
   577  			st.String(),
   578  			st.LabelSets(),
   579  			closeSeries,
   580  			cl,
   581  			shardMatcher,
   582  			applySharding,
   583  			emptyStreamResponses,
   584  		), nil
   585  	case EagerRetrieval:
   586  		return newEagerRespSet(
   587  			seriesCtx,
   588  			span,
   589  			frameTimeout,
   590  			st.String(),
   591  			st.LabelSets(),
   592  			closeSeries,
   593  			cl,
   594  			shardMatcher,
   595  			applySharding,
   596  			emptyStreamResponses,
   597  			labelsToRemove,
   598  		), nil
   599  	default:
   600  		panic(fmt.Sprintf("unsupported retrieval strategy %s", retrievalStrategy))
   601  	}
   602  }
   603  
   604  func (l *lazyRespSet) Close() {
   605  	l.bufferedResponsesMtx.Lock()
   606  	defer l.bufferedResponsesMtx.Unlock()
   607  
   608  	l.closeSeries()
   609  	l.noMoreData = true
   610  	l.dataOrFinishEvent.Signal()
   611  
   612  	l.shardMatcher.Close()
   613  }
   614  
   615  // eagerRespSet is a SeriesSet that blocks until all data is retrieved from
   616  // the StoreAPI.
   617  // NOTE(bwplotka): It also resorts the series (and emits warning) if the client.SupportsWithoutReplicaLabels() is false.
   618  type eagerRespSet struct {
   619  	// Generic parameters.
   620  	span opentracing.Span
   621  	cl   storepb.Store_SeriesClient
   622  	ctx  context.Context
   623  
   624  	closeSeries  context.CancelFunc
   625  	frameTimeout time.Duration
   626  
   627  	shardMatcher *storepb.ShardMatcher
   628  	removeLabels map[string]struct{}
   629  
   630  	storeName      string
   631  	storeLabels    map[string]struct{}
   632  	storeLabelSets []labels.Labels
   633  
   634  	// Internal bookkeeping.
   635  	bufferedResponses []*storepb.SeriesResponse
   636  	wg                *sync.WaitGroup
   637  	i                 int
   638  }
   639  
   640  func newEagerRespSet(
   641  	ctx context.Context,
   642  	span opentracing.Span,
   643  	frameTimeout time.Duration,
   644  	storeName string,
   645  	storeLabelSets []labels.Labels,
   646  	closeSeries context.CancelFunc,
   647  	cl storepb.Store_SeriesClient,
   648  	shardMatcher *storepb.ShardMatcher,
   649  	applySharding bool,
   650  	emptyStreamResponses prometheus.Counter,
   651  	removeLabels map[string]struct{},
   652  ) respSet {
   653  	ret := &eagerRespSet{
   654  		span:              span,
   655  		closeSeries:       closeSeries,
   656  		cl:                cl,
   657  		frameTimeout:      frameTimeout,
   658  		ctx:               ctx,
   659  		bufferedResponses: []*storepb.SeriesResponse{},
   660  		wg:                &sync.WaitGroup{},
   661  		shardMatcher:      shardMatcher,
   662  		removeLabels:      removeLabels,
   663  		storeName:         storeName,
   664  		storeLabelSets:    storeLabelSets,
   665  	}
   666  	ret.storeLabels = make(map[string]struct{})
   667  	for _, ls := range storeLabelSets {
   668  		for _, l := range ls {
   669  			ret.storeLabels[l.Name] = struct{}{}
   670  		}
   671  	}
   672  
   673  	ret.wg.Add(1)
   674  
   675  	// Start a goroutine and immediately buffer everything.
   676  	go func(l *eagerRespSet) {
   677  		seriesStats := &storepb.SeriesStatsCounter{}
   678  		bytesProcessed := 0
   679  
   680  		defer func() {
   681  			l.span.SetTag("processed.series", seriesStats.Series)
   682  			l.span.SetTag("processed.chunks", seriesStats.Chunks)
   683  			l.span.SetTag("processed.samples", seriesStats.Samples)
   684  			l.span.SetTag("processed.bytes", bytesProcessed)
   685  			l.span.Finish()
   686  			ret.wg.Done()
   687  		}()
   688  
   689  		numResponses := 0
   690  		defer func() {
   691  			if numResponses == 0 {
   692  				emptyStreamResponses.Inc()
   693  			}
   694  		}()
   695  
   696  		// TODO(bwplotka): Consider improving readability by getting rid of anonymous functions and merging eager and
   697  		// lazyResponse into one struct.
   698  		handleRecvResponse := func(t *time.Timer) bool {
   699  			if t != nil {
   700  				defer t.Reset(frameTimeout)
   701  			}
   702  
   703  			select {
   704  			case <-l.ctx.Done():
   705  				err := errors.Wrapf(l.ctx.Err(), "failed to receive any data from %s", storeName)
   706  				l.bufferedResponses = append(l.bufferedResponses, storepb.NewWarnSeriesResponse(err))
   707  				l.span.SetTag("err", err.Error())
   708  				return false
   709  			default:
   710  				resp, err := cl.Recv()
   711  				if err == io.EOF {
   712  					return false
   713  				}
   714  				if err != nil {
   715  					// TODO(bwplotka): Return early on error. Don't wait of dedup, merge and sort if partial response is disabled.
   716  					var rerr error
   717  					if t != nil && !t.Stop() && errors.Is(err, context.Canceled) {
   718  						// Most likely the per-Recv timeout has been reached.
   719  						// There's a small race between canceling and the Recv()
   720  						// but this is most likely true.
   721  						rerr = errors.Wrapf(err, "failed to receive any data in %s from %s", l.frameTimeout, storeName)
   722  					} else {
   723  						rerr = errors.Wrapf(err, "receive series from %s", storeName)
   724  					}
   725  					l.bufferedResponses = append(l.bufferedResponses, storepb.NewWarnSeriesResponse(rerr))
   726  					l.span.SetTag("err", rerr.Error())
   727  					return false
   728  				}
   729  
   730  				numResponses++
   731  				bytesProcessed += resp.Size()
   732  
   733  				if resp.GetSeries() != nil && applySharding && !shardMatcher.MatchesZLabels(resp.GetSeries().Labels) {
   734  					return true
   735  				}
   736  
   737  				if resp.GetSeries() != nil {
   738  					seriesStats.Count(resp.GetSeries())
   739  				}
   740  
   741  				l.bufferedResponses = append(l.bufferedResponses, resp)
   742  				return true
   743  			}
   744  		}
   745  		var t *time.Timer
   746  		if frameTimeout > 0 {
   747  			t = time.AfterFunc(frameTimeout, closeSeries)
   748  			defer t.Stop()
   749  		}
   750  
   751  		for {
   752  			if !handleRecvResponse(t) {
   753  				break
   754  			}
   755  		}
   756  
   757  		// This should be used only for stores that does not support doing this on server side.
   758  		// See docs/proposals-accepted/20221129-avoid-global-sort.md for details.
   759  		// NOTE. Client is not guaranteed to give a sorted response when extLset is added
   760  		// Generally we need to resort here.
   761  		sortWithoutLabels(l.bufferedResponses, l.removeLabels)
   762  
   763  	}(ret)
   764  
   765  	return ret
   766  }
   767  
   768  func rmLabels(l labels.Labels, labelsToRemove map[string]struct{}) labels.Labels {
   769  	for i := 0; i < len(l); i++ {
   770  		if _, ok := labelsToRemove[l[i].Name]; !ok {
   771  			continue
   772  		}
   773  		l = append(l[:i], l[i+1:]...)
   774  		i--
   775  	}
   776  	return l
   777  }
   778  
   779  // sortWithoutLabels removes given labels from series and re-sorts the series responses that the same
   780  // series with different labels are coming right after each other. Other types of responses are moved to front.
   781  func sortWithoutLabels(set []*storepb.SeriesResponse, labelsToRemove map[string]struct{}) {
   782  	for _, s := range set {
   783  		ser := s.GetSeries()
   784  		if ser == nil {
   785  			continue
   786  		}
   787  
   788  		if len(labelsToRemove) > 0 {
   789  			ser.Labels = labelpb.ZLabelsFromPromLabels(rmLabels(labelpb.ZLabelsToPromLabels(ser.Labels), labelsToRemove))
   790  		}
   791  	}
   792  
   793  	// With the re-ordered label sets, re-sorting all series aligns the same series
   794  	// from different replicas sequentially.
   795  	sort.Slice(set, func(i, j int) bool {
   796  		si := set[i].GetSeries()
   797  		if si == nil {
   798  			return true
   799  		}
   800  		sj := set[j].GetSeries()
   801  		if sj == nil {
   802  			return false
   803  		}
   804  		return labels.Compare(labelpb.ZLabelsToPromLabels(si.Labels), labelpb.ZLabelsToPromLabels(sj.Labels)) < 0
   805  	})
   806  }
   807  
   808  func (l *eagerRespSet) Close() {
   809  	l.closeSeries()
   810  	l.shardMatcher.Close()
   811  }
   812  
   813  func (l *eagerRespSet) At() *storepb.SeriesResponse {
   814  	l.wg.Wait()
   815  
   816  	if len(l.bufferedResponses) == 0 {
   817  		return nil
   818  	}
   819  
   820  	return l.bufferedResponses[l.i]
   821  }
   822  
   823  func (l *eagerRespSet) Next() bool {
   824  	l.wg.Wait()
   825  
   826  	l.i++
   827  
   828  	return l.i < len(l.bufferedResponses)
   829  }
   830  
   831  func (l *eagerRespSet) Empty() bool {
   832  	l.wg.Wait()
   833  
   834  	return len(l.bufferedResponses) == 0
   835  }
   836  
   837  func (l *eagerRespSet) StoreID() string {
   838  	return l.storeName
   839  }
   840  
   841  func (l *eagerRespSet) Labelset() string {
   842  	return labelpb.PromLabelSetsToString(l.storeLabelSets)
   843  }
   844  
   845  func (l *eagerRespSet) StoreLabels() map[string]struct{} {
   846  	return l.storeLabels
   847  }
   848  
   849  type respSet interface {
   850  	Close()
   851  	At() *storepb.SeriesResponse
   852  	Next() bool
   853  	StoreID() string
   854  	Labelset() string
   855  	StoreLabels() map[string]struct{}
   856  	Empty() bool
   857  }