github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/tsdb/index/postings.go (about)

     1  // Copyright 2017 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package index
    15  
    16  import (
    17  	"container/heap"
    18  	"encoding/binary"
    19  	"runtime"
    20  	"sort"
    21  	"sync"
    22  
    23  	"github.com/prometheus/prometheus/model/labels"
    24  	"github.com/prometheus/prometheus/storage"
    25  
    26  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    27  	"github.com/grafana/pyroscope/pkg/iter"
    28  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    29  )
    30  
    31  var allPostingsKey = &typesv1.LabelPair{}
    32  
    33  // AllPostingsKey returns the label key that is used to store the postings list of all existing IDs.
    34  func AllPostingsKey() (name, value string) {
    35  	return allPostingsKey.Name, allPostingsKey.Value
    36  }
    37  
    38  // ensureOrderBatchSize is the max number of postings passed to a worker in a single batch in MemPostings.EnsureOrder().
    39  const ensureOrderBatchSize = 1024
    40  
    41  // ensureOrderBatchPool is a pool used to recycle batches passed to workers in MemPostings.EnsureOrder().
    42  var ensureOrderBatchPool = sync.Pool{
    43  	New: func() interface{} {
    44  		return make([][]storage.SeriesRef, 0, ensureOrderBatchSize)
    45  	},
    46  }
    47  
    48  // MemPostings holds postings list for series ID per label pair. They may be written
    49  // to out of order.
    50  // EnsureOrder() must be called once before any reads are done. This allows for quick
    51  // unordered batch fills on startup.
    52  type MemPostings struct {
    53  	mtx     sync.RWMutex
    54  	m       map[string]map[string][]storage.SeriesRef
    55  	ordered bool
    56  }
    57  
    58  // NewMemPostings returns a memPostings that's ready for reads and writes.
    59  func NewMemPostings() *MemPostings {
    60  	return &MemPostings{
    61  		m:       make(map[string]map[string][]storage.SeriesRef, 512),
    62  		ordered: true,
    63  	}
    64  }
    65  
    66  // NewUnorderedMemPostings returns a memPostings that is not safe to be read from
    67  // until EnsureOrder() was called once.
    68  func NewUnorderedMemPostings() *MemPostings {
    69  	return &MemPostings{
    70  		m:       make(map[string]map[string][]storage.SeriesRef, 512),
    71  		ordered: false,
    72  	}
    73  }
    74  
    75  // Symbols returns an iterator over all unique name and value strings, in order.
    76  func (p *MemPostings) Symbols() iter.Iterator[string] {
    77  	p.mtx.RLock()
    78  
    79  	// Add all the strings to a map to de-duplicate.
    80  	symbols := make(map[string]struct{}, 512)
    81  	for n, e := range p.m {
    82  		symbols[n] = struct{}{}
    83  		for v := range e {
    84  			symbols[v] = struct{}{}
    85  		}
    86  	}
    87  	p.mtx.RUnlock()
    88  
    89  	res := make([]string, 0, len(symbols))
    90  	for k := range symbols {
    91  		res = append(res, k)
    92  	}
    93  
    94  	sort.Strings(res)
    95  	return iter.NewSliceIterator(res)
    96  }
    97  
    98  // SortedKeys returns a list of sorted label keys of the ostings.
    99  func (p *MemPostings) SortedKeys() []labels.Label {
   100  	p.mtx.RLock()
   101  	keys := make([]labels.Label, 0, len(p.m))
   102  
   103  	for n, e := range p.m {
   104  		for v := range e {
   105  			keys = append(keys, labels.Label{Name: n, Value: v})
   106  		}
   107  	}
   108  	p.mtx.RUnlock()
   109  
   110  	sort.Slice(keys, func(i, j int) bool {
   111  		if keys[i].Name != keys[j].Name {
   112  			return keys[i].Name < keys[j].Name
   113  		}
   114  		return keys[i].Value < keys[j].Value
   115  	})
   116  	return keys
   117  }
   118  
   119  // LabelNames returns all the unique label names.
   120  func (p *MemPostings) LabelNames() []string {
   121  	p.mtx.RLock()
   122  	defer p.mtx.RUnlock()
   123  	n := len(p.m)
   124  	if n == 0 {
   125  		return nil
   126  	}
   127  
   128  	names := make([]string, 0, n-1)
   129  	for name := range p.m {
   130  		if name != allPostingsKey.Name {
   131  			names = append(names, name)
   132  		}
   133  	}
   134  	return names
   135  }
   136  
   137  // LabelValues returns label values for the given name.
   138  func (p *MemPostings) LabelValues(name string) []string {
   139  	p.mtx.RLock()
   140  	defer p.mtx.RUnlock()
   141  
   142  	values := make([]string, 0, len(p.m[name]))
   143  	for v := range p.m[name] {
   144  		values = append(values, v)
   145  	}
   146  	return values
   147  }
   148  
   149  // PostingsStats contains cardinality based statistics for postings.
   150  type PostingsStats struct {
   151  	CardinalityMetricsStats []Stat
   152  	CardinalityLabelStats   []Stat
   153  	LabelValueStats         []Stat
   154  	LabelValuePairsStats    []Stat
   155  	NumLabelPairs           int
   156  }
   157  
   158  // Stats calculates the cardinality statistics from postings.
   159  func (p *MemPostings) Stats(label string) *PostingsStats {
   160  	const maxNumOfRecords = 10
   161  	var size uint64
   162  
   163  	p.mtx.RLock()
   164  
   165  	metrics := &maxHeap{}
   166  	labels := &maxHeap{}
   167  	labelValueLength := &maxHeap{}
   168  	labelValuePairs := &maxHeap{}
   169  	numLabelPairs := 0
   170  
   171  	metrics.init(maxNumOfRecords)
   172  	labels.init(maxNumOfRecords)
   173  	labelValueLength.init(maxNumOfRecords)
   174  	labelValuePairs.init(maxNumOfRecords)
   175  
   176  	for n, e := range p.m {
   177  		if n == "" {
   178  			continue
   179  		}
   180  		labels.push(Stat{Name: n, Count: uint64(len(e))})
   181  		numLabelPairs += len(e)
   182  		size = 0
   183  		for name, values := range e {
   184  			if n == label {
   185  				metrics.push(Stat{Name: name, Count: uint64(len(values))})
   186  			}
   187  			labelValuePairs.push(Stat{Name: n + "=" + name, Count: uint64(len(values))})
   188  			size += uint64(len(name))
   189  		}
   190  		labelValueLength.push(Stat{Name: n, Count: size})
   191  	}
   192  
   193  	p.mtx.RUnlock()
   194  
   195  	return &PostingsStats{
   196  		CardinalityMetricsStats: metrics.get(),
   197  		CardinalityLabelStats:   labels.get(),
   198  		LabelValueStats:         labelValueLength.get(),
   199  		LabelValuePairsStats:    labelValuePairs.get(),
   200  		NumLabelPairs:           numLabelPairs,
   201  	}
   202  }
   203  
   204  // Get returns a postings list for the given label pair.
   205  func (p *MemPostings) Get(name, value string) Postings {
   206  	var lp []storage.SeriesRef
   207  	p.mtx.RLock()
   208  	l := p.m[name]
   209  	if l != nil {
   210  		lp = l[value]
   211  	}
   212  	p.mtx.RUnlock()
   213  
   214  	if lp == nil {
   215  		return EmptyPostings()
   216  	}
   217  	return iter.NewSliceSeekIterator(lp)
   218  }
   219  
   220  // All returns a postings list over all documents ever added.
   221  func (p *MemPostings) All() Postings {
   222  	return p.Get(AllPostingsKey())
   223  }
   224  
   225  // EnsureOrder ensures that all postings lists are sorted. After it returns all further
   226  // calls to add and addFor will insert new IDs in a sorted manner.
   227  func (p *MemPostings) EnsureOrder() {
   228  	p.mtx.Lock()
   229  	defer p.mtx.Unlock()
   230  
   231  	if p.ordered {
   232  		return
   233  	}
   234  
   235  	n := runtime.GOMAXPROCS(0)
   236  	workc := make(chan [][]storage.SeriesRef)
   237  
   238  	var wg sync.WaitGroup
   239  	wg.Add(n)
   240  
   241  	for i := 0; i < n; i++ {
   242  		go func() {
   243  			for job := range workc {
   244  				for _, l := range job {
   245  					sort.Sort(seriesRefSlice(l))
   246  				}
   247  
   248  				job = job[:0]
   249  				ensureOrderBatchPool.Put(job) //nolint:staticcheck // Ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
   250  			}
   251  			wg.Done()
   252  		}()
   253  	}
   254  
   255  	nextJob := ensureOrderBatchPool.Get().([][]storage.SeriesRef)
   256  	for _, e := range p.m {
   257  		for _, l := range e {
   258  			nextJob = append(nextJob, l)
   259  
   260  			if len(nextJob) >= ensureOrderBatchSize {
   261  				workc <- nextJob
   262  				nextJob = ensureOrderBatchPool.Get().([][]storage.SeriesRef)
   263  			}
   264  		}
   265  	}
   266  
   267  	// If the last job was partially filled, we need to push it to workers too.
   268  	if len(nextJob) > 0 {
   269  		workc <- nextJob
   270  	}
   271  
   272  	close(workc)
   273  	wg.Wait()
   274  
   275  	p.ordered = true
   276  }
   277  
   278  // Delete removes all ids in the given map from the postings lists.
   279  func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}) {
   280  	var keys, vals []string
   281  
   282  	// Collect all keys relevant for deletion once. New keys added afterwards
   283  	// can by definition not be affected by any of the given deletes.
   284  	p.mtx.RLock()
   285  	for n := range p.m {
   286  		keys = append(keys, n)
   287  	}
   288  	p.mtx.RUnlock()
   289  
   290  	for _, n := range keys {
   291  		p.mtx.RLock()
   292  		vals = vals[:0]
   293  		for v := range p.m[n] {
   294  			vals = append(vals, v)
   295  		}
   296  		p.mtx.RUnlock()
   297  
   298  		// For each posting we first analyse whether the postings list is affected by the deletes.
   299  		// If yes, we actually reallocate a new postings list.
   300  		for _, l := range vals {
   301  			// Only lock for processing one postings list so we don't block reads for too long.
   302  			p.mtx.Lock()
   303  
   304  			found := false
   305  			for _, id := range p.m[n][l] {
   306  				if _, ok := deleted[id]; ok {
   307  					found = true
   308  					break
   309  				}
   310  			}
   311  			if !found {
   312  				p.mtx.Unlock()
   313  				continue
   314  			}
   315  			repl := make([]storage.SeriesRef, 0, len(p.m[n][l]))
   316  
   317  			for _, id := range p.m[n][l] {
   318  				if _, ok := deleted[id]; !ok {
   319  					repl = append(repl, id)
   320  				}
   321  			}
   322  			if len(repl) > 0 {
   323  				p.m[n][l] = repl
   324  			} else {
   325  				delete(p.m[n], l)
   326  			}
   327  			p.mtx.Unlock()
   328  		}
   329  		p.mtx.Lock()
   330  		if len(p.m[n]) == 0 {
   331  			delete(p.m, n)
   332  		}
   333  		p.mtx.Unlock()
   334  	}
   335  }
   336  
   337  // Iter calls f for each postings list. It aborts if f returns an error and returns it.
   338  func (p *MemPostings) Iter(f func(labels.Label, Postings) error) error {
   339  	p.mtx.RLock()
   340  	defer p.mtx.RUnlock()
   341  
   342  	for n, e := range p.m {
   343  		for v, p := range e {
   344  			if err := f(labels.Label{Name: n, Value: v}, iter.NewSliceSeekIterator(p)); err != nil {
   345  				return err
   346  			}
   347  		}
   348  	}
   349  	return nil
   350  }
   351  
   352  // Add a label set to the postings index.
   353  func (p *MemPostings) Add(id storage.SeriesRef, lset phlaremodel.Labels) {
   354  	p.mtx.Lock()
   355  
   356  	for _, l := range lset {
   357  		p.addFor(id, l)
   358  	}
   359  	p.addFor(id, allPostingsKey)
   360  
   361  	p.mtx.Unlock()
   362  }
   363  
   364  func (p *MemPostings) addFor(id storage.SeriesRef, l *typesv1.LabelPair) {
   365  	nm, ok := p.m[l.Name]
   366  	if !ok {
   367  		nm = map[string][]storage.SeriesRef{}
   368  		p.m[l.Name] = nm
   369  	}
   370  	list := append(nm[l.Value], id)
   371  	nm[l.Value] = list
   372  
   373  	if !p.ordered {
   374  		return
   375  	}
   376  	// There is no guarantee that no higher ID was inserted before as they may
   377  	// be generated independently before adding them to postings.
   378  	// We repair order violations on insert. The invariant is that the first n-1
   379  	// items in the list are already sorted.
   380  	for i := len(list) - 1; i >= 1; i-- {
   381  		if list[i] >= list[i-1] {
   382  			break
   383  		}
   384  		list[i], list[i-1] = list[i-1], list[i]
   385  	}
   386  }
   387  
   388  // ExpandPostings returns the postings expanded as a slice.
   389  func ExpandPostings(p Postings) (res []storage.SeriesRef, err error) {
   390  	for p.Next() {
   391  		res = append(res, p.At())
   392  	}
   393  	return res, p.Err()
   394  }
   395  
   396  // Postings provides iterative access over a postings list.
   397  type Postings = iter.SeekIterator[storage.SeriesRef, storage.SeriesRef]
   398  
   399  // EmptyPostings returns a postings list that's always empty.
   400  // NOTE: Returning EmptyPostings sentinel when Postings struct has no postings is recommended.
   401  // It triggers optimized flow in other functions like Intersect, Without etc.
   402  func EmptyPostings() Postings {
   403  	return iter.NewSliceSeekIterator([]storage.SeriesRef(nil))
   404  }
   405  
   406  // ErrPostings returns new postings that immediately error.
   407  func ErrPostings(err error) Postings {
   408  	return iter.NewErrSeekIterator[storage.SeriesRef, storage.SeriesRef](err)
   409  }
   410  
   411  // Intersect returns a new postings list over the intersection of the
   412  // input postings.
   413  func Intersect(its ...Postings) Postings {
   414  	if len(its) == 0 {
   415  		return EmptyPostings()
   416  	}
   417  	if len(its) == 1 {
   418  		return its[0]
   419  	}
   420  	for _, p := range its {
   421  		if p == EmptyPostings() {
   422  			return EmptyPostings()
   423  		}
   424  	}
   425  
   426  	return newIntersectPostings(its...)
   427  }
   428  
   429  type intersectPostings struct {
   430  	arr []Postings
   431  	cur storage.SeriesRef
   432  }
   433  
   434  func newIntersectPostings(its ...Postings) *intersectPostings {
   435  	return &intersectPostings{arr: its}
   436  }
   437  
   438  func (it *intersectPostings) At() storage.SeriesRef {
   439  	return it.cur
   440  }
   441  
   442  func (it *intersectPostings) doNext() bool {
   443  Loop:
   444  	for {
   445  		for _, p := range it.arr {
   446  			if !p.Seek(it.cur) {
   447  				return false
   448  			}
   449  			if p.At() > it.cur {
   450  				it.cur = p.At()
   451  				continue Loop
   452  			}
   453  		}
   454  		return true
   455  	}
   456  }
   457  
   458  func (it *intersectPostings) Next() bool {
   459  	for _, p := range it.arr {
   460  		if !p.Next() {
   461  			return false
   462  		}
   463  		if p.At() > it.cur {
   464  			it.cur = p.At()
   465  		}
   466  	}
   467  	return it.doNext()
   468  }
   469  
   470  func (it *intersectPostings) Seek(id storage.SeriesRef) bool {
   471  	it.cur = id
   472  	return it.doNext()
   473  }
   474  
   475  func (it *intersectPostings) Err() error {
   476  	for _, p := range it.arr {
   477  		if p.Err() != nil {
   478  			return p.Err()
   479  		}
   480  	}
   481  	return nil
   482  }
   483  
   484  func (it *intersectPostings) Close() error {
   485  	for _, p := range it.arr {
   486  		if err := p.Close(); err != nil {
   487  			return err
   488  		}
   489  	}
   490  	return nil
   491  }
   492  
   493  // Merge returns a new iterator over the union of the input iterators.
   494  func Merge(its ...Postings) Postings {
   495  	if len(its) == 0 {
   496  		return EmptyPostings()
   497  	}
   498  	if len(its) == 1 {
   499  		return its[0]
   500  	}
   501  
   502  	p, ok := newMergedPostings(its)
   503  	if !ok {
   504  		return EmptyPostings()
   505  	}
   506  	return p
   507  }
   508  
   509  type postingsHeap []Postings
   510  
   511  func (h postingsHeap) Len() int           { return len(h) }
   512  func (h postingsHeap) Less(i, j int) bool { return h[i].At() < h[j].At() }
   513  func (h *postingsHeap) Swap(i, j int)     { (*h)[i], (*h)[j] = (*h)[j], (*h)[i] }
   514  
   515  func (h *postingsHeap) Push(x interface{}) {
   516  	*h = append(*h, x.(Postings))
   517  }
   518  
   519  func (h *postingsHeap) Pop() interface{} {
   520  	old := *h
   521  	n := len(old)
   522  	x := old[n-1]
   523  	*h = old[0 : n-1]
   524  	return x
   525  }
   526  
   527  type mergedPostings struct {
   528  	h           postingsHeap
   529  	initialized bool
   530  	cur         storage.SeriesRef
   531  	err         error
   532  }
   533  
   534  func newMergedPostings(p []Postings) (m *mergedPostings, nonEmpty bool) {
   535  	ph := make(postingsHeap, 0, len(p))
   536  
   537  	for _, it := range p {
   538  		// NOTE: mergedPostings struct requires the user to issue an initial Next.
   539  		if it.Next() {
   540  			ph = append(ph, it)
   541  		} else {
   542  			if it.Err() != nil {
   543  				return &mergedPostings{err: it.Err()}, true
   544  			}
   545  		}
   546  	}
   547  
   548  	if len(ph) == 0 {
   549  		return nil, false
   550  	}
   551  	return &mergedPostings{h: ph}, true
   552  }
   553  
   554  func (it *mergedPostings) Next() bool {
   555  	if it.h.Len() == 0 || it.err != nil {
   556  		return false
   557  	}
   558  
   559  	// The user must issue an initial Next.
   560  	if !it.initialized {
   561  		heap.Init(&it.h)
   562  		it.cur = it.h[0].At()
   563  		it.initialized = true
   564  		return true
   565  	}
   566  
   567  	for {
   568  		cur := it.h[0]
   569  		if !cur.Next() {
   570  			heap.Pop(&it.h)
   571  			if cur.Err() != nil {
   572  				it.err = cur.Err()
   573  				return false
   574  			}
   575  			if it.h.Len() == 0 {
   576  				return false
   577  			}
   578  		} else {
   579  			// Value of top of heap has changed, re-heapify.
   580  			heap.Fix(&it.h, 0)
   581  		}
   582  
   583  		if it.h[0].At() != it.cur {
   584  			it.cur = it.h[0].At()
   585  			return true
   586  		}
   587  	}
   588  }
   589  
   590  func (it *mergedPostings) Seek(id storage.SeriesRef) bool {
   591  	if it.h.Len() == 0 || it.err != nil {
   592  		return false
   593  	}
   594  	if !it.initialized {
   595  		if !it.Next() {
   596  			return false
   597  		}
   598  	}
   599  	for it.cur < id {
   600  		cur := it.h[0]
   601  		if !cur.Seek(id) {
   602  			heap.Pop(&it.h)
   603  			if cur.Err() != nil {
   604  				it.err = cur.Err()
   605  				return false
   606  			}
   607  			if it.h.Len() == 0 {
   608  				return false
   609  			}
   610  		} else {
   611  			// Value of top of heap has changed, re-heapify.
   612  			heap.Fix(&it.h, 0)
   613  		}
   614  
   615  		it.cur = it.h[0].At()
   616  	}
   617  	return true
   618  }
   619  
   620  func (it mergedPostings) At() storage.SeriesRef {
   621  	return it.cur
   622  }
   623  
   624  func (it mergedPostings) Err() error {
   625  	return it.err
   626  }
   627  
   628  func (it mergedPostings) Close() error {
   629  	return nil
   630  }
   631  
   632  // Without returns a new postings list that contains all elements from the full list that
   633  // are not in the drop list.
   634  func Without(full, drop Postings) Postings {
   635  	if full == EmptyPostings() {
   636  		return EmptyPostings()
   637  	}
   638  
   639  	if drop == EmptyPostings() {
   640  		return full
   641  	}
   642  	return newRemovedPostings(full, drop)
   643  }
   644  
   645  type removedPostings struct {
   646  	full, remove Postings
   647  
   648  	cur storage.SeriesRef
   649  
   650  	initialized bool
   651  	fok, rok    bool
   652  }
   653  
   654  func newRemovedPostings(full, remove Postings) *removedPostings {
   655  	return &removedPostings{
   656  		full:   full,
   657  		remove: remove,
   658  	}
   659  }
   660  
   661  func (rp *removedPostings) At() storage.SeriesRef {
   662  	return rp.cur
   663  }
   664  
   665  func (rp *removedPostings) Next() bool {
   666  	if !rp.initialized {
   667  		rp.fok = rp.full.Next()
   668  		rp.rok = rp.remove.Next()
   669  		rp.initialized = true
   670  	}
   671  	for {
   672  		if !rp.fok {
   673  			return false
   674  		}
   675  
   676  		if !rp.rok {
   677  			rp.cur = rp.full.At()
   678  			rp.fok = rp.full.Next()
   679  			return true
   680  		}
   681  
   682  		fcur, rcur := rp.full.At(), rp.remove.At()
   683  		if fcur < rcur {
   684  			rp.cur = fcur
   685  			rp.fok = rp.full.Next()
   686  
   687  			return true
   688  		} else if rcur < fcur {
   689  			// Forward the remove postings to the right position.
   690  			rp.rok = rp.remove.Seek(fcur)
   691  		} else {
   692  			// Skip the current posting.
   693  			rp.fok = rp.full.Next()
   694  		}
   695  	}
   696  }
   697  
   698  func (rp *removedPostings) Seek(id storage.SeriesRef) bool {
   699  	if rp.cur >= id {
   700  		return true
   701  	}
   702  
   703  	rp.fok = rp.full.Seek(id)
   704  	rp.rok = rp.remove.Seek(id)
   705  	rp.initialized = true
   706  
   707  	return rp.Next()
   708  }
   709  
   710  func (rp *removedPostings) Err() error {
   711  	if rp.full.Err() != nil {
   712  		return rp.full.Err()
   713  	}
   714  
   715  	return rp.remove.Err()
   716  }
   717  
   718  func (rp *removedPostings) Close() error {
   719  	if err := rp.full.Close(); err != nil {
   720  		return err
   721  	}
   722  
   723  	return rp.remove.Close()
   724  }
   725  
   726  // bigEndianPostings implements the Postings interface over a byte stream of
   727  // big endian numbers.
   728  type bigEndianPostings struct {
   729  	list []byte
   730  	cur  uint32
   731  }
   732  
   733  func NewBigEndianPostings(list []byte) Postings {
   734  	return newBigEndianPostings(list)
   735  }
   736  
   737  func newBigEndianPostings(list []byte) *bigEndianPostings {
   738  	return &bigEndianPostings{list: list}
   739  }
   740  
   741  func (it *bigEndianPostings) At() storage.SeriesRef {
   742  	return storage.SeriesRef(it.cur)
   743  }
   744  
   745  func (it *bigEndianPostings) Next() bool {
   746  	if len(it.list) >= 4 {
   747  		it.cur = binary.BigEndian.Uint32(it.list)
   748  		it.list = it.list[4:]
   749  		return true
   750  	}
   751  	return false
   752  }
   753  
   754  func (it *bigEndianPostings) Seek(x storage.SeriesRef) bool {
   755  	if storage.SeriesRef(it.cur) >= x {
   756  		return true
   757  	}
   758  
   759  	num := len(it.list) / 4
   760  	// Do binary search between current position and end.
   761  	i := sort.Search(num, func(i int) bool {
   762  		return binary.BigEndian.Uint32(it.list[i*4:]) >= uint32(x)
   763  	})
   764  	if i < num {
   765  		j := i * 4
   766  		it.cur = binary.BigEndian.Uint32(it.list[j:])
   767  		it.list = it.list[j+4:]
   768  		return true
   769  	}
   770  	it.list = nil
   771  	return false
   772  }
   773  
   774  func (it *bigEndianPostings) Err() error {
   775  	return nil
   776  }
   777  
   778  func (it *bigEndianPostings) Close() error {
   779  	return nil
   780  }
   781  
   782  // seriesRefSlice attaches the methods of sort.Interface to []storage.SeriesRef, sorting in increasing order.
   783  type seriesRefSlice []storage.SeriesRef
   784  
   785  func (x seriesRefSlice) Len() int           { return len(x) }
   786  func (x seriesRefSlice) Less(i, j int) bool { return x[i] < x[j] }
   787  func (x seriesRefSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   788  
   789  type ShardedPostings struct {
   790  	p                    Postings
   791  	minOffset, maxOffset uint64
   792  	initialized          bool
   793  }
   794  
   795  // Note: shardedPostings can technically return more series than just those that correspond to
   796  // the requested shard. This is because we do fingperint/offset sampling in TSDB so we won't know exactly
   797  // which offsets to start/end at, but will likely buffer a little on each end, so they still need to be
   798  // checked for shard inclusiveness.
   799  // For example (below), given a shard, we'll likely return a slight superset of offsets surrounding the shard.
   800  // ---[shard0]--- # Shard membership
   801  // -[--shard0--]- # Series returned by shardedPostings
   802  func NewShardedPostings(p Postings, shard ShardAnnotation, offsets FingerprintOffsets) *ShardedPostings {
   803  	min, max := offsets.Range(shard)
   804  	return &ShardedPostings{
   805  		p:         p,
   806  		minOffset: min,
   807  		maxOffset: max,
   808  	}
   809  }
   810  
   811  // Next advances the iterator and returns true if another value was found.
   812  func (sp *ShardedPostings) Next() bool {
   813  	// fast forward to the point we know we'll have to start checking
   814  	if !sp.initialized {
   815  		sp.initialized = true
   816  		// Underlying bigEndianPostings doesn't play nice with Seek(0)
   817  		// so we first advance manually once
   818  		if ok := sp.p.Next(); !ok {
   819  			return false
   820  		}
   821  		return sp.Seek(0)
   822  	}
   823  	ok := sp.p.Next()
   824  	if !ok {
   825  		return false
   826  	}
   827  
   828  	if sp.p.At() >= storage.SeriesRef(sp.maxOffset) {
   829  		return false
   830  	}
   831  
   832  	return true
   833  }
   834  
   835  // Seek advances the iterator to value v or greater and returns
   836  // true if a value was found.
   837  func (sp *ShardedPostings) Seek(v storage.SeriesRef) (res bool) {
   838  	if v >= storage.SeriesRef(sp.maxOffset) {
   839  		return false
   840  	}
   841  	if v < storage.SeriesRef(sp.minOffset) {
   842  		v = storage.SeriesRef(sp.minOffset)
   843  	}
   844  	return sp.p.Seek(v)
   845  }
   846  
   847  // At returns the value at the current iterator position.
   848  func (sp *ShardedPostings) At() storage.SeriesRef {
   849  	return sp.p.At()
   850  }
   851  
   852  // Err returns the last error of the iterator.
   853  func (sp *ShardedPostings) Err() (err error) {
   854  	return sp.p.Err()
   855  }
   856  
   857  func (sp *ShardedPostings) Close() error {
   858  	return nil
   859  }