github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/tsdb/index/postings.go (about)

     1  // Copyright 2017 The Prometheus Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package index
    15  
    16  import (
    17  	"container/heap"
    18  	"encoding/binary"
    19  	"runtime"
    20  	"sort"
    21  	"sync"
    22  
    23  	"github.com/prometheus/prometheus/model/labels"
    24  	"github.com/prometheus/prometheus/storage"
    25  )
    26  
    27  var allPostingsKey = labels.Label{}
    28  
    29  // AllPostingsKey returns the label key that is used to store the postings list of all existing IDs.
    30  func AllPostingsKey() (name, value string) {
    31  	return allPostingsKey.Name, allPostingsKey.Value
    32  }
    33  
    34  // ensureOrderBatchSize is the max number of postings passed to a worker in a single batch in MemPostings.EnsureOrder().
    35  const ensureOrderBatchSize = 1024
    36  
    37  // ensureOrderBatchPool is a pool used to recycle batches passed to workers in MemPostings.EnsureOrder().
    38  var ensureOrderBatchPool = sync.Pool{
    39  	New: func() interface{} {
    40  		return make([][]storage.SeriesRef, 0, ensureOrderBatchSize)
    41  	},
    42  }
    43  
    44  // MemPostings holds postings list for series ID per label pair. They may be written
    45  // to out of order.
    46  // EnsureOrder() must be called once before any reads are done. This allows for quick
    47  // unordered batch fills on startup.
    48  type MemPostings struct {
    49  	mtx     sync.RWMutex
    50  	m       map[string]map[string][]storage.SeriesRef
    51  	ordered bool
    52  }
    53  
    54  // NewMemPostings returns a memPostings that's ready for reads and writes.
    55  func NewMemPostings() *MemPostings {
    56  	return &MemPostings{
    57  		m:       make(map[string]map[string][]storage.SeriesRef, 512),
    58  		ordered: true,
    59  	}
    60  }
    61  
    62  // NewUnorderedMemPostings returns a memPostings that is not safe to be read from
    63  // until EnsureOrder() was called once.
    64  func NewUnorderedMemPostings() *MemPostings {
    65  	return &MemPostings{
    66  		m:       make(map[string]map[string][]storage.SeriesRef, 512),
    67  		ordered: false,
    68  	}
    69  }
    70  
    71  // Symbols returns an iterator over all unique name and value strings, in order.
    72  func (p *MemPostings) Symbols() StringIter {
    73  	p.mtx.RLock()
    74  
    75  	// Add all the strings to a map to de-duplicate.
    76  	symbols := make(map[string]struct{}, 512)
    77  	for n, e := range p.m {
    78  		symbols[n] = struct{}{}
    79  		for v := range e {
    80  			symbols[v] = struct{}{}
    81  		}
    82  	}
    83  	p.mtx.RUnlock()
    84  
    85  	res := make([]string, 0, len(symbols))
    86  	for k := range symbols {
    87  		res = append(res, k)
    88  	}
    89  
    90  	sort.Strings(res)
    91  	return NewStringListIter(res)
    92  }
    93  
    94  // SortedKeys returns a list of sorted label keys of the postings.
    95  func (p *MemPostings) SortedKeys() []labels.Label {
    96  	p.mtx.RLock()
    97  	keys := make([]labels.Label, 0, len(p.m))
    98  
    99  	for n, e := range p.m {
   100  		for v := range e {
   101  			keys = append(keys, labels.Label{Name: n, Value: v})
   102  		}
   103  	}
   104  	p.mtx.RUnlock()
   105  
   106  	sort.Slice(keys, func(i, j int) bool {
   107  		if keys[i].Name != keys[j].Name {
   108  			return keys[i].Name < keys[j].Name
   109  		}
   110  		return keys[i].Value < keys[j].Value
   111  	})
   112  	return keys
   113  }
   114  
   115  // LabelNames returns all the unique label names.
   116  func (p *MemPostings) LabelNames() []string {
   117  	p.mtx.RLock()
   118  	defer p.mtx.RUnlock()
   119  	n := len(p.m)
   120  	if n == 0 {
   121  		return nil
   122  	}
   123  
   124  	names := make([]string, 0, n-1)
   125  	for name := range p.m {
   126  		if name != allPostingsKey.Name {
   127  			names = append(names, name)
   128  		}
   129  	}
   130  	return names
   131  }
   132  
   133  // LabelValues returns label values for the given name.
   134  func (p *MemPostings) LabelValues(name string) []string {
   135  	p.mtx.RLock()
   136  	defer p.mtx.RUnlock()
   137  
   138  	values := make([]string, 0, len(p.m[name]))
   139  	for v := range p.m[name] {
   140  		values = append(values, v)
   141  	}
   142  	return values
   143  }
   144  
   145  // PostingsStats contains cardinality based statistics for postings.
   146  type PostingsStats struct {
   147  	CardinalityMetricsStats []Stat
   148  	CardinalityLabelStats   []Stat
   149  	LabelValueStats         []Stat
   150  	LabelValuePairsStats    []Stat
   151  	NumLabelPairs           int
   152  }
   153  
   154  // Stats calculates the cardinality statistics from postings.
   155  func (p *MemPostings) Stats(label string) *PostingsStats {
   156  	const maxNumOfRecords = 10
   157  	var size uint64
   158  
   159  	p.mtx.RLock()
   160  
   161  	metrics := &maxHeap{}
   162  	labels := &maxHeap{}
   163  	labelValueLength := &maxHeap{}
   164  	labelValuePairs := &maxHeap{}
   165  	numLabelPairs := 0
   166  
   167  	metrics.init(maxNumOfRecords)
   168  	labels.init(maxNumOfRecords)
   169  	labelValueLength.init(maxNumOfRecords)
   170  	labelValuePairs.init(maxNumOfRecords)
   171  
   172  	for n, e := range p.m {
   173  		if n == "" {
   174  			continue
   175  		}
   176  		labels.push(Stat{Name: n, Count: uint64(len(e))})
   177  		numLabelPairs += len(e)
   178  		size = 0
   179  		for name, values := range e {
   180  			if n == label {
   181  				metrics.push(Stat{Name: name, Count: uint64(len(values))})
   182  			}
   183  			labelValuePairs.push(Stat{Name: n + "=" + name, Count: uint64(len(values))})
   184  			size += uint64(len(name))
   185  		}
   186  		labelValueLength.push(Stat{Name: n, Count: size})
   187  	}
   188  
   189  	p.mtx.RUnlock()
   190  
   191  	return &PostingsStats{
   192  		CardinalityMetricsStats: metrics.get(),
   193  		CardinalityLabelStats:   labels.get(),
   194  		LabelValueStats:         labelValueLength.get(),
   195  		LabelValuePairsStats:    labelValuePairs.get(),
   196  		NumLabelPairs:           numLabelPairs,
   197  	}
   198  }
   199  
   200  // Get returns a postings list for the given label pair.
   201  func (p *MemPostings) Get(name, value string) Postings {
   202  	var lp []storage.SeriesRef
   203  	p.mtx.RLock()
   204  	l := p.m[name]
   205  	if l != nil {
   206  		lp = l[value]
   207  	}
   208  	p.mtx.RUnlock()
   209  
   210  	if lp == nil {
   211  		return EmptyPostings()
   212  	}
   213  	return newListPostings(lp...)
   214  }
   215  
   216  // All returns a postings list over all documents ever added.
   217  func (p *MemPostings) All() Postings {
   218  	return p.Get(AllPostingsKey())
   219  }
   220  
   221  // EnsureOrder ensures that all postings lists are sorted. After it returns all further
   222  // calls to add and addFor will insert new IDs in a sorted manner.
   223  func (p *MemPostings) EnsureOrder() {
   224  	p.mtx.Lock()
   225  	defer p.mtx.Unlock()
   226  
   227  	if p.ordered {
   228  		return
   229  	}
   230  
   231  	n := runtime.GOMAXPROCS(0)
   232  	workc := make(chan [][]storage.SeriesRef)
   233  
   234  	var wg sync.WaitGroup
   235  	wg.Add(n)
   236  
   237  	for i := 0; i < n; i++ {
   238  		go func() {
   239  			for job := range workc {
   240  				for _, l := range job {
   241  					sort.Sort(seriesRefSlice(l))
   242  				}
   243  
   244  				job = job[:0]
   245  				ensureOrderBatchPool.Put(job) //nolint:staticcheck // Ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
   246  			}
   247  			wg.Done()
   248  		}()
   249  	}
   250  
   251  	nextJob := ensureOrderBatchPool.Get().([][]storage.SeriesRef)
   252  	for _, e := range p.m {
   253  		for _, l := range e {
   254  			nextJob = append(nextJob, l)
   255  
   256  			if len(nextJob) >= ensureOrderBatchSize {
   257  				workc <- nextJob
   258  				nextJob = ensureOrderBatchPool.Get().([][]storage.SeriesRef)
   259  			}
   260  		}
   261  	}
   262  
   263  	// If the last job was partially filled, we need to push it to workers too.
   264  	if len(nextJob) > 0 {
   265  		workc <- nextJob
   266  	}
   267  
   268  	close(workc)
   269  	wg.Wait()
   270  
   271  	p.ordered = true
   272  }
   273  
   274  // Delete removes all ids in the given map from the postings lists.
   275  func (p *MemPostings) Delete(deleted map[storage.SeriesRef]struct{}) {
   276  	var keys, vals []string
   277  
   278  	// Collect all keys relevant for deletion once. New keys added afterwards
   279  	// can by definition not be affected by any of the given deletes.
   280  	p.mtx.RLock()
   281  	for n := range p.m {
   282  		keys = append(keys, n)
   283  	}
   284  	p.mtx.RUnlock()
   285  
   286  	for _, n := range keys {
   287  		p.mtx.RLock()
   288  		vals = vals[:0]
   289  		for v := range p.m[n] {
   290  			vals = append(vals, v)
   291  		}
   292  		p.mtx.RUnlock()
   293  
   294  		// For each posting we first analyse whether the postings list is affected by the deletes.
   295  		// If yes, we actually reallocate a new postings list.
   296  		for _, l := range vals {
   297  			// Only lock for processing one postings list so we don't block reads for too long.
   298  			p.mtx.Lock()
   299  
   300  			found := false
   301  			for _, id := range p.m[n][l] {
   302  				if _, ok := deleted[id]; ok {
   303  					found = true
   304  					break
   305  				}
   306  			}
   307  			if !found {
   308  				p.mtx.Unlock()
   309  				continue
   310  			}
   311  			repl := make([]storage.SeriesRef, 0, len(p.m[n][l]))
   312  
   313  			for _, id := range p.m[n][l] {
   314  				if _, ok := deleted[id]; !ok {
   315  					repl = append(repl, id)
   316  				}
   317  			}
   318  			if len(repl) > 0 {
   319  				p.m[n][l] = repl
   320  			} else {
   321  				delete(p.m[n], l)
   322  			}
   323  			p.mtx.Unlock()
   324  		}
   325  		p.mtx.Lock()
   326  		if len(p.m[n]) == 0 {
   327  			delete(p.m, n)
   328  		}
   329  		p.mtx.Unlock()
   330  	}
   331  }
   332  
   333  // Iter calls f for each postings list. It aborts if f returns an error and returns it.
   334  func (p *MemPostings) Iter(f func(labels.Label, Postings) error) error {
   335  	p.mtx.RLock()
   336  	defer p.mtx.RUnlock()
   337  
   338  	for n, e := range p.m {
   339  		for v, p := range e {
   340  			if err := f(labels.Label{Name: n, Value: v}, newListPostings(p...)); err != nil {
   341  				return err
   342  			}
   343  		}
   344  	}
   345  	return nil
   346  }
   347  
   348  // Add a label set to the postings index.
   349  func (p *MemPostings) Add(id storage.SeriesRef, lset labels.Labels) {
   350  	p.mtx.Lock()
   351  
   352  	for _, l := range lset {
   353  		p.addFor(id, l)
   354  	}
   355  	p.addFor(id, allPostingsKey)
   356  
   357  	p.mtx.Unlock()
   358  }
   359  
   360  func (p *MemPostings) addFor(id storage.SeriesRef, l labels.Label) {
   361  	nm, ok := p.m[l.Name]
   362  	if !ok {
   363  		nm = map[string][]storage.SeriesRef{}
   364  		p.m[l.Name] = nm
   365  	}
   366  	list := append(nm[l.Value], id)
   367  	nm[l.Value] = list
   368  
   369  	if !p.ordered {
   370  		return
   371  	}
   372  	// There is no guarantee that no higher ID was inserted before as they may
   373  	// be generated independently before adding them to postings.
   374  	// We repair order violations on insert. The invariant is that the first n-1
   375  	// items in the list are already sorted.
   376  	for i := len(list) - 1; i >= 1; i-- {
   377  		if list[i] >= list[i-1] {
   378  			break
   379  		}
   380  		list[i], list[i-1] = list[i-1], list[i]
   381  	}
   382  }
   383  
   384  // ExpandPostings returns the postings expanded as a slice.
   385  func ExpandPostings(p Postings) (res []storage.SeriesRef, err error) {
   386  	for p.Next() {
   387  		res = append(res, p.At())
   388  	}
   389  	return res, p.Err()
   390  }
   391  
   392  // Postings provides iterative access over a postings list.
   393  type Postings interface {
   394  	// Next advances the iterator and returns true if another value was found.
   395  	Next() bool
   396  
   397  	// Seek advances the iterator to value v or greater and returns
   398  	// true if a value was found.
   399  	Seek(v storage.SeriesRef) bool
   400  
   401  	// At returns the value at the current iterator position.
   402  	At() storage.SeriesRef
   403  
   404  	// Err returns the last error of the iterator.
   405  	Err() error
   406  }
   407  
   408  // errPostings is an empty iterator that always errors.
   409  type errPostings struct {
   410  	err error
   411  }
   412  
   413  func (e errPostings) Next() bool                  { return false }
   414  func (e errPostings) Seek(storage.SeriesRef) bool { return false }
   415  func (e errPostings) At() storage.SeriesRef       { return 0 }
   416  func (e errPostings) Err() error                  { return e.err }
   417  
   418  var emptyPostings = errPostings{}
   419  
   420  // EmptyPostings returns a postings list that's always empty.
   421  // NOTE: Returning EmptyPostings sentinel when Postings struct has no postings is recommended.
   422  // It triggers optimized flow in other functions like Intersect, Without etc.
   423  func EmptyPostings() Postings {
   424  	return emptyPostings
   425  }
   426  
   427  // ErrPostings returns new postings that immediately error.
   428  func ErrPostings(err error) Postings {
   429  	return errPostings{err}
   430  }
   431  
   432  // Intersect returns a new postings list over the intersection of the
   433  // input postings.
   434  func Intersect(its ...Postings) Postings {
   435  	if len(its) == 0 {
   436  		return EmptyPostings()
   437  	}
   438  	if len(its) == 1 {
   439  		return its[0]
   440  	}
   441  	for _, p := range its {
   442  		if p == EmptyPostings() {
   443  			return EmptyPostings()
   444  		}
   445  	}
   446  
   447  	return newIntersectPostings(its...)
   448  }
   449  
   450  type intersectPostings struct {
   451  	arr []Postings
   452  	cur storage.SeriesRef
   453  }
   454  
   455  func newIntersectPostings(its ...Postings) *intersectPostings {
   456  	return &intersectPostings{arr: its}
   457  }
   458  
   459  func (it *intersectPostings) At() storage.SeriesRef {
   460  	return it.cur
   461  }
   462  
   463  func (it *intersectPostings) doNext() bool {
   464  Loop:
   465  	for {
   466  		for _, p := range it.arr {
   467  			if !p.Seek(it.cur) {
   468  				return false
   469  			}
   470  			if p.At() > it.cur {
   471  				it.cur = p.At()
   472  				continue Loop
   473  			}
   474  		}
   475  		return true
   476  	}
   477  }
   478  
   479  func (it *intersectPostings) Next() bool {
   480  	for _, p := range it.arr {
   481  		if !p.Next() {
   482  			return false
   483  		}
   484  		if p.At() > it.cur {
   485  			it.cur = p.At()
   486  		}
   487  	}
   488  	return it.doNext()
   489  }
   490  
   491  func (it *intersectPostings) Seek(id storage.SeriesRef) bool {
   492  	it.cur = id
   493  	return it.doNext()
   494  }
   495  
   496  func (it *intersectPostings) Err() error {
   497  	for _, p := range it.arr {
   498  		if p.Err() != nil {
   499  			return p.Err()
   500  		}
   501  	}
   502  	return nil
   503  }
   504  
   505  // Merge returns a new iterator over the union of the input iterators.
   506  func Merge(its ...Postings) Postings {
   507  	if len(its) == 0 {
   508  		return EmptyPostings()
   509  	}
   510  	if len(its) == 1 {
   511  		return its[0]
   512  	}
   513  
   514  	p, ok := newMergedPostings(its)
   515  	if !ok {
   516  		return EmptyPostings()
   517  	}
   518  	return p
   519  }
   520  
   521  type postingsHeap []Postings
   522  
   523  func (h postingsHeap) Len() int           { return len(h) }
   524  func (h postingsHeap) Less(i, j int) bool { return h[i].At() < h[j].At() }
   525  func (h *postingsHeap) Swap(i, j int)     { (*h)[i], (*h)[j] = (*h)[j], (*h)[i] }
   526  
   527  func (h *postingsHeap) Push(x interface{}) {
   528  	*h = append(*h, x.(Postings))
   529  }
   530  
   531  func (h *postingsHeap) Pop() interface{} {
   532  	old := *h
   533  	n := len(old)
   534  	x := old[n-1]
   535  	*h = old[0 : n-1]
   536  	return x
   537  }
   538  
   539  type mergedPostings struct {
   540  	h           postingsHeap
   541  	initialized bool
   542  	cur         storage.SeriesRef
   543  	err         error
   544  }
   545  
   546  func newMergedPostings(p []Postings) (m *mergedPostings, nonEmpty bool) {
   547  	ph := make(postingsHeap, 0, len(p))
   548  
   549  	for _, it := range p {
   550  		// NOTE: mergedPostings struct requires the user to issue an initial Next.
   551  		if it.Next() {
   552  			ph = append(ph, it)
   553  		} else {
   554  			if it.Err() != nil {
   555  				return &mergedPostings{err: it.Err()}, true
   556  			}
   557  		}
   558  	}
   559  
   560  	if len(ph) == 0 {
   561  		return nil, false
   562  	}
   563  	return &mergedPostings{h: ph}, true
   564  }
   565  
   566  func (it *mergedPostings) Next() bool {
   567  	if it.h.Len() == 0 || it.err != nil {
   568  		return false
   569  	}
   570  
   571  	// The user must issue an initial Next.
   572  	if !it.initialized {
   573  		heap.Init(&it.h)
   574  		it.cur = it.h[0].At()
   575  		it.initialized = true
   576  		return true
   577  	}
   578  
   579  	for {
   580  		cur := it.h[0]
   581  		if !cur.Next() {
   582  			heap.Pop(&it.h)
   583  			if cur.Err() != nil {
   584  				it.err = cur.Err()
   585  				return false
   586  			}
   587  			if it.h.Len() == 0 {
   588  				return false
   589  			}
   590  		} else {
   591  			// Value of top of heap has changed, re-heapify.
   592  			heap.Fix(&it.h, 0)
   593  		}
   594  
   595  		if it.h[0].At() != it.cur {
   596  			it.cur = it.h[0].At()
   597  			return true
   598  		}
   599  	}
   600  }
   601  
   602  func (it *mergedPostings) Seek(id storage.SeriesRef) bool {
   603  	if it.h.Len() == 0 || it.err != nil {
   604  		return false
   605  	}
   606  	if !it.initialized {
   607  		if !it.Next() {
   608  			return false
   609  		}
   610  	}
   611  	for it.cur < id {
   612  		cur := it.h[0]
   613  		if !cur.Seek(id) {
   614  			heap.Pop(&it.h)
   615  			if cur.Err() != nil {
   616  				it.err = cur.Err()
   617  				return false
   618  			}
   619  			if it.h.Len() == 0 {
   620  				return false
   621  			}
   622  		} else {
   623  			// Value of top of heap has changed, re-heapify.
   624  			heap.Fix(&it.h, 0)
   625  		}
   626  
   627  		it.cur = it.h[0].At()
   628  	}
   629  	return true
   630  }
   631  
   632  func (it mergedPostings) At() storage.SeriesRef {
   633  	return it.cur
   634  }
   635  
   636  func (it mergedPostings) Err() error {
   637  	return it.err
   638  }
   639  
   640  // Without returns a new postings list that contains all elements from the full list that
   641  // are not in the drop list.
   642  func Without(full, drop Postings) Postings {
   643  	if full == EmptyPostings() {
   644  		return EmptyPostings()
   645  	}
   646  
   647  	if drop == EmptyPostings() {
   648  		return full
   649  	}
   650  	return newRemovedPostings(full, drop)
   651  }
   652  
   653  type removedPostings struct {
   654  	full, remove Postings
   655  
   656  	cur storage.SeriesRef
   657  
   658  	initialized bool
   659  	fok, rok    bool
   660  }
   661  
   662  func newRemovedPostings(full, remove Postings) *removedPostings {
   663  	return &removedPostings{
   664  		full:   full,
   665  		remove: remove,
   666  	}
   667  }
   668  
   669  func (rp *removedPostings) At() storage.SeriesRef {
   670  	return rp.cur
   671  }
   672  
   673  func (rp *removedPostings) Next() bool {
   674  	if !rp.initialized {
   675  		rp.fok = rp.full.Next()
   676  		rp.rok = rp.remove.Next()
   677  		rp.initialized = true
   678  	}
   679  	for {
   680  		if !rp.fok {
   681  			return false
   682  		}
   683  
   684  		if !rp.rok {
   685  			rp.cur = rp.full.At()
   686  			rp.fok = rp.full.Next()
   687  			return true
   688  		}
   689  
   690  		fcur, rcur := rp.full.At(), rp.remove.At()
   691  		if fcur < rcur {
   692  			rp.cur = fcur
   693  			rp.fok = rp.full.Next()
   694  
   695  			return true
   696  		} else if rcur < fcur {
   697  			// Forward the remove postings to the right position.
   698  			rp.rok = rp.remove.Seek(fcur)
   699  		} else {
   700  			// Skip the current posting.
   701  			rp.fok = rp.full.Next()
   702  		}
   703  	}
   704  }
   705  
   706  func (rp *removedPostings) Seek(id storage.SeriesRef) bool {
   707  	if rp.cur >= id {
   708  		return true
   709  	}
   710  
   711  	rp.fok = rp.full.Seek(id)
   712  	rp.rok = rp.remove.Seek(id)
   713  	rp.initialized = true
   714  
   715  	return rp.Next()
   716  }
   717  
   718  func (rp *removedPostings) Err() error {
   719  	if rp.full.Err() != nil {
   720  		return rp.full.Err()
   721  	}
   722  
   723  	return rp.remove.Err()
   724  }
   725  
   726  // ListPostings implements the Postings interface over a plain list.
   727  type ListPostings struct {
   728  	list []storage.SeriesRef
   729  	cur  storage.SeriesRef
   730  }
   731  
   732  func NewListPostings(list []storage.SeriesRef) Postings {
   733  	return newListPostings(list...)
   734  }
   735  
   736  func newListPostings(list ...storage.SeriesRef) *ListPostings {
   737  	return &ListPostings{list: list}
   738  }
   739  
   740  func (it *ListPostings) At() storage.SeriesRef {
   741  	return it.cur
   742  }
   743  
   744  func (it *ListPostings) Next() bool {
   745  	if len(it.list) > 0 {
   746  		it.cur = it.list[0]
   747  		it.list = it.list[1:]
   748  		return true
   749  	}
   750  	it.cur = 0
   751  	return false
   752  }
   753  
   754  func (it *ListPostings) Seek(x storage.SeriesRef) bool {
   755  	// If the current value satisfies, then return.
   756  	if it.cur >= x {
   757  		return true
   758  	}
   759  	if len(it.list) == 0 {
   760  		return false
   761  	}
   762  
   763  	// Do binary search between current position and end.
   764  	i := sort.Search(len(it.list), func(i int) bool {
   765  		return it.list[i] >= x
   766  	})
   767  	if i < len(it.list) {
   768  		it.cur = it.list[i]
   769  		it.list = it.list[i+1:]
   770  		return true
   771  	}
   772  	it.list = nil
   773  	return false
   774  }
   775  
   776  func (it *ListPostings) Err() error {
   777  	return nil
   778  }
   779  
   780  // bigEndianPostings implements the Postings interface over a byte stream of
   781  // big endian numbers.
   782  type bigEndianPostings struct {
   783  	list []byte
   784  	cur  uint32
   785  }
   786  
   787  func newBigEndianPostings(list []byte) *bigEndianPostings {
   788  	return &bigEndianPostings{list: list}
   789  }
   790  
   791  func (it *bigEndianPostings) At() storage.SeriesRef {
   792  	return storage.SeriesRef(it.cur)
   793  }
   794  
   795  func (it *bigEndianPostings) Next() bool {
   796  	if len(it.list) >= 4 {
   797  		it.cur = binary.BigEndian.Uint32(it.list)
   798  		it.list = it.list[4:]
   799  		return true
   800  	}
   801  	return false
   802  }
   803  
   804  func (it *bigEndianPostings) Seek(x storage.SeriesRef) bool {
   805  	if storage.SeriesRef(it.cur) >= x {
   806  		return true
   807  	}
   808  
   809  	num := len(it.list) / 4
   810  	// Do binary search between current position and end.
   811  	i := sort.Search(num, func(i int) bool {
   812  		return binary.BigEndian.Uint32(it.list[i*4:]) >= uint32(x)
   813  	})
   814  	if i < num {
   815  		j := i * 4
   816  		it.cur = binary.BigEndian.Uint32(it.list[j:])
   817  		it.list = it.list[j+4:]
   818  		return true
   819  	}
   820  	it.list = nil
   821  	return false
   822  }
   823  
   824  func (it *bigEndianPostings) Err() error {
   825  	return nil
   826  }
   827  
   828  // seriesRefSlice attaches the methods of sort.Interface to []storage.SeriesRef, sorting in increasing order.
   829  type seriesRefSlice []storage.SeriesRef
   830  
   831  func (x seriesRefSlice) Len() int           { return len(x) }
   832  func (x seriesRefSlice) Less(i, j int) bool { return x[i] < x[j] }
   833  func (x seriesRefSlice) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
   834  
   835  type ShardedPostings struct {
   836  	p                    Postings
   837  	minOffset, maxOffset uint64
   838  	initialized          bool
   839  }
   840  
   841  // Note: shardedPostings can technically return more series than just those that correspond to
   842  // the requested shard. This is because we do fingperint/offset sampling in TSDB so we won't know exactly
   843  // which offsets to start/end at, but will likely buffer a little on each end, so they still need to be
   844  // checked for shard inclusiveness.
   845  // For example (below), given a shard, we'll likely return a slight superset of offsets surrounding the shard.
   846  // ---[shard0]--- # Shard membership
   847  // -[--shard0--]- # Series returned by shardedPostings
   848  func NewShardedPostings(p Postings, shard ShardAnnotation, offsets FingerprintOffsets) *ShardedPostings {
   849  	min, max := offsets.Range(shard)
   850  	return &ShardedPostings{
   851  		p:         p,
   852  		minOffset: min,
   853  		maxOffset: max,
   854  	}
   855  }
   856  
   857  // Next advances the iterator and returns true if another value was found.
   858  func (sp *ShardedPostings) Next() bool {
   859  	// fast forward to the point we know we'll have to start checking
   860  	if !sp.initialized {
   861  		sp.initialized = true
   862  		// Underlying bigEndianPostings doesn't play nice with Seek(0)
   863  		// so we first advance manually once
   864  		if ok := sp.p.Next(); !ok {
   865  			return false
   866  		}
   867  		return sp.Seek(0)
   868  	}
   869  	ok := sp.p.Next()
   870  	if !ok {
   871  		return false
   872  	}
   873  
   874  	if sp.p.At() >= storage.SeriesRef(sp.maxOffset) {
   875  		return false
   876  	}
   877  
   878  	return true
   879  }
   880  
   881  // Seek advances the iterator to value v or greater and returns
   882  // true if a value was found.
   883  func (sp *ShardedPostings) Seek(v storage.SeriesRef) (res bool) {
   884  	if v >= storage.SeriesRef(sp.maxOffset) {
   885  		return false
   886  	}
   887  	if v < storage.SeriesRef(sp.minOffset) {
   888  		v = storage.SeriesRef(sp.minOffset)
   889  	}
   890  	return sp.p.Seek(v)
   891  }
   892  
   893  // At returns the value at the current iterator position.
   894  func (sp *ShardedPostings) At() storage.SeriesRef {
   895  	return sp.p.At()
   896  }
   897  
   898  // Err returns the last error of the iterator.
   899  func (sp *ShardedPostings) Err() (err error) {
   900  	return sp.p.Err()
   901  }