github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/tsdb/bitprefix.go (about)

     1  package tsdb
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  
     7  	"github.com/prometheus/common/model"
     8  	"github.com/prometheus/prometheus/model/labels"
     9  
    10  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    11  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    12  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
    13  	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/shard"
    14  )
    15  
    16  // BitPrefixInvertedIndex is another inverted index implementation
    17  // that uses the bit prefix sharding algorithm in tsdb/index/shard.go
    18  // instead of a modulo approach.
    19  // This is the standard for TSDB compatibility because
    20  // the same series must resolve to the same shard (for each period config),
    21  // whether it's resolved on the ingester or via the store.
    22  type BitPrefixInvertedIndex struct {
    23  	totalShards uint32
    24  	shards      []*indexShard
    25  }
    26  
    27  func ValidateBitPrefixShardFactor(factor uint32) error {
    28  	if requiredBits := index.NewShard(0, factor).RequiredBits(); 1<<requiredBits != factor {
    29  		return fmt.Errorf("incompatible inverted index shard factor on ingester: it must be a power of two, got %d", factor)
    30  	}
    31  	return nil
    32  }
    33  
    34  func NewBitPrefixWithShards(totalShards uint32) (*BitPrefixInvertedIndex, error) {
    35  	if err := ValidateBitPrefixShardFactor(totalShards); err != nil {
    36  		return nil, err
    37  	}
    38  
    39  	shards := make([]*indexShard, totalShards)
    40  	for i := uint32(0); i < totalShards; i++ {
    41  		shards[i] = &indexShard{
    42  			idx:   map[string]indexEntry{},
    43  			shard: i,
    44  		}
    45  	}
    46  	return &BitPrefixInvertedIndex{
    47  		totalShards: totalShards,
    48  		shards:      shards,
    49  	}, nil
    50  }
    51  
    52  func (ii *BitPrefixInvertedIndex) getShards(shard *shard.Annotation) ([]*indexShard, bool) {
    53  	if shard == nil {
    54  		return ii.shards, false
    55  	}
    56  
    57  	// When comparing a higher shard factor to a lower inverted index shard factor
    58  	// we must filter resulting fingerprints as the lower shard factor in the
    59  	// inverted index is a superset of the requested factor.
    60  	//
    61  	// For instance, the 3_of_4 shard factor maps to the bit prefix 0b11.
    62  	// If the inverted index only has a factor of 2, we'll need to check the 0b1
    63  	// prefixed shard (which contains the 0b10 and 0b11 prefixes).
    64  	// Conversely, if the requested shard is 1_of_2, but the index has a factor of 4,
    65  	// we can _exactly_ match ob1 => (ob10, ob11) and know all fingerprints in those
    66  	// resulting shards have the requested ob1 prefix (don't need to filter).
    67  	var filter bool
    68  	if shard.Of > len(ii.shards) {
    69  		filter = true
    70  	}
    71  
    72  	requestedShard := shard.TSDB()
    73  	minFp, maxFp := requestedShard.Bounds()
    74  
    75  	// Determine how many bits we need to take from
    76  	// the requested shard's min/max fingerprint values
    77  	// in order to calculate the indices for the inverted index's
    78  	// shard factor.
    79  	requiredBits := index.NewShard(0, uint32(len(ii.shards))).RequiredBits()
    80  	lowerIdx := int(minFp >> (64 - requiredBits))
    81  	upperIdx := int(maxFp >> (64 - requiredBits))
    82  
    83  	// If the upper bound's shard doesn't align exactly
    84  	// with the maximum fingerprint, we must also
    85  	// check the subsequent shard.
    86  	// This happens in two cases:
    87  	// 1) When requesting the last shard of any factor.
    88  	// This accounts for zero indexing in our sharding logic
    89  	// to successfully request `shards[start:len(shards)]`
    90  	// 2) When requesting the _first_ shard of a larger factor
    91  	// than the index uses. In this case, the required_bits are not
    92  	// enough and the requested end prefix gets trimmed.
    93  	// If confused, comment out this line and see which tests fail.
    94  	if (upperIdx << (64 - requiredBits)) != int(maxFp) {
    95  		upperIdx++
    96  	}
    97  
    98  	return ii.shards[lowerIdx:upperIdx], filter
    99  }
   100  
   101  func (ii *BitPrefixInvertedIndex) shardForFP(fp model.Fingerprint) int {
   102  	localShard := index.NewShard(0, uint32(len(ii.shards)))
   103  	return int(fp >> (64 - localShard.RequiredBits()))
   104  }
   105  
   106  func (ii *BitPrefixInvertedIndex) validateShard(shard *shard.Annotation) error {
   107  	if shard == nil {
   108  		return nil
   109  	}
   110  
   111  	if 1<<(shard.TSDB().RequiredBits()) != shard.Of {
   112  		return fmt.Errorf("shard factor must be a power of two, got %d", shard.Of)
   113  	}
   114  	return nil
   115  }
   116  
   117  // Add a fingerprint under the specified labels.
   118  // NOTE: memory for `labels` is unsafe; anything retained beyond the
   119  // life of this function must be copied
   120  func (ii *BitPrefixInvertedIndex) Add(labels phlaremodel.Labels, fp model.Fingerprint) phlaremodel.Labels {
   121  	// add() returns 'interned' values so the original labels are not retained
   122  	return ii.shards[ii.shardForFP(fp)].add(labels, fp)
   123  }
   124  
   125  // Lookup all fingerprints for the provided matchers.
   126  func (ii *BitPrefixInvertedIndex) Lookup(matchers []*labels.Matcher, shard *shard.Annotation) ([]model.Fingerprint, error) {
   127  	if err := ii.validateShard(shard); err != nil {
   128  		return nil, err
   129  	}
   130  
   131  	var result []model.Fingerprint
   132  	shards, filter := ii.getShards(shard)
   133  
   134  	// if no matcher is specified, all fingerprints would be returned
   135  	if len(matchers) == 0 {
   136  		for i := range shards {
   137  			fps := shards[i].allFPs()
   138  			result = append(result, fps...)
   139  		}
   140  	} else {
   141  		for i := range shards {
   142  			fps := shards[i].lookup(matchers)
   143  			result = append(result, fps...)
   144  		}
   145  	}
   146  
   147  	// Because bit prefix order is also ascending order,
   148  	// the merged fingerprints from ascending shards are also in order.
   149  	if filter {
   150  		minFP, maxFP := shard.TSDB().Bounds()
   151  		minIdx := sort.Search(len(result), func(i int) bool {
   152  			return result[i] >= minFP
   153  		})
   154  
   155  		maxIdx := sort.Search(len(result), func(i int) bool {
   156  			return result[i] >= maxFP
   157  		})
   158  
   159  		result = result[minIdx:maxIdx]
   160  	}
   161  
   162  	return result, nil
   163  }
   164  
   165  // LabelNames returns all label names.
   166  func (ii *BitPrefixInvertedIndex) LabelNames(shard *shard.Annotation) ([]string, error) {
   167  	if err := ii.validateShard(shard); err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	var extractor func(unlockIndex) []string
   172  	shards, filter := ii.getShards(shard)
   173  
   174  	// If we need to check shard inclusion, we have to do it the expensive way :(
   175  	// Therefore it's more performant to request shard factors lower or equal to the
   176  	// inverted index factor
   177  	if filter {
   178  		s := shard.TSDB()
   179  
   180  		extractor = func(x unlockIndex) (results []string) {
   181  		outer:
   182  			for name, entry := range x {
   183  				for _, valEntry := range entry.fps {
   184  					for _, fp := range valEntry.fps {
   185  						if s.Match(fp) {
   186  							results = append(results, name)
   187  							continue outer
   188  						}
   189  					}
   190  				}
   191  			}
   192  
   193  			return results
   194  		}
   195  	}
   196  
   197  	results := make([][]string, 0, len(shards))
   198  	for i := range shards {
   199  		shardResult := shards[i].labelNames(extractor)
   200  		results = append(results, shardResult)
   201  	}
   202  
   203  	return mergeStringSlices(results), nil
   204  }
   205  
   206  // LabelValues returns the values for the given label.
   207  func (ii *BitPrefixInvertedIndex) LabelValues(name string, shard *shard.Annotation) ([]string, error) {
   208  	if err := ii.validateShard(shard); err != nil {
   209  		return nil, err
   210  	}
   211  
   212  	var extractor func(indexEntry) []string
   213  	shards, filter := ii.getShards(shard)
   214  	if filter {
   215  		s := shard.TSDB()
   216  
   217  		extractor = func(x indexEntry) []string {
   218  			results := make([]string, 0, len(x.fps))
   219  
   220  		outer:
   221  			for val, valEntry := range x.fps {
   222  				for _, fp := range valEntry.fps {
   223  					if s.Match(fp) {
   224  						results = append(results, val)
   225  						continue outer
   226  					}
   227  				}
   228  			}
   229  			return results
   230  		}
   231  	}
   232  	results := make([][]string, 0, len(shards))
   233  
   234  	for i := range shards {
   235  		shardResult := shards[i].labelValues(name, extractor)
   236  		results = append(results, shardResult)
   237  	}
   238  
   239  	return mergeStringSlices(results), nil
   240  }
   241  
   242  // Delete a fingerprint with the given label pairs.
   243  func (ii *BitPrefixInvertedIndex) Delete(labels []*typesv1.LabelPair, fp model.Fingerprint) {
   244  	localShard := index.NewShard(0, uint32(len(ii.shards)))
   245  	idx := int(fp >> (64 - localShard.RequiredBits()))
   246  	ii.shards[idx].delete(labels, fp)
   247  }