github.com/grafana/pyroscope@v1.18.0/pkg/block/metadata/metadata_labels.go (about)

     1  package metadata
     2  
     3  import (
     4  	goiter "iter"
     5  	"slices"
     6  	"strings"
     7  	"unsafe"
     8  
     9  	"github.com/prometheus/prometheus/model/labels"
    10  
    11  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    12  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    13  	"github.com/grafana/pyroscope/pkg/iter"
    14  )
    15  
    16  // TODO(kolesnikovae): LabelBuilder pool.
    17  
    18  const (
    19  	LabelNameTenantDataset     = "__tenant_dataset__"
    20  	LabelValueDatasetTSDBIndex = "dataset_tsdb_index"
    21  	LabelNameUnsymbolized      = "__unsymbolized__"
    22  )
    23  
    24  type LabelBuilder struct {
    25  	strings *StringTable
    26  	labels  []int32
    27  	seen    map[string]struct{}
    28  }
    29  
    30  func NewLabelBuilder(strings *StringTable) *LabelBuilder {
    31  	return &LabelBuilder{strings: strings}
    32  }
    33  
    34  func (lb *LabelBuilder) WithLabelSet(pairs ...string) *LabelBuilder {
    35  	if len(pairs)%2 == 1 {
    36  		panic("expected even number of values")
    37  	}
    38  	s := len(lb.labels)
    39  	lb.labels = slices.Grow(lb.labels, len(pairs)+1)[:s+len(pairs)+1]
    40  	lb.labels[s] = int32(len(pairs) / 2)
    41  	for i := range pairs {
    42  		lb.labels[s+i+1] = lb.strings.Put(pairs[i])
    43  	}
    44  	return lb
    45  }
    46  
    47  func (lb *LabelBuilder) Put(x []int32, strings []string) {
    48  	if len(x) == 0 {
    49  		return
    50  	}
    51  	if lb.seen == nil {
    52  		lb.seen = make(map[string]struct{})
    53  	}
    54  	var skip int
    55  	for i, v := range x {
    56  		if i == skip {
    57  			skip += int(v)*2 + 1
    58  			continue
    59  		}
    60  		x[i] = lb.strings.Put(strings[v])
    61  	}
    62  	lb.labels = slices.Grow(lb.labels, len(x))
    63  	pairs := LabelPairs(x)
    64  	for pairs.Next() {
    65  		lb.putPairs(pairs.At())
    66  	}
    67  }
    68  
    69  func (lb *LabelBuilder) putPairs(p []int32) {
    70  	if len(p) == 0 {
    71  		return
    72  	}
    73  	// We only copy the labels if this is the first time we see it.
    74  	// The fact that we assume that the order of labels is the same
    75  	// across all datasets is a precondition, therefore, we can
    76  	// use pairs as a key.
    77  	k := int32string(p)
    78  	if _, ok := lb.seen[k]; ok {
    79  		return
    80  	}
    81  	lb.labels = append(lb.labels, int32(len(p)/2))
    82  	lb.labels = append(lb.labels, p...)
    83  	lb.seen[strings.Clone(k)] = struct{}{}
    84  }
    85  
    86  func (lb *LabelBuilder) Build() []int32 {
    87  	c := make([]int32, len(lb.labels))
    88  	copy(c, lb.labels)
    89  	lb.labels = lb.labels[:0]
    90  	clear(lb.seen)
    91  	return c
    92  }
    93  
    94  func FindDatasets(md *metastorev1.BlockMeta, matchers ...*labels.Matcher) goiter.Seq[*metastorev1.Dataset] {
    95  	st := NewStringTable()
    96  	st.Import(md)
    97  	lm := NewLabelMatcher(st.Strings, matchers)
    98  	if !lm.IsValid() {
    99  		return func(func(*metastorev1.Dataset) bool) {}
   100  	}
   101  	return func(yield func(*metastorev1.Dataset) bool) {
   102  		for i := range md.Datasets {
   103  			ds := md.Datasets[i]
   104  			if !lm.Matches(ds.Labels) {
   105  				continue
   106  			}
   107  			if !yield(ds) {
   108  				return
   109  			}
   110  		}
   111  	}
   112  }
   113  
   114  func LabelPairs(ls []int32) iter.Iterator[[]int32] { return &labelPairs{labels: ls} }
   115  
   116  type labelPairs struct {
   117  	labels []int32
   118  	off    int
   119  	len    int
   120  }
   121  
   122  func (p *labelPairs) Err() error   { return nil }
   123  func (p *labelPairs) Close() error { return nil }
   124  
   125  func (p *labelPairs) At() []int32 { return p.labels[p.off : p.off+p.len] }
   126  
   127  func (p *labelPairs) Next() bool {
   128  	if p.len > 0 {
   129  		p.off += p.len
   130  	}
   131  	if p.off >= len(p.labels) {
   132  		return false
   133  	}
   134  	p.len = int(p.labels[p.off]) * 2
   135  	p.off++
   136  	return p.off+p.len <= len(p.labels)
   137  }
   138  
   139  type LabelMatcher struct {
   140  	eq      []matcher
   141  	neq     []matcher
   142  	keep    []int32
   143  	keepStr []string
   144  
   145  	strings []string
   146  	checked map[string]bool
   147  	matched int32
   148  	nomatch bool
   149  }
   150  
   151  type matcher struct {
   152  	*labels.Matcher
   153  	name int32
   154  }
   155  
   156  func NewLabelMatcher(strings []string, matchers []*labels.Matcher, keep ...string) *LabelMatcher {
   157  	s := make(map[string]int32, len(matchers)*2+len(keep))
   158  	for _, m := range matchers {
   159  		s[m.Name] = 0
   160  		s[m.Value] = 0
   161  	}
   162  	for _, k := range keep {
   163  		s[k] = 0
   164  	}
   165  	for i, x := range strings {
   166  		if v, ok := s[x]; ok && v == 0 {
   167  			s[x] = int32(i)
   168  		}
   169  	}
   170  	lm := &LabelMatcher{
   171  		eq:      make([]matcher, 0, len(matchers)),
   172  		neq:     make([]matcher, 0, len(matchers)),
   173  		keep:    make([]int32, len(keep)),
   174  		keepStr: keep,
   175  		checked: make(map[string]bool),
   176  		strings: strings,
   177  	}
   178  	for _, m := range matchers {
   179  		if m.Name == "" {
   180  			continue
   181  		}
   182  		n := s[m.Name]
   183  		switch m.Type {
   184  		case labels.MatchEqual:
   185  			if v := s[m.Value]; m.Value != "" && (n < 1 || v < 1) {
   186  				lm.nomatch = true
   187  				return lm
   188  			}
   189  			lm.eq = append(lm.eq, matcher{Matcher: m, name: n})
   190  		case labels.MatchRegexp:
   191  			lm.eq = append(lm.eq, matcher{Matcher: m, name: n})
   192  		case labels.MatchNotEqual, labels.MatchNotRegexp:
   193  			lm.neq = append(lm.neq, matcher{Matcher: m, name: n})
   194  		}
   195  	}
   196  	// Find the indices of the labels to keep.
   197  	// If the label is not found or is an empty string,
   198  	// it will always be an empty string at the output.
   199  	for i, k := range keep {
   200  		lm.keep[i] = s[k]
   201  	}
   202  	return lm
   203  }
   204  
   205  func (lm *LabelMatcher) IsValid() bool { return !lm.nomatch }
   206  
   207  // Matches reports whether the given set of labels matches the matchers.
   208  // Note that at least one labels set must satisfy matchers to return true.
   209  // For negations, all labels sets must satisfy the matchers to return true.
   210  // TODO(kolesnikovae): This might be really confusing; it's worth relaxing it.
   211  func (lm *LabelMatcher) Matches(labels []int32) bool {
   212  	pairs := LabelPairs(labels)
   213  	var matches bool
   214  	for pairs.Next() {
   215  		if lm.MatchesPairs(pairs.At()) {
   216  			matches = true
   217  			// If no keep labels are specified, we can return early.
   218  			// Otherwise, we need to scan all the label sets to
   219  			// collect matching ones.
   220  			if len(lm.keep) == 0 {
   221  				return true
   222  			}
   223  		}
   224  	}
   225  	return matches
   226  }
   227  
   228  // CollectMatches returns a new set of labels with only the labels
   229  // that satisfy the match expressions and that are in the keep list.
   230  func (lm *LabelMatcher) CollectMatches(dst, labels []int32) ([]int32, bool) {
   231  	pairs := LabelPairs(labels)
   232  	var matches bool
   233  	for pairs.Next() {
   234  		p := pairs.At()
   235  		if lm.MatchesPairs(p) {
   236  			matches = true
   237  			// If no keep labels are specified, we can return early.
   238  			// Otherwise, we need to scan all the label sets to
   239  			// collect matching ones.
   240  			if len(lm.keep) == 0 {
   241  				return dst, true
   242  			}
   243  			dst = lm.strip(dst, p)
   244  		}
   245  	}
   246  	return dst, matches
   247  }
   248  
   249  // strip returns a new length-prefixed slice of pairs
   250  // with only the labels that are in the keep list.
   251  func (lm *LabelMatcher) strip(dst, pairs []int32) []int32 {
   252  	// Length-prefix stub: we only know it after we iterate
   253  	// over the pairs.
   254  	s := len(dst)
   255  	c := len(lm.keep) * 2
   256  	dst = slices.Grow(dst, c+1)
   257  	dst = append(dst, 0)
   258  	var m int32
   259  	for _, n := range lm.keep {
   260  		if n < 1 {
   261  			// Ignore not found labels.
   262  			continue
   263  		}
   264  		for k := 0; k < len(pairs); k += 2 {
   265  			if pairs[k] == n {
   266  				dst = append(dst, pairs[k], pairs[k+1])
   267  				m++
   268  				break
   269  			}
   270  		}
   271  	}
   272  	// Write the actual number of pairs as a prefix.
   273  	dst[s] = m
   274  	return dst
   275  }
   276  
   277  func (lm *LabelMatcher) MatchesPairs(pairs []int32) bool {
   278  	k := int32string(pairs)
   279  	m, found := lm.checked[k]
   280  	if !found {
   281  		m = lm.checkMatches(pairs)
   282  		lm.checked[strings.Clone(k)] = m
   283  		if m {
   284  			lm.matched++
   285  		}
   286  	}
   287  	return m
   288  }
   289  
   290  func (lm *LabelMatcher) checkMatches(pairs []int32) bool {
   291  	if len(pairs)%2 == 1 {
   292  		// Invalid pairs.
   293  		return false
   294  	}
   295  	for _, m := range lm.eq {
   296  		var matches bool
   297  		for k := 0; k < len(pairs); k += 2 {
   298  			if pairs[k] != m.name {
   299  				continue
   300  			}
   301  			v := lm.strings[pairs[k+1]]
   302  			matches = m.Matches(v)
   303  			break
   304  		}
   305  		if !matches {
   306  			return false
   307  		}
   308  	}
   309  	// At this point, we know that all eq matchers have matched.
   310  	for _, m := range lm.neq {
   311  		for k := 0; k < len(pairs); k += 2 {
   312  			if pairs[k] != m.name {
   313  				continue
   314  			}
   315  			v := lm.strings[pairs[k+1]]
   316  			if !m.Matches(v) {
   317  				return false
   318  			}
   319  			break
   320  		}
   321  	}
   322  	return true
   323  }
   324  
   325  type LabelsCollector struct {
   326  	strings *StringTable
   327  	dict    map[string]struct{}
   328  	tmp     []int32
   329  	keys    []int32
   330  }
   331  
   332  func NewLabelsCollector(labels ...string) *LabelsCollector {
   333  	s := &LabelsCollector{
   334  		dict:    make(map[string]struct{}),
   335  		strings: NewStringTable(),
   336  	}
   337  	s.keys = make([]int32, len(labels))
   338  	s.tmp = make([]int32, len(labels))
   339  	for i, k := range labels {
   340  		s.keys[i] = s.strings.Put(k)
   341  	}
   342  	return s
   343  }
   344  
   345  // CollectMatches from the given matcher.
   346  //
   347  // The matcher and collect MUST be configured to keep the same
   348  // set of labels, in the exact order.
   349  //
   350  // A single collector may collect labels from multiple matchers.
   351  func (s *LabelsCollector) CollectMatches(lm *LabelMatcher) {
   352  	if len(lm.keep) == 0 || lm.nomatch || len(lm.checked) == 0 {
   353  		return
   354  	}
   355  	for set, match := range lm.checked {
   356  		if !match {
   357  			continue
   358  		}
   359  		// Project values of the keep labels to tmp,
   360  		// and resolve their strings.
   361  		clear(s.tmp)
   362  		p := int32s(set)
   363  		// Note that we're using the matcher's keep labels
   364  		// and not local 'keys'.
   365  		for i, n := range lm.keep {
   366  			for k := 0; k < len(p); k += 2 {
   367  				if p[k] == n {
   368  					s.tmp[i] = p[k+1]
   369  					break
   370  				}
   371  			}
   372  		}
   373  		for i := range s.tmp {
   374  			s.tmp[i] = s.strings.Put(lm.strings[s.tmp[i]])
   375  		}
   376  		// Check if we already saw the label set.
   377  		x := int32string(s.tmp)
   378  		if _, ok := s.dict[x]; ok {
   379  			continue
   380  		}
   381  		s.dict[strings.Clone(x)] = struct{}{}
   382  	}
   383  }
   384  
   385  func (s *LabelsCollector) Unique() goiter.Seq[*typesv1.Labels] {
   386  	return func(yield func(*typesv1.Labels) bool) {
   387  		for k := range s.dict {
   388  			l := &typesv1.Labels{Labels: make([]*typesv1.LabelPair, len(s.keys))}
   389  			for i, v := range int32s(k) {
   390  				l.Labels[i] = &typesv1.LabelPair{
   391  					Name:  s.strings.Strings[s.keys[i]],
   392  					Value: s.strings.Strings[v],
   393  				}
   394  			}
   395  			if !yield(l) {
   396  				return
   397  			}
   398  		}
   399  	}
   400  }
   401  
   402  func int32string(data []int32) string {
   403  	if len(data) == 0 {
   404  		return ""
   405  	}
   406  	return unsafe.String((*byte)(unsafe.Pointer(&data[0])), len(data)*4)
   407  }
   408  
   409  func int32s(s string) []int32 {
   410  	if len(s) == 0 {
   411  		return nil
   412  	}
   413  	return unsafe.Slice((*int32)(unsafe.Pointer(unsafe.StringData(s))), len(s)/4)
   414  }