github.com/grafana/pyroscope@v1.18.0/pkg/block/metadata/metadata_labels.go (about) 1 package metadata 2 3 import ( 4 goiter "iter" 5 "slices" 6 "strings" 7 "unsafe" 8 9 "github.com/prometheus/prometheus/model/labels" 10 11 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 12 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 13 "github.com/grafana/pyroscope/pkg/iter" 14 ) 15 16 // TODO(kolesnikovae): LabelBuilder pool. 17 18 const ( 19 LabelNameTenantDataset = "__tenant_dataset__" 20 LabelValueDatasetTSDBIndex = "dataset_tsdb_index" 21 LabelNameUnsymbolized = "__unsymbolized__" 22 ) 23 24 type LabelBuilder struct { 25 strings *StringTable 26 labels []int32 27 seen map[string]struct{} 28 } 29 30 func NewLabelBuilder(strings *StringTable) *LabelBuilder { 31 return &LabelBuilder{strings: strings} 32 } 33 34 func (lb *LabelBuilder) WithLabelSet(pairs ...string) *LabelBuilder { 35 if len(pairs)%2 == 1 { 36 panic("expected even number of values") 37 } 38 s := len(lb.labels) 39 lb.labels = slices.Grow(lb.labels, len(pairs)+1)[:s+len(pairs)+1] 40 lb.labels[s] = int32(len(pairs) / 2) 41 for i := range pairs { 42 lb.labels[s+i+1] = lb.strings.Put(pairs[i]) 43 } 44 return lb 45 } 46 47 func (lb *LabelBuilder) Put(x []int32, strings []string) { 48 if len(x) == 0 { 49 return 50 } 51 if lb.seen == nil { 52 lb.seen = make(map[string]struct{}) 53 } 54 var skip int 55 for i, v := range x { 56 if i == skip { 57 skip += int(v)*2 + 1 58 continue 59 } 60 x[i] = lb.strings.Put(strings[v]) 61 } 62 lb.labels = slices.Grow(lb.labels, len(x)) 63 pairs := LabelPairs(x) 64 for pairs.Next() { 65 lb.putPairs(pairs.At()) 66 } 67 } 68 69 func (lb *LabelBuilder) putPairs(p []int32) { 70 if len(p) == 0 { 71 return 72 } 73 // We only copy the labels if this is the first time we see it. 74 // The fact that we assume that the order of labels is the same 75 // across all datasets is a precondition, therefore, we can 76 // use pairs as a key. 77 k := int32string(p) 78 if _, ok := lb.seen[k]; ok { 79 return 80 } 81 lb.labels = append(lb.labels, int32(len(p)/2)) 82 lb.labels = append(lb.labels, p...) 83 lb.seen[strings.Clone(k)] = struct{}{} 84 } 85 86 func (lb *LabelBuilder) Build() []int32 { 87 c := make([]int32, len(lb.labels)) 88 copy(c, lb.labels) 89 lb.labels = lb.labels[:0] 90 clear(lb.seen) 91 return c 92 } 93 94 func FindDatasets(md *metastorev1.BlockMeta, matchers ...*labels.Matcher) goiter.Seq[*metastorev1.Dataset] { 95 st := NewStringTable() 96 st.Import(md) 97 lm := NewLabelMatcher(st.Strings, matchers) 98 if !lm.IsValid() { 99 return func(func(*metastorev1.Dataset) bool) {} 100 } 101 return func(yield func(*metastorev1.Dataset) bool) { 102 for i := range md.Datasets { 103 ds := md.Datasets[i] 104 if !lm.Matches(ds.Labels) { 105 continue 106 } 107 if !yield(ds) { 108 return 109 } 110 } 111 } 112 } 113 114 func LabelPairs(ls []int32) iter.Iterator[[]int32] { return &labelPairs{labels: ls} } 115 116 type labelPairs struct { 117 labels []int32 118 off int 119 len int 120 } 121 122 func (p *labelPairs) Err() error { return nil } 123 func (p *labelPairs) Close() error { return nil } 124 125 func (p *labelPairs) At() []int32 { return p.labels[p.off : p.off+p.len] } 126 127 func (p *labelPairs) Next() bool { 128 if p.len > 0 { 129 p.off += p.len 130 } 131 if p.off >= len(p.labels) { 132 return false 133 } 134 p.len = int(p.labels[p.off]) * 2 135 p.off++ 136 return p.off+p.len <= len(p.labels) 137 } 138 139 type LabelMatcher struct { 140 eq []matcher 141 neq []matcher 142 keep []int32 143 keepStr []string 144 145 strings []string 146 checked map[string]bool 147 matched int32 148 nomatch bool 149 } 150 151 type matcher struct { 152 *labels.Matcher 153 name int32 154 } 155 156 func NewLabelMatcher(strings []string, matchers []*labels.Matcher, keep ...string) *LabelMatcher { 157 s := make(map[string]int32, len(matchers)*2+len(keep)) 158 for _, m := range matchers { 159 s[m.Name] = 0 160 s[m.Value] = 0 161 } 162 for _, k := range keep { 163 s[k] = 0 164 } 165 for i, x := range strings { 166 if v, ok := s[x]; ok && v == 0 { 167 s[x] = int32(i) 168 } 169 } 170 lm := &LabelMatcher{ 171 eq: make([]matcher, 0, len(matchers)), 172 neq: make([]matcher, 0, len(matchers)), 173 keep: make([]int32, len(keep)), 174 keepStr: keep, 175 checked: make(map[string]bool), 176 strings: strings, 177 } 178 for _, m := range matchers { 179 if m.Name == "" { 180 continue 181 } 182 n := s[m.Name] 183 switch m.Type { 184 case labels.MatchEqual: 185 if v := s[m.Value]; m.Value != "" && (n < 1 || v < 1) { 186 lm.nomatch = true 187 return lm 188 } 189 lm.eq = append(lm.eq, matcher{Matcher: m, name: n}) 190 case labels.MatchRegexp: 191 lm.eq = append(lm.eq, matcher{Matcher: m, name: n}) 192 case labels.MatchNotEqual, labels.MatchNotRegexp: 193 lm.neq = append(lm.neq, matcher{Matcher: m, name: n}) 194 } 195 } 196 // Find the indices of the labels to keep. 197 // If the label is not found or is an empty string, 198 // it will always be an empty string at the output. 199 for i, k := range keep { 200 lm.keep[i] = s[k] 201 } 202 return lm 203 } 204 205 func (lm *LabelMatcher) IsValid() bool { return !lm.nomatch } 206 207 // Matches reports whether the given set of labels matches the matchers. 208 // Note that at least one labels set must satisfy matchers to return true. 209 // For negations, all labels sets must satisfy the matchers to return true. 210 // TODO(kolesnikovae): This might be really confusing; it's worth relaxing it. 211 func (lm *LabelMatcher) Matches(labels []int32) bool { 212 pairs := LabelPairs(labels) 213 var matches bool 214 for pairs.Next() { 215 if lm.MatchesPairs(pairs.At()) { 216 matches = true 217 // If no keep labels are specified, we can return early. 218 // Otherwise, we need to scan all the label sets to 219 // collect matching ones. 220 if len(lm.keep) == 0 { 221 return true 222 } 223 } 224 } 225 return matches 226 } 227 228 // CollectMatches returns a new set of labels with only the labels 229 // that satisfy the match expressions and that are in the keep list. 230 func (lm *LabelMatcher) CollectMatches(dst, labels []int32) ([]int32, bool) { 231 pairs := LabelPairs(labels) 232 var matches bool 233 for pairs.Next() { 234 p := pairs.At() 235 if lm.MatchesPairs(p) { 236 matches = true 237 // If no keep labels are specified, we can return early. 238 // Otherwise, we need to scan all the label sets to 239 // collect matching ones. 240 if len(lm.keep) == 0 { 241 return dst, true 242 } 243 dst = lm.strip(dst, p) 244 } 245 } 246 return dst, matches 247 } 248 249 // strip returns a new length-prefixed slice of pairs 250 // with only the labels that are in the keep list. 251 func (lm *LabelMatcher) strip(dst, pairs []int32) []int32 { 252 // Length-prefix stub: we only know it after we iterate 253 // over the pairs. 254 s := len(dst) 255 c := len(lm.keep) * 2 256 dst = slices.Grow(dst, c+1) 257 dst = append(dst, 0) 258 var m int32 259 for _, n := range lm.keep { 260 if n < 1 { 261 // Ignore not found labels. 262 continue 263 } 264 for k := 0; k < len(pairs); k += 2 { 265 if pairs[k] == n { 266 dst = append(dst, pairs[k], pairs[k+1]) 267 m++ 268 break 269 } 270 } 271 } 272 // Write the actual number of pairs as a prefix. 273 dst[s] = m 274 return dst 275 } 276 277 func (lm *LabelMatcher) MatchesPairs(pairs []int32) bool { 278 k := int32string(pairs) 279 m, found := lm.checked[k] 280 if !found { 281 m = lm.checkMatches(pairs) 282 lm.checked[strings.Clone(k)] = m 283 if m { 284 lm.matched++ 285 } 286 } 287 return m 288 } 289 290 func (lm *LabelMatcher) checkMatches(pairs []int32) bool { 291 if len(pairs)%2 == 1 { 292 // Invalid pairs. 293 return false 294 } 295 for _, m := range lm.eq { 296 var matches bool 297 for k := 0; k < len(pairs); k += 2 { 298 if pairs[k] != m.name { 299 continue 300 } 301 v := lm.strings[pairs[k+1]] 302 matches = m.Matches(v) 303 break 304 } 305 if !matches { 306 return false 307 } 308 } 309 // At this point, we know that all eq matchers have matched. 310 for _, m := range lm.neq { 311 for k := 0; k < len(pairs); k += 2 { 312 if pairs[k] != m.name { 313 continue 314 } 315 v := lm.strings[pairs[k+1]] 316 if !m.Matches(v) { 317 return false 318 } 319 break 320 } 321 } 322 return true 323 } 324 325 type LabelsCollector struct { 326 strings *StringTable 327 dict map[string]struct{} 328 tmp []int32 329 keys []int32 330 } 331 332 func NewLabelsCollector(labels ...string) *LabelsCollector { 333 s := &LabelsCollector{ 334 dict: make(map[string]struct{}), 335 strings: NewStringTable(), 336 } 337 s.keys = make([]int32, len(labels)) 338 s.tmp = make([]int32, len(labels)) 339 for i, k := range labels { 340 s.keys[i] = s.strings.Put(k) 341 } 342 return s 343 } 344 345 // CollectMatches from the given matcher. 346 // 347 // The matcher and collect MUST be configured to keep the same 348 // set of labels, in the exact order. 349 // 350 // A single collector may collect labels from multiple matchers. 351 func (s *LabelsCollector) CollectMatches(lm *LabelMatcher) { 352 if len(lm.keep) == 0 || lm.nomatch || len(lm.checked) == 0 { 353 return 354 } 355 for set, match := range lm.checked { 356 if !match { 357 continue 358 } 359 // Project values of the keep labels to tmp, 360 // and resolve their strings. 361 clear(s.tmp) 362 p := int32s(set) 363 // Note that we're using the matcher's keep labels 364 // and not local 'keys'. 365 for i, n := range lm.keep { 366 for k := 0; k < len(p); k += 2 { 367 if p[k] == n { 368 s.tmp[i] = p[k+1] 369 break 370 } 371 } 372 } 373 for i := range s.tmp { 374 s.tmp[i] = s.strings.Put(lm.strings[s.tmp[i]]) 375 } 376 // Check if we already saw the label set. 377 x := int32string(s.tmp) 378 if _, ok := s.dict[x]; ok { 379 continue 380 } 381 s.dict[strings.Clone(x)] = struct{}{} 382 } 383 } 384 385 func (s *LabelsCollector) Unique() goiter.Seq[*typesv1.Labels] { 386 return func(yield func(*typesv1.Labels) bool) { 387 for k := range s.dict { 388 l := &typesv1.Labels{Labels: make([]*typesv1.LabelPair, len(s.keys))} 389 for i, v := range int32s(k) { 390 l.Labels[i] = &typesv1.LabelPair{ 391 Name: s.strings.Strings[s.keys[i]], 392 Value: s.strings.Strings[v], 393 } 394 } 395 if !yield(l) { 396 return 397 } 398 } 399 } 400 } 401 402 func int32string(data []int32) string { 403 if len(data) == 0 { 404 return "" 405 } 406 return unsafe.String((*byte)(unsafe.Pointer(&data[0])), len(data)*4) 407 } 408 409 func int32s(s string) []int32 { 410 if len(s) == 0 { 411 return nil 412 } 413 return unsafe.Slice((*int32)(unsafe.Pointer(unsafe.StringData(s))), len(s)/4) 414 }